From 0f8ad5f2e93425812c393c91ceb5af3d95e79b10 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 3 Jul 2020 15:40:29 +0200 Subject: kcsan: Add support for atomic builtins Some architectures (currently e.g. s390 partially) implement atomics using the compiler's atomic builtins (__atomic_*, __sync_*). To support enabling KCSAN on such architectures in future, or support experimental use of these builtins, implement support for them. We should also avoid breaking KCSAN kernels due to use (accidental or otherwise) of atomic builtins in drivers, as has happened in the past: https://lkml.kernel.org/r/5231d2c0-41d9-6721-e15f-a7eedf3ce69e@infradead.org The instrumentation is subtly different from regular reads/writes: TSAN instrumentation replaces the use of atomic builtins with a call into the runtime, and the runtime's job is to also execute the desired atomic operation. We rely on the __atomic_* compiler builtins, available with all KCSAN-supported compilers, to implement each TSAN atomic instrumentation function. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 9147ff6a12e5..682d9fd76733 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -879,3 +879,113 @@ void __tsan_init(void) { } EXPORT_SYMBOL(__tsan_init); + +/* + * Instrumentation for atomic builtins (__atomic_*, __sync_*). + * + * Normal kernel code _should not_ be using them directly, but some + * architectures may implement some or all atomics using the compilers' + * builtins. + * + * Note: If an architecture decides to fully implement atomics using the + * builtins, because they are implicitly instrumented by KCSAN (and KASAN, + * etc.), implementing the ARCH_ATOMIC interface (to get instrumentation via + * atomic-instrumented) is no longer necessary. + * + * TSAN instrumentation replaces atomic accesses with calls to any of the below + * functions, whose job is to also execute the operation itself. + */ + +#define DEFINE_TSAN_ATOMIC_LOAD_STORE(bits) \ + u##bits __tsan_atomic##bits##_load(const u##bits *ptr, int memorder); \ + u##bits __tsan_atomic##bits##_load(const u##bits *ptr, int memorder) \ + { \ + check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_ATOMIC); \ + return __atomic_load_n(ptr, memorder); \ + } \ + EXPORT_SYMBOL(__tsan_atomic##bits##_load); \ + void __tsan_atomic##bits##_store(u##bits *ptr, u##bits v, int memorder); \ + void __tsan_atomic##bits##_store(u##bits *ptr, u##bits v, int memorder) \ + { \ + check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + __atomic_store_n(ptr, v, memorder); \ + } \ + EXPORT_SYMBOL(__tsan_atomic##bits##_store) + +#define DEFINE_TSAN_ATOMIC_RMW(op, bits, suffix) \ + u##bits __tsan_atomic##bits##_##op(u##bits *ptr, u##bits v, int memorder); \ + u##bits __tsan_atomic##bits##_##op(u##bits *ptr, u##bits v, int memorder) \ + { \ + check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + return __atomic_##op##suffix(ptr, v, memorder); \ + } \ + EXPORT_SYMBOL(__tsan_atomic##bits##_##op) + +/* + * Note: CAS operations are always classified as write, even in case they + * fail. We cannot perform check_access() after a write, as it might lead to + * false positives, in cases such as: + * + * T0: __atomic_compare_exchange_n(&p->flag, &old, 1, ...) + * + * T1: if (__atomic_load_n(&p->flag, ...)) { + * modify *p; + * p->flag = 0; + * } + * + * The only downside is that, if there are 3 threads, with one CAS that + * succeeds, another CAS that fails, and an unmarked racing operation, we may + * point at the wrong CAS as the source of the race. However, if we assume that + * all CAS can succeed in some other execution, the data race is still valid. + */ +#define DEFINE_TSAN_ATOMIC_CMPXCHG(bits, strength, weak) \ + int __tsan_atomic##bits##_compare_exchange_##strength(u##bits *ptr, u##bits *exp, \ + u##bits val, int mo, int fail_mo); \ + int __tsan_atomic##bits##_compare_exchange_##strength(u##bits *ptr, u##bits *exp, \ + u##bits val, int mo, int fail_mo) \ + { \ + check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + return __atomic_compare_exchange_n(ptr, exp, val, weak, mo, fail_mo); \ + } \ + EXPORT_SYMBOL(__tsan_atomic##bits##_compare_exchange_##strength) + +#define DEFINE_TSAN_ATOMIC_CMPXCHG_VAL(bits) \ + u##bits __tsan_atomic##bits##_compare_exchange_val(u##bits *ptr, u##bits exp, u##bits val, \ + int mo, int fail_mo); \ + u##bits __tsan_atomic##bits##_compare_exchange_val(u##bits *ptr, u##bits exp, u##bits val, \ + int mo, int fail_mo) \ + { \ + check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + __atomic_compare_exchange_n(ptr, &exp, val, 0, mo, fail_mo); \ + return exp; \ + } \ + EXPORT_SYMBOL(__tsan_atomic##bits##_compare_exchange_val) + +#define DEFINE_TSAN_ATOMIC_OPS(bits) \ + DEFINE_TSAN_ATOMIC_LOAD_STORE(bits); \ + DEFINE_TSAN_ATOMIC_RMW(exchange, bits, _n); \ + DEFINE_TSAN_ATOMIC_RMW(fetch_add, bits, ); \ + DEFINE_TSAN_ATOMIC_RMW(fetch_sub, bits, ); \ + DEFINE_TSAN_ATOMIC_RMW(fetch_and, bits, ); \ + DEFINE_TSAN_ATOMIC_RMW(fetch_or, bits, ); \ + DEFINE_TSAN_ATOMIC_RMW(fetch_xor, bits, ); \ + DEFINE_TSAN_ATOMIC_RMW(fetch_nand, bits, ); \ + DEFINE_TSAN_ATOMIC_CMPXCHG(bits, strong, 0); \ + DEFINE_TSAN_ATOMIC_CMPXCHG(bits, weak, 1); \ + DEFINE_TSAN_ATOMIC_CMPXCHG_VAL(bits) + +DEFINE_TSAN_ATOMIC_OPS(8); +DEFINE_TSAN_ATOMIC_OPS(16); +DEFINE_TSAN_ATOMIC_OPS(32); +DEFINE_TSAN_ATOMIC_OPS(64); + +void __tsan_atomic_thread_fence(int memorder); +void __tsan_atomic_thread_fence(int memorder) +{ + __atomic_thread_fence(memorder); +} +EXPORT_SYMBOL(__tsan_atomic_thread_fence); + +void __tsan_atomic_signal_fence(int memorder); +void __tsan_atomic_signal_fence(int memorder) { } +EXPORT_SYMBOL(__tsan_atomic_signal_fence); -- cgit v1.2.3 From f9ea63193135473ed6b6ff06f016eb6248100041 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 3 Jul 2020 15:40:31 +0200 Subject: kcsan: Add atomic builtin test case Adds test case to kcsan-test module, to test atomic builtin instrumentation works. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/kcsan-test.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'kernel') diff --git a/kernel/kcsan/kcsan-test.c b/kernel/kcsan/kcsan-test.c index fed6fcb5768c..721180cbbab1 100644 --- a/kernel/kcsan/kcsan-test.c +++ b/kernel/kcsan/kcsan-test.c @@ -390,6 +390,15 @@ static noinline void test_kernel_seqlock_writer(void) write_sequnlock_irqrestore(&test_seqlock, flags); } +static noinline void test_kernel_atomic_builtins(void) +{ + /* + * Generate concurrent accesses, expecting no reports, ensuring KCSAN + * treats builtin atomics as actually atomic. + */ + __atomic_load_n(&test_var, __ATOMIC_RELAXED); +} + /* ===== Test cases ===== */ /* Simple test with normal data race. */ @@ -852,6 +861,59 @@ static void test_seqlock_noreport(struct kunit *test) KUNIT_EXPECT_FALSE(test, match_never); } +/* + * Test atomic builtins work and required instrumentation functions exist. We + * also test that KCSAN understands they're atomic by racing with them via + * test_kernel_atomic_builtins(), and expect no reports. + * + * The atomic builtins _SHOULD NOT_ be used in normal kernel code! + */ +static void test_atomic_builtins(struct kunit *test) +{ + bool match_never = false; + + begin_test_checks(test_kernel_atomic_builtins, test_kernel_atomic_builtins); + do { + long tmp; + + kcsan_enable_current(); + + __atomic_store_n(&test_var, 42L, __ATOMIC_RELAXED); + KUNIT_EXPECT_EQ(test, 42L, __atomic_load_n(&test_var, __ATOMIC_RELAXED)); + + KUNIT_EXPECT_EQ(test, 42L, __atomic_exchange_n(&test_var, 20, __ATOMIC_RELAXED)); + KUNIT_EXPECT_EQ(test, 20L, test_var); + + tmp = 20L; + KUNIT_EXPECT_TRUE(test, __atomic_compare_exchange_n(&test_var, &tmp, 30L, + 0, __ATOMIC_RELAXED, + __ATOMIC_RELAXED)); + KUNIT_EXPECT_EQ(test, tmp, 20L); + KUNIT_EXPECT_EQ(test, test_var, 30L); + KUNIT_EXPECT_FALSE(test, __atomic_compare_exchange_n(&test_var, &tmp, 40L, + 1, __ATOMIC_RELAXED, + __ATOMIC_RELAXED)); + KUNIT_EXPECT_EQ(test, tmp, 30L); + KUNIT_EXPECT_EQ(test, test_var, 30L); + + KUNIT_EXPECT_EQ(test, 30L, __atomic_fetch_add(&test_var, 1, __ATOMIC_RELAXED)); + KUNIT_EXPECT_EQ(test, 31L, __atomic_fetch_sub(&test_var, 1, __ATOMIC_RELAXED)); + KUNIT_EXPECT_EQ(test, 30L, __atomic_fetch_and(&test_var, 0xf, __ATOMIC_RELAXED)); + KUNIT_EXPECT_EQ(test, 14L, __atomic_fetch_xor(&test_var, 0xf, __ATOMIC_RELAXED)); + KUNIT_EXPECT_EQ(test, 1L, __atomic_fetch_or(&test_var, 0xf0, __ATOMIC_RELAXED)); + KUNIT_EXPECT_EQ(test, 241L, __atomic_fetch_nand(&test_var, 0xf, __ATOMIC_RELAXED)); + KUNIT_EXPECT_EQ(test, -2L, test_var); + + __atomic_thread_fence(__ATOMIC_SEQ_CST); + __atomic_signal_fence(__ATOMIC_SEQ_CST); + + kcsan_disable_current(); + + match_never = report_available(); + } while (!end_test_checks(match_never)); + KUNIT_EXPECT_FALSE(test, match_never); +} + /* * Each test case is run with different numbers of threads. Until KUnit supports * passing arguments for each test case, we encode #threads in the test case @@ -891,6 +953,7 @@ static struct kunit_case kcsan_test_cases[] = { KCSAN_KUNIT_CASE(test_assert_exclusive_access_scoped), KCSAN_KUNIT_CASE(test_jiffies_noreport), KCSAN_KUNIT_CASE(test_seqlock_noreport), + KCSAN_KUNIT_CASE(test_atomic_builtins), {}, }; -- cgit v1.2.3 From 14e2ac8de0f91f12122a49f09897b0cd05256460 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 24 Jul 2020 09:00:01 +0200 Subject: kcsan: Support compounded read-write instrumentation Add support for compounded read-write instrumentation if supported by the compiler. Adds the necessary instrumentation functions, and a new type which is used to generate a more descriptive report. Furthermore, such compounded memory access instrumentation is excluded from the "assume aligned writes up to word size are atomic" rule, because we cannot assume that the compiler emits code that is atomic for compound ops. LLVM/Clang added support for the feature in: https://github.com/llvm/llvm-project/commit/785d41a261d136b64ab6c15c5d35f2adc5ad53e3 The new instrumentation is emitted for sets of memory accesses in the same basic block to the same address with at least one read appearing before a write. These typically result from compound operations such as ++, --, +=, -=, |=, &=, etc. but also equivalent forms such as "var = var + 1". Where the compiler determines that it is equivalent to emit a call to a single __tsan_read_write instead of separate __tsan_read and __tsan_write, we can then benefit from improved performance and better reporting for such access patterns. The new reports now show that the ops are both reads and writes, for example: read-write to 0xffffffff90548a38 of 8 bytes by task 143 on cpu 3: test_kernel_rmw_array+0x45/0xa0 access_thread+0x71/0xb0 kthread+0x21e/0x240 ret_from_fork+0x22/0x30 read-write to 0xffffffff90548a38 of 8 bytes by task 144 on cpu 2: test_kernel_rmw_array+0x45/0xa0 access_thread+0x71/0xb0 kthread+0x21e/0x240 ret_from_fork+0x22/0x30 Acked-by: Peter Zijlstra (Intel) Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 45 +++++++++++++++++++++++++++++--------------- kernel/kcsan/core.c | 23 +++++++++++++++++----- kernel/kcsan/report.c | 4 ++++ scripts/Makefile.kcsan | 2 +- 4 files changed, 53 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index c5f6c1dcf7e3..cf14840609ce 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -7,19 +7,13 @@ #include #include -/* - * ACCESS TYPE MODIFIERS - * - * : normal read access; - * WRITE : write access; - * ATOMIC: access is atomic; - * ASSERT: access is not a regular access, but an assertion; - * SCOPED: access is a scoped access; - */ -#define KCSAN_ACCESS_WRITE 0x1 -#define KCSAN_ACCESS_ATOMIC 0x2 -#define KCSAN_ACCESS_ASSERT 0x4 -#define KCSAN_ACCESS_SCOPED 0x8 +/* Access types -- if KCSAN_ACCESS_WRITE is not set, the access is a read. */ +#define KCSAN_ACCESS_WRITE (1 << 0) /* Access is a write. */ +#define KCSAN_ACCESS_COMPOUND (1 << 1) /* Compounded read-write instrumentation. */ +#define KCSAN_ACCESS_ATOMIC (1 << 2) /* Access is atomic. */ +/* The following are special, and never due to compiler instrumentation. */ +#define KCSAN_ACCESS_ASSERT (1 << 3) /* Access is an assertion. */ +#define KCSAN_ACCESS_SCOPED (1 << 4) /* Access is a scoped access. */ /* * __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used @@ -204,6 +198,15 @@ static inline void __kcsan_disable_current(void) { } #define __kcsan_check_write(ptr, size) \ __kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) +/** + * __kcsan_check_read_write - check regular read-write access for races + * + * @ptr: address of access + * @size: size of access + */ +#define __kcsan_check_read_write(ptr, size) \ + __kcsan_check_access(ptr, size, KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE) + /** * kcsan_check_read - check regular read access for races * @@ -221,18 +224,30 @@ static inline void __kcsan_disable_current(void) { } #define kcsan_check_write(ptr, size) \ kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) +/** + * kcsan_check_read_write - check regular read-write access for races + * + * @ptr: address of access + * @size: size of access + */ +#define kcsan_check_read_write(ptr, size) \ + kcsan_check_access(ptr, size, KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE) + /* * Check for atomic accesses: if atomic accesses are not ignored, this simply * aliases to kcsan_check_access(), otherwise becomes a no-op. */ #ifdef CONFIG_KCSAN_IGNORE_ATOMICS -#define kcsan_check_atomic_read(...) do { } while (0) -#define kcsan_check_atomic_write(...) do { } while (0) +#define kcsan_check_atomic_read(...) do { } while (0) +#define kcsan_check_atomic_write(...) do { } while (0) +#define kcsan_check_atomic_read_write(...) do { } while (0) #else #define kcsan_check_atomic_read(ptr, size) \ kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC) #define kcsan_check_atomic_write(ptr, size) \ kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC | KCSAN_ACCESS_WRITE) +#define kcsan_check_atomic_read_write(ptr, size) \ + kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_COMPOUND) #endif /** diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 682d9fd76733..4c8b40b14314 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -223,7 +223,7 @@ is_atomic(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) && (type & KCSAN_ACCESS_WRITE) && size <= sizeof(long) && - IS_ALIGNED((unsigned long)ptr, size)) + !(type & KCSAN_ACCESS_COMPOUND) && IS_ALIGNED((unsigned long)ptr, size)) return true; /* Assume aligned writes up to word size are atomic. */ if (ctx->atomic_next > 0) { @@ -793,7 +793,17 @@ EXPORT_SYMBOL(__kcsan_check_access); EXPORT_SYMBOL(__tsan_write##size); \ void __tsan_unaligned_write##size(void *ptr) \ __alias(__tsan_write##size); \ - EXPORT_SYMBOL(__tsan_unaligned_write##size) + EXPORT_SYMBOL(__tsan_unaligned_write##size); \ + void __tsan_read_write##size(void *ptr); \ + void __tsan_read_write##size(void *ptr) \ + { \ + check_access(ptr, size, \ + KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE); \ + } \ + EXPORT_SYMBOL(__tsan_read_write##size); \ + void __tsan_unaligned_read_write##size(void *ptr) \ + __alias(__tsan_read_write##size); \ + EXPORT_SYMBOL(__tsan_unaligned_read_write##size) DEFINE_TSAN_READ_WRITE(1); DEFINE_TSAN_READ_WRITE(2); @@ -916,7 +926,8 @@ EXPORT_SYMBOL(__tsan_init); u##bits __tsan_atomic##bits##_##op(u##bits *ptr, u##bits v, int memorder); \ u##bits __tsan_atomic##bits##_##op(u##bits *ptr, u##bits v, int memorder) \ { \ - check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + check_access(ptr, bits / BITS_PER_BYTE, \ + KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ return __atomic_##op##suffix(ptr, v, memorder); \ } \ EXPORT_SYMBOL(__tsan_atomic##bits##_##op) @@ -944,7 +955,8 @@ EXPORT_SYMBOL(__tsan_init); int __tsan_atomic##bits##_compare_exchange_##strength(u##bits *ptr, u##bits *exp, \ u##bits val, int mo, int fail_mo) \ { \ - check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + check_access(ptr, bits / BITS_PER_BYTE, \ + KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ return __atomic_compare_exchange_n(ptr, exp, val, weak, mo, fail_mo); \ } \ EXPORT_SYMBOL(__tsan_atomic##bits##_compare_exchange_##strength) @@ -955,7 +967,8 @@ EXPORT_SYMBOL(__tsan_init); u##bits __tsan_atomic##bits##_compare_exchange_val(u##bits *ptr, u##bits exp, u##bits val, \ int mo, int fail_mo) \ { \ - check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + check_access(ptr, bits / BITS_PER_BYTE, \ + KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ __atomic_compare_exchange_n(ptr, &exp, val, 0, mo, fail_mo); \ return exp; \ } \ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 9d07e175de0f..3e83a69239fa 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -228,6 +228,10 @@ static const char *get_access_type(int type) return "write"; case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: return "write (marked)"; + case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE: + return "read-write"; + case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: + return "read-write (marked)"; case KCSAN_ACCESS_SCOPED: return "read (scoped)"; case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC: diff --git a/scripts/Makefile.kcsan b/scripts/Makefile.kcsan index c50f27b3ac56..c37f9518d5d9 100644 --- a/scripts/Makefile.kcsan +++ b/scripts/Makefile.kcsan @@ -11,5 +11,5 @@ endif # of some options does not break KCSAN nor causes false positive reports. CFLAGS_KCSAN := -fsanitize=thread \ $(call cc-option,$(call cc-param,tsan-instrument-func-entry-exit=0) -fno-optimize-sibling-calls) \ - $(call cc-option,$(call cc-param,tsan-instrument-read-before-write=1)) \ + $(call cc-option,$(call cc-param,tsan-compound-read-before-write=1),$(call cc-option,$(call cc-param,tsan-instrument-read-before-write=1))) \ $(call cc-param,tsan-distinguish-volatile=1) -- cgit v1.2.3 From 106a307fd0a762e2d47e1cf99e6da43763887a18 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 24 Jul 2020 09:00:03 +0200 Subject: kcsan: Skew delay to be longer for certain access types For compound instrumentation and assert accesses, skew the watchpoint delay to be longer if randomized. This is useful to improve race detection for such accesses. For compound accesses we should increase the delay as we've aggregated both read and write instrumentation. By giving up 1 call into the runtime, we're less likely to set up a watchpoint and thus less likely to detect a race. We can balance this by increasing the watchpoint delay. For assert accesses, we know these are of increased interest, and we wish to increase our chances of detecting races for such checks. Note that, kcsan_udelay_{task,interrupt} define the upper bound delays. When randomized, delays are uniformly distributed between [0, delay]. Skewing the delay does not break this promise as long as the defined upper bounds are still adhered to. The current skew results in delays uniformly distributed between [delay/2, delay]. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 4c8b40b14314..95a364e22aab 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -283,11 +283,15 @@ static __always_inline bool kcsan_is_enabled(void) return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0; } -static inline unsigned int get_delay(void) +static inline unsigned int get_delay(int type) { unsigned int delay = in_task() ? kcsan_udelay_task : kcsan_udelay_interrupt; + /* For certain access types, skew the random delay to be longer. */ + unsigned int skew_delay_order = + (type & (KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_ASSERT)) ? 1 : 0; + return delay - (IS_ENABLED(CONFIG_KCSAN_DELAY_RANDOMIZE) ? - prandom_u32_max(delay) : + prandom_u32_max(delay >> skew_delay_order) : 0); } @@ -470,7 +474,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) * Delay this thread, to increase probability of observing a racy * conflicting access. */ - udelay(get_delay()); + udelay(get_delay(type)); /* * Re-read value, and check if it is as expected; if not, we infer a -- cgit v1.2.3 From 9d1335cc1e97cc3da0d14f640dd716e614083e8b Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 24 Jul 2020 09:00:04 +0200 Subject: kcsan: Add missing CONFIG_KCSAN_IGNORE_ATOMICS checks Add missing CONFIG_KCSAN_IGNORE_ATOMICS checks for the builtin atomics instrumentation. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 95a364e22aab..99e5044bc942 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -914,14 +914,19 @@ EXPORT_SYMBOL(__tsan_init); u##bits __tsan_atomic##bits##_load(const u##bits *ptr, int memorder); \ u##bits __tsan_atomic##bits##_load(const u##bits *ptr, int memorder) \ { \ - check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_ATOMIC); \ + if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ + check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_ATOMIC); \ + } \ return __atomic_load_n(ptr, memorder); \ } \ EXPORT_SYMBOL(__tsan_atomic##bits##_load); \ void __tsan_atomic##bits##_store(u##bits *ptr, u##bits v, int memorder); \ void __tsan_atomic##bits##_store(u##bits *ptr, u##bits v, int memorder) \ { \ - check_access(ptr, bits / BITS_PER_BYTE, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ + check_access(ptr, bits / BITS_PER_BYTE, \ + KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + } \ __atomic_store_n(ptr, v, memorder); \ } \ EXPORT_SYMBOL(__tsan_atomic##bits##_store) @@ -930,8 +935,11 @@ EXPORT_SYMBOL(__tsan_init); u##bits __tsan_atomic##bits##_##op(u##bits *ptr, u##bits v, int memorder); \ u##bits __tsan_atomic##bits##_##op(u##bits *ptr, u##bits v, int memorder) \ { \ - check_access(ptr, bits / BITS_PER_BYTE, \ - KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ + check_access(ptr, bits / BITS_PER_BYTE, \ + KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | \ + KCSAN_ACCESS_ATOMIC); \ + } \ return __atomic_##op##suffix(ptr, v, memorder); \ } \ EXPORT_SYMBOL(__tsan_atomic##bits##_##op) @@ -959,8 +967,11 @@ EXPORT_SYMBOL(__tsan_init); int __tsan_atomic##bits##_compare_exchange_##strength(u##bits *ptr, u##bits *exp, \ u##bits val, int mo, int fail_mo) \ { \ - check_access(ptr, bits / BITS_PER_BYTE, \ - KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ + check_access(ptr, bits / BITS_PER_BYTE, \ + KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | \ + KCSAN_ACCESS_ATOMIC); \ + } \ return __atomic_compare_exchange_n(ptr, exp, val, weak, mo, fail_mo); \ } \ EXPORT_SYMBOL(__tsan_atomic##bits##_compare_exchange_##strength) @@ -971,8 +982,11 @@ EXPORT_SYMBOL(__tsan_init); u##bits __tsan_atomic##bits##_compare_exchange_val(u##bits *ptr, u##bits exp, u##bits val, \ int mo, int fail_mo) \ { \ - check_access(ptr, bits / BITS_PER_BYTE, \ - KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC); \ + if (!IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { \ + check_access(ptr, bits / BITS_PER_BYTE, \ + KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | \ + KCSAN_ACCESS_ATOMIC); \ + } \ __atomic_compare_exchange_n(ptr, &exp, val, 0, mo, fail_mo); \ return exp; \ } \ -- cgit v1.2.3 From bec4a2474890a6884eb890c778ea02bccaaae6eb Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 24 Jul 2020 09:00:05 +0200 Subject: kcsan: Test support for compound instrumentation Changes kcsan-test module to support checking reports that include compound instrumentation. Since we should not fail the test if this support is unavailable, we have to add a config variable that the test can use to decide what to check for. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/kcsan-test.c | 65 +++++++++++++++++++++++++++++++++++++---------- lib/Kconfig.kcsan | 5 ++++ 2 files changed, 56 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/kcsan-test.c b/kernel/kcsan/kcsan-test.c index 721180cbbab1..ebe7fd245104 100644 --- a/kernel/kcsan/kcsan-test.c +++ b/kernel/kcsan/kcsan-test.c @@ -27,6 +27,12 @@ #include #include +#ifdef CONFIG_CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE +#define __KCSAN_ACCESS_RW(alt) (KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE) +#else +#define __KCSAN_ACCESS_RW(alt) (alt) +#endif + /* Points to current test-case memory access "kernels". */ static void (*access_kernels[2])(void); @@ -186,20 +192,21 @@ static bool report_matches(const struct expect_report *r) /* Access 1 & 2 */ for (i = 0; i < 2; ++i) { + const int ty = r->access[i].type; const char *const access_type = - (r->access[i].type & KCSAN_ACCESS_ASSERT) ? - ((r->access[i].type & KCSAN_ACCESS_WRITE) ? - "assert no accesses" : - "assert no writes") : - ((r->access[i].type & KCSAN_ACCESS_WRITE) ? - "write" : - "read"); + (ty & KCSAN_ACCESS_ASSERT) ? + ((ty & KCSAN_ACCESS_WRITE) ? + "assert no accesses" : + "assert no writes") : + ((ty & KCSAN_ACCESS_WRITE) ? + ((ty & KCSAN_ACCESS_COMPOUND) ? + "read-write" : + "write") : + "read"); const char *const access_type_aux = - (r->access[i].type & KCSAN_ACCESS_ATOMIC) ? - " (marked)" : - ((r->access[i].type & KCSAN_ACCESS_SCOPED) ? - " (scoped)" : - ""); + (ty & KCSAN_ACCESS_ATOMIC) ? + " (marked)" : + ((ty & KCSAN_ACCESS_SCOPED) ? " (scoped)" : ""); if (i == 1) { /* Access 2 */ @@ -277,6 +284,12 @@ static noinline void test_kernel_write_atomic(void) WRITE_ONCE(test_var, READ_ONCE_NOCHECK(test_sink) + 1); } +static noinline void test_kernel_atomic_rmw(void) +{ + /* Use builtin, so we can set up the "bad" atomic/non-atomic scenario. */ + __atomic_fetch_add(&test_var, 1, __ATOMIC_RELAXED); +} + __no_kcsan static noinline void test_kernel_write_uninstrumented(void) { test_var++; } @@ -439,8 +452,8 @@ static void test_concurrent_races(struct kunit *test) const struct expect_report expect = { .access = { /* NULL will match any address. */ - { test_kernel_rmw_array, NULL, 0, KCSAN_ACCESS_WRITE }, - { test_kernel_rmw_array, NULL, 0, 0 }, + { test_kernel_rmw_array, NULL, 0, __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) }, + { test_kernel_rmw_array, NULL, 0, __KCSAN_ACCESS_RW(0) }, }, }; static const struct expect_report never = { @@ -629,6 +642,29 @@ static void test_read_plain_atomic_write(struct kunit *test) KUNIT_EXPECT_TRUE(test, match_expect); } +/* Test that atomic RMWs generate correct report. */ +__no_kcsan +static void test_read_plain_atomic_rmw(struct kunit *test) +{ + const struct expect_report expect = { + .access = { + { test_kernel_read, &test_var, sizeof(test_var), 0 }, + { test_kernel_atomic_rmw, &test_var, sizeof(test_var), + KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC }, + }, + }; + bool match_expect = false; + + if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) + return; + + begin_test_checks(test_kernel_read, test_kernel_atomic_rmw); + do { + match_expect = report_matches(&expect); + } while (!end_test_checks(match_expect)); + KUNIT_EXPECT_TRUE(test, match_expect); +} + /* Zero-sized accesses should never cause data race reports. */ __no_kcsan static void test_zero_size_access(struct kunit *test) @@ -942,6 +978,7 @@ static struct kunit_case kcsan_test_cases[] = { KCSAN_KUNIT_CASE(test_write_write_struct_part), KCSAN_KUNIT_CASE(test_read_atomic_write_atomic), KCSAN_KUNIT_CASE(test_read_plain_atomic_write), + KCSAN_KUNIT_CASE(test_read_plain_atomic_rmw), KCSAN_KUNIT_CASE(test_zero_size_access), KCSAN_KUNIT_CASE(test_data_race), KCSAN_KUNIT_CASE(test_assert_exclusive_writer), diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 3d282d51849b..f271ff5fbb5a 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -40,6 +40,11 @@ menuconfig KCSAN if KCSAN +# Compiler capabilities that should not fail the test if they are unavailable. +config CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE + def_bool (CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-compound-read-before-write=1)) || \ + (CC_IS_GCC && $(cc-option,-fsanitize=thread --param tsan-compound-read-before-write=1)) + config KCSAN_VERBOSE bool "Show verbose reports with more information about system state" depends on PROVE_LOCKING -- cgit v1.2.3 From 69b2c81bc894606670204f0ae08f406dbcce836d Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 31 Jul 2020 10:17:19 +0200 Subject: kcsan: Simplify debugfs counter to name mapping Simplify counter ID to name mapping by using an array with designated inits. This way, we can turn a run-time BUG() into a compile-time static assertion failure if a counter name is missing. No functional change intended. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/debugfs.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 023e49c58d55..3a9566addeff 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -19,6 +19,18 @@ * Statistics counters. */ static atomic_long_t counters[KCSAN_COUNTER_COUNT]; +static const char *const counter_names[] = { + [KCSAN_COUNTER_USED_WATCHPOINTS] = "used_watchpoints", + [KCSAN_COUNTER_SETUP_WATCHPOINTS] = "setup_watchpoints", + [KCSAN_COUNTER_DATA_RACES] = "data_races", + [KCSAN_COUNTER_ASSERT_FAILURES] = "assert_failures", + [KCSAN_COUNTER_NO_CAPACITY] = "no_capacity", + [KCSAN_COUNTER_REPORT_RACES] = "report_races", + [KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN] = "races_unknown_origin", + [KCSAN_COUNTER_UNENCODABLE_ACCESSES] = "unencodable_accesses", + [KCSAN_COUNTER_ENCODING_FALSE_POSITIVES] = "encoding_false_positives", +}; +static_assert(ARRAY_SIZE(counter_names) == KCSAN_COUNTER_COUNT); /* * Addresses for filtering functions from reporting. This list can be used as a @@ -39,24 +51,6 @@ static struct { }; static DEFINE_SPINLOCK(report_filterlist_lock); -static const char *counter_to_name(enum kcsan_counter_id id) -{ - switch (id) { - case KCSAN_COUNTER_USED_WATCHPOINTS: return "used_watchpoints"; - case KCSAN_COUNTER_SETUP_WATCHPOINTS: return "setup_watchpoints"; - case KCSAN_COUNTER_DATA_RACES: return "data_races"; - case KCSAN_COUNTER_ASSERT_FAILURES: return "assert_failures"; - case KCSAN_COUNTER_NO_CAPACITY: return "no_capacity"; - case KCSAN_COUNTER_REPORT_RACES: return "report_races"; - case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN: return "races_unknown_origin"; - case KCSAN_COUNTER_UNENCODABLE_ACCESSES: return "unencodable_accesses"; - case KCSAN_COUNTER_ENCODING_FALSE_POSITIVES: return "encoding_false_positives"; - case KCSAN_COUNTER_COUNT: - BUG(); - } - return NULL; -} - void kcsan_counter_inc(enum kcsan_counter_id id) { atomic_long_inc(&counters[id]); @@ -271,8 +265,7 @@ static int show_info(struct seq_file *file, void *v) /* show stats */ seq_printf(file, "enabled: %i\n", READ_ONCE(kcsan_enabled)); for (i = 0; i < KCSAN_COUNTER_COUNT; ++i) - seq_printf(file, "%s: %ld\n", counter_to_name(i), - atomic_long_read(&counters[i])); + seq_printf(file, "%s: %ld\n", counter_names[i], atomic_long_read(&counters[i])); /* show filter functions, and filter type */ spin_lock_irqsave(&report_filterlist_lock, flags); -- cgit v1.2.3 From a4e74fa5f0d3e2a11f2d0deb522681d219a81426 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 31 Jul 2020 10:17:20 +0200 Subject: kcsan: Simplify constant string handling Simplify checking prefixes and length calculation of constant strings. For the former, the kernel provides str_has_prefix(), and the latter we should just use strlen("..") because GCC and Clang have optimizations that optimize these into constants. No functional change intended. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/debugfs.c | 8 ++++---- kernel/kcsan/report.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 3a9566addeff..116bdd8f050c 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -300,16 +300,16 @@ debugfs_write(struct file *file, const char __user *buf, size_t count, loff_t *o WRITE_ONCE(kcsan_enabled, true); } else if (!strcmp(arg, "off")) { WRITE_ONCE(kcsan_enabled, false); - } else if (!strncmp(arg, "microbench=", sizeof("microbench=") - 1)) { + } else if (str_has_prefix(arg, "microbench=")) { unsigned long iters; - if (kstrtoul(&arg[sizeof("microbench=") - 1], 0, &iters)) + if (kstrtoul(&arg[strlen("microbench=")], 0, &iters)) return -EINVAL; microbenchmark(iters); - } else if (!strncmp(arg, "test=", sizeof("test=") - 1)) { + } else if (str_has_prefix(arg, "test=")) { unsigned long iters; - if (kstrtoul(&arg[sizeof("test=") - 1], 0, &iters)) + if (kstrtoul(&arg[strlen("test=")], 0, &iters)) return -EINVAL; test_thread(iters); } else if (!strcmp(arg, "whitelist")) { diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 3e83a69239fa..bf1d59449805 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -279,8 +279,8 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries cur = strnstr(buf, "kcsan_", len); if (cur) { - cur += sizeof("kcsan_") - 1; - if (strncmp(cur, "test", sizeof("test") - 1)) + cur += strlen("kcsan_"); + if (!str_has_prefix(cur, "test")) continue; /* KCSAN runtime function. */ /* KCSAN related test. */ } -- cgit v1.2.3 From 4700ccdf18fa3002d66769b69cf715cc58beea37 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 31 Jul 2020 10:17:21 +0200 Subject: kcsan: Remove debugfs test command Remove the debugfs test command, as it is no longer needed now that we have the KUnit+Torture based kcsan-test module. This is to avoid confusion around how KCSAN should be tested, as only the kcsan-test module is maintained. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/debugfs.c | 66 -------------------------------------------------- 1 file changed, 66 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 116bdd8f050c..de1da1b01aa4 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -98,66 +98,6 @@ static noinline void microbenchmark(unsigned long iters) current->kcsan_ctx = ctx_save; } -/* - * Simple test to create conflicting accesses. Write 'test=' to KCSAN's - * debugfs file from multiple tasks to generate real conflicts and show reports. - */ -static long test_dummy; -static long test_flags; -static long test_scoped; -static noinline void test_thread(unsigned long iters) -{ - const long CHANGE_BITS = 0xff00ff00ff00ff00L; - const struct kcsan_ctx ctx_save = current->kcsan_ctx; - cycles_t cycles; - - /* We may have been called from an atomic region; reset context. */ - memset(¤t->kcsan_ctx, 0, sizeof(current->kcsan_ctx)); - - pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters); - pr_info("test_dummy@%px, test_flags@%px, test_scoped@%px,\n", - &test_dummy, &test_flags, &test_scoped); - - cycles = get_cycles(); - while (iters--) { - /* These all should generate reports. */ - __kcsan_check_read(&test_dummy, sizeof(test_dummy)); - ASSERT_EXCLUSIVE_WRITER(test_dummy); - ASSERT_EXCLUSIVE_ACCESS(test_dummy); - - ASSERT_EXCLUSIVE_BITS(test_flags, ~CHANGE_BITS); /* no report */ - __kcsan_check_read(&test_flags, sizeof(test_flags)); /* no report */ - - ASSERT_EXCLUSIVE_BITS(test_flags, CHANGE_BITS); /* report */ - __kcsan_check_read(&test_flags, sizeof(test_flags)); /* no report */ - - /* not actually instrumented */ - WRITE_ONCE(test_dummy, iters); /* to observe value-change */ - __kcsan_check_write(&test_dummy, sizeof(test_dummy)); - - test_flags ^= CHANGE_BITS; /* generate value-change */ - __kcsan_check_write(&test_flags, sizeof(test_flags)); - - BUG_ON(current->kcsan_ctx.scoped_accesses.prev); - { - /* Should generate reports anywhere in this block. */ - ASSERT_EXCLUSIVE_WRITER_SCOPED(test_scoped); - ASSERT_EXCLUSIVE_ACCESS_SCOPED(test_scoped); - BUG_ON(!current->kcsan_ctx.scoped_accesses.prev); - /* Unrelated accesses. */ - __kcsan_check_access(&cycles, sizeof(cycles), 0); - __kcsan_check_access(&cycles, sizeof(cycles), KCSAN_ACCESS_ATOMIC); - } - BUG_ON(current->kcsan_ctx.scoped_accesses.prev); - } - cycles = get_cycles() - cycles; - - pr_info("KCSAN: %s end | cycles: %llu\n", __func__, cycles); - - /* restore context */ - current->kcsan_ctx = ctx_save; -} - static int cmp_filterlist_addrs(const void *rhs, const void *lhs) { const unsigned long a = *(const unsigned long *)rhs; @@ -306,12 +246,6 @@ debugfs_write(struct file *file, const char __user *buf, size_t count, loff_t *o if (kstrtoul(&arg[strlen("microbench=")], 0, &iters)) return -EINVAL; microbenchmark(iters); - } else if (str_has_prefix(arg, "test=")) { - unsigned long iters; - - if (kstrtoul(&arg[strlen("test=")], 0, &iters)) - return -EINVAL; - test_thread(iters); } else if (!strcmp(arg, "whitelist")) { set_report_filterlist_whitelist(true); } else if (!strcmp(arg, "blacklist")) { -- cgit v1.2.3 From 2778793072c31e3eb33842f3bd7da82dfc7efc6b Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 31 Jul 2020 10:17:22 +0200 Subject: kcsan: Show message if enabled early Show a message in the kernel log if KCSAN was enabled early. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 99e5044bc942..b176400a2735 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) "kcsan: " fmt + #include #include #include @@ -463,7 +465,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (IS_ENABLED(CONFIG_KCSAN_DEBUG)) { kcsan_disable_current(); - pr_err("KCSAN: watching %s, size: %zu, addr: %px [slot: %d, encoded: %lx]\n", + pr_err("watching %s, size: %zu, addr: %px [slot: %d, encoded: %lx]\n", is_write ? "write" : "read", size, ptr, watchpoint_slot((unsigned long)ptr), encode_watchpoint((unsigned long)ptr, size, is_write)); @@ -623,8 +625,10 @@ void __init kcsan_init(void) * We are in the init task, and no other tasks should be running; * WRITE_ONCE without memory barrier is sufficient. */ - if (kcsan_early_enable) + if (kcsan_early_enable) { + pr_info("enabled early\n"); WRITE_ONCE(kcsan_enabled, true); + } } /* === Exported interface =================================================== */ -- cgit v1.2.3 From 178a1877d782c034f466edd80e30a107af5469df Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 31 Jul 2020 10:17:23 +0200 Subject: kcsan: Use pr_fmt for consistency Use the same pr_fmt throughout for consistency. [ The only exception is report.c, where the format must be kept precisely as-is. ] Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/debugfs.c | 8 +++++--- kernel/kcsan/selftest.c | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index de1da1b01aa4..6c4914fa2fad 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) "kcsan: " fmt + #include #include #include @@ -80,7 +82,7 @@ static noinline void microbenchmark(unsigned long iters) */ WRITE_ONCE(kcsan_enabled, false); - pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters); + pr_info("%s begin | iters: %lu\n", __func__, iters); cycles = get_cycles(); while (iters--) { @@ -91,7 +93,7 @@ static noinline void microbenchmark(unsigned long iters) } cycles = get_cycles() - cycles; - pr_info("KCSAN: %s end | cycles: %llu\n", __func__, cycles); + pr_info("%s end | cycles: %llu\n", __func__, cycles); WRITE_ONCE(kcsan_enabled, was_enabled); /* restore context */ @@ -154,7 +156,7 @@ static ssize_t insert_report_filterlist(const char *func) ssize_t ret = 0; if (!addr) { - pr_err("KCSAN: could not find function: '%s'\n", func); + pr_err("could not find function: '%s'\n", func); return -ENOENT; } diff --git a/kernel/kcsan/selftest.c b/kernel/kcsan/selftest.c index d26a052d3383..d98bc208d06d 100644 --- a/kernel/kcsan/selftest.c +++ b/kernel/kcsan/selftest.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) "kcsan: " fmt + #include #include #include @@ -116,16 +118,16 @@ static int __init kcsan_selftest(void) if (do_test()) \ ++passed; \ else \ - pr_err("KCSAN selftest: " #do_test " failed"); \ + pr_err("selftest: " #do_test " failed"); \ } while (0) RUN_TEST(test_requires); RUN_TEST(test_encode_decode); RUN_TEST(test_matching_access); - pr_info("KCSAN selftest: %d/%d tests passed\n", passed, total); + pr_info("selftest: %d/%d tests passed\n", passed, total); if (passed != total) - panic("KCSAN selftests failed"); + panic("selftests failed"); return 0; } postcore_initcall(kcsan_selftest); -- cgit v1.2.3 From 2e986b81f698e73c95e6456183f27b861f47bb87 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 10 Aug 2020 10:06:25 +0200 Subject: kcsan: Optimize debugfs stats counters Remove kcsan_counter_inc/dec() functions, as they perform no other logic, and are no longer needed. This avoids several calls in kcsan_setup_watchpoint() and kcsan_found_watchpoint(), as well as lets the compiler warn us about potential out-of-bounds accesses as the array's size is known at all usage sites at compile-time. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 22 +++++++++++----------- kernel/kcsan/debugfs.c | 21 +++++---------------- kernel/kcsan/kcsan.h | 12 ++++++------ kernel/kcsan/report.c | 2 +- 4 files changed, 23 insertions(+), 34 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index b176400a2735..8a1ff605ff2d 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -367,13 +367,13 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, * already removed the watchpoint, or another thread consumed * the watchpoint before this thread. */ - kcsan_counter_inc(KCSAN_COUNTER_REPORT_RACES); + atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_REPORT_RACES]); } if ((type & KCSAN_ACCESS_ASSERT) != 0) - kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); + atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]); else - kcsan_counter_inc(KCSAN_COUNTER_DATA_RACES); + atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_DATA_RACES]); user_access_restore(flags); } @@ -414,7 +414,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) goto out; if (!check_encodable((unsigned long)ptr, size)) { - kcsan_counter_inc(KCSAN_COUNTER_UNENCODABLE_ACCESSES); + atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_UNENCODABLE_ACCESSES]); goto out; } @@ -434,12 +434,12 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) * with which should_watch() returns true should be tweaked so * that this case happens very rarely. */ - kcsan_counter_inc(KCSAN_COUNTER_NO_CAPACITY); + atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_NO_CAPACITY]); goto out_unlock; } - kcsan_counter_inc(KCSAN_COUNTER_SETUP_WATCHPOINTS); - kcsan_counter_inc(KCSAN_COUNTER_USED_WATCHPOINTS); + atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_SETUP_WATCHPOINTS]); + atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_USED_WATCHPOINTS]); /* * Read the current value, to later check and infer a race if the data @@ -541,16 +541,16 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) * increment this counter. */ if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE) - kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); + atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]); kcsan_report(ptr, size, type, value_change, KCSAN_REPORT_RACE_SIGNAL, watchpoint - watchpoints); } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) { /* Inferring a race, since the value should not have changed. */ - kcsan_counter_inc(KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN); + atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN]); if (is_assert) - kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); + atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]); if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE, @@ -563,7 +563,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) * reused after this point. */ remove_watchpoint(watchpoint); - kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS); + atomic_long_dec(&kcsan_counters[KCSAN_COUNTER_USED_WATCHPOINTS]); out_unlock: if (!kcsan_interrupt_watcher) local_irq_restore(irq_flags); diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 6c4914fa2fad..3c8093a371b1 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -17,10 +17,7 @@ #include "kcsan.h" -/* - * Statistics counters. - */ -static atomic_long_t counters[KCSAN_COUNTER_COUNT]; +atomic_long_t kcsan_counters[KCSAN_COUNTER_COUNT]; static const char *const counter_names[] = { [KCSAN_COUNTER_USED_WATCHPOINTS] = "used_watchpoints", [KCSAN_COUNTER_SETUP_WATCHPOINTS] = "setup_watchpoints", @@ -53,16 +50,6 @@ static struct { }; static DEFINE_SPINLOCK(report_filterlist_lock); -void kcsan_counter_inc(enum kcsan_counter_id id) -{ - atomic_long_inc(&counters[id]); -} - -void kcsan_counter_dec(enum kcsan_counter_id id) -{ - atomic_long_dec(&counters[id]); -} - /* * The microbenchmark allows benchmarking KCSAN core runtime only. To run * multiple threads, pipe 'microbench=' from multiple tasks into the @@ -206,8 +193,10 @@ static int show_info(struct seq_file *file, void *v) /* show stats */ seq_printf(file, "enabled: %i\n", READ_ONCE(kcsan_enabled)); - for (i = 0; i < KCSAN_COUNTER_COUNT; ++i) - seq_printf(file, "%s: %ld\n", counter_names[i], atomic_long_read(&counters[i])); + for (i = 0; i < KCSAN_COUNTER_COUNT; ++i) { + seq_printf(file, "%s: %ld\n", counter_names[i], + atomic_long_read(&kcsan_counters[i])); + } /* show filter functions, and filter type */ spin_lock_irqsave(&report_filterlist_lock, flags); diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 29480010dc30..8d4bf3431b3c 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -8,6 +8,7 @@ #ifndef _KERNEL_KCSAN_KCSAN_H #define _KERNEL_KCSAN_KCSAN_H +#include #include #include @@ -34,6 +35,10 @@ void kcsan_restore_irqtrace(struct task_struct *task); */ void kcsan_debugfs_init(void); +/* + * Statistics counters displayed via debugfs; should only be modified in + * slow-paths. + */ enum kcsan_counter_id { /* * Number of watchpoints currently in use. @@ -86,12 +91,7 @@ enum kcsan_counter_id { KCSAN_COUNTER_COUNT, /* number of counters */ }; - -/* - * Increment/decrement counter with given id; avoid calling these in fast-path. - */ -extern void kcsan_counter_inc(enum kcsan_counter_id id); -extern void kcsan_counter_dec(enum kcsan_counter_id id); +extern atomic_long_t kcsan_counters[KCSAN_COUNTER_COUNT]; /* * Returns true if data races in the function symbol that maps to func_addr diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index bf1d59449805..d3bf87e6007c 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -559,7 +559,7 @@ static bool prepare_report_consumer(unsigned long *flags, * If the actual accesses to not match, this was a false * positive due to watchpoint encoding. */ - kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); + atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ENCODING_FALSE_POSITIVES]); goto discard; } -- cgit v1.2.3 From e918188611f073063415f40fae568fa4d86d9044 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:20 +0800 Subject: locking: More accurate annotations for read_lock() On the archs using QUEUED_RWLOCKS, read_lock() is not always a recursive read lock, actually it's only recursive if in_interrupt() is true. So change the annotation accordingly to catch more deadlocks. Note we used to treat read_lock() as pure recursive read locks in lib/locking-seftest.c, and this is useful, especially for the lockdep development selftest, so we keep this via a variable to force switching lock annotation for read_lock(). Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-2-boqun.feng@gmail.com --- include/linux/lockdep.h | 23 ++++++++++++++++++++++- kernel/locking/lockdep.c | 14 ++++++++++++++ lib/locking-selftest.c | 11 +++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 6a584b3e5c74..7cae5ea00d59 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -469,6 +469,20 @@ static inline void print_irqtrace_events(struct task_struct *curr) } #endif +/* Variable used to make lockdep treat read_lock() as recursive in selftests */ +#ifdef CONFIG_DEBUG_LOCKING_API_SELFTESTS +extern unsigned int force_read_lock_recursive; +#else /* CONFIG_DEBUG_LOCKING_API_SELFTESTS */ +#define force_read_lock_recursive 0 +#endif /* CONFIG_DEBUG_LOCKING_API_SELFTESTS */ + +#ifdef CONFIG_LOCKDEP +extern bool read_lock_is_recursive(void); +#else /* CONFIG_LOCKDEP */ +/* If !LOCKDEP, the value is meaningless */ +#define read_lock_is_recursive() 0 +#endif + /* * For trivial one-depth nesting of a lock-class, the following * global define can be used. (Subsystems with multiple levels @@ -490,7 +504,14 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define spin_release(l, i) lock_release(l, i) #define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) -#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) +#define rwlock_acquire_read(l, s, t, i) \ +do { \ + if (read_lock_is_recursive()) \ + lock_acquire_shared_recursive(l, s, t, NULL, i); \ + else \ + lock_acquire_shared(l, s, t, NULL, i); \ +} while (0) + #define rwlock_release(l, i) lock_release(l, i) #define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 54b74fabf40c..77cd9e6520c4 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4967,6 +4967,20 @@ static bool lockdep_nmi(void) return true; } +/* + * read_lock() is recursive if: + * 1. We force lockdep think this way in selftests or + * 2. The implementation is not queued read/write lock or + * 3. The locker is at an in_interrupt() context. + */ +bool read_lock_is_recursive(void) +{ + return force_read_lock_recursive || + !IS_ENABLED(CONFIG_QUEUED_RWLOCKS) || + in_interrupt(); +} +EXPORT_SYMBOL_GPL(read_lock_is_recursive); + /* * We are not always called with irqs disabled - do that here, * and also avoid lockdep recursion: diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 14f44f59e733..caadc4dd3368 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -28,6 +28,7 @@ * Change this to 1 if you want to see the failure printouts: */ static unsigned int debug_locks_verbose; +unsigned int force_read_lock_recursive; static DEFINE_WD_CLASS(ww_lockdep); @@ -1978,6 +1979,11 @@ void locking_selftest(void) return; } + /* + * treats read_lock() as recursive read locks for testing purpose + */ + force_read_lock_recursive = 1; + /* * Run the testsuite: */ @@ -2073,6 +2079,11 @@ void locking_selftest(void) ww_tests(); + force_read_lock_recursive = 0; + /* + * queued_read_lock() specific test cases can be put here + */ + if (unexpected_testcase_failures) { printk("-----------------------------------------------------------------\n"); debug_locks = 0; -- cgit v1.2.3 From b11be024de164213f6338973d76ab9ab139120cd Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:22 +0800 Subject: lockdep: Demagic the return value of BFS __bfs() could return four magic numbers: 1: search succeeds, but none match. 0: search succeeds, find one match. -1: search fails because of the cq is full. -2: search fails because a invalid node is found. This patch cleans things up by using a enum type for the return value of __bfs() and its friends, this improves the code readability of the code, and further, could help if we want to extend the BFS. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-4-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 155 +++++++++++++++++++++++++++-------------------- 1 file changed, 89 insertions(+), 66 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 77cd9e6520c4..462c68cfb378 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1471,28 +1471,58 @@ static inline struct list_head *get_dep_list(struct lock_list *lock, int offset) return lock_class + offset; } +/* + * Return values of a bfs search: + * + * BFS_E* indicates an error + * BFS_R* indicates a result (match or not) + * + * BFS_EINVALIDNODE: Find a invalid node in the graph. + * + * BFS_EQUEUEFULL: The queue is full while doing the bfs. + * + * BFS_RMATCH: Find the matched node in the graph, and put that node into + * *@target_entry. + * + * BFS_RNOMATCH: Haven't found the matched node and keep *@target_entry + * _unchanged_. + */ +enum bfs_result { + BFS_EINVALIDNODE = -2, + BFS_EQUEUEFULL = -1, + BFS_RMATCH = 0, + BFS_RNOMATCH = 1, +}; + +/* + * bfs_result < 0 means error + */ +static inline bool bfs_error(enum bfs_result res) +{ + return res < 0; +} /* * Forward- or backward-dependency search, used for both circular dependency * checking and hardirq-unsafe/softirq-unsafe checking. */ -static int __bfs(struct lock_list *source_entry, - void *data, - int (*match)(struct lock_list *entry, void *data), - struct lock_list **target_entry, - int offset) +static enum bfs_result __bfs(struct lock_list *source_entry, + void *data, + int (*match)(struct lock_list *entry, void *data), + struct lock_list **target_entry, + int offset) { struct lock_list *entry; struct lock_list *lock; struct list_head *head; struct circular_queue *cq = &lock_cq; - int ret = 1; + enum bfs_result ret = BFS_RNOMATCH; lockdep_assert_locked(); if (match(source_entry, data)) { *target_entry = source_entry; - ret = 0; + ret = BFS_RMATCH; goto exit; } @@ -1506,7 +1536,7 @@ static int __bfs(struct lock_list *source_entry, while ((lock = __cq_dequeue(cq))) { if (!lock->class) { - ret = -2; + ret = BFS_EINVALIDNODE; goto exit; } @@ -1518,12 +1548,12 @@ static int __bfs(struct lock_list *source_entry, mark_lock_accessed(entry, lock); if (match(entry, data)) { *target_entry = entry; - ret = 0; + ret = BFS_RMATCH; goto exit; } if (__cq_enqueue(cq, entry)) { - ret = -1; + ret = BFS_EQUEUEFULL; goto exit; } cq_depth = __cq_get_elem_count(cq); @@ -1536,20 +1566,22 @@ exit: return ret; } -static inline int __bfs_forwards(struct lock_list *src_entry, - void *data, - int (*match)(struct lock_list *entry, void *data), - struct lock_list **target_entry) +static inline enum bfs_result +__bfs_forwards(struct lock_list *src_entry, + void *data, + int (*match)(struct lock_list *entry, void *data), + struct lock_list **target_entry) { return __bfs(src_entry, data, match, target_entry, offsetof(struct lock_class, locks_after)); } -static inline int __bfs_backwards(struct lock_list *src_entry, - void *data, - int (*match)(struct lock_list *entry, void *data), - struct lock_list **target_entry) +static inline enum bfs_result +__bfs_backwards(struct lock_list *src_entry, + void *data, + int (*match)(struct lock_list *entry, void *data), + struct lock_list **target_entry) { return __bfs(src_entry, data, match, target_entry, offsetof(struct lock_class, locks_before)); @@ -1775,18 +1807,18 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class) /* * Check that the dependency graph starting at can lead to - * or not. Print an error and return 0 if it does. + * or not. */ -static noinline int +static noinline enum bfs_result check_path(struct lock_class *target, struct lock_list *src_entry, struct lock_list **target_entry) { - int ret; + enum bfs_result ret; ret = __bfs_forwards(src_entry, (void *)target, class_equal, target_entry); - if (unlikely(ret < 0)) + if (unlikely(bfs_error(ret))) print_bfs_bug(ret); return ret; @@ -1797,13 +1829,13 @@ check_path(struct lock_class *target, struct lock_list *src_entry, * lead to . If it can, there is a circle when adding * -> dependency. * - * Print an error and return 0 if it does. + * Print an error and return BFS_RMATCH if it does. */ -static noinline int +static noinline enum bfs_result check_noncircular(struct held_lock *src, struct held_lock *target, struct lock_trace **const trace) { - int ret; + enum bfs_result ret; struct lock_list *target_entry; struct lock_list src_entry = { .class = hlock_class(src), @@ -1814,7 +1846,7 @@ check_noncircular(struct held_lock *src, struct held_lock *target, ret = check_path(hlock_class(target), &src_entry, &target_entry); - if (unlikely(!ret)) { + if (unlikely(ret == BFS_RMATCH)) { if (!*trace) { /* * If save_trace fails here, the printing might @@ -1836,12 +1868,13 @@ check_noncircular(struct held_lock *src, struct held_lock *target, * or not. If it can, -> dependency is already * in the graph. * - * Print an error and return 2 if it does or 1 if it does not. + * Return BFS_RMATCH if it does, or BFS_RMATCH if it does not, return BFS_E* if + * any error appears in the bfs search. */ -static noinline int +static noinline enum bfs_result check_redundant(struct held_lock *src, struct held_lock *target) { - int ret; + enum bfs_result ret; struct lock_list *target_entry; struct lock_list src_entry = { .class = hlock_class(src), @@ -1852,11 +1885,8 @@ check_redundant(struct held_lock *src, struct held_lock *target) ret = check_path(hlock_class(target), &src_entry, &target_entry); - if (!ret) { + if (ret == BFS_RMATCH) debug_atomic_inc(nr_redundant); - ret = 2; - } else if (ret < 0) - ret = 0; return ret; } @@ -1886,17 +1916,14 @@ static inline int usage_match(struct lock_list *entry, void *mask) * Find a node in the forwards-direction dependency sub-graph starting * at @root->class that matches @bit. * - * Return 0 if such a node exists in the subgraph, and put that node + * Return BFS_MATCH if such a node exists in the subgraph, and put that node * into *@target_entry. - * - * Return 1 otherwise and keep *@target_entry unchanged. - * Return <0 on error. */ -static int +static enum bfs_result find_usage_forwards(struct lock_list *root, unsigned long usage_mask, struct lock_list **target_entry) { - int result; + enum bfs_result result; debug_atomic_inc(nr_find_usage_forwards_checks); @@ -1908,18 +1935,12 @@ find_usage_forwards(struct lock_list *root, unsigned long usage_mask, /* * Find a node in the backwards-direction dependency sub-graph starting * at @root->class that matches @bit. - * - * Return 0 if such a node exists in the subgraph, and put that node - * into *@target_entry. - * - * Return 1 otherwise and keep *@target_entry unchanged. - * Return <0 on error. */ -static int +static enum bfs_result find_usage_backwards(struct lock_list *root, unsigned long usage_mask, struct lock_list **target_entry) { - int result; + enum bfs_result result; debug_atomic_inc(nr_find_usage_backwards_checks); @@ -2247,7 +2268,7 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, struct lock_list *target_entry1; struct lock_list *target_entry; struct lock_list this, that; - int ret; + enum bfs_result ret; /* * Step 1: gather all hard/soft IRQs usages backward in an @@ -2257,7 +2278,7 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, this.class = hlock_class(prev); ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL); - if (ret < 0) { + if (bfs_error(ret)) { print_bfs_bug(ret); return 0; } @@ -2276,12 +2297,12 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, that.class = hlock_class(next); ret = find_usage_forwards(&that, forward_mask, &target_entry1); - if (ret < 0) { + if (bfs_error(ret)) { print_bfs_bug(ret); return 0; } - if (ret == 1) - return ret; + if (ret == BFS_RNOMATCH) + return 1; /* * Step 3: we found a bad match! Now retrieve a lock from the backward @@ -2291,11 +2312,11 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, backward_mask = original_mask(target_entry1->class->usage_mask); ret = find_usage_backwards(&this, backward_mask, &target_entry); - if (ret < 0) { + if (bfs_error(ret)) { print_bfs_bug(ret); return 0; } - if (DEBUG_LOCKS_WARN_ON(ret == 1)) + if (DEBUG_LOCKS_WARN_ON(ret == BFS_RNOMATCH)) return 1; /* @@ -2463,7 +2484,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, struct lock_trace **const trace) { struct lock_list *entry; - int ret; + enum bfs_result ret; if (!hlock_class(prev)->key || !hlock_class(next)->key) { /* @@ -2494,7 +2515,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, * in the graph whose neighbours are to be checked. */ ret = check_noncircular(next, prev, trace); - if (unlikely(ret <= 0)) + if (unlikely(bfs_error(ret) || ret == BFS_RMATCH)) return 0; if (!check_irq_usage(curr, prev, next)) @@ -2531,8 +2552,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, * Is the -> link redundant? */ ret = check_redundant(prev, next); - if (ret != 1) - return ret; + if (bfs_error(ret)) + return 0; + else if (ret == BFS_RMATCH) + return 2; #endif if (!*trace) { @@ -3436,19 +3459,19 @@ static int check_usage_forwards(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit bit, const char *irqclass) { - int ret; + enum bfs_result ret; struct lock_list root; struct lock_list *target_entry; root.parent = NULL; root.class = hlock_class(this); ret = find_usage_forwards(&root, lock_flag(bit), &target_entry); - if (ret < 0) { + if (bfs_error(ret)) { print_bfs_bug(ret); return 0; } - if (ret == 1) - return ret; + if (ret == BFS_RNOMATCH) + return 1; print_irq_inversion_bug(curr, &root, target_entry, this, 1, irqclass); @@ -3463,19 +3486,19 @@ static int check_usage_backwards(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit bit, const char *irqclass) { - int ret; + enum bfs_result ret; struct lock_list root; struct lock_list *target_entry; root.parent = NULL; root.class = hlock_class(this); ret = find_usage_backwards(&root, lock_flag(bit), &target_entry); - if (ret < 0) { + if (bfs_error(ret)) { print_bfs_bug(ret); return 0; } - if (ret == 1) - return ret; + if (ret == BFS_RNOMATCH) + return 1; print_irq_inversion_bug(curr, &root, target_entry, this, 0, irqclass); -- cgit v1.2.3 From d563bc6ead9e79be37067d58509a605b67378184 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:23 +0800 Subject: lockdep: Make __bfs() visit every dependency until a match Currently, __bfs() will do a breadth-first search in the dependency graph and visit each lock class in the graph exactly once, so for example, in the following graph: A ---------> B | ^ | | +----------> C a __bfs() call starts at A, will visit B through dependency A -> B and visit C through dependency A -> C and that's it, IOW, __bfs() will not visit dependency C -> B. This is OK for now, as we only have strong dependencies in the dependency graph, so whenever there is a traverse path from A to B in __bfs(), it means A has strong dependencies to B (IOW, B depends on A strongly). So no need to visit all dependencies in the graph. However, as we are going to add recursive-read lock into the dependency graph, as a result, not all the paths mean strong dependencies, in the same example above, dependency A -> B may be a weak dependency and traverse A -> C -> B may be a strong dependency path. And with the old way of __bfs() (i.e. visiting every lock class exactly once), we will miss the strong dependency path, which will result into failing to find a deadlock. To cure this for the future, we need to find a way for __bfs() to visit each dependency, rather than each class, exactly once in the search until we find a match. The solution is simple: We used to mark lock_class::lockdep_dependency_gen_id to indicate a class has been visited in __bfs(), now we change the semantics a little bit: we now mark lock_class::lockdep_dependency_gen_id to indicate _all the dependencies_ in its lock_{after,before} have been visited in the __bfs() (note we only take one direction in a __bfs() search). In this way, every dependency is guaranteed to be visited until we find a match. Note: the checks in mark_lock_accessed() and lock_accessed() are removed, because after this modification, we may call these two functions on @source_entry of __bfs(), which may not be the entry in "list_entries" Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-5-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 61 +++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 26 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 462c68cfb378..150686a71be0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1421,23 +1421,19 @@ static inline unsigned int __cq_get_elem_count(struct circular_queue *cq) return (cq->rear - cq->front) & CQ_MASK; } -static inline void mark_lock_accessed(struct lock_list *lock, - struct lock_list *parent) +static inline void mark_lock_accessed(struct lock_list *lock) { - unsigned long nr; + lock->class->dep_gen_id = lockdep_dependency_gen_id; +} - nr = lock - list_entries; - WARN_ON(nr >= ARRAY_SIZE(list_entries)); /* Out-of-bounds, input fail */ +static inline void visit_lock_entry(struct lock_list *lock, + struct lock_list *parent) +{ lock->parent = parent; - lock->class->dep_gen_id = lockdep_dependency_gen_id; } static inline unsigned long lock_accessed(struct lock_list *lock) { - unsigned long nr; - - nr = lock - list_entries; - WARN_ON(nr >= ARRAY_SIZE(list_entries)); /* Out-of-bounds, input fail */ return lock->class->dep_gen_id == lockdep_dependency_gen_id; } @@ -1540,26 +1536,39 @@ static enum bfs_result __bfs(struct lock_list *source_entry, goto exit; } + /* + * If we have visited all the dependencies from this @lock to + * others (iow, if we have visited all lock_list entries in + * @lock->class->locks_{after,before}) we skip, otherwise go + * and visit all the dependencies in the list and mark this + * list accessed. + */ + if (lock_accessed(lock)) + continue; + else + mark_lock_accessed(lock); + head = get_dep_list(lock, offset); + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + list_for_each_entry_rcu(entry, head, entry) { - if (!lock_accessed(entry)) { - unsigned int cq_depth; - mark_lock_accessed(entry, lock); - if (match(entry, data)) { - *target_entry = entry; - ret = BFS_RMATCH; - goto exit; - } - - if (__cq_enqueue(cq, entry)) { - ret = BFS_EQUEUEFULL; - goto exit; - } - cq_depth = __cq_get_elem_count(cq); - if (max_bfs_queue_depth < cq_depth) - max_bfs_queue_depth = cq_depth; + unsigned int cq_depth; + + visit_lock_entry(entry, lock); + if (match(entry, data)) { + *target_entry = entry; + ret = BFS_RMATCH; + goto exit; + } + + if (__cq_enqueue(cq, entry)) { + ret = BFS_EQUEUEFULL; + goto exit; } + cq_depth = __cq_get_elem_count(cq); + if (max_bfs_queue_depth < cq_depth) + max_bfs_queue_depth = cq_depth; } } exit: -- cgit v1.2.3 From bd76eca10de2eb9998d5125b08e8997cbf5508d5 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:24 +0800 Subject: lockdep: Reduce the size of lock_list::distance lock_list::distance is always not greater than MAX_LOCK_DEPTH (which is 48 right now), so a u16 will fit. This patch reduces the size of lock_list::distance to save space, so that we can introduce other fields to help detect recursive read lock deadlocks without increasing the size of lock_list structure. Suggested-by: Peter Zijlstra Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-6-boqun.feng@gmail.com --- include/linux/lockdep.h | 2 +- kernel/locking/lockdep.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 7cae5ea00d59..22750102b5fe 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -54,7 +54,7 @@ struct lock_list { struct lock_class *class; struct lock_class *links_to; const struct lock_trace *trace; - int distance; + u16 distance; /* * The parent field is used to implement breadth-first search, and the diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 150686a71be0..668a983d6405 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1320,7 +1320,7 @@ static struct lock_list *alloc_list_entry(void) */ static int add_lock_to_list(struct lock_class *this, struct lock_class *links_to, struct list_head *head, - unsigned long ip, int distance, + unsigned long ip, u16 distance, const struct lock_trace *trace) { struct lock_list *entry; @@ -2489,7 +2489,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next) */ static int check_prev_add(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next, int distance, + struct held_lock *next, u16 distance, struct lock_trace **const trace) { struct lock_list *entry; @@ -2622,7 +2622,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) goto out_bug; for (;;) { - int distance = curr->lockdep_depth - depth + 1; + u16 distance = curr->lockdep_depth - depth + 1; hlock = curr->held_locks + depth - 1; /* -- cgit v1.2.3 From 3454a36d6a39186de508dd43df590a6363364176 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:25 +0800 Subject: lockdep: Introduce lock_list::dep To add recursive read locks into the dependency graph, we need to store the types of dependencies for the BFS later. There are four types of dependencies: * Exclusive -> Non-recursive dependencies: EN e.g. write_lock(prev) held and try to acquire write_lock(next) or non-recursive read_lock(next), which can be represented as "prev -(EN)-> next" * Shared -> Non-recursive dependencies: SN e.g. read_lock(prev) held and try to acquire write_lock(next) or non-recursive read_lock(next), which can be represented as "prev -(SN)-> next" * Exclusive -> Recursive dependencies: ER e.g. write_lock(prev) held and try to acquire recursive read_lock(next), which can be represented as "prev -(ER)-> next" * Shared -> Recursive dependencies: SR e.g. read_lock(prev) held and try to acquire recursive read_lock(next), which can be represented as "prev -(SR)-> next" So we use 4 bits for the presence of each type in lock_list::dep. Helper functions and macros are also introduced to convert a pair of locks into lock_list::dep bit and maintain the addition of different types of dependencies. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-7-boqun.feng@gmail.com --- include/linux/lockdep.h | 2 ++ kernel/locking/lockdep.c | 92 +++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 22750102b5fe..35c8bb0108dd 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -55,6 +55,8 @@ struct lock_list { struct lock_class *links_to; const struct lock_trace *trace; u16 distance; + /* bitmap of different dependencies from head to this */ + u8 dep; /* * The parent field is used to implement breadth-first search, and the diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 668a983d6405..16ad1b74aa30 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1320,7 +1320,7 @@ static struct lock_list *alloc_list_entry(void) */ static int add_lock_to_list(struct lock_class *this, struct lock_class *links_to, struct list_head *head, - unsigned long ip, u16 distance, + unsigned long ip, u16 distance, u8 dep, const struct lock_trace *trace) { struct lock_list *entry; @@ -1334,6 +1334,7 @@ static int add_lock_to_list(struct lock_class *this, entry->class = this; entry->links_to = links_to; + entry->dep = dep; entry->distance = distance; entry->trace = trace; /* @@ -1498,6 +1499,57 @@ static inline bool bfs_error(enum bfs_result res) return res < 0; } +/* + * DEP_*_BIT in lock_list::dep + * + * For dependency @prev -> @next: + * + * SR: @prev is shared reader (->read != 0) and @next is recursive reader + * (->read == 2) + * ER: @prev is exclusive locker (->read == 0) and @next is recursive reader + * SN: @prev is shared reader and @next is non-recursive locker (->read != 2) + * EN: @prev is exclusive locker and @next is non-recursive locker + * + * Note that we define the value of DEP_*_BITs so that: + * bit0 is prev->read == 0 + * bit1 is next->read != 2 + */ +#define DEP_SR_BIT (0 + (0 << 1)) /* 0 */ +#define DEP_ER_BIT (1 + (0 << 1)) /* 1 */ +#define DEP_SN_BIT (0 + (1 << 1)) /* 2 */ +#define DEP_EN_BIT (1 + (1 << 1)) /* 3 */ + +#define DEP_SR_MASK (1U << (DEP_SR_BIT)) +#define DEP_ER_MASK (1U << (DEP_ER_BIT)) +#define DEP_SN_MASK (1U << (DEP_SN_BIT)) +#define DEP_EN_MASK (1U << (DEP_EN_BIT)) + +static inline unsigned int +__calc_dep_bit(struct held_lock *prev, struct held_lock *next) +{ + return (prev->read == 0) + ((next->read != 2) << 1); +} + +static inline u8 calc_dep(struct held_lock *prev, struct held_lock *next) +{ + return 1U << __calc_dep_bit(prev, next); +} + +/* + * calculate the dep_bit for backwards edges. We care about whether @prev is + * shared and whether @next is recursive. + */ +static inline unsigned int +__calc_dep_bitb(struct held_lock *prev, struct held_lock *next) +{ + return (next->read != 2) + ((prev->read == 0) << 1); +} + +static inline u8 calc_depb(struct held_lock *prev, struct held_lock *next) +{ + return 1U << __calc_dep_bitb(prev, next); +} + /* * Forward- or backward-dependency search, used for both circular dependency * checking and hardirq-unsafe/softirq-unsafe checking. @@ -2552,7 +2604,35 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, if (entry->class == hlock_class(next)) { if (distance == 1) entry->distance = 1; - return 1; + entry->dep |= calc_dep(prev, next); + + /* + * Also, update the reverse dependency in @next's + * ->locks_before list. + * + * Here we reuse @entry as the cursor, which is fine + * because we won't go to the next iteration of the + * outer loop: + * + * For normal cases, we return in the inner loop. + * + * If we fail to return, we have inconsistency, i.e. + * ::locks_after contains while + * ::locks_before doesn't contain . In + * that case, we return after the inner and indicate + * something is wrong. + */ + list_for_each_entry(entry, &hlock_class(next)->locks_before, entry) { + if (entry->class == hlock_class(prev)) { + if (distance == 1) + entry->distance = 1; + entry->dep |= calc_depb(prev, next); + return 1; + } + } + + /* is not found in ::locks_before */ + return 0; } } @@ -2579,14 +2659,18 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, */ ret = add_lock_to_list(hlock_class(next), hlock_class(prev), &hlock_class(prev)->locks_after, - next->acquire_ip, distance, *trace); + next->acquire_ip, distance, + calc_dep(prev, next), + *trace); if (!ret) return 0; ret = add_lock_to_list(hlock_class(prev), hlock_class(next), &hlock_class(next)->locks_before, - next->acquire_ip, distance, *trace); + next->acquire_ip, distance, + calc_depb(prev, next), + *trace); if (!ret) return 0; -- cgit v1.2.3 From 6971c0f345620aae5e6172207a57b7524603a34e Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:26 +0800 Subject: lockdep: Extend __bfs() to work with multiple types of dependencies Now we have four types of dependencies in the dependency graph, and not all the pathes carry real dependencies (the dependencies that may cause a deadlock), for example: Given lock A and B, if we have: CPU1 CPU2 ============= ============== write_lock(A); read_lock(B); read_lock(B); write_lock(A); (assuming read_lock(B) is a recursive reader) then we have dependencies A -(ER)-> B, and B -(SN)-> A, and a dependency path A -(ER)-> B -(SN)-> A. In lockdep w/o recursive locks, a dependency path from A to A means a deadlock. However, the above case is obviously not a deadlock, because no one holds B exclusively, therefore no one waits for the other to release B, so who get A first in CPU1 and CPU2 will run non-blockingly. As a result, dependency path A -(ER)-> B -(SN)-> A is not a real/strong dependency that could cause a deadlock. From the observation above, we know that for a dependency path to be real/strong, no two adjacent dependencies can be as -(*R)-> -(S*)->. Now our mission is to make __bfs() traverse only the strong dependency paths, which is simple: we record whether we only have -(*R)-> for the previous lock_list of the path in lock_list::only_xr, and when we pick a dependency in the traverse, we 1) filter out -(S*)-> dependency if the previous lock_list only has -(*R)-> dependency (i.e. ->only_xr is true) and 2) set the next lock_list::only_xr to true if we only have -(*R)-> left after we filter out dependencies based on 1), otherwise, set it to false. With this extension for __bfs(), we now need to initialize the root of __bfs() properly (with a correct ->only_xr), to do so, we introduce some helper functions, which also cleans up a little bit for the __bfs() root initialization code. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-8-boqun.feng@gmail.com --- include/linux/lockdep.h | 2 + kernel/locking/lockdep.c | 113 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 96 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 35c8bb0108dd..57d642d378c7 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -57,6 +57,8 @@ struct lock_list { u16 distance; /* bitmap of different dependencies from head to this */ u8 dep; + /* used by BFS to record whether "prev -> this" only has -(*R)-> */ + u8 only_xr; /* * The parent field is used to implement breadth-first search, and the diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 16ad1b74aa30..5abc227db0e9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1551,8 +1551,72 @@ static inline u8 calc_depb(struct held_lock *prev, struct held_lock *next) } /* - * Forward- or backward-dependency search, used for both circular dependency - * checking and hardirq-unsafe/softirq-unsafe checking. + * Initialize a lock_list entry @lock belonging to @class as the root for a BFS + * search. + */ +static inline void __bfs_init_root(struct lock_list *lock, + struct lock_class *class) +{ + lock->class = class; + lock->parent = NULL; + lock->only_xr = 0; +} + +/* + * Initialize a lock_list entry @lock based on a lock acquisition @hlock as the + * root for a BFS search. + * + * ->only_xr of the initial lock node is set to @hlock->read == 2, to make sure + * that -> @hlock and @hlock -> is not -(*R)-> + * and -(S*)->. + */ +static inline void bfs_init_root(struct lock_list *lock, + struct held_lock *hlock) +{ + __bfs_init_root(lock, hlock_class(hlock)); + lock->only_xr = (hlock->read == 2); +} + +/* + * Similar to bfs_init_root() but initialize the root for backwards BFS. + * + * ->only_xr of the initial lock node is set to @hlock->read != 0, to make sure + * that -> @hlock and @hlock -> is not + * -(*S)-> and -(R*)-> (reverse order of -(*R)-> and -(S*)->). + */ +static inline void bfs_init_rootb(struct lock_list *lock, + struct held_lock *hlock) +{ + __bfs_init_root(lock, hlock_class(hlock)); + lock->only_xr = (hlock->read != 0); +} + +/* + * Breadth-First Search to find a strong path in the dependency graph. + * + * @source_entry: the source of the path we are searching for. + * @data: data used for the second parameter of @match function + * @match: match function for the search + * @target_entry: pointer to the target of a matched path + * @offset: the offset to struct lock_class to determine whether it is + * locks_after or locks_before + * + * We may have multiple edges (considering different kinds of dependencies, + * e.g. ER and SN) between two nodes in the dependency graph. But + * only the strong dependency path in the graph is relevant to deadlocks. A + * strong dependency path is a dependency path that doesn't have two adjacent + * dependencies as -(*R)-> -(S*)->, please see: + * + * Documentation/locking/lockdep-design.rst + * + * for more explanation of the definition of strong dependency paths + * + * In __bfs(), we only traverse in the strong dependency path: + * + * In lock_list::only_xr, we record whether the previous dependency only + * has -(*R)-> in the search, and if it does (prev only has -(*R)->), we + * filter out any -(S*)-> in the current dependency and after that, the + * ->only_xr is set according to whether we only have -(*R)-> left. */ static enum bfs_result __bfs(struct lock_list *source_entry, void *data, @@ -1582,6 +1646,7 @@ static enum bfs_result __bfs(struct lock_list *source_entry, __cq_enqueue(cq, source_entry); while ((lock = __cq_dequeue(cq))) { + bool prev_only_xr; if (!lock->class) { ret = BFS_EINVALIDNODE; @@ -1602,10 +1667,26 @@ static enum bfs_result __bfs(struct lock_list *source_entry, head = get_dep_list(lock, offset); - DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + prev_only_xr = lock->only_xr; list_for_each_entry_rcu(entry, head, entry) { unsigned int cq_depth; + u8 dep = entry->dep; + + /* + * Mask out all -(S*)-> if we only have *R in previous + * step, because -(*R)-> -(S*)-> don't make up a strong + * dependency. + */ + if (prev_only_xr) + dep &= ~(DEP_SR_MASK | DEP_SN_MASK); + + /* If nothing left, we skip */ + if (!dep) + continue; + + /* If there are only -(*R)-> left, set that for the next step */ + entry->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK)); visit_lock_entry(entry, lock); if (match(entry, data)) { @@ -1827,8 +1908,7 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class) unsigned long ret, flags; struct lock_list this; - this.parent = NULL; - this.class = class; + __bfs_init_root(&this, class); raw_local_irq_save(flags); lockdep_lock(); @@ -1854,8 +1934,7 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class) unsigned long ret, flags; struct lock_list this; - this.parent = NULL; - this.class = class; + __bfs_init_root(&this, class); raw_local_irq_save(flags); lockdep_lock(); @@ -1898,10 +1977,9 @@ check_noncircular(struct held_lock *src, struct held_lock *target, { enum bfs_result ret; struct lock_list *target_entry; - struct lock_list src_entry = { - .class = hlock_class(src), - .parent = NULL, - }; + struct lock_list src_entry; + + bfs_init_root(&src_entry, src); debug_atomic_inc(nr_cyclic_checks); @@ -1937,10 +2015,9 @@ check_redundant(struct held_lock *src, struct held_lock *target) { enum bfs_result ret; struct lock_list *target_entry; - struct lock_list src_entry = { - .class = hlock_class(src), - .parent = NULL, - }; + struct lock_list src_entry; + + bfs_init_root(&src_entry, src); debug_atomic_inc(nr_redundant_checks); @@ -3556,8 +3633,7 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this, struct lock_list root; struct lock_list *target_entry; - root.parent = NULL; - root.class = hlock_class(this); + bfs_init_root(&root, this); ret = find_usage_forwards(&root, lock_flag(bit), &target_entry); if (bfs_error(ret)) { print_bfs_bug(ret); @@ -3583,8 +3659,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this, struct lock_list root; struct lock_list *target_entry; - root.parent = NULL; - root.class = hlock_class(this); + bfs_init_rootb(&root, this); ret = find_usage_backwards(&root, lock_flag(bit), &target_entry); if (bfs_error(ret)) { print_bfs_bug(ret); -- cgit v1.2.3 From 61775ed243433ff0556c4f76905929fe01e92922 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:27 +0800 Subject: lockdep: Make __bfs(.match) return bool The "match" parameter of __bfs() is used for checking whether we hit a match in the search, therefore it should return a boolean value rather than an integer for better readability. This patch then changes the return type of the function parameter and the match functions to bool. Suggested-by: Peter Zijlstra Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-9-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 5abc227db0e9..78cd74d77be9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1620,7 +1620,7 @@ static inline void bfs_init_rootb(struct lock_list *lock, */ static enum bfs_result __bfs(struct lock_list *source_entry, void *data, - int (*match)(struct lock_list *entry, void *data), + bool (*match)(struct lock_list *entry, void *data), struct lock_list **target_entry, int offset) { @@ -1711,7 +1711,7 @@ exit: static inline enum bfs_result __bfs_forwards(struct lock_list *src_entry, void *data, - int (*match)(struct lock_list *entry, void *data), + bool (*match)(struct lock_list *entry, void *data), struct lock_list **target_entry) { return __bfs(src_entry, data, match, target_entry, @@ -1722,7 +1722,7 @@ __bfs_forwards(struct lock_list *src_entry, static inline enum bfs_result __bfs_backwards(struct lock_list *src_entry, void *data, - int (*match)(struct lock_list *entry, void *data), + bool (*match)(struct lock_list *entry, void *data), struct lock_list **target_entry) { return __bfs(src_entry, data, match, target_entry, @@ -1833,7 +1833,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, print_circular_bug_entry(entry, depth); } -static inline int class_equal(struct lock_list *entry, void *data) +static inline bool class_equal(struct lock_list *entry, void *data) { return entry->class == data; } @@ -1888,10 +1888,10 @@ static noinline void print_bfs_bug(int ret) WARN(1, "lockdep bfs error:%d\n", ret); } -static int noop_count(struct lock_list *entry, void *data) +static bool noop_count(struct lock_list *entry, void *data) { (*(unsigned long *)data)++; - return 0; + return false; } static unsigned long __lockdep_count_forward_deps(struct lock_list *this) @@ -2032,11 +2032,11 @@ check_redundant(struct held_lock *src, struct held_lock *target) #ifdef CONFIG_TRACE_IRQFLAGS -static inline int usage_accumulate(struct lock_list *entry, void *mask) +static inline bool usage_accumulate(struct lock_list *entry, void *mask) { *(unsigned long *)mask |= entry->class->usage_mask; - return 0; + return false; } /* @@ -2045,9 +2045,9 @@ static inline int usage_accumulate(struct lock_list *entry, void *mask) * without creating any illegal irq-safe -> irq-unsafe lock dependency. */ -static inline int usage_match(struct lock_list *entry, void *mask) +static inline bool usage_match(struct lock_list *entry, void *mask) { - return entry->class->usage_mask & *(unsigned long *)mask; + return !!(entry->class->usage_mask & *(unsigned long *)mask); } /* -- cgit v1.2.3 From 9de0c9bbcedf752e762c67f105bff342e30f9105 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:28 +0800 Subject: lockdep: Support deadlock detection for recursive read locks in check_noncircular() Currently, lockdep only has limit support for deadlock detection for recursive read locks. This patch support deadlock detection for recursive read locks. The basic idea is: We are about to add dependency B -> A in to the dependency graph, we use check_noncircular() to find whether we have a strong dependency path A -> .. -> B so that we have a strong dependency circle (a closed strong dependency path): A -> .. -> B -> A , which doesn't have two adjacent dependencies as -(*R)-> L -(S*)->. Since A -> .. -> B is already a strong dependency path, so if either B -> A is -(E*)-> or A -> .. -> B is -(*N)->, the circle A -> .. -> B -> A is strong, otherwise not. So we introduce a new match function hlock_conflict() to replace the class_equal() for the deadlock check in check_noncircular(). Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-10-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 78cd74d77be9..9160f1ddc435 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1838,10 +1838,37 @@ static inline bool class_equal(struct lock_list *entry, void *data) return entry->class == data; } +/* + * We are about to add B -> A into the dependency graph, and in __bfs() a + * strong dependency path A -> .. -> B is found: hlock_class equals + * entry->class. + * + * We will have a deadlock case (conflict) if A -> .. -> B -> A is a strong + * dependency cycle, that means: + * + * Either + * + * a) B -> A is -(E*)-> + * + * or + * + * b) A -> .. -> B is -(*N)-> (i.e. A -> .. -(*N)-> B) + * + * as then we don't have -(*R)-> -(S*)-> in the cycle. + */ +static inline bool hlock_conflict(struct lock_list *entry, void *data) +{ + struct held_lock *hlock = (struct held_lock *)data; + + return hlock_class(hlock) == entry->class && /* Found A -> .. -> B */ + (hlock->read == 0 || /* B -> A is -(E*)-> */ + !entry->only_xr); /* A -> .. -> B is -(*N)-> */ +} + static noinline void print_circular_bug(struct lock_list *this, - struct lock_list *target, - struct held_lock *check_src, - struct held_lock *check_tgt) + struct lock_list *target, + struct held_lock *check_src, + struct held_lock *check_tgt) { struct task_struct *curr = current; struct lock_list *parent; @@ -1950,13 +1977,13 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class) * or not. */ static noinline enum bfs_result -check_path(struct lock_class *target, struct lock_list *src_entry, +check_path(struct held_lock *target, struct lock_list *src_entry, + bool (*match)(struct lock_list *entry, void *data), struct lock_list **target_entry) { enum bfs_result ret; - ret = __bfs_forwards(src_entry, (void *)target, class_equal, - target_entry); + ret = __bfs_forwards(src_entry, target, match, target_entry); if (unlikely(bfs_error(ret))) print_bfs_bug(ret); @@ -1983,7 +2010,7 @@ check_noncircular(struct held_lock *src, struct held_lock *target, debug_atomic_inc(nr_cyclic_checks); - ret = check_path(hlock_class(target), &src_entry, &target_entry); + ret = check_path(target, &src_entry, hlock_conflict, &target_entry); if (unlikely(ret == BFS_RMATCH)) { if (!*trace) { @@ -2021,7 +2048,7 @@ check_redundant(struct held_lock *src, struct held_lock *target) debug_atomic_inc(nr_redundant_checks); - ret = check_path(hlock_class(target), &src_entry, &target_entry); + ret = check_path(target, &src_entry, class_equal, &target_entry); if (ret == BFS_RMATCH) debug_atomic_inc(nr_redundant); -- cgit v1.2.3 From 68e305678583f13a67e2ce22088c2520bd4f97b4 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:29 +0800 Subject: lockdep: Adjust check_redundant() for recursive read change check_redundant() will report redundancy if it finds a path could replace the about-to-add dependency in the BFS search. With recursive read lock changes, we certainly need to change the match function for the check_redundant(), because the path needs to match not only the lock class but also the dependency kinds. For example, if the about-to-add dependency @prev -> @next is A -(SN)-> B, and we find a path A -(S*)-> .. -(*R)->B in the dependency graph with __bfs() (for simplicity, we can also say we find an -(SR)-> path from A to B), we can not replace the dependency with that path in the BFS search. Because the -(SN)-> dependency can make a strong path with a following -(S*)-> dependency, however an -(SR)-> path cannot. Further, we can replace an -(SN)-> dependency with a -(EN)-> path, that means if we find a path which is stronger than or equal to the about-to-add dependency, we can report the redundancy. By "stronger", it means both the start and the end of the path are not weaker than the start and the end of the dependency (E is "stronger" than S and N is "stronger" than R), so that we can replace the dependency with that path. To make sure we find a path whose start point is not weaker than the about-to-add dependency, we use a trick: the ->only_xr of the root (start point) of __bfs() is initialized as @prev-> == 0, therefore if @prev is E, __bfs() will pick only -(E*)-> for the first dependency, otherwise, __bfs() can pick -(E*)-> or -(S*)-> for the first dependency. To make sure we find a path whose end point is not weaker than the about-to-add dependency, we replace the match function for __bfs() check_redundant(), we check for the case that either @next is R (anything is not weaker than it) or the end point of the path is N (which is not weaker than anything). Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-11-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 47 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 9160f1ddc435..42e2f1fbbd5f 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1833,9 +1833,39 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, print_circular_bug_entry(entry, depth); } -static inline bool class_equal(struct lock_list *entry, void *data) +/* + * We are about to add A -> B into the dependency graph, and in __bfs() a + * strong dependency path A -> .. -> B is found: hlock_class equals + * entry->class. + * + * If A -> .. -> B can replace A -> B in any __bfs() search (means the former + * is _stronger_ than or equal to the latter), we consider A -> B as redundant. + * For example if A -> .. -> B is -(EN)-> (i.e. A -(E*)-> .. -(*N)-> B), and A + * -> B is -(ER)-> or -(EN)->, then we don't need to add A -> B into the + * dependency graph, as any strong path ..-> A -> B ->.. we can get with + * having dependency A -> B, we could already get a equivalent path ..-> A -> + * .. -> B -> .. with A -> .. -> B. Therefore A -> B is reduntant. + * + * We need to make sure both the start and the end of A -> .. -> B is not + * weaker than A -> B. For the start part, please see the comment in + * check_redundant(). For the end part, we need: + * + * Either + * + * a) A -> B is -(*R)-> (everything is not weaker than that) + * + * or + * + * b) A -> .. -> B is -(*N)-> (nothing is stronger than this) + * + */ +static inline bool hlock_equal(struct lock_list *entry, void *data) { - return entry->class == data; + struct held_lock *hlock = (struct held_lock *)data; + + return hlock_class(hlock) == entry->class && /* Found A -> .. -> B */ + (hlock->read == 2 || /* A -> B is -(*R)-> */ + !entry->only_xr); /* A -> .. -> B is -(*N)-> */ } /* @@ -2045,10 +2075,21 @@ check_redundant(struct held_lock *src, struct held_lock *target) struct lock_list src_entry; bfs_init_root(&src_entry, src); + /* + * Special setup for check_redundant(). + * + * To report redundant, we need to find a strong dependency path that + * is equal to or stronger than -> . So if is E, + * we need to let __bfs() only search for a path starting at a -(E*)->, + * we achieve this by setting the initial node's ->only_xr to true in + * that case. And if is S, we set initial ->only_xr to false + * because both -(S*)-> (equal) and -(E*)-> (stronger) are redundant. + */ + src_entry.only_xr = src->read == 0; debug_atomic_inc(nr_redundant_checks); - ret = check_path(target, &src_entry, class_equal, &target_entry); + ret = check_path(target, &src_entry, hlock_equal, &target_entry); if (ret == BFS_RMATCH) debug_atomic_inc(nr_redundant); -- cgit v1.2.3 From f08e3888574d490b31481eef6d84c61bedba7a47 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:30 +0800 Subject: lockdep: Fix recursive read lock related safe->unsafe detection Currently, in safe->unsafe detection, lockdep misses the fact that a LOCK_ENABLED_IRQ_*_READ usage and a LOCK_USED_IN_IRQ_*_READ usage may cause deadlock too, for example: P1 P2 write_lock(l1); read_lock(l2); write_lock(l2); read_lock(l1); Actually, all of the following cases may cause deadlocks: LOCK_USED_IN_IRQ_* -> LOCK_ENABLED_IRQ_* LOCK_USED_IN_IRQ_*_READ -> LOCK_ENABLED_IRQ_* LOCK_USED_IN_IRQ_* -> LOCK_ENABLED_IRQ_*_READ LOCK_USED_IN_IRQ_*_READ -> LOCK_ENABLED_IRQ_*_READ To fix this, we need to 1) change the calculation of exclusive_mask() so that READ bits are not dropped and 2) always call usage() in mark_lock_irq() to check usage deadlocks, even when the new usage of the lock is READ. Besides, adjust usage_match() and usage_acculumate() to recursive read lock changes. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-12-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 188 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 141 insertions(+), 47 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 42e2f1fbbd5f..6644974a4f5a 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2100,22 +2100,72 @@ check_redundant(struct held_lock *src, struct held_lock *target) #ifdef CONFIG_TRACE_IRQFLAGS +/* + * Forwards and backwards subgraph searching, for the purposes of + * proving that two subgraphs can be connected by a new dependency + * without creating any illegal irq-safe -> irq-unsafe lock dependency. + * + * A irq safe->unsafe deadlock happens with the following conditions: + * + * 1) We have a strong dependency path A -> ... -> B + * + * 2) and we have ENABLED_IRQ usage of B and USED_IN_IRQ usage of A, therefore + * irq can create a new dependency B -> A (consider the case that a holder + * of B gets interrupted by an irq whose handler will try to acquire A). + * + * 3) the dependency circle A -> ... -> B -> A we get from 1) and 2) is a + * strong circle: + * + * For the usage bits of B: + * a) if A -> B is -(*N)->, then B -> A could be any type, so any + * ENABLED_IRQ usage suffices. + * b) if A -> B is -(*R)->, then B -> A must be -(E*)->, so only + * ENABLED_IRQ_*_READ usage suffices. + * + * For the usage bits of A: + * c) if A -> B is -(E*)->, then B -> A could be any type, so any + * USED_IN_IRQ usage suffices. + * d) if A -> B is -(S*)->, then B -> A must be -(*N)->, so only + * USED_IN_IRQ_*_READ usage suffices. + */ + +/* + * There is a strong dependency path in the dependency graph: A -> B, and now + * we need to decide which usage bit of A should be accumulated to detect + * safe->unsafe bugs. + * + * Note that usage_accumulate() is used in backwards search, so ->only_xr + * stands for whether A -> B only has -(S*)-> (in this case ->only_xr is true). + * + * As above, if only_xr is false, which means A -> B has -(E*)-> dependency + * path, any usage of A should be considered. Otherwise, we should only + * consider _READ usage. + */ static inline bool usage_accumulate(struct lock_list *entry, void *mask) { - *(unsigned long *)mask |= entry->class->usage_mask; + if (!entry->only_xr) + *(unsigned long *)mask |= entry->class->usage_mask; + else /* Mask out _READ usage bits */ + *(unsigned long *)mask |= (entry->class->usage_mask & LOCKF_IRQ); return false; } /* - * Forwards and backwards subgraph searching, for the purposes of - * proving that two subgraphs can be connected by a new dependency - * without creating any illegal irq-safe -> irq-unsafe lock dependency. + * There is a strong dependency path in the dependency graph: A -> B, and now + * we need to decide which usage bit of B conflicts with the usage bits of A, + * i.e. which usage bit of B may introduce safe->unsafe deadlocks. + * + * As above, if only_xr is false, which means A -> B has -(*N)-> dependency + * path, any usage of B should be considered. Otherwise, we should only + * consider _READ usage. */ - static inline bool usage_match(struct lock_list *entry, void *mask) { - return !!(entry->class->usage_mask & *(unsigned long *)mask); + if (!entry->only_xr) + return !!(entry->class->usage_mask & *(unsigned long *)mask); + else /* Mask out _READ usage bits */ + return !!((entry->class->usage_mask & LOCKF_IRQ) & *(unsigned long *)mask); } /* @@ -2406,17 +2456,39 @@ static unsigned long invert_dir_mask(unsigned long mask) } /* - * As above, we clear bitnr0 (LOCK_*_READ off) with bitmask ops. First, for all - * bits with bitnr0 set (LOCK_*_READ), add those with bitnr0 cleared (LOCK_*). - * And then mask out all bitnr0. + * Note that a LOCK_ENABLED_IRQ_*_READ usage and a LOCK_USED_IN_IRQ_*_READ + * usage may cause deadlock too, for example: + * + * P1 P2 + * + * write_lock(l1); + * read_lock(l2); + * write_lock(l2); + * + * read_lock(l1); + * + * , in above case, l1 will be marked as LOCK_USED_IN_IRQ_HARDIRQ_READ and l2 + * will marked as LOCK_ENABLE_IRQ_HARDIRQ_READ, and this is a possible + * deadlock. + * + * In fact, all of the following cases may cause deadlocks: + * + * LOCK_USED_IN_IRQ_* -> LOCK_ENABLED_IRQ_* + * LOCK_USED_IN_IRQ_*_READ -> LOCK_ENABLED_IRQ_* + * LOCK_USED_IN_IRQ_* -> LOCK_ENABLED_IRQ_*_READ + * LOCK_USED_IN_IRQ_*_READ -> LOCK_ENABLED_IRQ_*_READ + * + * As a result, to calculate the "exclusive mask", first we invert the + * direction (USED_IN/ENABLED) of the original mask, and 1) for all bits with + * bitnr0 set (LOCK_*_READ), add those with bitnr0 cleared (LOCK_*). 2) for all + * bits with bitnr0 cleared (LOCK_*_READ), add those with bitnr0 set (LOCK_*). */ static unsigned long exclusive_mask(unsigned long mask) { unsigned long excl = invert_dir_mask(mask); - /* Strip read */ excl |= (excl & LOCKF_IRQ_READ) >> LOCK_USAGE_READ_MASK; - excl &= ~LOCKF_IRQ_READ; + excl |= (excl & LOCKF_IRQ) << LOCK_USAGE_READ_MASK; return excl; } @@ -2433,6 +2505,7 @@ static unsigned long original_mask(unsigned long mask) unsigned long excl = invert_dir_mask(mask); /* Include read in existing usages */ + excl |= (excl & LOCKF_IRQ_READ) >> LOCK_USAGE_READ_MASK; excl |= (excl & LOCKF_IRQ) << LOCK_USAGE_READ_MASK; return excl; @@ -2447,14 +2520,24 @@ static int find_exclusive_match(unsigned long mask, enum lock_usage_bit *bitp, enum lock_usage_bit *excl_bitp) { - int bit, excl; + int bit, excl, excl_read; for_each_set_bit(bit, &mask, LOCK_USED) { + /* + * exclusive_bit() strips the read bit, however, + * LOCK_ENABLED_IRQ_*_READ may cause deadlocks too, so we need + * to search excl | LOCK_USAGE_READ_MASK as well. + */ excl = exclusive_bit(bit); + excl_read = excl | LOCK_USAGE_READ_MASK; if (excl_mask & lock_flag(excl)) { *bitp = bit; *excl_bitp = excl; return 0; + } else if (excl_mask & lock_flag(excl_read)) { + *bitp = bit; + *excl_bitp = excl_read; + return 0; } } return -1; @@ -2480,8 +2563,7 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, * Step 1: gather all hard/soft IRQs usages backward in an * accumulated usage mask. */ - this.parent = NULL; - this.class = hlock_class(prev); + bfs_init_rootb(&this, prev); ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL); if (bfs_error(ret)) { @@ -2499,8 +2581,7 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, */ forward_mask = exclusive_mask(usage_mask); - that.parent = NULL; - that.class = hlock_class(next); + bfs_init_root(&that, next); ret = find_usage_forwards(&that, forward_mask, &target_entry1); if (bfs_error(ret)) { @@ -3695,14 +3776,16 @@ print_irq_inversion_bug(struct task_struct *curr, */ static int check_usage_forwards(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit bit, const char *irqclass) + enum lock_usage_bit bit) { enum bfs_result ret; struct lock_list root; struct lock_list *target_entry; + enum lock_usage_bit read_bit = bit + LOCK_USAGE_READ_MASK; + unsigned usage_mask = lock_flag(bit) | lock_flag(read_bit); bfs_init_root(&root, this); - ret = find_usage_forwards(&root, lock_flag(bit), &target_entry); + ret = find_usage_forwards(&root, usage_mask, &target_entry); if (bfs_error(ret)) { print_bfs_bug(ret); return 0; @@ -3710,8 +3793,15 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this, if (ret == BFS_RNOMATCH) return 1; - print_irq_inversion_bug(curr, &root, target_entry, - this, 1, irqclass); + /* Check whether write or read usage is the match */ + if (target_entry->class->usage_mask & lock_flag(bit)) { + print_irq_inversion_bug(curr, &root, target_entry, + this, 1, state_name(bit)); + } else { + print_irq_inversion_bug(curr, &root, target_entry, + this, 1, state_name(read_bit)); + } + return 0; } @@ -3721,14 +3811,16 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this, */ static int check_usage_backwards(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit bit, const char *irqclass) + enum lock_usage_bit bit) { enum bfs_result ret; struct lock_list root; struct lock_list *target_entry; + enum lock_usage_bit read_bit = bit + LOCK_USAGE_READ_MASK; + unsigned usage_mask = lock_flag(bit) | lock_flag(read_bit); bfs_init_rootb(&root, this); - ret = find_usage_backwards(&root, lock_flag(bit), &target_entry); + ret = find_usage_backwards(&root, usage_mask, &target_entry); if (bfs_error(ret)) { print_bfs_bug(ret); return 0; @@ -3736,8 +3828,15 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this, if (ret == BFS_RNOMATCH) return 1; - print_irq_inversion_bug(curr, &root, target_entry, - this, 0, irqclass); + /* Check whether write or read usage is the match */ + if (target_entry->class->usage_mask & lock_flag(bit)) { + print_irq_inversion_bug(curr, &root, target_entry, + this, 0, state_name(bit)); + } else { + print_irq_inversion_bug(curr, &root, target_entry, + this, 0, state_name(read_bit)); + } + return 0; } @@ -3776,8 +3875,6 @@ static int SOFTIRQ_verbose(struct lock_class *class) return 0; } -#define STRICT_READ_CHECKS 1 - static int (*state_verbose_f[])(struct lock_class *class) = { #define LOCKDEP_STATE(__STATE) \ __STATE##_verbose, @@ -3802,16 +3899,6 @@ mark_lock_irq(struct task_struct *curr, struct held_lock *this, int read = new_bit & LOCK_USAGE_READ_MASK; int dir = new_bit & LOCK_USAGE_DIR_MASK; - /* - * mark USED_IN has to look forwards -- to ensure no dependency - * has ENABLED state, which would allow recursion deadlocks. - * - * mark ENABLED has to look backwards -- to ensure no dependee - * has USED_IN state, which, again, would allow recursion deadlocks. - */ - check_usage_f usage = dir ? - check_usage_backwards : check_usage_forwards; - /* * Validate that this particular lock does not have conflicting * usage states. @@ -3820,23 +3907,30 @@ mark_lock_irq(struct task_struct *curr, struct held_lock *this, return 0; /* - * Validate that the lock dependencies don't have conflicting usage - * states. + * Check for read in write conflicts */ - if ((!read || STRICT_READ_CHECKS) && - !usage(curr, this, excl_bit, state_name(new_bit & ~LOCK_USAGE_READ_MASK))) + if (!read && !valid_state(curr, this, new_bit, + excl_bit + LOCK_USAGE_READ_MASK)) return 0; + /* - * Check for read in write conflicts + * Validate that the lock dependencies don't have conflicting usage + * states. */ - if (!read) { - if (!valid_state(curr, this, new_bit, excl_bit + LOCK_USAGE_READ_MASK)) + if (dir) { + /* + * mark ENABLED has to look backwards -- to ensure no dependee + * has USED_IN state, which, again, would allow recursion deadlocks. + */ + if (!check_usage_backwards(curr, this, excl_bit)) return 0; - - if (STRICT_READ_CHECKS && - !usage(curr, this, excl_bit + LOCK_USAGE_READ_MASK, - state_name(new_bit + LOCK_USAGE_READ_MASK))) + } else { + /* + * mark USED_IN has to look forwards -- to ensure no dependency + * has ENABLED state, which would allow recursion deadlocks. + */ + if (!check_usage_forwards(curr, this, excl_bit)) return 0; } -- cgit v1.2.3 From 621c9dac0eea7607cb9a57cc9ba47fbcd4e644c9 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:31 +0800 Subject: lockdep: Add recursive read locks into dependency graph Since we have all the fundamental to handle recursive read locks, we now add them into the dependency graph. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-13-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 6644974a4f5a..b87766e080f5 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2808,16 +2808,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, if (!check_irq_usage(curr, prev, next)) return 0; - /* - * For recursive read-locks we do all the dependency checks, - * but we dont store read-triggered dependencies (only - * write-triggered dependencies). This ensures that only the - * write-side dependencies matter, and that if for example a - * write-lock never takes any other locks, then the reads are - * equivalent to a NOP. - */ - if (next->read == 2 || prev->read == 2) - return 1; /* * Is the -> dependency already present? * @@ -2935,13 +2925,8 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) u16 distance = curr->lockdep_depth - depth + 1; hlock = curr->held_locks + depth - 1; - /* - * Only non-recursive-read entries get new dependencies - * added: - */ - if (hlock->read != 2 && hlock->check) { - int ret = check_prev_add(curr, hlock, next, distance, - &trace); + if (hlock->check) { + int ret = check_prev_add(curr, hlock, next, distance, &trace); if (!ret) return 0; -- cgit v1.2.3 From f611e8cf98ec908b9c2c0da6064a660fc6022487 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:33 +0800 Subject: lockdep: Take read/write status in consideration when generate chainkey Currently, the chainkey of a lock chain is a hash sum of the class_idx of all the held locks, the read/write status are not taken in to consideration while generating the chainkey. This could result into a problem, if we have: P1() { read_lock(B); lock(A); } P2() { lock(A); read_lock(B); } P3() { lock(A); write_lock(B); } , and P1(), P2(), P3() run one by one. And when running P2(), lockdep detects such a lock chain A -> B is not a deadlock, then it's added in the chain cache, and then when running P3(), even if it's a deadlock, we could miss it because of the hit of chain cache. This could be confirmed by self testcase "chain cached mixed R-L/L-W ". To resolve this, we use concept "hlock_id" to generate the chainkey, the hlock_id is a tuple (hlock->class_idx, hlock->read), which fits in a u16 type. With this, the chainkeys are different is the lock sequences have the same locks but different read/write status. Besides, since we use "hlock_id" to generate chainkeys, the chain_hlocks array now store the "hlock_id"s rather than lock_class indexes. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-15-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 53 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b87766e080f5..cccf4bc759c6 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -371,6 +371,21 @@ static struct hlist_head classhash_table[CLASSHASH_SIZE]; static struct hlist_head chainhash_table[CHAINHASH_SIZE]; +/* + * the id of held_lock + */ +static inline u16 hlock_id(struct held_lock *hlock) +{ + BUILD_BUG_ON(MAX_LOCKDEP_KEYS_BITS + 2 > 16); + + return (hlock->class_idx | (hlock->read << MAX_LOCKDEP_KEYS_BITS)); +} + +static inline unsigned int chain_hlock_class_idx(u16 hlock_id) +{ + return hlock_id & (MAX_LOCKDEP_KEYS - 1); +} + /* * The hash key of the lock dependency chains is a hash itself too: * it's a hash of all locks taken up to that lock, including that lock. @@ -3202,7 +3217,10 @@ static inline void free_chain_hlocks(int base, int size) struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i) { - return lock_classes + chain_hlocks[chain->base + i]; + u16 chain_hlock = chain_hlocks[chain->base + i]; + unsigned int class_idx = chain_hlock_class_idx(chain_hlock); + + return lock_classes + class_idx - 1; } /* @@ -3228,12 +3246,12 @@ static inline int get_first_held_lock(struct task_struct *curr, /* * Returns the next chain_key iteration */ -static u64 print_chain_key_iteration(int class_idx, u64 chain_key) +static u64 print_chain_key_iteration(u16 hlock_id, u64 chain_key) { - u64 new_chain_key = iterate_chain_key(chain_key, class_idx); + u64 new_chain_key = iterate_chain_key(chain_key, hlock_id); - printk(" class_idx:%d -> chain_key:%016Lx", - class_idx, + printk(" hlock_id:%d -> chain_key:%016Lx", + (unsigned int)hlock_id, (unsigned long long)new_chain_key); return new_chain_key; } @@ -3250,12 +3268,12 @@ print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_ne hlock_next->irq_context); for (; i < depth; i++) { hlock = curr->held_locks + i; - chain_key = print_chain_key_iteration(hlock->class_idx, chain_key); + chain_key = print_chain_key_iteration(hlock_id(hlock), chain_key); print_lock(hlock); } - print_chain_key_iteration(hlock_next->class_idx, chain_key); + print_chain_key_iteration(hlock_id(hlock_next), chain_key); print_lock(hlock_next); } @@ -3263,14 +3281,14 @@ static void print_chain_keys_chain(struct lock_chain *chain) { int i; u64 chain_key = INITIAL_CHAIN_KEY; - int class_id; + u16 hlock_id; printk("depth: %u\n", chain->depth); for (i = 0; i < chain->depth; i++) { - class_id = chain_hlocks[chain->base + i]; - chain_key = print_chain_key_iteration(class_id, chain_key); + hlock_id = chain_hlocks[chain->base + i]; + chain_key = print_chain_key_iteration(hlock_id, chain_key); - print_lock_name(lock_classes + class_id); + print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id) - 1); printk("\n"); } } @@ -3319,7 +3337,7 @@ static int check_no_collision(struct task_struct *curr, } for (j = 0; j < chain->depth - 1; j++, i++) { - id = curr->held_locks[i].class_idx; + id = hlock_id(&curr->held_locks[i]); if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) { print_collision(curr, hlock, chain); @@ -3368,7 +3386,6 @@ static inline int add_chain_cache(struct task_struct *curr, struct held_lock *hlock, u64 chain_key) { - struct lock_class *class = hlock_class(hlock); struct hlist_head *hash_head = chainhashentry(chain_key); struct lock_chain *chain; int i, j; @@ -3411,11 +3428,11 @@ static inline int add_chain_cache(struct task_struct *curr, chain->base = j; for (j = 0; j < chain->depth - 1; j++, i++) { - int lock_id = curr->held_locks[i].class_idx; + int lock_id = hlock_id(curr->held_locks + i); chain_hlocks[chain->base + j] = lock_id; } - chain_hlocks[chain->base + j] = class - lock_classes; + chain_hlocks[chain->base + j] = hlock_id(hlock); hlist_add_head_rcu(&chain->entry, hash_head); debug_atomic_inc(chain_lookup_misses); inc_chains(chain->irq_context); @@ -3602,7 +3619,7 @@ static void check_chain_key(struct task_struct *curr) if (prev_hlock && (prev_hlock->irq_context != hlock->irq_context)) chain_key = INITIAL_CHAIN_KEY; - chain_key = iterate_chain_key(chain_key, hlock->class_idx); + chain_key = iterate_chain_key(chain_key, hlock_id(hlock)); prev_hlock = hlock; } if (chain_key != curr->curr_chain_key) { @@ -4749,7 +4766,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, chain_key = INITIAL_CHAIN_KEY; chain_head = 1; } - chain_key = iterate_chain_key(chain_key, class_idx); + chain_key = iterate_chain_key(chain_key, hlock_id(hlock)); if (nest_lock && !__lock_is_held(nest_lock, -1)) { print_lock_nested_lock_not_held(curr, hlock, ip); @@ -5648,7 +5665,7 @@ static void remove_class_from_lock_chain(struct pending_free *pf, int i; for (i = chain->base; i < chain->base + chain->depth; i++) { - if (chain_hlocks[i] != class - lock_classes) + if (chain_hlock_class_idx(chain_hlocks[i]) != class - lock_classes) continue; /* * Each lock class occurs at most once in a lock chain so once -- cgit v1.2.3 From cd290ec24633f51029dab0d25505fae7da0e1eda Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 21 Aug 2020 14:31:26 +0200 Subject: kcsan: Use tracing-safe version of prandom In the core runtime, we must minimize any calls to external library functions to avoid any kind of recursion. This can happen even though instrumentation is disabled for called functions, but tracing is enabled. Most recently, prandom_u32() added a tracepoint, which can cause problems for KCSAN even if the rcuidle variant is used. For example: kcsan -> prandom_u32() -> trace_prandom_u32_rcuidle -> srcu_read_lock_notrace -> __srcu_read_lock -> kcsan ... While we could disable KCSAN in kcsan_setup_watchpoint(), this does not solve other unexpected behaviour we may get due recursing into functions that may not be tolerant to such recursion: __srcu_read_lock -> kcsan -> ... -> __srcu_read_lock Therefore, switch to using prandom_u32_state(), which is uninstrumented, and does not have a tracepoint. Link: https://lkml.kernel.org/r/20200821063043.1949509-1-elver@google.com Link: https://lkml.kernel.org/r/20200820172046.GA177701@elver.google.com Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- kernel/kcsan/core.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 8a1ff605ff2d..3994a217bde7 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -100,6 +100,9 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; */ static DEFINE_PER_CPU(long, kcsan_skip); +/* For kcsan_prandom_u32_max(). */ +static DEFINE_PER_CPU(struct rnd_state, kcsan_rand_state); + static __always_inline atomic_long_t *find_watchpoint(unsigned long addr, size_t size, bool expect_write, @@ -271,11 +274,28 @@ should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx * return true; } +/* + * Returns a pseudo-random number in interval [0, ep_ro). See prandom_u32_max() + * for more details. + * + * The open-coded version here is using only safe primitives for all contexts + * where we can have KCSAN instrumentation. In particular, we cannot use + * prandom_u32() directly, as its tracepoint could cause recursion. + */ +static u32 kcsan_prandom_u32_max(u32 ep_ro) +{ + struct rnd_state *state = &get_cpu_var(kcsan_rand_state); + const u32 res = prandom_u32_state(state); + + put_cpu_var(kcsan_rand_state); + return (u32)(((u64) res * ep_ro) >> 32); +} + static inline void reset_kcsan_skip(void) { long skip_count = kcsan_skip_watch - (IS_ENABLED(CONFIG_KCSAN_SKIP_WATCH_RANDOMIZE) ? - prandom_u32_max(kcsan_skip_watch) : + kcsan_prandom_u32_max(kcsan_skip_watch) : 0); this_cpu_write(kcsan_skip, skip_count); } @@ -285,16 +305,18 @@ static __always_inline bool kcsan_is_enabled(void) return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0; } -static inline unsigned int get_delay(int type) +/* Introduce delay depending on context and configuration. */ +static void delay_access(int type) { unsigned int delay = in_task() ? kcsan_udelay_task : kcsan_udelay_interrupt; /* For certain access types, skew the random delay to be longer. */ unsigned int skew_delay_order = (type & (KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_ASSERT)) ? 1 : 0; - return delay - (IS_ENABLED(CONFIG_KCSAN_DELAY_RANDOMIZE) ? - prandom_u32_max(delay >> skew_delay_order) : - 0); + delay -= IS_ENABLED(CONFIG_KCSAN_DELAY_RANDOMIZE) ? + kcsan_prandom_u32_max(delay >> skew_delay_order) : + 0; + udelay(delay); } void kcsan_save_irqtrace(struct task_struct *task) @@ -476,7 +498,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) * Delay this thread, to increase probability of observing a racy * conflicting access. */ - udelay(get_delay(type)); + delay_access(type); /* * Re-read value, and check if it is as expected; if not, we infer a @@ -620,6 +642,7 @@ void __init kcsan_init(void) BUG_ON(!in_task()); kcsan_debugfs_init(); + prandom_seed_full_state(&kcsan_rand_state); /* * We are in the init task, and no other tasks should be running; -- cgit v1.2.3 From 58faf20a086bd34f91983609e26eac3d5fe76be3 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Thu, 27 Aug 2020 13:40:37 +0200 Subject: time/sched_clock: Use raw_read_seqcount_latch() during suspend sched_clock uses seqcount_t latching to switch between two storage places protected by the sequence counter. This allows it to have interruptible, NMI-safe, seqcount_t write side critical sections. Since 7fc26327b756 ("seqlock: Introduce raw_read_seqcount_latch()"), raw_read_seqcount_latch() became the standardized way for seqcount_t latch read paths. Due to the dependent load, it has one read memory barrier less than the currently used raw_read_seqcount() API. Use raw_read_seqcount_latch() for the suspend path. Commit aadd6e5caaac ("time/sched_clock: Use raw_read_seqcount_latch()") missed changing that instance of raw_read_seqcount(). References: 1809bfa44e10 ("timers, sched/clock: Avoid deadlock during read from NMI") Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200715092345.GA231464@debian-buster-darwi.lab.linutronix.de --- kernel/time/sched_clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 1c03eec6ca9b..8c6b5febd7a0 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -258,7 +258,7 @@ void __init generic_sched_clock_init(void) */ static u64 notrace suspended_sched_clock_read(void) { - unsigned int seq = raw_read_seqcount(&cd.seq); + unsigned int seq = raw_read_seqcount_latch(&cd.seq); return cd.read_data[seq & 1].epoch_cyc; } -- cgit v1.2.3 From a690ed07353ec45f056b0a6f87c23a12a59c030d Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Thu, 27 Aug 2020 13:40:40 +0200 Subject: time/sched_clock: Use seqcount_latch_t Latch sequence counters have unique read and write APIs, and thus seqcount_latch_t was recently introduced at seqlock.h. Use that new data type instead of plain seqcount_t. This adds the necessary type-safety and ensures only latching-safe seqcount APIs are to be used. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200827114044.11173-5-a.darwish@linutronix.de --- kernel/time/sched_clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 8c6b5febd7a0..0642013dace4 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -35,7 +35,7 @@ * into a single 64-byte cache line. */ struct clock_data { - seqcount_t seq; + seqcount_latch_t seq; struct clock_read_data read_data[2]; ktime_t wrap_kt; unsigned long rate; @@ -76,7 +76,7 @@ struct clock_read_data *sched_clock_read_begin(unsigned int *seq) int sched_clock_read_retry(unsigned int seq) { - return read_seqcount_retry(&cd.seq, seq); + return read_seqcount_latch_retry(&cd.seq, seq); } unsigned long long notrace sched_clock(void) -- cgit v1.2.3 From 249d053835320cb3e7c00066cf085a6ba9b1f126 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Thu, 27 Aug 2020 13:40:41 +0200 Subject: timekeeping: Use seqcount_latch_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Latch sequence counters are a multiversion concurrency control mechanism where the seqcount_t counter even/odd value is used to switch between two data storage copies. This allows the seqcount_t read path to safely interrupt its write side critical section (e.g. from NMIs). Initially, latch sequence counters were implemented as a single write function, raw_write_seqcount_latch(), above plain seqcount_t. The read path was expected to use plain seqcount_t raw_read_seqcount(). A specialized read function was later added, raw_read_seqcount_latch(), and became the standardized way for latch read paths. Having unique read and write APIs meant that latch sequence counters are basically a data type of their own -- just inappropriately overloading plain seqcount_t. The seqcount_latch_t data type was thus introduced at seqlock.h. Use that new data type instead of seqcount_raw_spinlock_t. This ensures that only latch-safe APIs are to be used with the sequence counter. Note that the use of seqcount_raw_spinlock_t was not very useful in the first place. Only the "raw_" subset of seqcount_t APIs were used at timekeeping.c. This subset was created for contexts where lockdep cannot be used. seqcount_LOCKTYPE_t's raison d'ĂȘtre -- verifying that the seqcount_t writer serialization lock is held -- cannot thus be done. References: 0c3351d451ae ("seqlock: Use raw_ prefix instead of _no_lockdep") References: 55f3560df975 ("seqlock: Extend seqcount API with associated locks") Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200827114044.11173-6-a.darwish@linutronix.de --- kernel/time/timekeeping.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4c47f388a83f..999c981ae766 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -64,7 +64,7 @@ static struct timekeeper shadow_timekeeper; * See @update_fast_timekeeper() below. */ struct tk_fast { - seqcount_raw_spinlock_t seq; + seqcount_latch_t seq; struct tk_read_base base[2]; }; @@ -81,13 +81,13 @@ static struct clocksource dummy_clock = { }; static struct tk_fast tk_fast_mono ____cacheline_aligned = { - .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_mono.seq, &timekeeper_lock), + .seq = SEQCNT_LATCH_ZERO(tk_fast_mono.seq), .base[0] = { .clock = &dummy_clock, }, .base[1] = { .clock = &dummy_clock, }, }; static struct tk_fast tk_fast_raw ____cacheline_aligned = { - .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_raw.seq, &timekeeper_lock), + .seq = SEQCNT_LATCH_ZERO(tk_fast_raw.seq), .base[0] = { .clock = &dummy_clock, }, .base[1] = { .clock = &dummy_clock, }, }; @@ -467,7 +467,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) tk_clock_read(tkr), tkr->cycle_last, tkr->mask)); - } while (read_seqcount_retry(&tkf->seq, seq)); + } while (read_seqcount_latch_retry(&tkf->seq, seq)); return now; } @@ -533,7 +533,7 @@ static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf) tk_clock_read(tkr), tkr->cycle_last, tkr->mask)); - } while (read_seqcount_retry(&tkf->seq, seq)); + } while (read_seqcount_latch_retry(&tkf->seq, seq)); return now; } -- cgit v1.2.3 From 6d1823ccc480866e571ab1206665d693aeb600cf Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Thu, 17 Sep 2020 16:01:50 +0800 Subject: lockdep: Optimize the memory usage of circular queue Qian Cai reported a BFS_EQUEUEFULL warning [1] after read recursive deadlock detection merged into tip tree recently. Unlike the previous lockep graph searching, which iterate every lock class (every node in the graph) exactly once, the graph searching for read recurisve deadlock detection needs to iterate every lock dependency (every edge in the graph) once, as a result, the maximum memory cost of the circular queue changes from O(V), where V is the number of lock classes (nodes or vertices) in the graph, to O(E), where E is the number of lock dependencies (edges), because every lock class or dependency gets enqueued once in the BFS. Therefore we hit the BFS_EQUEUEFULL case. However, actually we don't need to enqueue all dependencies for the BFS, because every time we enqueue a dependency, we almostly enqueue all other dependencies in the same dependency list ("almostly" is because we currently check before enqueue, so if a dependency doesn't pass the check stage we won't enqueue it, however, we can always do in reverse ordering), based on this, we can only enqueue the first dependency from a dependency list and every time we want to fetch a new dependency to work, we can either: 1) fetch the dependency next to the current dependency in the dependency list or 2) if the dependency in 1) doesn't exist, fetch the dependency from the queue. With this approach, the "max bfs queue depth" for a x86_64_defconfig + lockdep and selftest config kernel can get descreased from: max bfs queue depth: 201 to (after apply this patch) max bfs queue depth: 61 While I'm at it, clean up the code logic a little (e.g. directly return other than set a "ret" value and goto the "exit" label). [1]: https://lore.kernel.org/lkml/17343f6f7f2438fc376125384133c5ba70c2a681.camel@redhat.com/ Reported-by: Qian Cai Reported-by: syzbot+62ebe501c1ce9a91f68c@syzkaller.appspotmail.com Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200917080210.108095-1-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 99 +++++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 39 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index cccf4bc759c6..9560a4e55277 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1606,6 +1606,15 @@ static inline void bfs_init_rootb(struct lock_list *lock, lock->only_xr = (hlock->read != 0); } +static inline struct lock_list *__bfs_next(struct lock_list *lock, int offset) +{ + if (!lock || !lock->parent) + return NULL; + + return list_next_or_null_rcu(get_dep_list(lock->parent, offset), + &lock->entry, struct lock_list, entry); +} + /* * Breadth-First Search to find a strong path in the dependency graph. * @@ -1639,36 +1648,25 @@ static enum bfs_result __bfs(struct lock_list *source_entry, struct lock_list **target_entry, int offset) { + struct circular_queue *cq = &lock_cq; + struct lock_list *lock = NULL; struct lock_list *entry; - struct lock_list *lock; struct list_head *head; - struct circular_queue *cq = &lock_cq; - enum bfs_result ret = BFS_RNOMATCH; + unsigned int cq_depth; + bool first; lockdep_assert_locked(); - if (match(source_entry, data)) { - *target_entry = source_entry; - ret = BFS_RMATCH; - goto exit; - } - - head = get_dep_list(source_entry, offset); - if (list_empty(head)) - goto exit; - __cq_init(cq); __cq_enqueue(cq, source_entry); - while ((lock = __cq_dequeue(cq))) { - bool prev_only_xr; - - if (!lock->class) { - ret = BFS_EINVALIDNODE; - goto exit; - } + while ((lock = __bfs_next(lock, offset)) || (lock = __cq_dequeue(cq))) { + if (!lock->class) + return BFS_EINVALIDNODE; /* + * Step 1: check whether we already finish on this one. + * * If we have visited all the dependencies from this @lock to * others (iow, if we have visited all lock_list entries in * @lock->class->locks_{after,before}) we skip, otherwise go @@ -1680,13 +1678,13 @@ static enum bfs_result __bfs(struct lock_list *source_entry, else mark_lock_accessed(lock); - head = get_dep_list(lock, offset); - - prev_only_xr = lock->only_xr; - - list_for_each_entry_rcu(entry, head, entry) { - unsigned int cq_depth; - u8 dep = entry->dep; + /* + * Step 2: check whether prev dependency and this form a strong + * dependency path. + */ + if (lock->parent) { /* Parent exists, check prev dependency */ + u8 dep = lock->dep; + bool prev_only_xr = lock->parent->only_xr; /* * Mask out all -(S*)-> if we only have *R in previous @@ -1701,26 +1699,49 @@ static enum bfs_result __bfs(struct lock_list *source_entry, continue; /* If there are only -(*R)-> left, set that for the next step */ - entry->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK)); + lock->only_xr = !(dep & (DEP_SN_MASK | DEP_EN_MASK)); + } + /* + * Step 3: we haven't visited this and there is a strong + * dependency path to this, so check with @match. + */ + if (match(lock, data)) { + *target_entry = lock; + return BFS_RMATCH; + } + + /* + * Step 4: if not match, expand the path by adding the + * forward or backwards dependencis in the search + * + */ + first = true; + head = get_dep_list(lock, offset); + list_for_each_entry_rcu(entry, head, entry) { visit_lock_entry(entry, lock); - if (match(entry, data)) { - *target_entry = entry; - ret = BFS_RMATCH; - goto exit; - } - if (__cq_enqueue(cq, entry)) { - ret = BFS_EQUEUEFULL; - goto exit; - } + /* + * Note we only enqueue the first of the list into the + * queue, because we can always find a sibling + * dependency from one (see __bfs_next()), as a result + * the space of queue is saved. + */ + if (!first) + continue; + + first = false; + + if (__cq_enqueue(cq, entry)) + return BFS_EQUEUEFULL; + cq_depth = __cq_get_elem_count(cq); if (max_bfs_queue_depth < cq_depth) max_bfs_queue_depth = cq_depth; } } -exit: - return ret; + + return BFS_RNOMATCH; } static inline enum bfs_result -- cgit v1.2.3 From 2bb8945bcc1a768f2bc402a16c9610bba8d5187d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Sep 2020 11:49:37 +0200 Subject: lockdep: Fix usage_traceoverflow Basically print_lock_class_header()'s for loop is out of sync with the the size of of ->usage_traces[]. Also clean things up a bit while at it, to avoid such mishaps in the future. Fixes: 23870f122768 ("locking/lockdep: Fix "USED" <- "IN-NMI" inversions") Reported-by: Qian Cai Debugged-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Qian Cai Link: https://lkml.kernel.org/r/20200930094937.GE2651@hirez.programming.kicks-ass.net --- include/linux/lockdep_types.h | 8 ++++++-- kernel/locking/lockdep.c | 32 +++++++++++++++----------------- kernel/locking/lockdep_internals.h | 7 +++++-- 3 files changed, 26 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index bb35b449f533..9a1fd49df17f 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -35,8 +35,12 @@ enum lockdep_wait_type { /* * We'd rather not expose kernel/lockdep_states.h this wide, but we do need * the total number of states... :-( + * + * XXX_LOCK_USAGE_STATES is the number of lines in lockdep_states.h, for each + * of those we generates 4 states, Additionally we report on USED and USED_READ. */ -#define XXX_LOCK_USAGE_STATES (1+2*4) +#define XXX_LOCK_USAGE_STATES 2 +#define LOCK_TRACE_STATES (XXX_LOCK_USAGE_STATES*4 + 2) /* * NR_LOCKDEP_CACHING_CLASSES ... Number of classes @@ -106,7 +110,7 @@ struct lock_class { * IRQ/softirq usage tracking bits: */ unsigned long usage_mask; - const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES]; + const struct lock_trace *usage_traces[LOCK_TRACE_STATES]; /* * Generation counter, when doing certain classes of graph walking, diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 2facbbd146ec..a430fbb01eb1 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -585,6 +585,8 @@ static const char *usage_str[] = #include "lockdep_states.h" #undef LOCKDEP_STATE [LOCK_USED] = "INITIAL USE", + [LOCK_USED_READ] = "INITIAL READ USE", + /* abused as string storage for verify_lock_unused() */ [LOCK_USAGE_STATES] = "IN-NMI", }; #endif @@ -1939,7 +1941,7 @@ static void print_lock_class_header(struct lock_class *class, int depth) #endif printk(KERN_CONT " {\n"); - for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { + for (bit = 0; bit < LOCK_TRACE_STATES; bit++) { if (class->usage_mask & (1 << bit)) { int len = depth; @@ -3969,7 +3971,7 @@ static int separate_irq_context(struct task_struct *curr, static int mark_lock(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit new_bit) { - unsigned int old_mask, new_mask, ret = 1; + unsigned int new_mask, ret = 1; if (new_bit >= LOCK_USAGE_STATES) { DEBUG_LOCKS_WARN_ON(1); @@ -3996,30 +3998,26 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, if (unlikely(hlock_class(this)->usage_mask & new_mask)) goto unlock; - old_mask = hlock_class(this)->usage_mask; hlock_class(this)->usage_mask |= new_mask; - /* - * Save one usage_traces[] entry and map both LOCK_USED and - * LOCK_USED_READ onto the same entry. - */ - if (new_bit == LOCK_USED || new_bit == LOCK_USED_READ) { - if (old_mask & (LOCKF_USED | LOCKF_USED_READ)) - goto unlock; - new_bit = LOCK_USED; + if (new_bit < LOCK_TRACE_STATES) { + if (!(hlock_class(this)->usage_traces[new_bit] = save_trace())) + return 0; } - if (!(hlock_class(this)->usage_traces[new_bit] = save_trace())) - return 0; - switch (new_bit) { + case 0 ... LOCK_USED-1: + ret = mark_lock_irq(curr, this, new_bit); + if (!ret) + return 0; + break; + case LOCK_USED: debug_atomic_dec(nr_unused_locks); break; + default: - ret = mark_lock_irq(curr, this, new_bit); - if (!ret) - return 0; + break; } unlock: diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index b0be1560ed17..de49f9e1c11b 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -20,9 +20,12 @@ enum lock_usage_bit { #undef LOCKDEP_STATE LOCK_USED, LOCK_USED_READ, - LOCK_USAGE_STATES + LOCK_USAGE_STATES, }; +/* states after LOCK_USED_READ are not traced and printed */ +static_assert(LOCK_TRACE_STATES == LOCK_USAGE_STATES); + #define LOCK_USAGE_READ_MASK 1 #define LOCK_USAGE_DIR_MASK 2 #define LOCK_USAGE_STATE_MASK (~(LOCK_USAGE_READ_MASK | LOCK_USAGE_DIR_MASK)) @@ -121,7 +124,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ = extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[]; -#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2) +#define LOCK_USAGE_CHARS (2*XXX_LOCK_USAGE_STATES + 1) extern void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS]); -- cgit v1.2.3 From 4d004099a668c41522242aa146a38cc4eb59cb1e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 2 Oct 2020 11:04:21 +0200 Subject: lockdep: Fix lockdep recursion Steve reported that lockdep_assert*irq*(), when nested inside lockdep itself, will trigger a false-positive. One example is the stack-trace code, as called from inside lockdep, triggering tracing, which in turn calls RCU, which then uses lockdep_assert_irqs_disabled(). Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu variables") Reported-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 13 ++++--- kernel/locking/lockdep.c | 99 +++++++++++++++++++++++++++++------------------- 2 files changed, 67 insertions(+), 45 deletions(-) (limited to 'kernel') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 6a584b3e5c74..b1227be47496 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -534,6 +534,7 @@ do { \ DECLARE_PER_CPU(int, hardirqs_enabled); DECLARE_PER_CPU(int, hardirq_context); +DECLARE_PER_CPU(unsigned int, lockdep_recursion); /* * The below lockdep_assert_*() macros use raw_cpu_read() to access the above @@ -543,25 +544,27 @@ DECLARE_PER_CPU(int, hardirq_context); * read the value from our previous CPU. */ +#define __lockdep_enabled (debug_locks && !raw_cpu_read(lockdep_recursion)) + #define lockdep_assert_irqs_enabled() \ do { \ - WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirqs_enabled)); \ + WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirqs_enabled)); \ } while (0) #define lockdep_assert_irqs_disabled() \ do { \ - WARN_ON_ONCE(debug_locks && raw_cpu_read(hardirqs_enabled)); \ + WARN_ON_ONCE(__lockdep_enabled && raw_cpu_read(hardirqs_enabled)); \ } while (0) #define lockdep_assert_in_irq() \ do { \ - WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirq_context)); \ + WARN_ON_ONCE(__lockdep_enabled && !raw_cpu_read(hardirq_context)); \ } while (0) #define lockdep_assert_preemption_enabled() \ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ - debug_locks && \ + __lockdep_enabled && \ (preempt_count() != 0 || \ !raw_cpu_read(hardirqs_enabled))); \ } while (0) @@ -569,7 +572,7 @@ do { \ #define lockdep_assert_preemption_disabled() \ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ - debug_locks && \ + __lockdep_enabled && \ (preempt_count() == 0 && \ raw_cpu_read(hardirqs_enabled))); \ } while (0) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index a430fbb01eb1..85d15f0362dc 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -76,6 +76,23 @@ module_param(lock_stat, int, 0644); #define lock_stat 0 #endif +DEFINE_PER_CPU(unsigned int, lockdep_recursion); +EXPORT_PER_CPU_SYMBOL_GPL(lockdep_recursion); + +static inline bool lockdep_enabled(void) +{ + if (!debug_locks) + return false; + + if (raw_cpu_read(lockdep_recursion)) + return false; + + if (current->lockdep_recursion) + return false; + + return true; +} + /* * lockdep_lock: protects the lockdep graph, the hashes and the * class/list/hash allocators. @@ -93,7 +110,7 @@ static inline void lockdep_lock(void) arch_spin_lock(&__lock); __owner = current; - current->lockdep_recursion++; + __this_cpu_inc(lockdep_recursion); } static inline void lockdep_unlock(void) @@ -101,7 +118,7 @@ static inline void lockdep_unlock(void) if (debug_locks && DEBUG_LOCKS_WARN_ON(__owner != current)) return; - current->lockdep_recursion--; + __this_cpu_dec(lockdep_recursion); __owner = NULL; arch_spin_unlock(&__lock); } @@ -393,10 +410,15 @@ void lockdep_init_task(struct task_struct *task) task->lockdep_recursion = 0; } +static __always_inline void lockdep_recursion_inc(void) +{ + __this_cpu_inc(lockdep_recursion); +} + static __always_inline void lockdep_recursion_finish(void) { - if (WARN_ON_ONCE((--current->lockdep_recursion) & LOCKDEP_RECURSION_MASK)) - current->lockdep_recursion = 0; + if (WARN_ON_ONCE(__this_cpu_dec_return(lockdep_recursion))) + __this_cpu_write(lockdep_recursion, 0); } void lockdep_set_selftest_task(struct task_struct *task) @@ -3659,7 +3681,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip) if (unlikely(in_nmi())) return; - if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) + if (unlikely(__this_cpu_read(lockdep_recursion))) return; if (unlikely(lockdep_hardirqs_enabled())) { @@ -3695,7 +3717,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip) current->hardirq_chain_key = current->curr_chain_key; - current->lockdep_recursion++; + lockdep_recursion_inc(); __trace_hardirqs_on_caller(); lockdep_recursion_finish(); } @@ -3728,7 +3750,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) goto skip_checks; } - if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) + if (unlikely(__this_cpu_read(lockdep_recursion))) return; if (lockdep_hardirqs_enabled()) { @@ -3781,7 +3803,7 @@ void noinstr lockdep_hardirqs_off(unsigned long ip) if (in_nmi()) { if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI)) return; - } else if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK) + } else if (__this_cpu_read(lockdep_recursion)) return; /* @@ -3814,7 +3836,7 @@ void lockdep_softirqs_on(unsigned long ip) { struct irqtrace_events *trace = ¤t->irqtrace; - if (unlikely(!debug_locks || current->lockdep_recursion)) + if (unlikely(!lockdep_enabled())) return; /* @@ -3829,7 +3851,7 @@ void lockdep_softirqs_on(unsigned long ip) return; } - current->lockdep_recursion++; + lockdep_recursion_inc(); /* * We'll do an OFF -> ON transition: */ @@ -3852,7 +3874,7 @@ void lockdep_softirqs_on(unsigned long ip) */ void lockdep_softirqs_off(unsigned long ip) { - if (unlikely(!debug_locks || current->lockdep_recursion)) + if (unlikely(!lockdep_enabled())) return; /* @@ -4233,11 +4255,11 @@ void lockdep_init_map_waits(struct lockdep_map *lock, const char *name, if (subclass) { unsigned long flags; - if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion)) + if (DEBUG_LOCKS_WARN_ON(!lockdep_enabled())) return; raw_local_irq_save(flags); - current->lockdep_recursion++; + lockdep_recursion_inc(); register_lock_class(lock, subclass, 1); lockdep_recursion_finish(); raw_local_irq_restore(flags); @@ -4920,11 +4942,11 @@ void lock_set_class(struct lockdep_map *lock, const char *name, { unsigned long flags; - if (unlikely(current->lockdep_recursion)) + if (unlikely(!lockdep_enabled())) return; raw_local_irq_save(flags); - current->lockdep_recursion++; + lockdep_recursion_inc(); check_flags(flags); if (__lock_set_class(lock, name, key, subclass, ip)) check_chain_key(current); @@ -4937,11 +4959,11 @@ void lock_downgrade(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - if (unlikely(current->lockdep_recursion)) + if (unlikely(!lockdep_enabled())) return; raw_local_irq_save(flags); - current->lockdep_recursion++; + lockdep_recursion_inc(); check_flags(flags); if (__lock_downgrade(lock, ip)) check_chain_key(current); @@ -4979,7 +5001,7 @@ static void verify_lock_unused(struct lockdep_map *lock, struct held_lock *hlock static bool lockdep_nmi(void) { - if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK) + if (raw_cpu_read(lockdep_recursion)) return false; if (!in_nmi()) @@ -5000,7 +5022,10 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); - if (unlikely(current->lockdep_recursion)) { + if (!debug_locks) + return; + + if (unlikely(!lockdep_enabled())) { /* XXX allow trylock from NMI ?!? */ if (lockdep_nmi() && !trylock) { struct held_lock hlock; @@ -5023,7 +5048,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, raw_local_irq_save(flags); check_flags(flags); - current->lockdep_recursion++; + lockdep_recursion_inc(); __lock_acquire(lock, subclass, trylock, read, check, irqs_disabled_flags(flags), nest_lock, ip, 0, 0); lockdep_recursion_finish(); @@ -5037,13 +5062,13 @@ void lock_release(struct lockdep_map *lock, unsigned long ip) trace_lock_release(lock, ip); - if (unlikely(current->lockdep_recursion)) + if (unlikely(!lockdep_enabled())) return; raw_local_irq_save(flags); check_flags(flags); - current->lockdep_recursion++; + lockdep_recursion_inc(); if (__lock_release(lock, ip)) check_chain_key(current); lockdep_recursion_finish(); @@ -5056,13 +5081,13 @@ noinstr int lock_is_held_type(const struct lockdep_map *lock, int read) unsigned long flags; int ret = 0; - if (unlikely(current->lockdep_recursion)) + if (unlikely(!lockdep_enabled())) return 1; /* avoid false negative lockdep_assert_held() */ raw_local_irq_save(flags); check_flags(flags); - current->lockdep_recursion++; + lockdep_recursion_inc(); ret = __lock_is_held(lock, read); lockdep_recursion_finish(); raw_local_irq_restore(flags); @@ -5077,13 +5102,13 @@ struct pin_cookie lock_pin_lock(struct lockdep_map *lock) struct pin_cookie cookie = NIL_COOKIE; unsigned long flags; - if (unlikely(current->lockdep_recursion)) + if (unlikely(!lockdep_enabled())) return cookie; raw_local_irq_save(flags); check_flags(flags); - current->lockdep_recursion++; + lockdep_recursion_inc(); cookie = __lock_pin_lock(lock); lockdep_recursion_finish(); raw_local_irq_restore(flags); @@ -5096,13 +5121,13 @@ void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie cookie) { unsigned long flags; - if (unlikely(current->lockdep_recursion)) + if (unlikely(!lockdep_enabled())) return; raw_local_irq_save(flags); check_flags(flags); - current->lockdep_recursion++; + lockdep_recursion_inc(); __lock_repin_lock(lock, cookie); lockdep_recursion_finish(); raw_local_irq_restore(flags); @@ -5113,13 +5138,13 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie) { unsigned long flags; - if (unlikely(current->lockdep_recursion)) + if (unlikely(!lockdep_enabled())) return; raw_local_irq_save(flags); check_flags(flags); - current->lockdep_recursion++; + lockdep_recursion_inc(); __lock_unpin_lock(lock, cookie); lockdep_recursion_finish(); raw_local_irq_restore(flags); @@ -5249,15 +5274,12 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) trace_lock_acquired(lock, ip); - if (unlikely(!lock_stat || !debug_locks)) - return; - - if (unlikely(current->lockdep_recursion)) + if (unlikely(!lock_stat || !lockdep_enabled())) return; raw_local_irq_save(flags); check_flags(flags); - current->lockdep_recursion++; + lockdep_recursion_inc(); __lock_contended(lock, ip); lockdep_recursion_finish(); raw_local_irq_restore(flags); @@ -5270,15 +5292,12 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip) trace_lock_contended(lock, ip); - if (unlikely(!lock_stat || !debug_locks)) - return; - - if (unlikely(current->lockdep_recursion)) + if (unlikely(!lock_stat || !lockdep_enabled())) return; raw_local_irq_save(flags); check_flags(flags); - current->lockdep_recursion++; + lockdep_recursion_inc(); __lock_acquired(lock, ip); lockdep_recursion_finish(); raw_local_irq_restore(flags); -- cgit v1.2.3