From f92b070f2dc89a8ff1a0cc8b608e20abef894c7d Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 13 Sep 2018 14:34:06 +0200 Subject: printk: Do not miss new messages when replaying the log The variable "exclusive_console" is used to reply all existing messages on a newly registered console. It is cleared when all messages are out. The problem is that new messages might appear in the meantime. These are then visible only on the exclusive console. The obvious solution is to clear "exclusive_console" after we replay all messages that were already proceed before we started the reply. Reported-by: Sergey Senozhatsky Link: http://lkml.kernel.org/r/20180913123406.14378-1-pmladek@suse.com To: Steven Rostedt Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 9bf5404397e0..cfaa211a8b54 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -423,6 +423,7 @@ static u32 log_next_idx; /* the next printk record to write to the console */ static u64 console_seq; static u32 console_idx; +static u64 exclusive_console_stop_seq; /* the next printk record to read after the last 'clear' command */ static u64 clear_seq; @@ -2009,6 +2010,7 @@ static u64 syslog_seq; static u32 syslog_idx; static u64 console_seq; static u32 console_idx; +static u64 exclusive_console_stop_seq; static u64 log_first_seq; static u32 log_first_idx; static u64 log_next_seq; @@ -2376,6 +2378,12 @@ skip: goto skip; } + /* Output to all consoles once old messages replayed. */ + if (unlikely(exclusive_console && + console_seq >= exclusive_console_stop_seq)) { + exclusive_console = NULL; + } + len += msg_print_text(msg, console_msg_format & MSG_FORMAT_SYSLOG, text + len, @@ -2418,10 +2426,6 @@ skip: console_locked = 0; - /* Release the exclusive_console once it is used */ - if (unlikely(exclusive_console)) - exclusive_console = NULL; - raw_spin_unlock(&logbuf_lock); up_console_sem(); @@ -2706,6 +2710,7 @@ void register_console(struct console *newcon) * the already-registered consoles. */ exclusive_console = newcon; + exclusive_console_stop_seq = console_seq; } console_unlock(); console_sysfs_notify(); -- cgit v1.2.3 From 884e370ea88c109a3b982f4eb9ecd82510a3a1fe Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 28 Sep 2018 18:53:04 +0900 Subject: printk: CON_PRINTBUFFER console registration is a bit racy CON_PRINTBUFFER console registration requires us to do several preparation steps: - Rollback console_seq to replay logbuf messages which were already seen on other consoles; - Set exclusive_console flag so console_unlock() will ->write() logbuf messages only to the exclusive_console driver. The way we do it, however, is a bit racy logbuf_lock_irqsave(flags); console_seq = syslog_seq; console_idx = syslog_idx; logbuf_unlock_irqrestore(flags); << preemption enabled << irqs enabled exclusive_console = newcon; console_unlock(); We rollback console_seq under logbuf_lock with IRQs disabled, but we set exclusive_console with local IRQs enabled and logbuf unlocked. If the system oops-es or panic-s before we set exclusive_console - and given that we have IRQs and preemption enabled there is such a possibility - we will re-play all logbuf messages to every registered console, which may be a bit annoying and time consuming. Move exclusive_console assignment to the same IRQs-disabled and logbuf_lock-protected section where we rollback console_seq. Link: http://lkml.kernel.org/r/20180928095304.9972-1-sergey.senozhatsky@gmail.com To: Steven Rostedt Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index cfaa211a8b54..c5b568c2d167 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2703,14 +2703,18 @@ void register_console(struct console *newcon) logbuf_lock_irqsave(flags); console_seq = syslog_seq; console_idx = syslog_idx; - logbuf_unlock_irqrestore(flags); /* * We're about to replay the log buffer. Only do this to the * just-registered console to avoid excessive message spam to * the already-registered consoles. + * + * Set exclusive_console with disabled interrupts to reduce + * race window with eventual console_flush_on_panic() that + * ignores console_lock. */ exclusive_console = newcon; exclusive_console_stop_seq = console_seq; + logbuf_unlock_irqrestore(flags); } console_unlock(); console_sysfs_notify(); -- cgit v1.2.3 From 277fcdb2cfee38ccdbe07e705dbd4896ba0c9930 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Sun, 30 Sep 2018 00:45:50 +0800 Subject: printk: Fix panic caused by passing log_buf_len to command line log_buf_len_setup does not check input argument before passing it to simple_strtoull. The argument would be a NULL pointer if "log_buf_len", without its value, is set in command line and thus causes the following panic. PANIC: early exception 0xe3 IP 10:ffffffffaaeacd0d error 0 cr2 0x0 [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.19.0-rc4-yocto-standard+ #1 [ 0.000000] RIP: 0010:_parse_integer_fixup_radix+0xd/0x70 ... [ 0.000000] Call Trace: [ 0.000000] simple_strtoull+0x29/0x70 [ 0.000000] memparse+0x26/0x90 [ 0.000000] log_buf_len_setup+0x17/0x22 [ 0.000000] do_early_param+0x57/0x8e [ 0.000000] parse_args+0x208/0x320 [ 0.000000] ? rdinit_setup+0x30/0x30 [ 0.000000] parse_early_options+0x29/0x2d [ 0.000000] ? rdinit_setup+0x30/0x30 [ 0.000000] parse_early_param+0x36/0x4d [ 0.000000] setup_arch+0x336/0x99e [ 0.000000] start_kernel+0x6f/0x4ee [ 0.000000] x86_64_start_reservations+0x24/0x26 [ 0.000000] x86_64_start_kernel+0x6f/0x72 [ 0.000000] secondary_startup_64+0xa4/0xb0 This patch adds a check to prevent the panic. Link: http://lkml.kernel.org/r/1538239553-81805-1-git-send-email-zhe.he@windriver.com Cc: stable@vger.kernel.org Cc: rostedt@goodmis.org Cc: linux-kernel@vger.kernel.org Signed-off-by: He Zhe Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c5b568c2d167..a14f15ad0f35 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1049,7 +1049,12 @@ static void __init log_buf_len_update(unsigned size) /* save requested log_buf_len since it's too early to process it */ static int __init log_buf_len_setup(char *str) { - unsigned size = memparse(str, &str); + unsigned int size; + + if (!str) + return -EINVAL; + + size = memparse(str, &str); log_buf_len_update(size); -- cgit v1.2.3 From 51a72ab7372d85c96104e58036f1b49ba11e5d2b Mon Sep 17 00:00:00 2001 From: He Zhe Date: Sun, 30 Sep 2018 00:45:51 +0800 Subject: printk: Correct wrong casting log_first_seq and console_seq are 64-bit unsigned integers. Correct a wrong casting that might cut off the output. Link: http://lkml.kernel.org/r/1538239553-81805-2-git-send-email-zhe.he@windriver.com Cc: rostedt@goodmis.org Cc: linux-kernel@vger.kernel.org Signed-off-by: He Zhe [sergey.senozhatsky@gmail.com: More descriptive commit message] Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a14f15ad0f35..d4704baf6aaf 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2358,8 +2358,9 @@ again: printk_safe_enter_irqsave(flags); raw_spin_lock(&logbuf_lock); if (console_seq < log_first_seq) { - len = sprintf(text, "** %u printk messages dropped **\n", - (unsigned)(log_first_seq - console_seq)); + len = sprintf(text, + "** %llu printk messages dropped **\n", + log_first_seq - console_seq); /* messages are gone, move to first one */ console_seq = log_first_seq; -- cgit v1.2.3 From dd5adbfbfc50ff998909660c6158c0488f7b6a2b Mon Sep 17 00:00:00 2001 From: He Zhe Date: Sun, 30 Sep 2018 00:45:52 +0800 Subject: printk: Add KBUILD_MODNAME and remove a redundant print prefix Add KBUILD_MODNAME to make prints more clear. Link: http://lkml.kernel.org/r/1538239553-81805-3-git-send-email-zhe.he@windriver.com Cc: rostedt@goodmis.org Cc: linux-kernel@vger.kernel.org Signed-off-by: He Zhe Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index d4704baf6aaf..15f3e70be448 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -16,6 +16,8 @@ * 01Mar01 Andrew Morton */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -2699,7 +2701,7 @@ void register_console(struct console *newcon) if (newcon->flags & CON_EXTENDED) if (!nr_ext_console_drivers++) - pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n"); + pr_info("continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n"); if (newcon->flags & CON_PRINTBUFFER) { /* -- cgit v1.2.3 From e6fe3e5b7d16e8f146a4ae7fe481bc6e97acde1e Mon Sep 17 00:00:00 2001 From: He Zhe Date: Sun, 30 Sep 2018 00:45:53 +0800 Subject: printk: Give error on attempt to set log buffer length to over 2G The current printk() is ready to handle log buffer size up to 2G. Give an explicit error for users who want to use larger log buffer. Also fix printk formatting to show the 2G as a positive number. Link: http://lkml.kernel.org/r/20181008135916.gg4kkmoki5bgtco5@pathway.suse.cz Cc: rostedt@goodmis.org Cc: linux-kernel@vger.kernel.org Suggested-by: Sergey Senozhatsky Signed-off-by: He Zhe Reviewed-by: Sergey Senozhatsky [pmladek: Fixed to the really safe limit 2GB.] Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 15f3e70be448..fce696d80e09 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -440,6 +440,7 @@ static u32 clear_idx; /* record buffer */ #define LOG_ALIGN __alignof__(struct printk_log) #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) +#define LOG_BUF_LEN_MAX (u32)(1 << 31) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; @@ -1040,18 +1041,23 @@ void log_buf_vmcoreinfo_setup(void) static unsigned long __initdata new_log_buf_len; /* we practice scaling the ring buffer by powers of 2 */ -static void __init log_buf_len_update(unsigned size) +static void __init log_buf_len_update(u64 size) { + if (size > (u64)LOG_BUF_LEN_MAX) { + size = (u64)LOG_BUF_LEN_MAX; + pr_err("log_buf over 2G is not supported.\n"); + } + if (size) size = roundup_pow_of_two(size); if (size > log_buf_len) - new_log_buf_len = size; + new_log_buf_len = (unsigned long)size; } /* save requested log_buf_len since it's too early to process it */ static int __init log_buf_len_setup(char *str) { - unsigned int size; + u64 size; if (!str) return -EINVAL; @@ -1121,7 +1127,7 @@ void __init setup_log_buf(int early) } if (unlikely(!new_log_buf)) { - pr_err("log_buf_len: %ld bytes not available\n", + pr_err("log_buf_len: %lu bytes not available\n", new_log_buf_len); return; } @@ -1134,8 +1140,8 @@ void __init setup_log_buf(int early) memcpy(log_buf, __log_buf, __LOG_BUF_LEN); logbuf_unlock_irqrestore(flags); - pr_info("log_buf_len: %d bytes\n", log_buf_len); - pr_info("early log buf free: %d(%d%%)\n", + pr_info("log_buf_len: %u bytes\n", log_buf_len); + pr_info("early log buf free: %u(%u%%)\n", free, (free * 100) / __LOG_BUF_LEN); } -- cgit v1.2.3 From 9627808d2d409279cea3fb334212d04a83ff6371 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 2 Oct 2018 11:38:34 +0900 Subject: printk: keep kernel cont support always enabled Since commit 5c2992ee7fd8a29 ("printk: remove console flushing special cases for partial buffered lines") we don't print cont fragments to the consoles; cont lines are now proper log_buf entries and there is no "consecutive continuation flag" anymore: we either have 'c' entries that mark continuation lines without fragments; or '-' entries that mark normal logbuf entries. There are no '+' entries anymore. However, we still have a small leftover - presence of ext_console drivers disables kernel cont support and we flush each pr_cont() and store it as a separate log_buf entry. Previously, it worked because msg_print_ext_header() had that "an optional external merge of the records" functionality: if (msg->flags & LOG_CONT) cont = (prev_flags & LOG_CONT) ? '+' : 'c'; We don't do this as of now, so keep kernel cont always enabled. Note from pmladek: The original purpose was to get full information including the metadata and dictionary via extended console drivers, see commit 6fe29354befe4c46e ("printk: implement support for extended console drivers"). The dictionary probably was the most important part but it was actually lost: static void cont_flush(void) { [...] log_store(cont.facility, cont.level, cont.flags, cont.ts_nsec, NULL, 0, cont.buf, cont.len); Nobody noticed because the only dictionary user is dev_printk() and dev_cont() is _not_ defined. Link: http://lkml.kernel.org/r/20181002023836.4487-2-sergey.senozhatsky@gmail.com To: Steven Rostedt Cc: Andrew Morton Cc: Dmitriy Vyukov Cc: Tetsuo Handa Cc: Tejun Heo Cc: Peter Zijlstra Cc: LKML Cc: Sergey Senozhatsky Signed-off-by: Sergey Senozhatsky [pmladek@suse.com: Updated commit message] Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index fce696d80e09..f717656c0fac 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -194,16 +194,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, return 0; } -/* - * Number of registered extended console drivers. - * - * If extended consoles are present, in-kernel cont reassembly is disabled - * and each fragment is stored as a separate log entry with proper - * continuation flag so that every emitted message has full metadata. This - * doesn't change the result for regular consoles or /proc/kmsg. For - * /dev/kmsg, as long as the reader concatenates messages according to - * consecutive continuation flags, the end result should be the same too. - */ +/* Number of registered extended console drivers. */ static int nr_ext_console_drivers; /* @@ -1781,12 +1772,8 @@ static void cont_flush(void) static bool cont_add(int facility, int level, enum log_flags flags, const char *text, size_t len) { - /* - * If ext consoles are present, flush and skip in-kernel - * continuation. See nr_ext_console_drivers definition. Also, if - * the line gets too long, split it up in separate records. - */ - if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) { + /* If the line gets too long, split it up in separate records. */ + if (cont.len + len > sizeof(cont.buf)) { cont_flush(); return false; } @@ -2706,8 +2693,7 @@ void register_console(struct console *newcon) } if (newcon->flags & CON_EXTENDED) - if (!nr_ext_console_drivers++) - pr_info("continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n"); + nr_ext_console_drivers++; if (newcon->flags & CON_PRINTBUFFER) { /* -- cgit v1.2.3 From 3ac37a93fa9217e576bebfd4ba3e80edaaeb2289 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 2 Oct 2018 11:38:35 +0900 Subject: printk: lock/unlock console only for new logbuf entries Prior to commit 5c2992ee7fd8a29 ("printk: remove console flushing special cases for partial buffered lines") we would do console_cont_flush() for each pr_cont() to print cont fragments, so console_unlock() would actually print data: pr_cont(); console_lock(); console_unlock() console_cont_flush(); // print cont fragment ... pr_cont(); console_lock(); console_unlock() console_cont_flush(); // print cont fragment We don't do console_cont_flush() anymore, so when we do pr_cont() console_unlock() does nothing (unless we flushed the cont buffer): pr_cont(); console_lock(); console_unlock(); // noop ... pr_cont(); console_lock(); console_unlock(); // noop ... pr_cont(); cont_flush(); console_lock(); console_unlock(); // print data We also wakeup klogd purposelessly for pr_cont() output - un-flushed cont buffer is not stored in log_buf; there is nothing to pull. Thus we can console_lock()/console_unlock()/wake_up_klogd() only when we know that we log_store()-ed a message and there is something to print to the consoles/syslog. Link: http://lkml.kernel.org/r/20181002023836.4487-3-sergey.senozhatsky@gmail.com To: Steven Rostedt Cc: Andrew Morton Cc: Dmitriy Vyukov Cc: Tetsuo Handa Cc: Tejun Heo Cc: Peter Zijlstra Cc: LKML Cc: Sergey Senozhatsky Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f717656c0fac..e9a7e50ed60a 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1890,8 +1890,9 @@ asmlinkage int vprintk_emit(int facility, int level, const char *fmt, va_list args) { int printed_len; - bool in_sched = false; + bool in_sched = false, pending_output; unsigned long flags; + u64 curr_log_seq; if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; @@ -1903,11 +1904,13 @@ asmlinkage int vprintk_emit(int facility, int level, /* This stops the holder of console_sem just where we want him */ logbuf_lock_irqsave(flags); + curr_log_seq = log_next_seq; printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args); + pending_output = (curr_log_seq != log_next_seq); logbuf_unlock_irqrestore(flags); /* If called from the scheduler, we can not call up(). */ - if (!in_sched) { + if (!in_sched && pending_output) { /* * Disable preemption to avoid being preempted while holding * console_sem which would prevent anyone from printing to @@ -1924,7 +1927,8 @@ asmlinkage int vprintk_emit(int facility, int level, preempt_enable(); } - wake_up_klogd(); + if (pending_output) + wake_up_klogd(); return printed_len; } EXPORT_SYMBOL(vprintk_emit); -- cgit v1.2.3 From 0e96a19c4450253c3ddcff69140b1096f2c2adaf Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 2 Oct 2018 11:38:36 +0900 Subject: printk: do not preliminary split up cont buffer We have a proper 'overflow' check which tells us that we need to split up existing cont buffer in separate records: if (cont.len + len > sizeof(cont.buf)) cont_flush(); At the same time we also have one extra flush: "if cont buffer is 80% full then split it up" in cont_add(): if (cont.len > (sizeof(cont.buf) * 80) / 100) cont_flush(); This looks to be redundant, since the existing "overflow" check should work just fine, so remove this 80% check and wait for either a normal cont termination \n, for preliminary flush due to possible buffer overflow or for preliminary flush due to cont race. Link: http://lkml.kernel.org/r/20181002023836.4487-4-sergey.senozhatsky@gmail.com To: Steven Rostedt Cc: Andrew Morton Cc: Dmitriy Vyukov Cc: Tetsuo Handa Cc: Tejun Heo Cc: Peter Zijlstra Cc: LKML Cc: Sergey Senozhatsky Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index e9a7e50ed60a..505663bb4f1f 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1796,9 +1796,6 @@ static bool cont_add(int facility, int level, enum log_flags flags, const char * cont_flush(); } - if (cont.len > (sizeof(cont.buf) * 80) / 100) - cont_flush(); - return true; } -- cgit v1.2.3 From d2130e82e9454304e9b91ba9da551b5989af8c27 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 10 Oct 2018 20:33:08 +0900 Subject: printk: fix integer overflow in setup_log_buf() The way we calculate logbuf free space percentage overflows signed integer: int free; free = __LOG_BUF_LEN - log_next_idx; pr_info("early log buf free: %u(%u%%)\n", free, (free * 100) / __LOG_BUF_LEN); We support LOG_BUF_LEN of up to 1<<25 bytes. Since setup_log_buf() is called during early init, logbuf is mostly empty, so __LOG_BUF_LEN - log_next_idx is close to 1<<25. Thus when we multiply it by 100, we overflow signed integer value range: 100 is 2^6 + 2^5 + 2^2. Example, booting with LOG_BUF_LEN 1<<25 and log_buf_len=2G boot param: [ 0.075317] log_buf_len: -2147483648 bytes [ 0.075319] early log buf free: 33549896(-28%) Make "free" unsigned integer and use appropriate printk() specifier. Link: http://lkml.kernel.org/r/20181010113308.9337-1-sergey.senozhatsky@gmail.com To: Steven Rostedt Cc: linux-kernel@vger.kernel.org Cc: Sergey Senozhatsky Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 505663bb4f1f..b77150ad1965 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1098,7 +1098,7 @@ void __init setup_log_buf(int early) { unsigned long flags; char *new_log_buf; - int free; + unsigned int free; if (log_buf != __log_buf) return; -- cgit v1.2.3