From 6778948f9de86c3cfaf36725a7c87dcff9ba247f Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Mon, 11 Dec 2023 08:20:59 +0000 Subject: [PATCH] kernel_5.10 no rt --- kernel/kernel/printk/printk.c | 1833 ++++++++++++++++++++++++++++------------------------------- 1 files changed, 872 insertions(+), 961 deletions(-) diff --git a/kernel/kernel/printk/printk.c b/kernel/kernel/printk/printk.c index 13cf2d7..e253475 100644 --- a/kernel/kernel/printk/printk.c +++ b/kernel/kernel/printk/printk.c @@ -44,9 +44,6 @@ #include <linux/irq_work.h> #include <linux/ctype.h> #include <linux/uio.h> -#include <linux/kthread.h> -#include <linux/kdb.h> -#include <linux/clocksource.h> #include <linux/sched/clock.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> @@ -64,6 +61,25 @@ #include "printk_ringbuffer.h" #include "console_cmdline.h" #include "braille.h" +#include "internal.h" + +#ifdef CONFIG_PRINTK_TIME_FROM_ARM_ARCH_TIMER +#include <clocksource/arm_arch_timer.h> +static u64 get_local_clock(void) +{ + u64 ns; + + ns = arch_timer_read_counter() * 1000; + do_div(ns, 24); + + return ns; +} +#else +static inline u64 get_local_clock(void) +{ + return local_clock(); +} +#endif int console_printk[4] = { CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ @@ -232,7 +248,19 @@ static int __down_trylock_console_sem(unsigned long ip) { - if (down_trylock(&console_sem)) + int lock_failed; + unsigned long flags; + + /* + * Here and in __up_console_sem() we need to be in safe mode, + * because spindump/WARN/etc from under console ->lock will + * deadlock in printk()->down_trylock_console_sem() otherwise. + */ + printk_safe_enter_irqsave(flags); + lock_failed = down_trylock(&console_sem); + printk_safe_exit_irqrestore(flags); + + if (lock_failed) return 1; mutex_acquire(&console_lock_dep_map, 0, 1, ip); return 0; @@ -241,9 +269,13 @@ static void __up_console_sem(unsigned long ip) { + unsigned long flags; + mutex_release(&console_lock_dep_map, ip); + printk_safe_enter_irqsave(flags); up(&console_sem); + printk_safe_exit_irqrestore(flags); } #define up_console_sem() __up_console_sem(_RET_IP_) @@ -256,6 +288,11 @@ * locked without the console sempahore held). */ static int console_locked, console_suspended; + +/* + * If exclusive_console is non-NULL then only this console is to be printed to. + */ +static struct console *exclusive_console; /* * Array of consoles built from command line options (console=) @@ -341,43 +378,61 @@ LOG_CONT = 8, /* text is a fragment of a continuation line */ }; +/* + * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken + * within the scheduler's rq lock. It must be released before calling + * console_unlock() or anything else that might wake up a process. + */ +DEFINE_RAW_SPINLOCK(logbuf_lock); + +/* + * Helper macros to lock/unlock logbuf_lock and switch between + * printk-safe/unsafe modes. + */ +#define logbuf_lock_irq() \ + do { \ + printk_safe_enter_irq(); \ + raw_spin_lock(&logbuf_lock); \ + } while (0) + +#define logbuf_unlock_irq() \ + do { \ + raw_spin_unlock(&logbuf_lock); \ + printk_safe_exit_irq(); \ + } while (0) + +#define logbuf_lock_irqsave(flags) \ + do { \ + printk_safe_enter_irqsave(flags); \ + raw_spin_lock(&logbuf_lock); \ + } while (0) + +#define logbuf_unlock_irqrestore(flags) \ + do { \ + raw_spin_unlock(&logbuf_lock); \ + printk_safe_exit_irqrestore(flags); \ + } while (0) + #ifdef CONFIG_PRINTK -/* syslog_lock protects syslog_* variables and write access to clear_seq. */ -static DEFINE_SPINLOCK(syslog_lock); - -/* Set to enable sync mode. Once set, it is never cleared. */ -static bool sync_mode; - DECLARE_WAIT_QUEUE_HEAD(log_wait); -/* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ static u64 syslog_seq; static size_t syslog_partial; static bool syslog_time; -struct latched_seq { - seqcount_latch_t latch; - u64 val[2]; -}; +/* the next printk record to write to the console */ +static u64 console_seq; +static u64 exclusive_console_stop_seq; +static unsigned long console_dropped; -/* - * The next printk record to read after the last 'clear' command. There are - * two copies (updated with seqcount_latch) so that reads can locklessly - * access a valid value. Writers are synchronized by @syslog_lock. - */ -static struct latched_seq clear_seq = { - .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), - .val[0] = 0, - .val[1] = 0, -}; +/* the next printk record to read after the last 'clear' command */ +static u64 clear_seq; #ifdef CONFIG_PRINTK_CALLER #define PREFIX_MAX 48 #else #define PREFIX_MAX 32 #endif - -/* the maximum size allowed to be reserved for a record */ #define LOG_LINE_MAX (1024 - PREFIX_MAX) #define LOG_LEVEL(v) ((v) & 0x07) @@ -415,34 +470,9 @@ */ static bool __printk_percpu_data_ready __read_mostly; -static bool printk_percpu_data_ready(void) +bool printk_percpu_data_ready(void) { return __printk_percpu_data_ready; -} - -/* Must be called under syslog_lock. */ -static void latched_seq_write(struct latched_seq *ls, u64 val) -{ - raw_write_seqcount_latch(&ls->latch); - ls->val[0] = val; - raw_write_seqcount_latch(&ls->latch); - ls->val[1] = val; -} - -/* Can be called from any context. */ -static u64 latched_seq_read_nolock(struct latched_seq *ls) -{ - unsigned int seq; - unsigned int idx; - u64 val; - - do { - seq = raw_read_seqcount_latch(&ls->latch); - idx = seq & 0x1; - val = ls->val[idx]; - } while (read_seqcount_latch_retry(&ls->latch, seq)); - - return val; } /* Return log buffer address */ @@ -484,6 +514,54 @@ *text_len -= *trunc_msg_len; else *trunc_msg_len = 0; +} + +/* insert record into the buffer, discard old ones, update heads */ +static int log_store(u32 caller_id, int facility, int level, + enum log_flags flags, u64 ts_nsec, + const struct dev_printk_info *dev_info, + const char *text, u16 text_len) +{ + struct prb_reserved_entry e; + struct printk_record r; + u16 trunc_msg_len = 0; + + prb_rec_init_wr(&r, text_len); + + if (!prb_reserve(&e, prb, &r)) { + /* truncate the message if it is too long for empty buffer */ + truncate_msg(&text_len, &trunc_msg_len); + prb_rec_init_wr(&r, text_len + trunc_msg_len); + /* survive when the log buffer is too small for trunc_msg */ + if (!prb_reserve(&e, prb, &r)) + return 0; + } + + /* fill message */ + memcpy(&r.text_buf[0], text, text_len); + if (trunc_msg_len) + memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); + r.info->text_len = text_len + trunc_msg_len; + r.info->facility = facility; + r.info->level = level & 7; + r.info->flags = flags & 0x1f; + if (ts_nsec > 0) + r.info->ts_nsec = ts_nsec; + else + r.info->ts_nsec = get_local_clock(); + r.info->caller_id = caller_id; + if (dev_info) + memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); + + /* A message without a trailing newline can be continued. */ + if (!(flags & LOG_NEWLINE)) + prb_commit(&e); + else + prb_final_commit(&e); + + trace_android_vh_logbuf(prb, &r); + + return (text_len + trunc_msg_len); } int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT); @@ -614,7 +692,7 @@ /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { - atomic64_t seq; + u64 seq; struct ratelimit_state rs; struct mutex lock; char buf[CONSOLE_EXT_LOG_MAX]; @@ -715,22 +793,27 @@ if (ret) return ret; - if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) { + logbuf_lock_irq(); + if (!prb_read_valid(prb, user->seq, r)) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; + logbuf_unlock_irq(); goto out; } + logbuf_unlock_irq(); ret = wait_event_interruptible(log_wait, - prb_read_valid(prb, atomic64_read(&user->seq), r)); + prb_read_valid(prb, user->seq, r)); if (ret) goto out; + logbuf_lock_irq(); } - if (r->info->seq != atomic64_read(&user->seq)) { + if (r->info->seq != user->seq) { /* our last seen message is gone, return error and reset */ - atomic64_set(&user->seq, r->info->seq); + user->seq = r->info->seq; ret = -EPIPE; + logbuf_unlock_irq(); goto out; } @@ -739,7 +822,8 @@ &r->text_buf[0], r->info->text_len, &r->info->dev_info); - atomic64_set(&user->seq, r->info->seq + 1); + user->seq = r->info->seq + 1; + logbuf_unlock_irq(); if (len > count) { ret = -EINVAL; @@ -774,10 +858,11 @@ if (offset) return -ESPIPE; + logbuf_lock_irq(); switch (whence) { case SEEK_SET: /* the first record */ - atomic64_set(&user->seq, prb_first_valid_seq(prb)); + user->seq = prb_first_valid_seq(prb); break; case SEEK_DATA: /* @@ -785,15 +870,16 @@ * like issued by 'dmesg -c'. Reading /dev/kmsg itself * changes no global state, and does not clear anything. */ - atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq)); + user->seq = clear_seq; break; case SEEK_END: /* after the last record */ - atomic64_set(&user->seq, prb_next_seq(prb)); + user->seq = prb_next_seq(prb); break; default: ret = -EINVAL; } + logbuf_unlock_irq(); return ret; } @@ -808,13 +894,15 @@ poll_wait(file, &log_wait, wait); - if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { + logbuf_lock_irq(); + if (prb_read_valid_info(prb, user->seq, &info, NULL)) { /* return error when data has vanished underneath us */ - if (info.seq != atomic64_read(&user->seq)) + if (info.seq != user->seq) ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; else ret = EPOLLIN|EPOLLRDNORM; } + logbuf_unlock_irq(); return ret; } @@ -847,7 +935,9 @@ prb_rec_init_rd(&user->record, &user->info, &user->text_buf[0], sizeof(user->text_buf)); - atomic64_set(&user->seq, prb_first_valid_seq(prb)); + logbuf_lock_irq(); + user->seq = prb_first_valid_seq(prb); + logbuf_unlock_irq(); file->private_data = user; return 0; @@ -939,9 +1029,6 @@ VMCOREINFO_SIZE(atomic_long_t); VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); - - VMCOREINFO_STRUCT_SIZE(latched_seq); - VMCOREINFO_OFFSET(latched_seq, val); } #endif @@ -1013,6 +1100,9 @@ static void __init set_percpu_data_ready(void) { + printk_safe_init(); + /* Make sure we set this flag only after printk_safe() init is done */ + barrier(); __printk_percpu_data_ready = true; } @@ -1052,6 +1142,7 @@ struct printk_record r; size_t new_descs_size; size_t new_infos_size; + unsigned long flags; char *new_log_buf; unsigned int free; u64 seq; @@ -1109,6 +1200,8 @@ new_descs, ilog2(new_descs_count), new_infos); + logbuf_lock_irqsave(flags); + log_buf_len = new_log_buf_len; log_buf = new_log_buf; new_log_buf_len = 0; @@ -1123,6 +1216,8 @@ * appear during the transition to the dynamic buffer. */ prb = &printk_rb_dynamic; + + logbuf_unlock_irqrestore(flags); if (seq != prb_next_seq(&printk_rb_static)) { pr_err("dropped %llu messages\n", @@ -1400,50 +1495,6 @@ return ((prefix_len * line_count) + info->text_len + 1); } -/* - * Beginning with @start_seq, find the first record where it and all following - * records up to (but not including) @max_seq fit into @size. - * - * @max_seq is simply an upper bound and does not need to exist. If the caller - * does not require an upper bound, -1 can be used for @max_seq. - */ -static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, - bool syslog, bool time) -{ - struct printk_info info; - unsigned int line_count; - size_t len = 0; - u64 seq; - - /* Determine the size of the records up to @max_seq. */ - prb_for_each_info(start_seq, prb, seq, &info, &line_count) { - if (info.seq >= max_seq) - break; - len += get_record_print_text_size(&info, line_count, syslog, time); - } - - /* - * Adjust the upper bound for the next loop to avoid subtracting - * lengths that were never added. - */ - if (seq < max_seq) - max_seq = seq; - - /* - * Move first record forward until length fits into the buffer. Ignore - * newest messages that were not counted in the above cycle. Messages - * might appear and get lost in the meantime. This is a best effort - * that prevents an infinite loop that could occur with a retry. - */ - prb_for_each_info(start_seq, prb, seq, &info, &line_count) { - if (len <= size || info.seq >= max_seq) - break; - len -= get_record_print_text_size(&info, line_count, syslog, time); - } - - return seq; -} - static int syslog_print(char __user *buf, int size) { struct printk_info info; @@ -1451,19 +1502,19 @@ char *text; int len = 0; - text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); + text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); if (!text) return -ENOMEM; - prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); + prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); while (size > 0) { size_t n; size_t skip; - spin_lock_irq(&syslog_lock); + logbuf_lock_irq(); if (!prb_read_valid(prb, syslog_seq, &r)) { - spin_unlock_irq(&syslog_lock); + logbuf_unlock_irq(); break; } if (r.info->seq != syslog_seq) { @@ -1492,7 +1543,7 @@ syslog_partial += n; } else n = 0; - spin_unlock_irq(&syslog_lock); + logbuf_unlock_irq(); if (!n) break; @@ -1515,25 +1566,34 @@ static int syslog_print_all(char __user *buf, int size, bool clear) { struct printk_info info; + unsigned int line_count; struct printk_record r; char *text; int len = 0; u64 seq; bool time; - text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); + text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); if (!text) return -ENOMEM; time = printk_time; + logbuf_lock_irq(); /* * Find first record that fits, including all following records, * into the user-provided buffer for this dump. */ - seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1, - size, true, time); + prb_for_each_info(clear_seq, prb, seq, &info, &line_count) + len += get_record_print_text_size(&info, line_count, true, time); - prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); + /* move first record forward until length fits into the buffer */ + prb_for_each_info(clear_seq, prb, seq, &info, &line_count) { + if (len <= size) + break; + len -= get_record_print_text_size(&info, line_count, true, time); + } + + prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); len = 0; prb_for_each_record(seq, prb, seq, &r) { @@ -1546,20 +1606,20 @@ break; } + logbuf_unlock_irq(); if (copy_to_user(buf + len, text, textlen)) len = -EFAULT; else len += textlen; + logbuf_lock_irq(); if (len < 0) break; } - if (clear) { - spin_lock_irq(&syslog_lock); - latched_seq_write(&clear_seq, seq); - spin_unlock_irq(&syslog_lock); - } + if (clear) + clear_seq = seq; + logbuf_unlock_irq(); kfree(text); return len; @@ -1567,21 +1627,9 @@ static void syslog_clear(void) { - spin_lock_irq(&syslog_lock); - latched_seq_write(&clear_seq, prb_next_seq(prb)); - spin_unlock_irq(&syslog_lock); -} - -/* Return a consistent copy of @syslog_seq. */ -static u64 read_syslog_seq_irq(void) -{ - u64 seq; - - spin_lock_irq(&syslog_lock); - seq = syslog_seq; - spin_unlock_irq(&syslog_lock); - - return seq; + logbuf_lock_irq(); + clear_seq = prb_next_seq(prb); + logbuf_unlock_irq(); } int do_syslog(int type, char __user *buf, int len, int source) @@ -1607,9 +1655,8 @@ return 0; if (!access_ok(buf, len)) return -EFAULT; - error = wait_event_interruptible(log_wait, - prb_read_valid(prb, read_syslog_seq_irq(), NULL)); + prb_read_valid(prb, syslog_seq, NULL)); if (error) return error; error = syslog_print(buf, len); @@ -1657,10 +1704,10 @@ break; /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: - spin_lock_irq(&syslog_lock); + logbuf_lock_irq(); if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { /* No unread messages. */ - spin_unlock_irq(&syslog_lock); + logbuf_unlock_irq(); return 0; } if (info.seq != syslog_seq) { @@ -1688,7 +1735,7 @@ } error -= syslog_partial; } - spin_unlock_irq(&syslog_lock); + logbuf_unlock_irq(); break; /* Size of the log buffer */ case SYSLOG_ACTION_SIZE_BUFFER: @@ -1707,12 +1754,194 @@ return do_syslog(type, buf, len, SYSLOG_FROM_READER); } +/* + * Special console_lock variants that help to reduce the risk of soft-lockups. + * They allow to pass console_lock to another printk() call using a busy wait. + */ + +#ifdef CONFIG_LOCKDEP +static struct lockdep_map console_owner_dep_map = { + .name = "console_owner" +}; +#endif + +static DEFINE_RAW_SPINLOCK(console_owner_lock); +static struct task_struct *console_owner; +static bool console_waiter; + +/** + * console_lock_spinning_enable - mark beginning of code where another + * thread might safely busy wait + * + * This basically converts console_lock into a spinlock. This marks + * the section where the console_lock owner can not sleep, because + * there may be a waiter spinning (like a spinlock). Also it must be + * ready to hand over the lock at the end of the section. + */ +static void console_lock_spinning_enable(void) +{ + raw_spin_lock(&console_owner_lock); + console_owner = current; + raw_spin_unlock(&console_owner_lock); + + /* The waiter may spin on us after setting console_owner */ + spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); +} + +/** + * console_lock_spinning_disable_and_check - mark end of code where another + * thread was able to busy wait and check if there is a waiter + * + * This is called at the end of the section where spinning is allowed. + * It has two functions. First, it is a signal that it is no longer + * safe to start busy waiting for the lock. Second, it checks if + * there is a busy waiter and passes the lock rights to her. + * + * Important: Callers lose the lock if there was a busy waiter. + * They must not touch items synchronized by console_lock + * in this case. + * + * Return: 1 if the lock rights were passed, 0 otherwise. + */ +static int console_lock_spinning_disable_and_check(void) +{ + int waiter; + + raw_spin_lock(&console_owner_lock); + waiter = READ_ONCE(console_waiter); + console_owner = NULL; + raw_spin_unlock(&console_owner_lock); + + if (!waiter) { + spin_release(&console_owner_dep_map, _THIS_IP_); + return 0; + } + + /* The waiter is now free to continue */ + WRITE_ONCE(console_waiter, false); + + spin_release(&console_owner_dep_map, _THIS_IP_); + + /* + * Hand off console_lock to waiter. The waiter will perform + * the up(). After this, the waiter is the console_lock owner. + */ + mutex_release(&console_lock_dep_map, _THIS_IP_); + return 1; +} + +/** + * console_trylock_spinning - try to get console_lock by busy waiting + * + * This allows to busy wait for the console_lock when the current + * owner is running in specially marked sections. It means that + * the current owner is running and cannot reschedule until it + * is ready to lose the lock. + * + * Return: 1 if we got the lock, 0 othrewise + */ +static int console_trylock_spinning(void) +{ + struct task_struct *owner = NULL; + bool waiter; + bool spin = false; + unsigned long flags; + + if (console_trylock()) + return 1; + + printk_safe_enter_irqsave(flags); + + raw_spin_lock(&console_owner_lock); + owner = READ_ONCE(console_owner); + waiter = READ_ONCE(console_waiter); + if (!waiter && owner && owner != current) { + WRITE_ONCE(console_waiter, true); + spin = true; + } + raw_spin_unlock(&console_owner_lock); + + /* + * If there is an active printk() writing to the + * consoles, instead of having it write our data too, + * see if we can offload that load from the active + * printer, and do some printing ourselves. + * Go into a spin only if there isn't already a waiter + * spinning, and there is an active printer, and + * that active printer isn't us (recursive printk?). + */ + if (!spin) { + printk_safe_exit_irqrestore(flags); + return 0; + } + + /* We spin waiting for the owner to release us */ + spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); + /* Owner will clear console_waiter on hand off */ + while (READ_ONCE(console_waiter)) + cpu_relax(); + spin_release(&console_owner_dep_map, _THIS_IP_); + + printk_safe_exit_irqrestore(flags); + /* + * The owner passed the console lock to us. + * Since we did not spin on console lock, annotate + * this as a trylock. Otherwise lockdep will + * complain. + */ + mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); + + return 1; +} + +/* + * Call the console drivers, asking them to write out + * log_buf[start] to log_buf[end - 1]. + * The console_lock must be held. + */ +static void call_console_drivers(const char *ext_text, size_t ext_len, + const char *text, size_t len) +{ + static char dropped_text[64]; + size_t dropped_len = 0; + struct console *con; + + trace_console_rcuidle(text, len); + + if (!console_drivers) + return; + + if (console_dropped) { + dropped_len = snprintf(dropped_text, sizeof(dropped_text), + "** %lu printk messages dropped **\n", + console_dropped); + console_dropped = 0; + } + + for_each_console(con) { + if (exclusive_console && con != exclusive_console) + continue; + if (!(con->flags & CON_ENABLED)) + continue; + if (!con->write) + continue; + if (!cpu_online(smp_processor_id()) && + !(con->flags & CON_ANYTIME)) + continue; + if (con->flags & CON_EXTENDED) + con->write(con, ext_text, ext_len); + else { + if (dropped_len) + con->write(con, dropped_text, dropped_len); + con->write(con, text, len); + } + } +} + int printk_delay_msec __read_mostly; -static inline void printk_delay(int level) +static inline void printk_delay(void) { - boot_delay_msec(level); - if (unlikely(printk_delay_msec)) { int m = printk_delay_msec; @@ -1723,282 +1952,83 @@ } } -static bool kernel_sync_mode(void) -{ - return (oops_in_progress || sync_mode); -} - -static bool console_can_sync(struct console *con) -{ - if (!(con->flags & CON_ENABLED)) - return false; - if (con->write_atomic && kernel_sync_mode()) - return true; - if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread) - return true; - if (con->write && (con->flags & CON_BOOT) && !con->thread) - return true; - return false; -} - -static bool call_sync_console_driver(struct console *con, const char *text, size_t text_len) -{ - if (!(con->flags & CON_ENABLED)) - return false; - if (con->write_atomic && kernel_sync_mode()) - con->write_atomic(con, text, text_len); - else if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread) - con->write_atomic(con, text, text_len); - else if (con->write && (con->flags & CON_BOOT) && !con->thread) - con->write(con, text, text_len); - else - return false; - - return true; -} - -static bool have_atomic_console(void) -{ - struct console *con; - - for_each_console(con) { - if (!(con->flags & CON_ENABLED)) - continue; - if (con->write_atomic) - return true; - } - return false; -} - -static bool print_sync(struct console *con, u64 *seq) -{ - struct printk_info info; - struct printk_record r; - size_t text_len; - - prb_rec_init_rd(&r, &info, &con->sync_buf[0], sizeof(con->sync_buf)); - - if (!prb_read_valid(prb, *seq, &r)) - return false; - - text_len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); - - if (!call_sync_console_driver(con, &con->sync_buf[0], text_len)) - return false; - - *seq = r.info->seq; - - touch_softlockup_watchdog_sync(); - clocksource_touch_watchdog(); - rcu_cpu_stall_reset(); - touch_nmi_watchdog(); - - if (text_len) - printk_delay(r.info->level); - - return true; -} - -static void print_sync_until(struct console *con, u64 seq) -{ - unsigned int flags; - u64 printk_seq; - - console_atomic_lock(&flags); - for (;;) { - printk_seq = atomic64_read(&con->printk_seq); - if (printk_seq >= seq) - break; - if (!print_sync(con, &printk_seq)) - break; - atomic64_set(&con->printk_seq, printk_seq + 1); - } - console_atomic_unlock(flags); -} - -#ifdef CONFIG_PRINTK_NMI -#define NUM_RECURSION_CTX 2 -#else -#define NUM_RECURSION_CTX 1 -#endif - -struct printk_recursion { - char count[NUM_RECURSION_CTX]; -}; - -static DEFINE_PER_CPU(struct printk_recursion, percpu_printk_recursion); -static char printk_recursion_count[NUM_RECURSION_CTX]; - -static char *printk_recursion_counter(void) -{ - struct printk_recursion *rec; - char *count; - - if (!printk_percpu_data_ready()) { - count = &printk_recursion_count[0]; - } else { - rec = this_cpu_ptr(&percpu_printk_recursion); - - count = &rec->count[0]; - } - -#ifdef CONFIG_PRINTK_NMI - if (in_nmi()) - count++; -#endif - - return count; -} - -static bool printk_enter_irqsave(unsigned long *flags) -{ - char *count; - - local_irq_save(*flags); - count = printk_recursion_counter(); - /* Only 1 level of recursion allowed. */ - if (*count > 1) { - local_irq_restore(*flags); - return false; - } - (*count)++; - - return true; -} - -static void printk_exit_irqrestore(unsigned long flags) -{ - char *count; - - count = printk_recursion_counter(); - (*count)--; - local_irq_restore(flags); -} - static inline u32 printk_caller_id(void) { return in_task() ? task_pid_nr(current) : 0x80000000 + raw_smp_processor_id(); } -/** - * parse_prefix - Parse level and control flags. - * - * @text: The terminated text message. - * @level: A pointer to the current level value, will be updated. - * @lflags: A pointer to the current log flags, will be updated. - * - * @level may be NULL if the caller is not interested in the parsed value. - * Otherwise the variable pointed to by @level must be set to - * LOGLEVEL_DEFAULT in order to be updated with the parsed value. - * - * @lflags may be NULL if the caller is not interested in the parsed value. - * Otherwise the variable pointed to by @lflags will be OR'd with the parsed - * value. - * - * Return: The length of the parsed level and control flags. - */ -static u16 parse_prefix(char *text, int *level, enum log_flags *lflags) -{ - u16 prefix_len = 0; - int kern_level; - - while (*text) { - kern_level = printk_get_level(text); - if (!kern_level) - break; - - switch (kern_level) { - case '0' ... '7': - if (level && *level == LOGLEVEL_DEFAULT) - *level = kern_level - '0'; - break; - case 'c': /* KERN_CONT */ - if (lflags) - *lflags |= LOG_CONT; - } - - prefix_len += 2; - text += 2; - } - - return prefix_len; -} - -static u16 printk_sprint(char *text, u16 size, int facility, enum log_flags *lflags, - const char *fmt, va_list args) -{ - u16 text_len; - - text_len = vscnprintf(text, size, fmt, args); - - /* Mark and strip a trailing newline. */ - if (text_len && text[text_len - 1] == '\n') { - text_len--; - *lflags |= LOG_NEWLINE; - } - - /* Strip log level and control flags. */ - if (facility == 0) { - u16 prefix_len; - - prefix_len = parse_prefix(text, NULL, NULL); - if (prefix_len) { - text_len -= prefix_len; - memmove(text, text + prefix_len, text_len); - } - } - - return text_len; -} - -__printf(4, 0) -static int vprintk_store(int facility, int level, +static size_t log_output(int facility, int level, enum log_flags lflags, const struct dev_printk_info *dev_info, - const char *fmt, va_list args) + char *text, size_t text_len) { const u32 caller_id = printk_caller_id(); - struct prb_reserved_entry e; + + if (lflags & LOG_CONT) { + struct prb_reserved_entry e; + struct printk_record r; + + prb_rec_init_wr(&r, text_len); + if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { + memcpy(&r.text_buf[r.info->text_len], text, text_len); + r.info->text_len += text_len; + if (lflags & LOG_NEWLINE) { + r.info->flags |= LOG_NEWLINE; + prb_final_commit(&e); + } else { + prb_commit(&e); + } + + trace_android_vh_logbuf_pr_cont(&r, text_len); + return text_len; + } + } + + /* Store it in the record log */ + return log_store(caller_id, facility, level, lflags, 0, + dev_info, text, text_len); +} + +/* Must be called under logbuf_lock. */ +int vprintk_store(int facility, int level, + const struct dev_printk_info *dev_info, + const char *fmt, va_list args) +{ + static char textbuf[LOG_LINE_MAX]; + char *text = textbuf; + size_t text_len; enum log_flags lflags = 0; - bool final_commit = false; - struct printk_record r; - unsigned long irqflags; - u16 trunc_msg_len = 0; - char prefix_buf[8]; - u16 reserve_size; - va_list args2; - u16 text_len; - int ret = 0; - u64 ts_nsec; - u64 seq; /* - * Since the duration of printk() can vary depending on the message - * and state of the ringbuffer, grab the timestamp now so that it is - * close to the call of printk(). This provides a more deterministic - * timestamp with respect to the caller. + * The printf needs to come first; we need the syslog + * prefix which might be passed-in as a parameter. */ - ts_nsec = local_clock(); + text_len = vscnprintf(text, sizeof(textbuf), fmt, args); - if (!printk_enter_irqsave(&irqflags)) - return 0; + /* mark and strip a trailing newline */ + if (text_len && text[text_len-1] == '\n') { + text_len--; + lflags |= LOG_NEWLINE; + } - /* - * The sprintf needs to come first since the syslog prefix might be - * passed in as a parameter. An extra byte must be reserved so that - * later the vscnprintf() into the reserved buffer has room for the - * terminating '\0', which is not counted by vsnprintf(). - */ - va_copy(args2, args); - reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1; - va_end(args2); + /* strip kernel syslog prefix and extract log level or control flags */ + if (facility == 0) { + int kern_level; - if (reserve_size > LOG_LINE_MAX) - reserve_size = LOG_LINE_MAX; + while ((kern_level = printk_get_level(text)) != 0) { + switch (kern_level) { + case '0' ... '7': + if (level == LOGLEVEL_DEFAULT) + level = kern_level - '0'; + break; + case 'c': /* KERN_CONT */ + lflags |= LOG_CONT; + } - /* Extract log level or control flags. */ - if (facility == 0) - parse_prefix(&prefix_buf[0], &level, &lflags); + text_len -= 2; + text += 2; + } + } if (level == LOGLEVEL_DEFAULT) level = default_message_loglevel; @@ -2006,79 +2036,7 @@ if (dev_info) lflags |= LOG_NEWLINE; - if (lflags & LOG_CONT) { - prb_rec_init_wr(&r, reserve_size); - if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { - seq = r.info->seq; - text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, - facility, &lflags, fmt, args); - r.info->text_len += text_len; - - if (lflags & LOG_NEWLINE) { - r.info->flags |= LOG_NEWLINE; - prb_final_commit(&e); - final_commit = true; - } else { - prb_commit(&e); - } - - ret = text_len; - goto out; - } - } - - /* - * Explicitly initialize the record before every prb_reserve() call. - * prb_reserve_in_last() and prb_reserve() purposely invalidate the - * structure when they fail. - */ - prb_rec_init_wr(&r, reserve_size); - if (!prb_reserve(&e, prb, &r)) { - /* truncate the message if it is too long for empty buffer */ - truncate_msg(&reserve_size, &trunc_msg_len); - - prb_rec_init_wr(&r, reserve_size + trunc_msg_len); - if (!prb_reserve(&e, prb, &r)) - goto out; - } - - seq = r.info->seq; - - /* fill message */ - text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &lflags, fmt, args); - if (trunc_msg_len) - memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); - r.info->text_len = text_len + trunc_msg_len; - r.info->facility = facility; - r.info->level = level & 7; - r.info->flags = lflags & 0x1f; - r.info->ts_nsec = ts_nsec; - r.info->caller_id = caller_id; - if (dev_info) - memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); - - /* A message without a trailing newline can be continued. */ - if (!(lflags & LOG_NEWLINE)) { - prb_commit(&e); - } else { - prb_final_commit(&e); - final_commit = true; - } - - ret = text_len + trunc_msg_len; -out: - /* only the kernel may perform synchronous printing */ - if (facility == 0 && final_commit) { - struct console *con; - - for_each_console(con) { - if (console_can_sync(con)) - print_sync_until(con, seq + 1); - } - } - - printk_exit_irqrestore(irqflags); - return ret; + return log_output(facility, level, lflags, dev_info, text, text_len); } asmlinkage int vprintk_emit(int facility, int level, @@ -2086,42 +2044,60 @@ const char *fmt, va_list args) { int printed_len; + bool in_sched = false; + unsigned long flags; /* Suppress unimportant messages after panic happens */ if (unlikely(suppress_printk)) return 0; - if (level == LOGLEVEL_SCHED) + if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; + in_sched = true; + } + boot_delay_msec(level); + printk_delay(); + + /* This stops the holder of console_sem just where we want him */ + logbuf_lock_irqsave(flags); printed_len = vprintk_store(facility, level, dev_info, fmt, args); + logbuf_unlock_irqrestore(flags); + + /* If called from the scheduler, we can not call up(). */ + if (!in_sched) { + /* + * Disable preemption to avoid being preempted while holding + * console_sem which would prevent anyone from printing to + * console + */ + preempt_disable(); + /* + * Try to acquire and then immediately release the console + * semaphore. The release will print out buffers and wake up + * /dev/kmsg and syslog() users. + */ + if (console_trylock_spinning()) + console_unlock(); + preempt_enable(); + } wake_up_klogd(); return printed_len; } EXPORT_SYMBOL(vprintk_emit); -__printf(1, 0) -static int vprintk_default(const char *fmt, va_list args) -{ - return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); -} -__printf(1, 0) -static int vprintk_func(const char *fmt, va_list args) -{ -#ifdef CONFIG_KGDB_KDB - /* Allow to pass printk() to kdb but avoid a recursion. */ - if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) - return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); -#endif - return vprintk_default(fmt, args); -} - asmlinkage int vprintk(const char *fmt, va_list args) { return vprintk_func(fmt, args); } EXPORT_SYMBOL(vprintk); + +int vprintk_default(const char *fmt, va_list args) +{ + return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); +} +EXPORT_SYMBOL_GPL(vprintk_default); /** * printk - print a kernel message @@ -2157,162 +2133,38 @@ } EXPORT_SYMBOL(printk); -static int printk_kthread_func(void *data) -{ - struct console *con = data; - unsigned long dropped = 0; - char *dropped_text = NULL; - struct printk_info info; - struct printk_record r; - char *ext_text = NULL; - size_t dropped_len; - int ret = -ENOMEM; - char *text = NULL; - char *write_text; - u64 printk_seq; - size_t len; - int error; - u64 seq; - - if (con->flags & CON_EXTENDED) { - ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); - if (!ext_text) - goto out; - } - text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); - dropped_text = kmalloc(64, GFP_KERNEL); - if (!text || !dropped_text) - goto out; - - if (con->flags & CON_EXTENDED) - write_text = ext_text; - else - write_text = text; - - seq = atomic64_read(&con->printk_seq); - - prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); - - for (;;) { - error = wait_event_interruptible(log_wait, - prb_read_valid(prb, seq, &r) || kthread_should_stop()); - - if (kthread_should_stop()) - break; - - if (error) - continue; - - if (seq != r.info->seq) { - dropped += r.info->seq - seq; - seq = r.info->seq; - } - - seq++; - - if (!(con->flags & CON_ENABLED)) - continue; - - if (suppress_message_printing(r.info->level)) - continue; - - if (con->flags & CON_EXTENDED) { - len = info_print_ext_header(ext_text, - CONSOLE_EXT_LOG_MAX, - r.info); - len += msg_print_ext_body(ext_text + len, - CONSOLE_EXT_LOG_MAX - len, - &r.text_buf[0], r.info->text_len, - &r.info->dev_info); - } else { - len = record_print_text(&r, - console_msg_format & MSG_FORMAT_SYSLOG, - printk_time); - } - - printk_seq = atomic64_read(&con->printk_seq); - - console_lock(); - console_may_schedule = 0; - - if (kernel_sync_mode() && con->write_atomic) { - console_unlock(); - break; - } - - if (!(con->flags & CON_EXTENDED) && dropped) { - dropped_len = snprintf(dropped_text, 64, - "** %lu printk messages dropped **\n", - dropped); - dropped = 0; - - con->write(con, dropped_text, dropped_len); - printk_delay(r.info->level); - } - - con->write(con, write_text, len); - if (len) - printk_delay(r.info->level); - - atomic64_cmpxchg_relaxed(&con->printk_seq, printk_seq, seq); - - console_unlock(); - } -out: - kfree(dropped_text); - kfree(text); - kfree(ext_text); - pr_info("%sconsole [%s%d]: printing thread stopped\n", - (con->flags & CON_BOOT) ? "boot" : "", - con->name, con->index); - return ret; -} - -/* Must be called within console_lock(). */ -static void start_printk_kthread(struct console *con) -{ - /* No need to start a printing thread if the console cannot print. */ - if (!con->write) - return; - - con->thread = kthread_run(printk_kthread_func, con, - "pr/%s%d", con->name, con->index); - if (IS_ERR(con->thread)) { - pr_err("%sconsole [%s%d]: unable to start printing thread\n", - (con->flags & CON_BOOT) ? "boot" : "", - con->name, con->index); - return; - } - pr_info("%sconsole [%s%d]: printing thread started\n", - (con->flags & CON_BOOT) ? "boot" : "", - con->name, con->index); -} - -/* protected by console_lock */ -static bool kthreads_started; - -/* Must be called within console_lock(). */ -static void console_try_thread(struct console *con) -{ - if (kthreads_started) { - start_printk_kthread(con); - return; - } - - /* - * The printing threads have not been started yet. If this console - * can print synchronously, print all unprinted messages. - */ - if (console_can_sync(con)) - print_sync_until(con, prb_next_seq(prb)); -} - #else /* CONFIG_PRINTK */ -#define prb_first_valid_seq(rb) 0 -#define prb_next_seq(rb) 0 +#define LOG_LINE_MAX 0 +#define PREFIX_MAX 0 +#define printk_time false -#define console_try_thread(con) +#define prb_read_valid(rb, seq, r) false +#define prb_first_valid_seq(rb) 0 + +static u64 syslog_seq; +static u64 console_seq; +static u64 exclusive_console_stop_seq; +static unsigned long console_dropped; + +static size_t record_print_text(const struct printk_record *r, + bool syslog, bool time) +{ + return 0; +} +static ssize_t info_print_ext_header(char *buf, size_t size, + struct printk_info *info) +{ + return 0; +} +static ssize_t msg_print_ext_body(char *buf, size_t size, + char *text, size_t text_len, + struct dev_printk_info *dev_info) { return 0; } +static void console_lock_spinning_enable(void) { } +static int console_lock_spinning_disable_and_check(void) { return 0; } +static void call_console_drivers(const char *ext_text, size_t ext_len, + const char *text, size_t len) {} +static bool suppress_message_printing(int level) { return false; } #endif /* CONFIG_PRINTK */ @@ -2563,6 +2415,34 @@ } EXPORT_SYMBOL(is_console_locked); +/* + * Check if we have any console that is capable of printing while cpu is + * booting or shutting down. Requires console_sem. + */ +static int have_callable_console(void) +{ + struct console *con; + + for_each_console(con) + if ((con->flags & CON_ENABLED) && + (con->flags & CON_ANYTIME)) + return 1; + + return 0; +} + +/* + * Can we actually use the console at this time on this cpu? + * + * Console drivers may assume that per-cpu resources have been allocated. So + * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't + * call them until this CPU is officially up. + */ +static inline int can_use_console(void) +{ + return cpu_online(raw_smp_processor_id()) || have_callable_console(); +} + /** * console_unlock - unlock the console system * @@ -2579,14 +2459,142 @@ */ void console_unlock(void) { + static char ext_text[CONSOLE_EXT_LOG_MAX]; + static char text[LOG_LINE_MAX + PREFIX_MAX]; + unsigned long flags; + bool do_cond_resched, retry; + struct printk_info info; + struct printk_record r; + if (console_suspended) { up_console_sem(); return; } + prb_rec_init_rd(&r, &info, text, sizeof(text)); + + /* + * Console drivers are called with interrupts disabled, so + * @console_may_schedule should be cleared before; however, we may + * end up dumping a lot of lines, for example, if called from + * console registration path, and should invoke cond_resched() + * between lines if allowable. Not doing so can cause a very long + * scheduling stall on a slow console leading to RCU stall and + * softlockup warnings which exacerbate the issue with more + * messages practically incapacitating the system. + * + * console_trylock() is not able to detect the preemptive + * context reliably. Therefore the value must be stored before + * and cleared after the "again" goto label. + */ + do_cond_resched = console_may_schedule; +again: + console_may_schedule = 0; + + /* + * We released the console_sem lock, so we need to recheck if + * cpu is online and (if not) is there at least one CON_ANYTIME + * console. + */ + if (!can_use_console()) { + console_locked = 0; + up_console_sem(); + return; + } + + for (;;) { + size_t ext_len = 0; + size_t len; + + printk_safe_enter_irqsave(flags); + raw_spin_lock(&logbuf_lock); +skip: + if (!prb_read_valid(prb, console_seq, &r)) + break; + + if (console_seq != r.info->seq) { + console_dropped += r.info->seq - console_seq; + console_seq = r.info->seq; + } + + if (suppress_message_printing(r.info->level)) { + /* + * Skip record we have buffered and already printed + * directly to the console when we received it, and + * record that has level above the console loglevel. + */ + console_seq++; + goto skip; + } + + /* Output to all consoles once old messages replayed. */ + if (unlikely(exclusive_console && + console_seq >= exclusive_console_stop_seq)) { + exclusive_console = NULL; + } + + /* + * Handle extended console text first because later + * record_print_text() will modify the record buffer in-place. + */ + if (nr_ext_console_drivers) { + ext_len = info_print_ext_header(ext_text, + sizeof(ext_text), + r.info); + ext_len += msg_print_ext_body(ext_text + ext_len, + sizeof(ext_text) - ext_len, + &r.text_buf[0], + r.info->text_len, + &r.info->dev_info); + } + len = record_print_text(&r, + console_msg_format & MSG_FORMAT_SYSLOG, + printk_time); + console_seq++; + raw_spin_unlock(&logbuf_lock); + + /* + * While actively printing out messages, if another printk() + * were to occur on another CPU, it may wait for this one to + * finish. This task can not be preempted if there is a + * waiter waiting to take over. + */ + console_lock_spinning_enable(); + + stop_critical_timings(); /* don't trace print latency */ + call_console_drivers(ext_text, ext_len, text, len); + start_critical_timings(); + + if (console_lock_spinning_disable_and_check()) { + printk_safe_exit_irqrestore(flags); + return; + } + + printk_safe_exit_irqrestore(flags); + + if (do_cond_resched) + cond_resched(); + } + console_locked = 0; + raw_spin_unlock(&logbuf_lock); + up_console_sem(); + + /* + * Someone could have filled up the buffer again, so re-check if there's + * something to flush. In case we cannot trylock the console_sem again, + * there's a new owner and the console_unlock() from them will do the + * flush, no worries. + */ + raw_spin_lock(&logbuf_lock); + retry = prb_read_valid(prb, console_seq, NULL); + raw_spin_unlock(&logbuf_lock); + printk_safe_exit_irqrestore(flags); + + if (retry && console_trylock()) + goto again; } EXPORT_SYMBOL(console_unlock); @@ -2636,20 +2644,23 @@ */ void console_flush_on_panic(enum con_flush_mode mode) { - struct console *c; - u64 seq; - - if (!console_trylock()) - return; - + /* + * If someone else is holding the console lock, trylock will fail + * and may_schedule may be set. Ignore and proceed to unlock so + * that messages are flushed out. As this can be called from any + * context and we don't want to get preempted while flushing, + * ensure may_schedule is cleared. + */ + console_trylock(); console_may_schedule = 0; if (mode == CONSOLE_REPLAY_ALL) { - seq = prb_first_valid_seq(prb); - for_each_console(c) - atomic64_set(&c->printk_seq, seq); - } + unsigned long flags; + logbuf_lock_irqsave(flags); + console_seq = prb_first_valid_seq(prb); + logbuf_unlock_irqrestore(flags); + } console_unlock(); } @@ -2784,6 +2795,7 @@ */ void register_console(struct console *newcon) { + unsigned long flags; struct console *bcon = NULL; int err; @@ -2806,8 +2818,6 @@ } } } - - newcon->thread = NULL; if (console_drivers && console_drivers->flags & CON_BOOT) bcon = console_drivers; @@ -2850,10 +2860,8 @@ * the real console are the same physical device, it's annoying to * see the beginning boot messages twice */ - if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { + if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) newcon->flags &= ~CON_PRINTBUFFER; - newcon->flags |= CON_HANDOVER; - } /* * Put this console in the list - keep the @@ -2875,12 +2883,26 @@ if (newcon->flags & CON_EXTENDED) nr_ext_console_drivers++; - if (newcon->flags & CON_PRINTBUFFER) - atomic64_set(&newcon->printk_seq, 0); - else - atomic64_set(&newcon->printk_seq, prb_next_seq(prb)); - - console_try_thread(newcon); + if (newcon->flags & CON_PRINTBUFFER) { + /* + * console_unlock(); will print out the buffered messages + * for us. + */ + logbuf_lock_irqsave(flags); + /* + * We're about to replay the log buffer. Only do this to the + * just-registered console to avoid excessive message spam to + * the already-registered consoles. + * + * Set exclusive_console with disabled interrupts to reduce + * race window with eventual console_flush_on_panic() that + * ignores console_lock. + */ + exclusive_console = newcon; + exclusive_console_stop_seq = console_seq; + console_seq = syslog_seq; + logbuf_unlock_irqrestore(flags); + } console_unlock(); console_sysfs_notify(); @@ -2953,9 +2975,6 @@ console->flags &= ~CON_ENABLED; console_unlock(); console_sysfs_notify(); - - if (console->thread && !IS_ERR(console->thread)) - kthread_stop(console->thread); if (console->exit) res = console->exit(console); @@ -3039,15 +3058,6 @@ unregister_console(con); } } - -#ifdef CONFIG_PRINTK - console_lock(); - for_each_console(con) - start_printk_kthread(con); - kthreads_started = true; - console_unlock(); -#endif - ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, console_cpu_notify); WARN_ON(ret < 0); @@ -3063,6 +3073,7 @@ * Delayed printk version, for scheduler-internal messages: */ #define PRINTK_PENDING_WAKEUP 0x01 +#define PRINTK_PENDING_OUTPUT 0x02 static DEFINE_PER_CPU(int, printk_pending); @@ -3070,8 +3081,14 @@ { int pending = __this_cpu_xchg(printk_pending, 0); + if (pending & PRINTK_PENDING_OUTPUT) { + /* If trylock fails, someone else is doing the printing */ + if (console_trylock()) + console_unlock(); + } + if (pending & PRINTK_PENDING_WAKEUP) - wake_up_interruptible_all(&log_wait); + wake_up_interruptible(&log_wait); } static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { @@ -3092,10 +3109,25 @@ preempt_enable(); } -__printf(1, 0) -static int vprintk_deferred(const char *fmt, va_list args) +void defer_console_output(void) { - return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); + if (!printk_percpu_data_ready()) + return; + + preempt_disable(); + __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); + irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); + preempt_enable(); +} + +int vprintk_deferred(const char *fmt, va_list args) +{ + int r; + + r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); + defer_console_output(); + + return r; } int printk_deferred(const char *fmt, ...) @@ -3235,26 +3267,8 @@ */ void kmsg_dump(enum kmsg_dump_reason reason) { - struct kmsg_dumper_iter iter; struct kmsg_dumper *dumper; - - if (!oops_in_progress) { - /* - * If atomic consoles are available, activate kernel sync mode - * to make sure any final messages are visible. The trailing - * printk message is important to flush any pending messages. - */ - if (have_atomic_console()) { - sync_mode = true; - pr_info("enabled sync mode\n"); - } - - /* - * Give the printing threads time to flush, allowing up to - * 1s of no printing forward progress before giving up. - */ - pr_flush(1000, true); - } + unsigned long flags; rcu_read_lock(); list_for_each_entry_rcu(dumper, &dump_list, list) { @@ -3272,18 +3286,81 @@ continue; /* initialize iterator with data about the stored records */ - iter.active = true; - kmsg_dump_rewind(&iter); + dumper->active = true; + + logbuf_lock_irqsave(flags); + dumper->cur_seq = clear_seq; + dumper->next_seq = prb_next_seq(prb); + logbuf_unlock_irqrestore(flags); /* invoke dumper which will iterate over records */ - dumper->dump(dumper, reason, &iter); + dumper->dump(dumper, reason); + + /* reset iterator */ + dumper->active = false; } rcu_read_unlock(); } /** + * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) + * @dumper: registered kmsg dumper + * @syslog: include the "<4>" prefixes + * @line: buffer to copy the line to + * @size: maximum size of the buffer + * @len: length of line placed into buffer + * + * Start at the beginning of the kmsg buffer, with the oldest kmsg + * record, and copy one record into the provided buffer. + * + * Consecutive calls will return the next available record moving + * towards the end of the buffer with the youngest messages. + * + * A return value of FALSE indicates that there are no more records to + * read. + * + * The function is similar to kmsg_dump_get_line(), but grabs no locks. + */ +bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len) +{ + struct printk_info info; + unsigned int line_count; + struct printk_record r; + size_t l = 0; + bool ret = false; + + prb_rec_init_rd(&r, &info, line, size); + + if (!dumper->active) + goto out; + + /* Read text or count text lines? */ + if (line) { + if (!prb_read_valid(prb, dumper->cur_seq, &r)) + goto out; + l = record_print_text(&r, syslog, printk_time); + } else { + if (!prb_read_valid_info(prb, dumper->cur_seq, + &info, &line_count)) { + goto out; + } + l = get_record_print_text_size(&info, line_count, syslog, + printk_time); + + } + + dumper->cur_seq = r.info->seq + 1; + ret = true; +out: + if (len) + *len = l; + return ret; +} + +/** * kmsg_dump_get_line - retrieve one kmsg log line - * @iter: kmsg dumper iterator + * @dumper: registered kmsg dumper * @syslog: include the "<4>" prefixes * @line: buffer to copy the line to * @size: maximum size of the buffer @@ -3298,47 +3375,23 @@ * A return value of FALSE indicates that there are no more records to * read. */ -bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog, +bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, char *line, size_t size, size_t *len) { - struct printk_info info; - unsigned int line_count; - struct printk_record r; - size_t l = 0; - bool ret = false; + unsigned long flags; + bool ret; - prb_rec_init_rd(&r, &info, line, size); + logbuf_lock_irqsave(flags); + ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); + logbuf_unlock_irqrestore(flags); - if (!iter->active) - goto out; - - /* Read text or count text lines? */ - if (line) { - if (!prb_read_valid(prb, iter->cur_seq, &r)) - goto out; - l = record_print_text(&r, syslog, printk_time); - } else { - if (!prb_read_valid_info(prb, iter->cur_seq, - &info, &line_count)) { - goto out; - } - l = get_record_print_text_size(&info, line_count, syslog, - printk_time); - - } - - iter->cur_seq = r.info->seq + 1; - ret = true; -out: - if (len) - *len = l; return ret; } EXPORT_SYMBOL_GPL(kmsg_dump_get_line); /** * kmsg_dump_get_buffer - copy kmsg log lines - * @iter: kmsg dumper iterator + * @dumper: registered kmsg dumper * @syslog: include the "<4>" prefixes * @buf: buffer to copy the line to * @size: maximum size of the buffer @@ -3355,258 +3408,116 @@ * A return value of FALSE indicates that there are no more records to * read. */ -bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog, - char *buf, size_t size, size_t *len_out) +bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, + char *buf, size_t size, size_t *len) { struct printk_info info; + unsigned int line_count; struct printk_record r; + unsigned long flags; u64 seq; u64 next_seq; - size_t len = 0; + size_t l = 0; bool ret = false; bool time = printk_time; - if (!iter->active || !buf || !size) + prb_rec_init_rd(&r, &info, buf, size); + + if (!dumper->active || !buf || !size) goto out; - if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { - if (info.seq != iter->cur_seq) { + logbuf_lock_irqsave(flags); + if (prb_read_valid_info(prb, dumper->cur_seq, &info, NULL)) { + if (info.seq != dumper->cur_seq) { /* messages are gone, move to first available one */ - iter->cur_seq = info.seq; + dumper->cur_seq = info.seq; } } /* last entry */ - if (iter->cur_seq >= iter->next_seq) + if (dumper->cur_seq >= dumper->next_seq) { + logbuf_unlock_irqrestore(flags); goto out; - - /* - * Find first record that fits, including all following records, - * into the user-provided buffer for this dump. Pass in size-1 - * because this function (by way of record_print_text()) will - * not write more than size-1 bytes of text into @buf. - */ - seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq, - size - 1, syslog, time); - - /* - * Next kmsg_dump_get_buffer() invocation will dump block of - * older records stored right before this one. - */ - next_seq = seq; - - prb_rec_init_rd(&r, &info, buf, size); - - len = 0; - prb_for_each_record(seq, prb, seq, &r) { - if (r.info->seq >= iter->next_seq) - break; - - len += record_print_text(&r, syslog, time); - - /* Adjust record to store to remaining buffer space. */ - prb_rec_init_rd(&r, &info, buf + len, size - len); } - iter->next_seq = next_seq; + /* calculate length of entire buffer */ + seq = dumper->cur_seq; + while (prb_read_valid_info(prb, seq, &info, &line_count)) { + if (r.info->seq >= dumper->next_seq) + break; + l += get_record_print_text_size(&info, line_count, syslog, time); + seq = r.info->seq + 1; + } + + /* move first record forward until length fits into the buffer */ + seq = dumper->cur_seq; + while (l >= size && prb_read_valid_info(prb, seq, + &info, &line_count)) { + if (r.info->seq >= dumper->next_seq) + break; + l -= get_record_print_text_size(&info, line_count, syslog, time); + seq = r.info->seq + 1; + } + + /* last message in next interation */ + next_seq = seq; + + /* actually read text into the buffer now */ + l = 0; + while (prb_read_valid(prb, seq, &r)) { + if (r.info->seq >= dumper->next_seq) + break; + + l += record_print_text(&r, syslog, time); + + /* adjust record to store to remaining buffer space */ + prb_rec_init_rd(&r, &info, buf + l, size - l); + + seq = r.info->seq + 1; + } + + dumper->next_seq = next_seq; ret = true; + logbuf_unlock_irqrestore(flags); out: - if (len_out) - *len_out = len; + if (len) + *len = l; return ret; } EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); /** + * kmsg_dump_rewind_nolock - reset the iterator (unlocked version) + * @dumper: registered kmsg dumper + * + * Reset the dumper's iterator so that kmsg_dump_get_line() and + * kmsg_dump_get_buffer() can be called again and used multiple + * times within the same dumper.dump() callback. + * + * The function is similar to kmsg_dump_rewind(), but grabs no locks. + */ +void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +{ + dumper->cur_seq = clear_seq; + dumper->next_seq = prb_next_seq(prb); +} + +/** * kmsg_dump_rewind - reset the iterator - * @iter: kmsg dumper iterator + * @dumper: registered kmsg dumper * * Reset the dumper's iterator so that kmsg_dump_get_line() and * kmsg_dump_get_buffer() can be called again and used multiple * times within the same dumper.dump() callback. */ -void kmsg_dump_rewind(struct kmsg_dumper_iter *iter) +void kmsg_dump_rewind(struct kmsg_dumper *dumper) { - iter->cur_seq = latched_seq_read_nolock(&clear_seq); - iter->next_seq = prb_next_seq(prb); + unsigned long flags; + + logbuf_lock_irqsave(flags); + kmsg_dump_rewind_nolock(dumper); + logbuf_unlock_irqrestore(flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); #endif - -struct prb_cpulock { - atomic_t owner; - unsigned long __percpu *irqflags; -}; - -#define DECLARE_STATIC_PRINTKRB_CPULOCK(name) \ -static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags); \ -static struct prb_cpulock name = { \ - .owner = ATOMIC_INIT(-1), \ - .irqflags = &_##name##_percpu_irqflags, \ -} - -static bool __prb_trylock(struct prb_cpulock *cpu_lock, - unsigned int *cpu_store) -{ - unsigned long *flags; - unsigned int cpu; - - cpu = get_cpu(); - - *cpu_store = atomic_read(&cpu_lock->owner); - /* memory barrier to ensure the current lock owner is visible */ - smp_rmb(); - if (*cpu_store == -1) { - flags = per_cpu_ptr(cpu_lock->irqflags, cpu); - local_irq_save(*flags); - if (atomic_try_cmpxchg_acquire(&cpu_lock->owner, - cpu_store, cpu)) { - return true; - } - local_irq_restore(*flags); - } else if (*cpu_store == cpu) { - return true; - } - - put_cpu(); - return false; -} - -/* - * prb_lock: Perform a processor-reentrant spin lock. - * @cpu_lock: A pointer to the lock object. - * @cpu_store: A "flags" pointer to store lock status information. - * - * If no processor has the lock, the calling processor takes the lock and - * becomes the owner. If the calling processor is already the owner of the - * lock, this function succeeds immediately. If lock is locked by another - * processor, this function spins until the calling processor becomes the - * owner. - * - * It is safe to call this function from any context and state. - */ -static void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store) -{ - for (;;) { - if (__prb_trylock(cpu_lock, cpu_store)) - break; - cpu_relax(); - } -} - -/* - * prb_unlock: Perform a processor-reentrant spin unlock. - * @cpu_lock: A pointer to the lock object. - * @cpu_store: A "flags" object storing lock status information. - * - * Release the lock. The calling processor must be the owner of the lock. - * - * It is safe to call this function from any context and state. - */ -static void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store) -{ - unsigned long *flags; - unsigned int cpu; - - cpu = atomic_read(&cpu_lock->owner); - atomic_set_release(&cpu_lock->owner, cpu_store); - - if (cpu_store == -1) { - flags = per_cpu_ptr(cpu_lock->irqflags, cpu); - local_irq_restore(*flags); - } - - put_cpu(); -} - -DECLARE_STATIC_PRINTKRB_CPULOCK(printk_cpulock); - -void console_atomic_lock(unsigned int *flags) -{ - prb_lock(&printk_cpulock, flags); -} -EXPORT_SYMBOL(console_atomic_lock); - -void console_atomic_unlock(unsigned int flags) -{ - prb_unlock(&printk_cpulock, flags); -} -EXPORT_SYMBOL(console_atomic_unlock); - -static void pr_msleep(bool may_sleep, int ms) -{ - if (may_sleep) { - msleep(ms); - } else { - while (ms--) - udelay(1000); - } -} - -/** - * pr_flush() - Wait for printing threads to catch up. - * - * @timeout_ms: The maximum time (in ms) to wait. - * @reset_on_progress: Reset the timeout if forward progress is seen. - * - * A value of 0 for @timeout_ms means no waiting will occur. A value of -1 - * represents infinite waiting. - * - * If @reset_on_progress is true, the timeout will be reset whenever any - * printer has been seen to make some forward progress. - * - * Context: Any context. - * Return: true if all enabled printers are caught up. - */ -bool pr_flush(int timeout_ms, bool reset_on_progress) -{ - int remaining = timeout_ms; - struct console *con; - u64 last_diff = 0; - bool may_sleep; - u64 printk_seq; - u64 diff; - u64 seq; - - may_sleep = (preemptible() && - !in_softirq() && - system_state >= SYSTEM_RUNNING); - - seq = prb_next_seq(prb); - - for (;;) { - diff = 0; - - for_each_console(con) { - if (!(con->flags & CON_ENABLED)) - continue; - if (!con->write && !con->write_atomic) - continue; - printk_seq = atomic64_read(&con->printk_seq); - if (printk_seq < seq) - diff += seq - printk_seq; - } - - if (diff != last_diff && reset_on_progress) - remaining = timeout_ms; - - if (!diff || remaining == 0) - break; - - if (remaining < 0) { - pr_msleep(may_sleep, 100); - } else if (remaining < 100) { - pr_msleep(may_sleep, remaining); - remaining = 0; - } else { - pr_msleep(may_sleep, 100); - remaining -= 100; - } - - last_diff = diff; - } - - return (diff == 0); -} -EXPORT_SYMBOL(pr_flush); -- Gitblit v1.6.2