From f8ff195ef1b9b84111f1872d70919fbfb38f2a5f Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 14 Aug 2020 23:39:16 +0206 Subject: docs: vmcoreinfo: add lockless printk ringbuffer vmcoreinfo With the introduction of the lockless printk ringbuffer, the VMCOREINFO relating to the kernel log buffer was changed. Update the documentation to match those changes. Fixes: 896fbe20b4e2333fb55 ("printk: use the lockless ringbuffer") Reported-by: Nick Desaulniers Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20200814213316.6394-1-john.ogness@linutronix.de --- Documentation/admin-guide/kdump/vmcoreinfo.rst | 131 +++++++++++++++++++------ 1 file changed, 102 insertions(+), 29 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index e4ee8b2db604..703bb9f92a39 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -184,50 +184,123 @@ from this. Free areas descriptor. User-space tools use this value to iterate the free_area ranges. MAX_ORDER is used by the zone buddy allocator. -log_first_idx +prb +--- + +A pointer to the printk ringbuffer (struct printk_ringbuffer). This +may be pointing to the static boot ringbuffer or the dynamically +allocated ringbuffer, depending on when the the core dump occurred. +Used by user-space tools to read the active kernel log buffer. + +printk_rb_static +---------------- + +A pointer to the static boot printk ringbuffer. If @prb has a +different value, this is useful for viewing the initial boot messages, +which may have been overwritten in the dynamically allocated +ringbuffer. + +clear_seq +--------- + +The sequence number of the printk() record after the last clear +command. It indicates the first record after the last +SYSLOG_ACTION_CLEAR, like issued by 'dmesg -c'. Used by user-space +tools to dump a subset of the dmesg log. + +printk_ringbuffer +----------------- + +The size of a printk_ringbuffer structure. This structure contains all +information required for accessing the various components of the +kernel log buffer. + +(printk_ringbuffer, desc_ring|text_data_ring|dict_data_ring|fail) +----------------------------------------------------------------- + +Offsets for the various components of the printk ringbuffer. Used by +user-space tools to view the kernel log buffer without requiring the +declaration of the structure. + +prb_desc_ring ------------- -Index of the first record stored in the buffer log_buf. Used by -user-space tools to read the strings in the log_buf. +The size of the prb_desc_ring structure. This structure contains +information about the set of record descriptors. -log_buf -------- +(prb_desc_ring, count_bits|descs|head_id|tail_id) +------------------------------------------------- + +Offsets for the fields describing the set of record descriptors. Used +by user-space tools to be able to traverse the descriptors without +requiring the declaration of the structure. + +prb_desc +-------- + +The size of the prb_desc structure. This structure contains +information about a single record descriptor. + +(prb_desc, info|state_var|text_blk_lpos|dict_blk_lpos) +------------------------------------------------------ + +Offsets for the fields describing a record descriptors. Used by +user-space tools to be able to read descriptors without requiring +the declaration of the structure. + +prb_data_blk_lpos +----------------- + +The size of the prb_data_blk_lpos structure. This structure contains +information about where the text or dictionary data (data block) is +located within the respective data ring. + +(prb_data_blk_lpos, begin|next) +------------------------------- -Console output is written to the ring buffer log_buf at index -log_first_idx. Used to get the kernel log. +Offsets for the fields describing the location of a data block. Used +by user-space tools to be able to locate data blocks without +requiring the declaration of the structure. -log_buf_len +printk_info ----------- -log_buf's length. +The size of the printk_info structure. This structure contains all +the meta-data for a record. -clear_idx ---------- +(printk_info, seq|ts_nsec|text_len|dict_len|caller_id) +------------------------------------------------------ -The index that the next printk() record to read after the last clear -command. It indicates the first record after the last SYSLOG_ACTION -_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump -the dmesg log. +Offsets for the fields providing the meta-data for a record. Used by +user-space tools to be able to read the information without requiring +the declaration of the structure. -log_next_idx ------------- +prb_data_ring +------------- -The index of the next record to store in the buffer log_buf. Used to -compute the index of the current buffer position. +The size of the prb_data_ring structure. This structure contains +information about a set of data blocks. -printk_log ----------- +(prb_data_ring, size_bits|data|head_lpos|tail_lpos) +--------------------------------------------------- -The size of a structure printk_log. Used to compute the size of -messages, and extract dmesg log. It encapsulates header information for -log_buf, such as timestamp, syslog level, etc. +Offsets for the fields describing a set of data blocks. Used by +user-space tools to be able to access the data blocks without +requiring the declaration of the structure. -(printk_log, ts_nsec|len|text_len|dict_len) -------------------------------------------- +atomic_long_t +------------- + +The size of the atomic_long_t structure. Used by user-space tools to +be able to copy the full structure, regardless of its +architecture-specific implementation. + +(atomic_long_t, counter) +------------------------ -It represents field offsets in struct printk_log. User space tools -parse it and check whether the values of printk_log's members have been -changed. +Offset for the long value of an atomic_long_t variable. Used by +user-space tools to access the long value without requiring the +architecture-specific declaration. (free_area.free_list, MIGRATE_TYPES) ------------------------------------ -- cgit v1.2.3 From e60768311af854734ce2bbfc50f24cff67b54a91 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Fri, 14 Aug 2020 23:31:25 +0206 Subject: scripts/gdb: update for lockless printk ringbuffer With the introduction of the lockless printk ringbuffer, the data structure for the kernel log buffer was changed. Update the gdb scripts to be able to parse/print the new log buffer structure. Fixes: 896fbe20b4e2333fb55 ("printk: use the lockless ringbuffer") Signed-off-by: John Ogness Reported-by: Nick Desaulniers Tested-by: Nick Desaulniers Tested-by: Petr Mladek [akpm@linux-foundation.org: A typo fix.] Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20200814212525.6118-3-john.ogness@linutronix.de --- Documentation/admin-guide/kdump/gdbmacros.txt | 153 ++++++++++++++++++-------- 1 file changed, 107 insertions(+), 46 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt index 220d0a80ca2c..7adece30237e 100644 --- a/Documentation/admin-guide/kdump/gdbmacros.txt +++ b/Documentation/admin-guide/kdump/gdbmacros.txt @@ -170,57 +170,111 @@ document trapinfo address the kernel panicked. end -define dump_log_idx - set $idx = $arg0 +define dump_record + set var $desc = $arg0 if ($argc > 1) - set $prev_flags = $arg1 + set var $prev_flags = $arg1 else - set $prev_flags = 0 + set var $prev_flags = 0 end - set $msg = ((struct printk_log *) (log_buf + $idx)) - set $prefix = 1 - set $newline = 1 - set $log = log_buf + $idx + sizeof(*$msg) - - # prev & LOG_CONT && !(msg->flags & LOG_PREIX) - if (($prev_flags & 8) && !($msg->flags & 4)) - set $prefix = 0 + + set var $info = &$desc->info + set var $prefix = 1 + set var $newline = 1 + + set var $begin = $desc->text_blk_lpos.begin % (1U << prb->text_data_ring.size_bits) + set var $next = $desc->text_blk_lpos.next % (1U << prb->text_data_ring.size_bits) + + # handle data-less record + if ($begin & 1) + set var $text_len = 0 + set var $log = "" + else + # handle wrapping data block + if ($begin > $next) + set var $begin = 0 + end + + # skip over descriptor id + set var $begin = $begin + sizeof(long) + + # handle truncated message + if ($next - $begin < $info->text_len) + set var $text_len = $next - $begin + else + set var $text_len = $info->text_len + end + + set var $log = &prb->text_data_ring.data[$begin] + end + + # prev & LOG_CONT && !(info->flags & LOG_PREIX) + if (($prev_flags & 8) && !($info->flags & 4)) + set var $prefix = 0 end - # msg->flags & LOG_CONT - if ($msg->flags & 8) + # info->flags & LOG_CONT + if ($info->flags & 8) # (prev & LOG_CONT && !(prev & LOG_NEWLINE)) if (($prev_flags & 8) && !($prev_flags & 2)) - set $prefix = 0 + set var $prefix = 0 end - # (!(msg->flags & LOG_NEWLINE)) - if (!($msg->flags & 2)) - set $newline = 0 + # (!(info->flags & LOG_NEWLINE)) + if (!($info->flags & 2)) + set var $newline = 0 end end if ($prefix) - printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000 + printf "[%5lu.%06lu] ", $info->ts_nsec / 1000000000, $info->ts_nsec % 1000000000 end - if ($msg->text_len != 0) - eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len + if ($text_len) + eval "printf \"%%%d.%ds\", $log", $text_len, $text_len end if ($newline) printf "\n" end - if ($msg->dict_len > 0) - set $dict = $log + $msg->text_len - set $idx = 0 - set $line = 1 - while ($idx < $msg->dict_len) + + # handle dictionary data + + set var $begin = $desc->dict_blk_lpos.begin % (1U << prb->dict_data_ring.size_bits) + set var $next = $desc->dict_blk_lpos.next % (1U << prb->dict_data_ring.size_bits) + + # handle data-less record + if ($begin & 1) + set var $dict_len = 0 + set var $dict = "" + else + # handle wrapping data block + if ($begin > $next) + set var $begin = 0 + end + + # skip over descriptor id + set var $begin = $begin + sizeof(long) + + # handle truncated message + if ($next - $begin < $info->dict_len) + set var $dict_len = $next - $begin + else + set var $dict_len = $info->dict_len + end + + set var $dict = &prb->dict_data_ring.data[$begin] + end + + if ($dict_len > 0) + set var $idx = 0 + set var $line = 1 + while ($idx < $dict_len) if ($line) printf " " - set $line = 0 + set var $line = 0 end - set $c = $dict[$idx] + set var $c = $dict[$idx] if ($c == '\0') printf "\n" - set $line = 1 + set var $line = 1 else if ($c < ' ' || $c >= 127 || $c == '\\') printf "\\x%02x", $c @@ -228,33 +282,40 @@ define dump_log_idx printf "%c", $c end end - set $idx = $idx + 1 + set var $idx = $idx + 1 end printf "\n" end end -document dump_log_idx - Dump a single log given its index in the log buffer. The first - parameter is the index into log_buf, the second is optional and - specified the previous log buffer's flags, used for properly - formatting continued lines. +document dump_record + Dump a single record. The first parameter is the descriptor + sequence number, the second is optional and specifies the + previous record's flags, used for properly formatting + continued lines. end define dmesg - set $i = log_first_idx - set $end_idx = log_first_idx - set $prev_flags = 0 + set var $desc_committed = 1UL << ((sizeof(long) * 8) - 1) + set var $flags_mask = 3UL << ((sizeof(long) * 8) - 2) + set var $id_mask = ~$flags_mask + + set var $desc_count = 1U << prb->desc_ring.count_bits + set var $prev_flags = 0 + + set var $id = prb->desc_ring.tail_id.counter + set var $end_id = prb->desc_ring.head_id.counter while (1) - set $msg = ((struct printk_log *) (log_buf + $i)) - if ($msg->len == 0) - set $i = 0 - else - dump_log_idx $i $prev_flags - set $i = $i + $msg->len - set $prev_flags = $msg->flags + set var $desc = &prb->desc_ring.descs[$id % $desc_count] + + # skip non-committed record + if (($desc->state_var.counter & $flags_mask) == $desc_committed) + dump_record $desc $prev_flags + set var $prev_flags = $desc->info.flags end - if ($i == $end_idx) + + set var $id = ($id + 1) & $id_mask + if ($id == $end_id) loop_break end end -- cgit v1.2.3 From 10dcb06d40411a73e1ae111717e9a987bb760313 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 14 Sep 2020 14:39:52 +0206 Subject: printk: ringbuffer: change representation of states Rather than deriving the state by evaluating bits within the flags area of the state variable, assign the states explicit values and set those values in the flags area. Introduce macros to make it simple to read and write state values for the state variable. Although the functionality is preserved, the binary representation for the states is changed. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20200914123354.832-5-john.ogness@linutronix.de --- Documentation/admin-guide/kdump/gdbmacros.txt | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt index 7adece30237e..8f533b751c46 100644 --- a/Documentation/admin-guide/kdump/gdbmacros.txt +++ b/Documentation/admin-guide/kdump/gdbmacros.txt @@ -295,9 +295,12 @@ document dump_record end define dmesg - set var $desc_committed = 1UL << ((sizeof(long) * 8) - 1) - set var $flags_mask = 3UL << ((sizeof(long) * 8) - 2) - set var $id_mask = ~$flags_mask + # definitions from kernel/printk/printk_ringbuffer.h + set var $desc_committed = 1 + set var $desc_sv_bits = sizeof(long) * 8 + set var $desc_flags_shift = $desc_sv_bits - 2 + set var $desc_flags_mask = 3 << $desc_flags_shift + set var $id_mask = ~$desc_flags_mask set var $desc_count = 1U << prb->desc_ring.count_bits set var $prev_flags = 0 @@ -309,7 +312,8 @@ define dmesg set var $desc = &prb->desc_ring.descs[$id % $desc_count] # skip non-committed record - if (($desc->state_var.counter & $flags_mask) == $desc_committed) + set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift) + if ($state == $desc_committed) dump_record $desc $prev_flags set var $prev_flags = $desc->info.flags end -- cgit v1.2.3 From 4cfc7258f876a7feba673ac6d050f525b39cc84c Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 14 Sep 2020 14:39:53 +0206 Subject: printk: ringbuffer: add finalization/extension support Add support for extending the newest data block. For this, introduce a new finalization state (desc_finalized) denoting a committed descriptor that cannot be extended. Until a record is finalized, a writer can reopen that record to append new data. Reopening a record means transitioning from the desc_committed state back to the desc_reserved state. A writer can explicitly finalize a record if there is no intention of extending it. Also, records are automatically finalized when a new record is reserved. This relieves writers of needing to explicitly finalize while also making such records available to readers sooner. (Readers can only traverse finalized records.) Four new memory barrier pairs are introduced. Two of them are insignificant additions (data_realloc:A/desc_read:D and data_realloc:A/data_push_tail:B) because they are alternate path memory barriers that exactly match the purpose, pairing, and context of the two existing memory barrier pairs they provide an alternate path for. The other two new memory barrier pairs are significant additions: desc_reopen_last:A / _prb_commit:B - When reopening a descriptor, ensure the state transitions back to desc_reserved before fully trusting the descriptor data. _prb_commit:B / desc_reserve:D - When committing a descriptor, ensure the state transitions to desc_committed before checking the head ID to see if the descriptor needs to be finalized. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20200914123354.832-6-john.ogness@linutronix.de --- Documentation/admin-guide/kdump/gdbmacros.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt index 8f533b751c46..94fabb165abf 100644 --- a/Documentation/admin-guide/kdump/gdbmacros.txt +++ b/Documentation/admin-guide/kdump/gdbmacros.txt @@ -297,6 +297,7 @@ end define dmesg # definitions from kernel/printk/printk_ringbuffer.h set var $desc_committed = 1 + set var $desc_finalized = 2 set var $desc_sv_bits = sizeof(long) * 8 set var $desc_flags_shift = $desc_sv_bits - 2 set var $desc_flags_mask = 3 << $desc_flags_shift @@ -313,7 +314,7 @@ define dmesg # skip non-committed record set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift) - if ($state == $desc_committed) + if ($state == $desc_committed || $state == $desc_finalized) dump_record $desc $prev_flags set var $prev_flags = $desc->info.flags end -- cgit v1.2.3 From 74caba7f2a0685575b3ee5330a118f5922485e02 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 21 Sep 2020 13:24:45 +0206 Subject: printk: move dictionary keys to dev_printk_info Dictionaries are only used for SUBSYSTEM and DEVICE properties. The current implementation stores the property names each time they are used. This requires more space than otherwise necessary. Also, because the dictionary entries are currently considered optional, it cannot be relied upon that they are always available, even if the writer wanted to store them. These issues will increase should new dictionary properties be introduced. Rather than storing the subsystem and device properties in the dict ring, introduce a struct dev_printk_info with separate fields to store only the property values. Embed this struct within the struct printk_info to provide guaranteed availability. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/87mu1jl6ne.fsf@jogness.linutronix.de --- Documentation/admin-guide/kdump/gdbmacros.txt | 73 ++++++++++++--------------- 1 file changed, 33 insertions(+), 40 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt index 94fabb165abf..82aecdcae8a6 100644 --- a/Documentation/admin-guide/kdump/gdbmacros.txt +++ b/Documentation/admin-guide/kdump/gdbmacros.txt @@ -172,13 +172,13 @@ end define dump_record set var $desc = $arg0 - if ($argc > 1) - set var $prev_flags = $arg1 + set var $info = $arg1 + if ($argc > 2) + set var $prev_flags = $arg2 else set var $prev_flags = 0 end - set var $info = &$desc->info set var $prefix = 1 set var $newline = 1 @@ -237,44 +237,36 @@ define dump_record # handle dictionary data - set var $begin = $desc->dict_blk_lpos.begin % (1U << prb->dict_data_ring.size_bits) - set var $next = $desc->dict_blk_lpos.next % (1U << prb->dict_data_ring.size_bits) - - # handle data-less record - if ($begin & 1) - set var $dict_len = 0 - set var $dict = "" - else - # handle wrapping data block - if ($begin > $next) - set var $begin = 0 - end - - # skip over descriptor id - set var $begin = $begin + sizeof(long) - - # handle truncated message - if ($next - $begin < $info->dict_len) - set var $dict_len = $next - $begin - else - set var $dict_len = $info->dict_len + set var $dict = &$info->dev_info.subsystem[0] + set var $dict_len = sizeof($info->dev_info.subsystem) + if ($dict[0] != '\0') + printf " SUBSYSTEM=" + set var $idx = 0 + while ($idx < $dict_len) + set var $c = $dict[$idx] + if ($c == '\0') + loop_break + else + if ($c < ' ' || $c >= 127 || $c == '\\') + printf "\\x%02x", $c + else + printf "%c", $c + end + end + set var $idx = $idx + 1 end - - set var $dict = &prb->dict_data_ring.data[$begin] + printf "\n" end - if ($dict_len > 0) + set var $dict = &$info->dev_info.device[0] + set var $dict_len = sizeof($info->dev_info.device) + if ($dict[0] != '\0') + printf " DEVICE=" set var $idx = 0 - set var $line = 1 while ($idx < $dict_len) - if ($line) - printf " " - set var $line = 0 - end set var $c = $dict[$idx] if ($c == '\0') - printf "\n" - set var $line = 1 + loop_break else if ($c < ' ' || $c >= 127 || $c == '\\') printf "\\x%02x", $c @@ -288,10 +280,10 @@ define dump_record end end document dump_record - Dump a single record. The first parameter is the descriptor - sequence number, the second is optional and specifies the - previous record's flags, used for properly formatting - continued lines. + Dump a single record. The first parameter is the descriptor, + the second parameter is the info, the third parameter is + optional and specifies the previous record's flags, used for + properly formatting continued lines. end define dmesg @@ -311,12 +303,13 @@ define dmesg while (1) set var $desc = &prb->desc_ring.descs[$id % $desc_count] + set var $info = &prb->desc_ring.infos[$id % $desc_count] # skip non-committed record set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift) if ($state == $desc_committed || $state == $desc_finalized) - dump_record $desc $prev_flags - set var $prev_flags = $desc->info.flags + dump_record $desc $info $prev_flags + set var $prev_flags = $info->flags end set var $id = ($id + 1) & $id_mask -- cgit v1.2.3