summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/trace/trace.c36
1 files changed, 35 insertions, 1 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a0a84ff46ecd..a0d66a056e59 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2734,10 +2734,44 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
if (!tr->no_filter_buffering_ref &&
(trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
(entry = this_cpu_read(trace_buffered_event))) {
- /* Try to use the per cpu buffer first */
+ /*
+ * Filtering is on, so try to use the per cpu buffer first.
+ * This buffer will simulate a ring_buffer_event,
+ * where the type_len is zero and the array[0] will
+ * hold the full length.
+ * (see include/linux/ring-buffer.h for details on
+ * how the ring_buffer_event is structured).
+ *
+ * Using a temp buffer during filtering and copying it
+ * on a matched filter is quicker than writing directly
+ * into the ring buffer and then discarding it when
+ * it doesn't match. That is because the discard
+ * requires several atomic operations to get right.
+ * Copying on match and doing nothing on a failed match
+ * is still quicker than no copy on match, but having
+ * to discard out of the ring buffer on a failed match.
+ */
int max_len = PAGE_SIZE - struct_size(entry, array, 1);
val = this_cpu_inc_return(trace_buffered_event_cnt);
+
+ /*
+ * Preemption is disabled, but interrupts and NMIs
+ * can still come in now. If that happens after
+ * the above increment, then it will have to go
+ * back to the old method of allocating the event
+ * on the ring buffer, and if the filter fails, it
+ * will have to call ring_buffer_discard_commit()
+ * to remove it.
+ *
+ * Need to also check the unlikely case that the
+ * length is bigger than the temp buffer size.
+ * If that happens, then the reserve is pretty much
+ * guaranteed to fail, as the ring buffer currently
+ * only allows events less than a page. But that may
+ * change in the future, so let the ring buffer reserve
+ * handle the failure in that case.
+ */
if (val == 1 && likely(len <= max_len)) {
trace_event_setup(entry, type, trace_ctx);
entry->array[0] = len;