From 244b2c5ca8b14627e4a17755e5922221e121c771 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 09 Oct 2024 06:15:07 +0000
Subject: [PATCH] change system file
---
kernel/kernel/events/ring_buffer.c | 239 +++++++++++++++++++++++++++++++++++------------------------
1 files changed, 143 insertions(+), 96 deletions(-)
diff --git a/kernel/kernel/events/ring_buffer.c b/kernel/kernel/events/ring_buffer.c
index 12f351b..4032cd4 100644
--- a/kernel/kernel/events/ring_buffer.c
+++ b/kernel/kernel/events/ring_buffer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Performance events ring-buffer code:
*
@@ -5,8 +6,6 @@
* Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
* Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- *
- * For licensing details see kernel-base/COPYING
*/
#include <linux/perf_event.h>
@@ -36,17 +35,33 @@
*/
static void perf_output_get_handle(struct perf_output_handle *handle)
{
- struct ring_buffer *rb = handle->rb;
+ struct perf_buffer *rb = handle->rb;
preempt_disable();
- local_inc(&rb->nest);
+
+ /*
+ * Avoid an explicit LOAD/STORE such that architectures with memops
+ * can use them.
+ */
+ (*(volatile unsigned int *)&rb->nest)++;
handle->wakeup = local_read(&rb->wakeup);
}
static void perf_output_put_handle(struct perf_output_handle *handle)
{
- struct ring_buffer *rb = handle->rb;
+ struct perf_buffer *rb = handle->rb;
unsigned long head;
+ unsigned int nest;
+
+ /*
+ * If this isn't the outermost nesting, we don't have to update
+ * @rb->user_page->data_head.
+ */
+ nest = READ_ONCE(rb->nest);
+ if (nest > 1) {
+ WRITE_ONCE(rb->nest, nest - 1);
+ goto out;
+ }
again:
/*
@@ -64,15 +79,6 @@
* IRQ/NMI can happen here and advance @rb->head, causing our
* load above to be stale.
*/
-
- /*
- * If this isn't the outermost nesting, we don't have to update
- * @rb->user_page->data_head.
- */
- if (local_read(&rb->nest) > 1) {
- local_dec(&rb->nest);
- goto out;
- }
/*
* Since the mmap() consumer (userspace) can run on a different CPU:
@@ -109,7 +115,7 @@
* write will (temporarily) publish a stale value.
*/
barrier();
- local_set(&rb->nest, 0);
+ WRITE_ONCE(rb->nest, 0);
/*
* Ensure we decrement @rb->nest before we validate the @rb->head.
@@ -117,7 +123,7 @@
*/
barrier();
if (unlikely(head != local_read(&rb->head))) {
- local_inc(&rb->nest);
+ WRITE_ONCE(rb->nest, 1);
goto again;
}
@@ -141,10 +147,11 @@
static __always_inline int
__perf_output_begin(struct perf_output_handle *handle,
+ struct perf_sample_data *data,
struct perf_event *event, unsigned int size,
bool backward)
{
- struct ring_buffer *rb;
+ struct perf_buffer *rb;
unsigned long tail, offset, head;
int have_lost, page_shift;
struct {
@@ -231,18 +238,16 @@
handle->size = (1UL << page_shift) - offset;
if (unlikely(have_lost)) {
- struct perf_sample_data sample_data;
-
lost_event.header.size = sizeof(lost_event);
lost_event.header.type = PERF_RECORD_LOST;
lost_event.header.misc = 0;
lost_event.id = event->id;
lost_event.lost = local_xchg(&rb->lost, 0);
- perf_event_header__init_id(&lost_event.header,
- &sample_data, event);
+ /* XXX mostly redundant; @data is already fully initializes */
+ perf_event_header__init_id(&lost_event.header, data, event);
perf_output_put(handle, lost_event);
- perf_event__output_id_sample(event, handle, &sample_data);
+ perf_event__output_id_sample(event, handle, data);
}
return 0;
@@ -257,22 +262,25 @@
}
int perf_output_begin_forward(struct perf_output_handle *handle,
- struct perf_event *event, unsigned int size)
+ struct perf_sample_data *data,
+ struct perf_event *event, unsigned int size)
{
- return __perf_output_begin(handle, event, size, false);
+ return __perf_output_begin(handle, data, event, size, false);
}
int perf_output_begin_backward(struct perf_output_handle *handle,
+ struct perf_sample_data *data,
struct perf_event *event, unsigned int size)
{
- return __perf_output_begin(handle, event, size, true);
+ return __perf_output_begin(handle, data, event, size, true);
}
int perf_output_begin(struct perf_output_handle *handle,
+ struct perf_sample_data *data,
struct perf_event *event, unsigned int size)
{
- return __perf_output_begin(handle, event, size,
+ return __perf_output_begin(handle, data, event, size,
unlikely(is_write_backward(event)));
}
@@ -295,7 +303,7 @@
}
static void
-ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
+ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
{
long max_size = perf_data_size(rb);
@@ -310,7 +318,7 @@
else
rb->overwrite = 1;
- atomic_set(&rb->refcount, 1);
+ refcount_set(&rb->refcount, 1);
INIT_LIST_HEAD(&rb->event_list);
spin_lock_init(&rb->event_lock);
@@ -355,7 +363,8 @@
{
struct perf_event *output_event = event;
unsigned long aux_head, aux_tail;
- struct ring_buffer *rb;
+ struct perf_buffer *rb;
+ unsigned int nest;
if (output_event->parent)
output_event = output_event->parent;
@@ -383,15 +392,18 @@
if (!atomic_read(&rb->aux_mmap_count))
goto err;
- if (!atomic_inc_not_zero(&rb->aux_refcount))
+ if (!refcount_inc_not_zero(&rb->aux_refcount))
goto err;
+ nest = READ_ONCE(rb->aux_nest);
/*
* Nesting is not supported for AUX area, make sure nested
* writers are caught early
*/
- if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1)))
+ if (WARN_ON_ONCE(nest))
goto err_put;
+
+ WRITE_ONCE(rb->aux_nest, nest + 1);
aux_head = rb->aux_head;
@@ -420,7 +432,7 @@
if (!handle->size) { /* A, matches D */
event->pending_disable = smp_processor_id();
perf_output_wakeup(handle);
- local_set(&rb->aux_nest, 0);
+ WRITE_ONCE(rb->aux_nest, 0);
goto err_put;
}
}
@@ -439,7 +451,7 @@
}
EXPORT_SYMBOL_GPL(perf_aux_output_begin);
-static __always_inline bool rb_need_aux_wakeup(struct ring_buffer *rb)
+static __always_inline bool rb_need_aux_wakeup(struct perf_buffer *rb)
{
if (rb->aux_overwrite)
return false;
@@ -465,7 +477,7 @@
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
{
bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED);
- struct ring_buffer *rb = handle->rb;
+ struct perf_buffer *rb = handle->rb;
unsigned long aux_head;
/* in overwrite mode, driver provides aux_head via handle */
@@ -481,14 +493,21 @@
rb->aux_head += size;
}
- if (size || handle->aux_flags) {
- /*
- * Only send RECORD_AUX if we have something useful to communicate
- */
-
+ /*
+ * Only send RECORD_AUX if we have something useful to communicate
+ *
+ * Note: the OVERWRITE records by themselves are not considered
+ * useful, as they don't communicate any *new* information,
+ * aside from the short-lived offset, that becomes history at
+ * the next event sched-in and therefore isn't useful.
+ * The userspace that needs to copy out AUX data in overwrite
+ * mode should know to use user_page::aux_head for the actual
+ * offset. So, from now on we don't output AUX records that
+ * have *only* OVERWRITE flag set.
+ */
+ if (size || (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE))
perf_event_aux_event(handle->event, aux_head, size,
- handle->aux_flags);
- }
+ handle->aux_flags);
WRITE_ONCE(rb->user_page->aux_head, rb->aux_head);
if (rb_need_aux_wakeup(rb))
@@ -502,7 +521,7 @@
handle->event = NULL;
- local_set(&rb->aux_nest, 0);
+ WRITE_ONCE(rb->aux_nest, 0);
/* can't be last */
rb_free_aux(rb);
ring_buffer_put(rb);
@@ -515,7 +534,7 @@
*/
int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
{
- struct ring_buffer *rb = handle->rb;
+ struct perf_buffer *rb = handle->rb;
if (size > handle->size)
return -ENOSPC;
@@ -545,6 +564,42 @@
}
EXPORT_SYMBOL_GPL(perf_get_aux);
+/*
+ * Copy out AUX data from an AUX handle.
+ */
+long perf_output_copy_aux(struct perf_output_handle *aux_handle,
+ struct perf_output_handle *handle,
+ unsigned long from, unsigned long to)
+{
+ struct perf_buffer *rb = aux_handle->rb;
+ unsigned long tocopy, remainder, len = 0;
+ void *addr;
+
+ from &= (rb->aux_nr_pages << PAGE_SHIFT) - 1;
+ to &= (rb->aux_nr_pages << PAGE_SHIFT) - 1;
+
+ do {
+ tocopy = PAGE_SIZE - offset_in_page(from);
+ if (to > from)
+ tocopy = min(tocopy, to - from);
+ if (!tocopy)
+ break;
+
+ addr = rb->aux_pages[from >> PAGE_SHIFT];
+ addr += offset_in_page(from);
+
+ remainder = perf_output_copy(handle, addr, tocopy);
+ if (remainder)
+ return -EFAULT;
+
+ len += tocopy;
+ from += tocopy;
+ from &= (rb->aux_nr_pages << PAGE_SHIFT) - 1;
+ } while (to != from);
+
+ return len;
+}
+
#define PERF_AUX_GFP (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
static struct page *rb_alloc_aux_page(int node, int order)
@@ -573,7 +628,7 @@
return page;
}
-static void rb_free_aux_page(struct ring_buffer *rb, int idx)
+static void rb_free_aux_page(struct perf_buffer *rb, int idx)
{
struct page *page = virt_to_page(rb->aux_pages[idx]);
@@ -582,7 +637,7 @@
__free_page(page);
}
-static void __rb_free_aux(struct ring_buffer *rb)
+static void __rb_free_aux(struct perf_buffer *rb)
{
int pg;
@@ -609,34 +664,31 @@
}
}
-int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
+int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
pgoff_t pgoff, int nr_pages, long watermark, int flags)
{
bool overwrite = !(flags & RING_BUFFER_WRITABLE);
int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
- int ret = -ENOMEM, max_order = 0;
+ int ret = -ENOMEM, max_order;
if (!has_aux(event))
return -EOPNOTSUPP;
- if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) {
- /*
- * We need to start with the max_order that fits in nr_pages,
- * not the other way around, hence ilog2() and not get_order.
- */
- max_order = ilog2(nr_pages);
+ /*
+ * We need to start with the max_order that fits in nr_pages,
+ * not the other way around, hence ilog2() and not get_order.
+ */
+ max_order = ilog2(nr_pages);
- /*
- * PMU requests more than one contiguous chunks of memory
- * for SW double buffering
- */
- if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) &&
- !overwrite) {
- if (!max_order)
- return -EINVAL;
+ /*
+ * PMU requests more than one contiguous chunks of memory
+ * for SW double buffering
+ */
+ if (!overwrite) {
+ if (!max_order)
+ return -EINVAL;
- max_order--;
- }
+ max_order--;
}
rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL,
@@ -686,7 +738,7 @@
* we keep a refcount here to make sure either of the two can
* reference them safely.
*/
- atomic_set(&rb->aux_refcount, 1);
+ refcount_set(&rb->aux_refcount, 1);
rb->aux_overwrite = overwrite;
rb->aux_watermark = watermark;
@@ -703,9 +755,9 @@
return ret;
}
-void rb_free_aux(struct ring_buffer *rb)
+void rb_free_aux(struct perf_buffer *rb)
{
- if (atomic_dec_and_test(&rb->aux_refcount))
+ if (refcount_dec_and_test(&rb->aux_refcount))
__rb_free_aux(rb);
}
@@ -716,7 +768,7 @@
*/
static struct page *
-__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+__perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff)
{
if (pgoff > rb->nr_pages)
return NULL;
@@ -740,13 +792,21 @@
return page_address(page);
}
-struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
+static void perf_mmap_free_page(void *addr)
{
- struct ring_buffer *rb;
+ struct page *page = virt_to_page(addr);
+
+ page->mapping = NULL;
+ __free_page(page);
+}
+
+struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
+{
+ struct perf_buffer *rb;
unsigned long size;
int i;
- size = sizeof(struct ring_buffer);
+ size = sizeof(struct perf_buffer);
size += nr_pages * sizeof(void *);
if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER)
@@ -774,9 +834,9 @@
fail_data_pages:
for (i--; i >= 0; i--)
- free_page((unsigned long)rb->data_pages[i]);
+ perf_mmap_free_page(rb->data_pages[i]);
- free_page((unsigned long)rb->user_page);
+ perf_mmap_free_page(rb->user_page);
fail_user_page:
kfree(rb);
@@ -785,32 +845,19 @@
return NULL;
}
-static void perf_mmap_free_page(unsigned long addr)
-{
- struct page *page = virt_to_page((void *)addr);
-
- page->mapping = NULL;
- __free_page(page);
-}
-
-void rb_free(struct ring_buffer *rb)
+void rb_free(struct perf_buffer *rb)
{
int i;
- perf_mmap_free_page((unsigned long)rb->user_page);
+ perf_mmap_free_page(rb->user_page);
for (i = 0; i < rb->nr_pages; i++)
- perf_mmap_free_page((unsigned long)rb->data_pages[i]);
+ perf_mmap_free_page(rb->data_pages[i]);
kfree(rb);
}
#else
-static int data_page_nr(struct ring_buffer *rb)
-{
- return rb->nr_pages << page_order(rb);
-}
-
static struct page *
-__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+__perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff)
{
/* The '>' counts in the user page. */
if (pgoff > data_page_nr(rb))
@@ -828,11 +875,11 @@
static void rb_free_work(struct work_struct *work)
{
- struct ring_buffer *rb;
+ struct perf_buffer *rb;
void *base;
int i, nr;
- rb = container_of(work, struct ring_buffer, work);
+ rb = container_of(work, struct perf_buffer, work);
nr = data_page_nr(rb);
base = rb->user_page;
@@ -844,18 +891,18 @@
kfree(rb);
}
-void rb_free(struct ring_buffer *rb)
+void rb_free(struct perf_buffer *rb)
{
schedule_work(&rb->work);
}
-struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
+struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
{
- struct ring_buffer *rb;
+ struct perf_buffer *rb;
unsigned long size;
void *all_buf;
- size = sizeof(struct ring_buffer);
+ size = sizeof(struct perf_buffer);
size += sizeof(void *);
rb = kzalloc(size, GFP_KERNEL);
@@ -889,7 +936,7 @@
#endif
struct page *
-perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff)
{
if (rb->aux_nr_pages) {
/* above AUX space */
--
Gitblit v1.6.2