hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/kernel/trace/trace_syscalls.c
....@@ -7,6 +7,7 @@
77 #include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
88 #include <linux/ftrace.h>
99 #include <linux/perf_event.h>
10
+#include <linux/xarray.h>
1011 #include <asm/syscall.h>
1112
1213 #include "trace_output.h"
....@@ -30,6 +31,7 @@
3031 extern struct syscall_metadata *__start_syscalls_metadata[];
3132 extern struct syscall_metadata *__stop_syscalls_metadata[];
3233
34
+static DEFINE_XARRAY(syscalls_metadata_sparse);
3335 static struct syscall_metadata **syscalls_metadata;
3436
3537 #ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
....@@ -101,6 +103,9 @@
101103
102104 static struct syscall_metadata *syscall_nr_to_meta(int nr)
103105 {
106
+ if (IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR))
107
+ return xa_load(&syscalls_metadata_sparse, (unsigned long)nr);
108
+
104109 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
105110 return NULL;
106111
....@@ -198,11 +203,10 @@
198203
199204 extern char *__bad_type_size(void);
200205
201
-#define SYSCALL_FIELD(type, field, name) \
202
- sizeof(type) != sizeof(trace.field) ? \
203
- __bad_type_size() : \
204
- #type, #name, offsetof(typeof(trace), field), \
205
- sizeof(trace.field), is_signed_type(type)
206
+#define SYSCALL_FIELD(_type, _name) { \
207
+ .type = #_type, .name = #_name, \
208
+ .size = sizeof(_type), .align = __alignof__(_type), \
209
+ .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }
206210
207211 static int __init
208212 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
....@@ -269,38 +273,19 @@
269273 {
270274 struct syscall_trace_enter trace;
271275 struct syscall_metadata *meta = call->data;
272
- int ret;
273
- int i;
274276 int offset = offsetof(typeof(trace), args);
275
-
276
- ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
277
- FILTER_OTHER);
278
- if (ret)
279
- return ret;
277
+ int ret = 0;
278
+ int i;
280279
281280 for (i = 0; i < meta->nb_args; i++) {
282281 ret = trace_define_field(call, meta->types[i],
283282 meta->args[i], offset,
284283 sizeof(unsigned long), 0,
285284 FILTER_OTHER);
285
+ if (ret)
286
+ break;
286287 offset += sizeof(unsigned long);
287288 }
288
-
289
- return ret;
290
-}
291
-
292
-static int __init syscall_exit_define_fields(struct trace_event_call *call)
293
-{
294
- struct syscall_trace_exit trace;
295
- int ret;
296
-
297
- ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
298
- FILTER_OTHER);
299
- if (ret)
300
- return ret;
301
-
302
- ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
303
- FILTER_OTHER);
304289
305290 return ret;
306291 }
....@@ -312,8 +297,9 @@
312297 struct syscall_trace_enter *entry;
313298 struct syscall_metadata *sys_data;
314299 struct ring_buffer_event *event;
315
- struct ring_buffer *buffer;
300
+ struct trace_buffer *buffer;
316301 unsigned long irq_flags;
302
+ unsigned long args[6];
317303 int pc;
318304 int syscall_nr;
319305 int size;
....@@ -339,7 +325,7 @@
339325 local_save_flags(irq_flags);
340326 pc = preempt_count();
341327
342
- buffer = tr->trace_buffer.buffer;
328
+ buffer = tr->array_buffer.buffer;
343329 event = trace_buffer_lock_reserve(buffer,
344330 sys_data->enter_event->event.type, size, irq_flags, pc);
345331 if (!event)
....@@ -347,7 +333,8 @@
347333
348334 entry = ring_buffer_event_data(event);
349335 entry->nr = syscall_nr;
350
- syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
336
+ syscall_get_arguments(current, regs, args);
337
+ memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
351338
352339 event_trigger_unlock_commit(trace_file, buffer, event, entry,
353340 irq_flags, pc);
....@@ -360,7 +347,7 @@
360347 struct syscall_trace_exit *entry;
361348 struct syscall_metadata *sys_data;
362349 struct ring_buffer_event *event;
363
- struct ring_buffer *buffer;
350
+ struct trace_buffer *buffer;
364351 unsigned long irq_flags;
365352 int pc;
366353 int syscall_nr;
....@@ -384,7 +371,7 @@
384371 local_save_flags(irq_flags);
385372 pc = preempt_count();
386373
387
- buffer = tr->trace_buffer.buffer;
374
+ buffer = tr->array_buffer.buffer;
388375 event = trace_buffer_lock_reserve(buffer,
389376 sys_data->exit_event->event.type, sizeof(*entry),
390377 irq_flags, pc);
....@@ -500,6 +487,13 @@
500487 return id;
501488 }
502489
490
+static struct trace_event_fields __refdata syscall_enter_fields_array[] = {
491
+ SYSCALL_FIELD(int, __syscall_nr),
492
+ { .type = TRACE_FUNCTION_TYPE,
493
+ .define_fields = syscall_enter_define_fields },
494
+ {}
495
+};
496
+
503497 struct trace_event_functions enter_syscall_print_funcs = {
504498 .trace = print_syscall_enter,
505499 };
....@@ -511,7 +505,7 @@
511505 struct trace_event_class __refdata event_class_syscall_enter = {
512506 .system = "syscalls",
513507 .reg = syscall_enter_register,
514
- .define_fields = syscall_enter_define_fields,
508
+ .fields_array = syscall_enter_fields_array,
515509 .get_fields = syscall_get_enter_fields,
516510 .raw_init = init_syscall_trace,
517511 };
....@@ -519,7 +513,11 @@
519513 struct trace_event_class __refdata event_class_syscall_exit = {
520514 .system = "syscalls",
521515 .reg = syscall_exit_register,
522
- .define_fields = syscall_exit_define_fields,
516
+ .fields_array = (struct trace_event_fields[]){
517
+ SYSCALL_FIELD(int, __syscall_nr),
518
+ SYSCALL_FIELD(long, ret),
519
+ {}
520
+ },
523521 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
524522 .raw_init = init_syscall_trace,
525523 };
....@@ -534,12 +532,16 @@
534532 struct syscall_metadata *meta;
535533 unsigned long addr;
536534 int i;
535
+ void *ret;
537536
538
- syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
539
- GFP_KERNEL);
540
- if (!syscalls_metadata) {
541
- WARN_ON(1);
542
- return;
537
+ if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) {
538
+ syscalls_metadata = kcalloc(NR_syscalls,
539
+ sizeof(*syscalls_metadata),
540
+ GFP_KERNEL);
541
+ if (!syscalls_metadata) {
542
+ WARN_ON(1);
543
+ return;
544
+ }
543545 }
544546
545547 for (i = 0; i < NR_syscalls; i++) {
....@@ -549,7 +551,16 @@
549551 continue;
550552
551553 meta->syscall_nr = i;
552
- syscalls_metadata[i] = meta;
554
+
555
+ if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) {
556
+ syscalls_metadata[i] = meta;
557
+ } else {
558
+ ret = xa_store(&syscalls_metadata_sparse, i, meta,
559
+ GFP_KERNEL);
560
+ WARN(xa_is_err(ret),
561
+ "Syscall memory allocation failed\n");
562
+ }
563
+
553564 }
554565 }
555566
....@@ -583,6 +594,7 @@
583594 struct syscall_metadata *sys_data;
584595 struct syscall_trace_enter *rec;
585596 struct hlist_head *head;
597
+ unsigned long args[6];
586598 bool valid_prog_array;
587599 int syscall_nr;
588600 int rctx;
....@@ -613,8 +625,8 @@
613625 return;
614626
615627 rec->nr = syscall_nr;
616
- syscall_get_arguments(current, regs, 0, sys_data->nb_args,
617
- (unsigned long *)&rec->args);
628
+ syscall_get_arguments(current, regs, args);
629
+ memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
618630
619631 if ((valid_prog_array &&
620632 !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||