~hc/RK356X_SDK_RELEASE.git

..	..	@@ -7,18 +7,73 @@
7	7	#include <linux/slab.h>
8	8	#include <linux/bpf.h>
9	9	#include <linux/bpf_perf_event.h>
	10	+#include <linux/btf.h>
10	11	#include <linux/filter.h>
11	12	#include <linux/uaccess.h>
12	13	#include <linux/ctype.h>
13	14	#include <linux/kprobes.h>
	15	+#include <linux/spinlock.h>
14	16	#include <linux/syscalls.h>
15	17	#include <linux/error-injection.h>
	18	+#include <linux/btf_ids.h>
	19	+
	20	+#include <uapi/linux/bpf.h>
	21	+#include <uapi/linux/btf.h>
	22	+
	23	+#include <asm/tlb.h>
16	24
17	25	#include "trace_probe.h"
18	26	#include "trace.h"
19	27
	28	+#define CREATE_TRACE_POINTS
	29	+#include "bpf_trace.h"
	30	+
	31	+#define bpf_event_rcu_dereference(p) \
	32	+ rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
	33	+
	34	+#ifdef CONFIG_MODULES
	35	+struct bpf_trace_module {
	36	+ struct module *module;
	37	+ struct list_head list;
	38	+};
	39	+
	40	+static LIST_HEAD(bpf_trace_modules);
	41	+static DEFINE_MUTEX(bpf_module_mutex);
	42	+
	43	+static struct bpf_raw_event_map bpf_get_raw_tracepoint_module(const char name)
	44	+{
	45	+ struct bpf_raw_event_map btp, ret = NULL;
	46	+ struct bpf_trace_module *btm;
	47	+ unsigned int i;
	48	+
	49	+ mutex_lock(&bpf_module_mutex);
	50	+ list_for_each_entry(btm, &bpf_trace_modules, list) {
	51	+ for (i = 0; i < btm->module->num_bpf_raw_events; ++i) {
	52	+ btp = &btm->module->bpf_raw_events[i];
	53	+ if (!strcmp(btp->tp->name, name)) {
	54	+ if (try_module_get(btm->module))
	55	+ ret = btp;
	56	+ goto out;
	57	+ }
	58	+ }
	59	+ }
	60	+out:
	61	+ mutex_unlock(&bpf_module_mutex);
	62	+ return ret;
	63	+}
	64	+#else
	65	+static struct bpf_raw_event_map bpf_get_raw_tracepoint_module(const char name)
	66	+{
	67	+ return NULL;
	68	+}
	69	+#endif /* CONFIG_MODULES */
	70	+
20	71	u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
21	72	u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
	73	+
	74	+static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
	75	+ u64 flags, const struct btf **btf,
	76	+ s32 *btf_id);
22	77
23	78	/**
24	79	* trace_call_bpf - invoke BPF program
..	..	@@ -38,10 +93,7 @@
38	93	{
39	94	unsigned int ret;
40	95
41		- if (in_nmi()) /* not supported yet */
42		- return 1;
43		-
44		- preempt_disable();
	96	+ cant_sleep();
45	97
46	98	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
47	99	/*
..	..	@@ -73,11 +125,9 @@
73	125
74	126	out:
75	127	__this_cpu_dec(bpf_prog_active);
76		- preempt_enable();
77	128
78	129	return ret;
79	130	}
80		-EXPORT_SYMBOL_GPL(trace_call_bpf);
81	131
82	132	#ifdef CONFIG_BPF_KPROBE_OVERRIDE
83	133	BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
..	..	@@ -96,19 +146,25 @@
96	146	};
97	147	#endif
98	148
99		-BPF_CALL_3(bpf_probe_read, void , dst, u32, size, const void , unsafe_ptr)
	149	+static __always_inline int
	150	+bpf_probe_read_user_common(void dst, u32 size, const void __user unsafe_ptr)
100	151	{
101	152	int ret;
102	153
103		- ret = probe_kernel_read(dst, unsafe_ptr, size);
	154	+ ret = copy_from_user_nofault(dst, unsafe_ptr, size);
104	155	if (unlikely(ret < 0))
105	156	memset(dst, 0, size);
106		-
107	157	return ret;
108	158	}
109	159
110		-static const struct bpf_func_proto bpf_probe_read_proto = {
111		- .func = bpf_probe_read,
	160	+BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
	161	+ const void __user *, unsafe_ptr)
	162	+{
	163	+ return bpf_probe_read_user_common(dst, size, unsafe_ptr);
	164	+}
	165	+
	166	+const struct bpf_func_proto bpf_probe_read_user_proto = {
	167	+ .func = bpf_probe_read_user,
112	168	.gpl_only = true,
113	169	.ret_type = RET_INTEGER,
114	170	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
..	..	@@ -116,7 +172,145 @@
116	172	.arg3_type = ARG_ANYTHING,
117	173	};
118	174
119		-BPF_CALL_3(bpf_probe_write_user, void , unsafe_ptr, const void , src,
	175	+static __always_inline int
	176	+bpf_probe_read_user_str_common(void *dst, u32 size,
	177	+ const void __user *unsafe_ptr)
	178	+{
	179	+ int ret;
	180	+
	181	+ /*
	182	+ * NB: We rely on strncpy_from_user() not copying junk past the NUL
	183	+ * terminator into `dst`.
	184	+ *
	185	+ * strncpy_from_user() does long-sized strides in the fast path. If the
	186	+ * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
	187	+ * then there could be junk after the NUL in `dst`. If user takes `dst`
	188	+ * and keys a hash map with it, then semantically identical strings can
	189	+ * occupy multiple entries in the map.
	190	+ */
	191	+ ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
	192	+ if (unlikely(ret < 0))
	193	+ memset(dst, 0, size);
	194	+ return ret;
	195	+}
	196	+
	197	+BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
	198	+ const void __user *, unsafe_ptr)
	199	+{
	200	+ return bpf_probe_read_user_str_common(dst, size, unsafe_ptr);
	201	+}
	202	+
	203	+const struct bpf_func_proto bpf_probe_read_user_str_proto = {
	204	+ .func = bpf_probe_read_user_str,
	205	+ .gpl_only = true,
	206	+ .ret_type = RET_INTEGER,
	207	+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
	208	+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
	209	+ .arg3_type = ARG_ANYTHING,
	210	+};
	211	+
	212	+static __always_inline int
	213	+bpf_probe_read_kernel_common(void dst, u32 size, const void unsafe_ptr)
	214	+{
	215	+ int ret;
	216	+
	217	+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
	218	+ if (unlikely(ret < 0))
	219	+ memset(dst, 0, size);
	220	+ return ret;
	221	+}
	222	+
	223	+BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
	224	+ const void *, unsafe_ptr)
	225	+{
	226	+ return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
	227	+}
	228	+
	229	+const struct bpf_func_proto bpf_probe_read_kernel_proto = {
	230	+ .func = bpf_probe_read_kernel,
	231	+ .gpl_only = true,
	232	+ .ret_type = RET_INTEGER,
	233	+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
	234	+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
	235	+ .arg3_type = ARG_ANYTHING,
	236	+};
	237	+
	238	+static __always_inline int
	239	+bpf_probe_read_kernel_str_common(void dst, u32 size, const void unsafe_ptr)
	240	+{
	241	+ int ret;
	242	+
	243	+ /*
	244	+ * The strncpy_from_kernel_nofault() call will likely not fill the
	245	+ * entire buffer, but that's okay in this circumstance as we're probing
	246	+ * arbitrary memory anyway similar to bpf_probe_read_*() and might
	247	+ * as well probe the stack. Thus, memory is explicitly cleared
	248	+ * only in error case, so that improper users ignoring return
	249	+ * code altogether don't copy garbage; otherwise length of string
	250	+ * is returned that can be used for bpf_perf_event_output() et al.
	251	+ */
	252	+ ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size);
	253	+ if (unlikely(ret < 0))
	254	+ memset(dst, 0, size);
	255	+ return ret;
	256	+}
	257	+
	258	+BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
	259	+ const void *, unsafe_ptr)
	260	+{
	261	+ return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
	262	+}
	263	+
	264	+const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
	265	+ .func = bpf_probe_read_kernel_str,
	266	+ .gpl_only = true,
	267	+ .ret_type = RET_INTEGER,
	268	+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
	269	+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
	270	+ .arg3_type = ARG_ANYTHING,
	271	+};
	272	+
	273	+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
	274	+BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
	275	+ const void *, unsafe_ptr)
	276	+{
	277	+ if ((unsigned long)unsafe_ptr < TASK_SIZE) {
	278	+ return bpf_probe_read_user_common(dst, size,
	279	+ (__force void __user *)unsafe_ptr);
	280	+ }
	281	+ return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
	282	+}
	283	+
	284	+static const struct bpf_func_proto bpf_probe_read_compat_proto = {
	285	+ .func = bpf_probe_read_compat,
	286	+ .gpl_only = true,
	287	+ .ret_type = RET_INTEGER,
	288	+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
	289	+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
	290	+ .arg3_type = ARG_ANYTHING,
	291	+};
	292	+
	293	+BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
	294	+ const void *, unsafe_ptr)
	295	+{
	296	+ if ((unsigned long)unsafe_ptr < TASK_SIZE) {
	297	+ return bpf_probe_read_user_str_common(dst, size,
	298	+ (__force void __user *)unsafe_ptr);
	299	+ }
	300	+ return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
	301	+}
	302	+
	303	+static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
	304	+ .func = bpf_probe_read_compat_str,
	305	+ .gpl_only = true,
	306	+ .ret_type = RET_INTEGER,
	307	+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
	308	+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
	309	+ .arg3_type = ARG_ANYTHING,
	310	+};
	311	+#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
	312	+
	313	+BPF_CALL_3(bpf_probe_write_user, void __user , unsafe_ptr, const void , src,
120	314	u32, size)
121	315	{
122	316	/*
..	..	@@ -126,6 +320,10 @@
126	320	* access_ok() should prevent writing to non-user memory, but in
127	321	* some situations (nommu, temporary switch, etc) access_ok() does
128	322	* not provide enough validation, hence the check on KERNEL_DS.
	323	+ *
	324	+ * nmi_uaccess_okay() ensures the probe is not run in an interim
	325	+ * state, when the task or mm are switched. This is specifically
	326	+ * required to prevent the use of temporary mm.
129	327	*/
130	328
131	329	if (unlikely(in_interrupt() \|\|
..	..	@@ -133,10 +331,10 @@
133	331	return -EPERM;
134	332	if (unlikely(uaccess_kernel()))
135	333	return -EPERM;
136		- if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
	334	+ if (unlikely(!nmi_uaccess_okay()))
137	335	return -EPERM;
138	336
139		- return probe_kernel_write(unsafe_ptr, src, size);
	337	+ return copy_to_user_nofault(unsafe_ptr, src, size);
140	338	}
141	339
142	340	static const struct bpf_func_proto bpf_probe_write_user_proto = {
..	..	@@ -150,25 +348,75 @@
150	348
151	349	static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
152	350	{
	351	+ if (!capable(CAP_SYS_ADMIN))
	352	+ return NULL;
	353	+
153	354	pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
154	355	current->comm, task_pid_nr(current));
155	356
156	357	return &bpf_probe_write_user_proto;
157	358	}
158	359
	360	+static void bpf_trace_copy_string(char buf, void unsafe_ptr, char fmt_ptype,
	361	+ size_t bufsz)
	362	+{
	363	+ void __user user_ptr = (__force void __user )unsafe_ptr;
	364	+
	365	+ buf[0] = 0;
	366	+
	367	+ switch (fmt_ptype) {
	368	+ case 's':
	369	+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
	370	+ if ((unsigned long)unsafe_ptr < TASK_SIZE) {
	371	+ strncpy_from_user_nofault(buf, user_ptr, bufsz);
	372	+ break;
	373	+ }
	374	+ fallthrough;
	375	+#endif
	376	+ case 'k':
	377	+ strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
	378	+ break;
	379	+ case 'u':
	380	+ strncpy_from_user_nofault(buf, user_ptr, bufsz);
	381	+ break;
	382	+ }
	383	+}
	384	+
	385	+static DEFINE_RAW_SPINLOCK(trace_printk_lock);
	386	+
	387	+#define BPF_TRACE_PRINTK_SIZE 1024
	388	+
	389	+static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...)
	390	+{
	391	+ static char buf[BPF_TRACE_PRINTK_SIZE];
	392	+ unsigned long flags;
	393	+ va_list ap;
	394	+ int ret;
	395	+
	396	+ raw_spin_lock_irqsave(&trace_printk_lock, flags);
	397	+ va_start(ap, fmt);
	398	+ ret = vsnprintf(buf, sizeof(buf), fmt, ap);
	399	+ va_end(ap);
	400	+ /* vsnprintf() will not append null for zero-length strings */
	401	+ if (ret == 0)
	402	+ buf[0] = '\0';
	403	+ trace_bpf_trace_printk(buf);
	404	+ raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
	405	+
	406	+ return ret;
	407	+}
	408	+
159	409	/*
160	410	* Only limited trace_printk() conversion specifiers allowed:
161		- * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
	411	+ * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
162	412	*/
163	413	BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
164	414	u64, arg2, u64, arg3)
165	415	{
	416	+ int i, mod[3] = {}, fmt_cnt = 0;
	417	+ char buf[64], fmt_ptype;
	418	+ void *unsafe_ptr = NULL;
166	419	bool str_seen = false;
167		- int mod[3] = {};
168		- int fmt_cnt = 0;
169		- u64 unsafe_addr;
170		- char buf[64];
171		- int i;
172	420
173	421	/*
174	422	* bpf_check()->check_func_arg()->check_stack_boundary()
..	..	@@ -194,40 +442,60 @@
194	442	if (fmt[i] == 'l') {
195	443	mod[fmt_cnt]++;
196	444	i++;
197		- } else if (fmt[i] == 'p' \|\| fmt[i] == 's') {
	445	+ } else if (fmt[i] == 'p') {
198	446	mod[fmt_cnt]++;
	447	+ if ((fmt[i + 1] == 'k' \|\|
	448	+ fmt[i + 1] == 'u') &&
	449	+ fmt[i + 2] == 's') {
	450	+ fmt_ptype = fmt[i + 1];
	451	+ i += 2;
	452	+ goto fmt_str;
	453	+ }
	454	+
	455	+ if (fmt[i + 1] == 'B') {
	456	+ i++;
	457	+ goto fmt_next;
	458	+ }
	459	+
199	460	/* disallow any further format extensions */
200	461	if (fmt[i + 1] != 0 &&
201	462	!isspace(fmt[i + 1]) &&
202	463	!ispunct(fmt[i + 1]))
203	464	return -EINVAL;
204		- fmt_cnt++;
205		- if (fmt[i] == 's') {
206		- if (str_seen)
207		- /* allow only one '%s' per fmt string */
208		- return -EINVAL;
209		- str_seen = true;
210	465
211		- switch (fmt_cnt) {
212		- case 1:
213		- unsafe_addr = arg1;
214		- arg1 = (long) buf;
215		- break;
216		- case 2:
217		- unsafe_addr = arg2;
218		- arg2 = (long) buf;
219		- break;
220		- case 3:
221		- unsafe_addr = arg3;
222		- arg3 = (long) buf;
223		- break;
224		- }
225		- buf[0] = 0;
226		- strncpy_from_unsafe(buf,
227		- (void *) (long) unsafe_addr,
228		- sizeof(buf));
	466	+ goto fmt_next;
	467	+ } else if (fmt[i] == 's') {
	468	+ mod[fmt_cnt]++;
	469	+ fmt_ptype = fmt[i];
	470	+fmt_str:
	471	+ if (str_seen)
	472	+ /* allow only one '%s' per fmt string */
	473	+ return -EINVAL;
	474	+ str_seen = true;
	475	+
	476	+ if (fmt[i + 1] != 0 &&
	477	+ !isspace(fmt[i + 1]) &&
	478	+ !ispunct(fmt[i + 1]))
	479	+ return -EINVAL;
	480	+
	481	+ switch (fmt_cnt) {
	482	+ case 0:
	483	+ unsafe_ptr = (void *)(long)arg1;
	484	+ arg1 = (long)buf;
	485	+ break;
	486	+ case 1:
	487	+ unsafe_ptr = (void *)(long)arg2;
	488	+ arg2 = (long)buf;
	489	+ break;
	490	+ case 2:
	491	+ unsafe_ptr = (void *)(long)arg3;
	492	+ arg3 = (long)buf;
	493	+ break;
229	494	}
230		- continue;
	495	+
	496	+ bpf_trace_copy_string(buf, unsafe_ptr, fmt_ptype,
	497	+ sizeof(buf));
	498	+ goto fmt_next;
231	499	}
232	500
233	501	if (fmt[i] == 'l') {
..	..	@@ -238,6 +506,7 @@
238	506	if (fmt[i] != 'i' && fmt[i] != 'd' &&
239	507	fmt[i] != 'u' && fmt[i] != 'x')
240	508	return -EINVAL;
	509	+fmt_next:
241	510	fmt_cnt++;
242	511	}
243	512
..	..	@@ -246,8 +515,7 @@
246	515	*/
247	516	#define __BPF_TP_EMIT() __BPF_ARG3_TP()
248	517	#define __BPF_TP(...) \
249		- __trace_printk(0 /* Fake ip */, \
250		- fmt, ##__VA_ARGS__)
	518	+ bpf_do_trace_printk(fmt, ##__VA_ARGS__)
251	519
252	520	#define __BPF_ARG1_TP(...) \
253	521	((mod[0] == 2 \|\| (mod[0] == 1 && __BITS_PER_LONG == 64)) \
..	..	@@ -284,13 +552,253 @@
284	552	const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
285	553	{
286	554	/*
287		- * this program might be calling bpf_trace_printk,
288		- * so allocate per-cpu printk buffers
	555	+ * This program might be calling bpf_trace_printk,
	556	+ * so enable the associated bpf_trace/bpf_trace_printk event.
	557	+ * Repeat this each time as it is possible a user has
	558	+ * disabled bpf_trace_printk events. By loading a program
	559	+ * calling bpf_trace_printk() however the user has expressed
	560	+ * the intent to see such events.
289	561	*/
290		- trace_printk_init_buffers();
	562	+ if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
	563	+ pr_warn_ratelimited("could not enable bpf_trace_printk events");
291	564
292	565	return &bpf_trace_printk_proto;
293	566	}
	567	+
	568	+#define MAX_SEQ_PRINTF_VARARGS 12
	569	+#define MAX_SEQ_PRINTF_MAX_MEMCPY 6
	570	+#define MAX_SEQ_PRINTF_STR_LEN 128
	571	+
	572	+struct bpf_seq_printf_buf {
	573	+ char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN];
	574	+};
	575	+static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf);
	576	+static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used);
	577	+
	578	+BPF_CALL_5(bpf_seq_printf, struct seq_file , m, char , fmt, u32, fmt_size,
	579	+ const void *, data, u32, data_len)
	580	+{
	581	+ int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0;
	582	+ int i, buf_used, copy_size, num_args;
	583	+ u64 params[MAX_SEQ_PRINTF_VARARGS];
	584	+ struct bpf_seq_printf_buf *bufs;
	585	+ const u64 *args = data;
	586	+
	587	+ buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used);
	588	+ if (WARN_ON_ONCE(buf_used > 1)) {
	589	+ err = -EBUSY;
	590	+ goto out;
	591	+ }
	592	+
	593	+ bufs = this_cpu_ptr(&bpf_seq_printf_buf);
	594	+
	595	+ /*
	596	+ * bpf_check()->check_func_arg()->check_stack_boundary()
	597	+ * guarantees that fmt points to bpf program stack,
	598	+ * fmt_size bytes of it were initialized and fmt_size > 0
	599	+ */
	600	+ if (fmt[--fmt_size] != 0)
	601	+ goto out;
	602	+
	603	+ if (data_len & 7)
	604	+ goto out;
	605	+
	606	+ for (i = 0; i < fmt_size; i++) {
	607	+ if (fmt[i] == '%') {
	608	+ if (fmt[i + 1] == '%')
	609	+ i++;
	610	+ else if (!data \|\| !data_len)
	611	+ goto out;
	612	+ }
	613	+ }
	614	+
	615	+ num_args = data_len / 8;
	616	+
	617	+ /* check format string for allowed specifiers */
	618	+ for (i = 0; i < fmt_size; i++) {
	619	+ /* only printable ascii for now. */
	620	+ if ((!isprint(fmt[i]) && !isspace(fmt[i])) \|\| !isascii(fmt[i])) {
	621	+ err = -EINVAL;
	622	+ goto out;
	623	+ }
	624	+
	625	+ if (fmt[i] != '%')
	626	+ continue;
	627	+
	628	+ if (fmt[i + 1] == '%') {
	629	+ i++;
	630	+ continue;
	631	+ }
	632	+
	633	+ if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) {
	634	+ err = -E2BIG;
	635	+ goto out;
	636	+ }
	637	+
	638	+ if (fmt_cnt >= num_args) {
	639	+ err = -EINVAL;
	640	+ goto out;
	641	+ }
	642	+
	643	+ /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
	644	+ i++;
	645	+
	646	+ /* skip optional "[0 +-][num]" width formating field */
	647	+ while (fmt[i] == '0' \|\| fmt[i] == '+' \|\| fmt[i] == '-' \|\|
	648	+ fmt[i] == ' ')
	649	+ i++;
	650	+ if (fmt[i] >= '1' && fmt[i] <= '9') {
	651	+ i++;
	652	+ while (fmt[i] >= '0' && fmt[i] <= '9')
	653	+ i++;
	654	+ }
	655	+
	656	+ if (fmt[i] == 's') {
	657	+ void *unsafe_ptr;
	658	+
	659	+ /* try our best to copy */
	660	+ if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
	661	+ err = -E2BIG;
	662	+ goto out;
	663	+ }
	664	+
	665	+ unsafe_ptr = (void *)(long)args[fmt_cnt];
	666	+ err = strncpy_from_kernel_nofault(bufs->buf[memcpy_cnt],
	667	+ unsafe_ptr, MAX_SEQ_PRINTF_STR_LEN);
	668	+ if (err < 0)
	669	+ bufs->buf[memcpy_cnt][0] = '\0';
	670	+ params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
	671	+
	672	+ fmt_cnt++;
	673	+ memcpy_cnt++;
	674	+ continue;
	675	+ }
	676	+
	677	+ if (fmt[i] == 'p') {
	678	+ if (fmt[i + 1] == 0 \|\|
	679	+ fmt[i + 1] == 'K' \|\|
	680	+ fmt[i + 1] == 'x' \|\|
	681	+ fmt[i + 1] == 'B') {
	682	+ /* just kernel pointers */
	683	+ params[fmt_cnt] = args[fmt_cnt];
	684	+ fmt_cnt++;
	685	+ continue;
	686	+ }
	687	+
	688	+ /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
	689	+ if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') {
	690	+ err = -EINVAL;
	691	+ goto out;
	692	+ }
	693	+ if (fmt[i + 2] != '4' && fmt[i + 2] != '6') {
	694	+ err = -EINVAL;
	695	+ goto out;
	696	+ }
	697	+
	698	+ if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
	699	+ err = -E2BIG;
	700	+ goto out;
	701	+ }
	702	+
	703	+
	704	+ copy_size = (fmt[i + 2] == '4') ? 4 : 16;
	705	+
	706	+ err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt],
	707	+ (void *) (long) args[fmt_cnt],
	708	+ copy_size);
	709	+ if (err < 0)
	710	+ memset(bufs->buf[memcpy_cnt], 0, copy_size);
	711	+ params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
	712	+
	713	+ i += 2;
	714	+ fmt_cnt++;
	715	+ memcpy_cnt++;
	716	+ continue;
	717	+ }
	718	+
	719	+ if (fmt[i] == 'l') {
	720	+ i++;
	721	+ if (fmt[i] == 'l')
	722	+ i++;
	723	+ }
	724	+
	725	+ if (fmt[i] != 'i' && fmt[i] != 'd' &&
	726	+ fmt[i] != 'u' && fmt[i] != 'x' &&
	727	+ fmt[i] != 'X') {
	728	+ err = -EINVAL;
	729	+ goto out;
	730	+ }
	731	+
	732	+ params[fmt_cnt] = args[fmt_cnt];
	733	+ fmt_cnt++;
	734	+ }
	735	+
	736	+ /* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give
	737	+ * all of them to seq_printf().
	738	+ */
	739	+ seq_printf(m, fmt, params[0], params[1], params[2], params[3],
	740	+ params[4], params[5], params[6], params[7], params[8],
	741	+ params[9], params[10], params[11]);
	742	+
	743	+ err = seq_has_overflowed(m) ? -EOVERFLOW : 0;
	744	+out:
	745	+ this_cpu_dec(bpf_seq_printf_buf_used);
	746	+ return err;
	747	+}
	748	+
	749	+BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
	750	+
	751	+static const struct bpf_func_proto bpf_seq_printf_proto = {
	752	+ .func = bpf_seq_printf,
	753	+ .gpl_only = true,
	754	+ .ret_type = RET_INTEGER,
	755	+ .arg1_type = ARG_PTR_TO_BTF_ID,
	756	+ .arg1_btf_id = &btf_seq_file_ids[0],
	757	+ .arg2_type = ARG_PTR_TO_MEM,
	758	+ .arg3_type = ARG_CONST_SIZE,
	759	+ .arg4_type = ARG_PTR_TO_MEM_OR_NULL,
	760	+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
	761	+};
	762	+
	763	+BPF_CALL_3(bpf_seq_write, struct seq_file , m, const void , data, u32, len)
	764	+{
	765	+ return seq_write(m, data, len) ? -EOVERFLOW : 0;
	766	+}
	767	+
	768	+static const struct bpf_func_proto bpf_seq_write_proto = {
	769	+ .func = bpf_seq_write,
	770	+ .gpl_only = true,
	771	+ .ret_type = RET_INTEGER,
	772	+ .arg1_type = ARG_PTR_TO_BTF_ID,
	773	+ .arg1_btf_id = &btf_seq_file_ids[0],
	774	+ .arg2_type = ARG_PTR_TO_MEM,
	775	+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
	776	+};
	777	+
	778	+BPF_CALL_4(bpf_seq_printf_btf, struct seq_file , m, struct btf_ptr , ptr,
	779	+ u32, btf_ptr_size, u64, flags)
	780	+{
	781	+ const struct btf *btf;
	782	+ s32 btf_id;
	783	+ int ret;
	784	+
	785	+ ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
	786	+ if (ret)
	787	+ return ret;
	788	+
	789	+ return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags);
	790	+}
	791	+
	792	+static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
	793	+ .func = bpf_seq_printf_btf,
	794	+ .gpl_only = true,
	795	+ .ret_type = RET_INTEGER,
	796	+ .arg1_type = ARG_PTR_TO_BTF_ID,
	797	+ .arg1_btf_id = &btf_seq_file_ids[0],
	798	+ .arg2_type = ARG_PTR_TO_MEM,
	799	+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
	800	+ .arg4_type = ARG_ANYTHING,
	801	+};
294	802
295	803	static __always_inline int
296	804	get_map_perf_counter(struct bpf_map *map, u64 flags,
..	..	@@ -392,8 +900,7 @@
392	900	if (unlikely(event->oncpu != cpu))
393	901	return -EOPNOTSUPP;
394	902
395		- perf_event_output(event, sd, regs);
396		- return 0;
	903	+ return perf_event_output(event, sd, regs);
397	904	}
398	905
399	906	/*
..	..	@@ -453,14 +960,16 @@
453	960	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
454	961	};
455	962
456		-static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
457		-static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
	963	+static DEFINE_PER_CPU(int, bpf_event_output_nest_level);
	964	+struct bpf_nested_pt_regs {
	965	+ struct pt_regs regs[3];
	966	+};
	967	+static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs);
	968	+static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
458	969
459	970	u64 bpf_event_output(struct bpf_map map, u64 flags, void meta, u64 meta_size,
460	971	void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
461	972	{
462		- struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
463		- struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
464	973	struct perf_raw_frag frag = {
465	974	.copy = ctx_copy,
466	975	.size = ctx_size,
..	..	@@ -475,12 +984,30 @@
475	984	.data = meta,
476	985	},
477	986	};
	987	+ struct perf_sample_data *sd;
	988	+ struct pt_regs *regs;
	989	+ int nest_level;
	990	+ u64 ret;
	991	+
	992	+ preempt_disable();
	993	+ nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
	994	+
	995	+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
	996	+ ret = -EBUSY;
	997	+ goto out;
	998	+ }
	999	+ sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]);
	1000	+ regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]);
478	1001
479	1002	perf_fetch_caller_regs(regs);
480	1003	perf_sample_data_init(sd, 0, 0);
481	1004	sd->raw = &raw;
482	1005
483		- return __bpf_perf_event_output(regs, map, flags, sd);
	1006	+ ret = __bpf_perf_event_output(regs, map, flags, sd);
	1007	+out:
	1008	+ this_cpu_dec(bpf_event_output_nest_level);
	1009	+ preempt_enable();
	1010	+ return ret;
484	1011	}
485	1012
486	1013	BPF_CALL_0(bpf_get_current_task)
..	..	@@ -488,7 +1015,7 @@
488	1015	return (long) current;
489	1016	}
490	1017
491		-static const struct bpf_func_proto bpf_get_current_task_proto = {
	1018	+const struct bpf_func_proto bpf_get_current_task_proto = {
492	1019	.func = bpf_get_current_task,
493	1020	.gpl_only = true,
494	1021	.ret_type = RET_INTEGER,
..	..	@@ -517,38 +1044,216 @@
517	1044	.arg2_type = ARG_ANYTHING,
518	1045	};
519	1046
520		-BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
521		- const void *, unsafe_ptr)
522		-{
523		- int ret;
524		-
525		- /*
526		- * The strncpy_from_unsafe() call will likely not fill the entire
527		- * buffer, but that's okay in this circumstance as we're probing
528		- * arbitrary memory anyway similar to bpf_probe_read() and might
529		- * as well probe the stack. Thus, memory is explicitly cleared
530		- * only in error case, so that improper users ignoring return
531		- * code altogether don't copy garbage; otherwise length of string
532		- * is returned that can be used for bpf_perf_event_output() et al.
533		- */
534		- ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
535		- if (unlikely(ret < 0))
536		- memset(dst, 0, size);
537		-
538		- return ret;
539		-}
540		-
541		-static const struct bpf_func_proto bpf_probe_read_str_proto = {
542		- .func = bpf_probe_read_str,
543		- .gpl_only = true,
544		- .ret_type = RET_INTEGER,
545		- .arg1_type = ARG_PTR_TO_UNINIT_MEM,
546		- .arg2_type = ARG_CONST_SIZE_OR_ZERO,
547		- .arg3_type = ARG_ANYTHING,
	1047	+struct send_signal_irq_work {
	1048	+ struct irq_work irq_work;
	1049	+ struct task_struct *task;
	1050	+ u32 sig;
	1051	+ enum pid_type type;
548	1052	};
549	1053
550		-static const struct bpf_func_proto *
551		-tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
	1054	+static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
	1055	+
	1056	+static void do_bpf_send_signal(struct irq_work *entry)
	1057	+{
	1058	+ struct send_signal_irq_work *work;
	1059	+
	1060	+ work = container_of(entry, struct send_signal_irq_work, irq_work);
	1061	+ group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type);
	1062	+ put_task_struct(work->task);
	1063	+}
	1064	+
	1065	+static int bpf_send_signal_common(u32 sig, enum pid_type type)
	1066	+{
	1067	+ struct send_signal_irq_work *work = NULL;
	1068	+
	1069	+ /* Similar to bpf_probe_write_user, task needs to be
	1070	+ * in a sound condition and kernel memory access be
	1071	+ * permitted in order to send signal to the current
	1072	+ * task.
	1073	+ */
	1074	+ if (unlikely(current->flags & (PF_KTHREAD \| PF_EXITING)))
	1075	+ return -EPERM;
	1076	+ if (unlikely(uaccess_kernel()))
	1077	+ return -EPERM;
	1078	+ if (unlikely(!nmi_uaccess_okay()))
	1079	+ return -EPERM;
	1080	+ /* Task should not be pid=1 to avoid kernel panic. */
	1081	+ if (unlikely(is_global_init(current)))
	1082	+ return -EPERM;
	1083	+
	1084	+ if (irqs_disabled()) {
	1085	+ /* Do an early check on signal validity. Otherwise,
	1086	+ * the error is lost in deferred irq_work.
	1087	+ */
	1088	+ if (unlikely(!valid_signal(sig)))
	1089	+ return -EINVAL;
	1090	+
	1091	+ work = this_cpu_ptr(&send_signal_work);
	1092	+ if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY)
	1093	+ return -EBUSY;
	1094	+
	1095	+ /* Add the current task, which is the target of sending signal,
	1096	+ * to the irq_work. The current task may change when queued
	1097	+ * irq works get executed.
	1098	+ */
	1099	+ work->task = get_task_struct(current);
	1100	+ work->sig = sig;
	1101	+ work->type = type;
	1102	+ irq_work_queue(&work->irq_work);
	1103	+ return 0;
	1104	+ }
	1105	+
	1106	+ return group_send_sig_info(sig, SEND_SIG_PRIV, current, type);
	1107	+}
	1108	+
	1109	+BPF_CALL_1(bpf_send_signal, u32, sig)
	1110	+{
	1111	+ return bpf_send_signal_common(sig, PIDTYPE_TGID);
	1112	+}
	1113	+
	1114	+static const struct bpf_func_proto bpf_send_signal_proto = {
	1115	+ .func = bpf_send_signal,
	1116	+ .gpl_only = false,
	1117	+ .ret_type = RET_INTEGER,
	1118	+ .arg1_type = ARG_ANYTHING,
	1119	+};
	1120	+
	1121	+BPF_CALL_1(bpf_send_signal_thread, u32, sig)
	1122	+{
	1123	+ return bpf_send_signal_common(sig, PIDTYPE_PID);
	1124	+}
	1125	+
	1126	+static const struct bpf_func_proto bpf_send_signal_thread_proto = {
	1127	+ .func = bpf_send_signal_thread,
	1128	+ .gpl_only = false,
	1129	+ .ret_type = RET_INTEGER,
	1130	+ .arg1_type = ARG_ANYTHING,
	1131	+};
	1132	+
	1133	+BPF_CALL_3(bpf_d_path, struct path , path, char , buf, u32, sz)
	1134	+{
	1135	+ struct path copy;
	1136	+ long len;
	1137	+ char *p;
	1138	+
	1139	+ if (!sz)
	1140	+ return 0;
	1141	+
	1142	+ /*
	1143	+ * The path pointer is verified as trusted and safe to use,
	1144	+ * but let's double check it's valid anyway to workaround
	1145	+ * potentially broken verifier.
	1146	+ */
	1147	+ len = copy_from_kernel_nofault(&copy, path, sizeof(*path));
	1148	+ if (len < 0)
	1149	+ return len;
	1150	+
	1151	+ p = d_path(&copy, buf, sz);
	1152	+ if (IS_ERR(p)) {
	1153	+ len = PTR_ERR(p);
	1154	+ } else {
	1155	+ len = buf + sz - p;
	1156	+ memmove(buf, p, len);
	1157	+ }
	1158	+
	1159	+ return len;
	1160	+}
	1161	+
	1162	+BTF_SET_START(btf_allowlist_d_path)
	1163	+#ifdef CONFIG_SECURITY
	1164	+BTF_ID(func, security_file_permission)
	1165	+BTF_ID(func, security_inode_getattr)
	1166	+BTF_ID(func, security_file_open)
	1167	+#endif
	1168	+#ifdef CONFIG_SECURITY_PATH
	1169	+BTF_ID(func, security_path_truncate)
	1170	+#endif
	1171	+BTF_ID(func, vfs_truncate)
	1172	+BTF_ID(func, vfs_fallocate)
	1173	+BTF_ID(func, dentry_open)
	1174	+BTF_ID(func, vfs_getattr)
	1175	+BTF_ID(func, filp_close)
	1176	+BTF_SET_END(btf_allowlist_d_path)
	1177	+
	1178	+static bool bpf_d_path_allowed(const struct bpf_prog *prog)
	1179	+{
	1180	+ return btf_id_set_contains(&btf_allowlist_d_path, prog->aux->attach_btf_id);
	1181	+}
	1182	+
	1183	+BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path)
	1184	+
	1185	+static const struct bpf_func_proto bpf_d_path_proto = {
	1186	+ .func = bpf_d_path,
	1187	+ .gpl_only = false,
	1188	+ .ret_type = RET_INTEGER,
	1189	+ .arg1_type = ARG_PTR_TO_BTF_ID,
	1190	+ .arg1_btf_id = &bpf_d_path_btf_ids[0],
	1191	+ .arg2_type = ARG_PTR_TO_MEM,
	1192	+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
	1193	+ .allowed = bpf_d_path_allowed,
	1194	+};
	1195	+
	1196	+#define BTF_F_ALL (BTF_F_COMPACT \| BTF_F_NONAME \| \
	1197	+ BTF_F_PTR_RAW \| BTF_F_ZERO)
	1198	+
	1199	+static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
	1200	+ u64 flags, const struct btf **btf,
	1201	+ s32 *btf_id)
	1202	+{
	1203	+ const struct btf_type *t;
	1204	+
	1205	+ if (unlikely(flags & ~(BTF_F_ALL)))
	1206	+ return -EINVAL;
	1207	+
	1208	+ if (btf_ptr_size != sizeof(struct btf_ptr))
	1209	+ return -EINVAL;
	1210	+
	1211	+ *btf = bpf_get_btf_vmlinux();
	1212	+
	1213	+ if (IS_ERR_OR_NULL(*btf))
	1214	+ return IS_ERR(btf) ? PTR_ERR(btf) : -EINVAL;
	1215	+
	1216	+ if (ptr->type_id > 0)
	1217	+ *btf_id = ptr->type_id;
	1218	+ else
	1219	+ return -EINVAL;
	1220	+
	1221	+ if (*btf_id > 0)
	1222	+ t = btf_type_by_id(btf, btf_id);
	1223	+ if (*btf_id <= 0 \|\| !t)
	1224	+ return -ENOENT;
	1225	+
	1226	+ return 0;
	1227	+}
	1228	+
	1229	+BPF_CALL_5(bpf_snprintf_btf, char , str, u32, str_size, struct btf_ptr , ptr,
	1230	+ u32, btf_ptr_size, u64, flags)
	1231	+{
	1232	+ const struct btf *btf;
	1233	+ s32 btf_id;
	1234	+ int ret;
	1235	+
	1236	+ ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
	1237	+ if (ret)
	1238	+ return ret;
	1239	+
	1240	+ return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size,
	1241	+ flags);
	1242	+}
	1243	+
	1244	+const struct bpf_func_proto bpf_snprintf_btf_proto = {
	1245	+ .func = bpf_snprintf_btf,
	1246	+ .gpl_only = false,
	1247	+ .ret_type = RET_INTEGER,
	1248	+ .arg1_type = ARG_PTR_TO_MEM,
	1249	+ .arg2_type = ARG_CONST_SIZE,
	1250	+ .arg3_type = ARG_PTR_TO_MEM,
	1251	+ .arg4_type = ARG_CONST_SIZE,
	1252	+ .arg5_type = ARG_ANYTHING,
	1253	+};
	1254	+
	1255	+const struct bpf_func_proto *
	1256	+bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
552	1257	{
553	1258	switch (func_id) {
554	1259	case BPF_FUNC_map_lookup_elem:
..	..	@@ -557,8 +1262,12 @@
557	1262	return &bpf_map_update_elem_proto;
558	1263	case BPF_FUNC_map_delete_elem:
559	1264	return &bpf_map_delete_elem_proto;
560		- case BPF_FUNC_probe_read:
561		- return &bpf_probe_read_proto;
	1265	+ case BPF_FUNC_map_push_elem:
	1266	+ return &bpf_map_push_elem_proto;
	1267	+ case BPF_FUNC_map_pop_elem:
	1268	+ return &bpf_map_pop_elem_proto;
	1269	+ case BPF_FUNC_map_peek_elem:
	1270	+ return &bpf_map_peek_elem_proto;
562	1271	case BPF_FUNC_ktime_get_ns:
563	1272	return &bpf_ktime_get_ns_proto;
564	1273	case BPF_FUNC_ktime_get_boot_ns:
..	..	@@ -581,18 +1290,65 @@
581	1290	return &bpf_get_numa_node_id_proto;
582	1291	case BPF_FUNC_perf_event_read:
583	1292	return &bpf_perf_event_read_proto;
584		- case BPF_FUNC_probe_write_user:
585		- return bpf_get_probe_write_proto();
586	1293	case BPF_FUNC_current_task_under_cgroup:
587	1294	return &bpf_current_task_under_cgroup_proto;
588	1295	case BPF_FUNC_get_prandom_u32:
589	1296	return &bpf_get_prandom_u32_proto;
	1297	+ case BPF_FUNC_probe_write_user:
	1298	+ return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ?
	1299	+ NULL : bpf_get_probe_write_proto();
	1300	+ case BPF_FUNC_probe_read_user:
	1301	+ return &bpf_probe_read_user_proto;
	1302	+ case BPF_FUNC_probe_read_kernel:
	1303	+ return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
	1304	+ NULL : &bpf_probe_read_kernel_proto;
	1305	+ case BPF_FUNC_probe_read_user_str:
	1306	+ return &bpf_probe_read_user_str_proto;
	1307	+ case BPF_FUNC_probe_read_kernel_str:
	1308	+ return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
	1309	+ NULL : &bpf_probe_read_kernel_str_proto;
	1310	+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
	1311	+ case BPF_FUNC_probe_read:
	1312	+ return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
	1313	+ NULL : &bpf_probe_read_compat_proto;
590	1314	case BPF_FUNC_probe_read_str:
591		- return &bpf_probe_read_str_proto;
	1315	+ return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
	1316	+ NULL : &bpf_probe_read_compat_str_proto;
	1317	+#endif
592	1318	#ifdef CONFIG_CGROUPS
593	1319	case BPF_FUNC_get_current_cgroup_id:
594	1320	return &bpf_get_current_cgroup_id_proto;
595	1321	#endif
	1322	+ case BPF_FUNC_send_signal:
	1323	+ return &bpf_send_signal_proto;
	1324	+ case BPF_FUNC_send_signal_thread:
	1325	+ return &bpf_send_signal_thread_proto;
	1326	+ case BPF_FUNC_perf_event_read_value:
	1327	+ return &bpf_perf_event_read_value_proto;
	1328	+ case BPF_FUNC_get_ns_current_pid_tgid:
	1329	+ return &bpf_get_ns_current_pid_tgid_proto;
	1330	+ case BPF_FUNC_ringbuf_output:
	1331	+ return &bpf_ringbuf_output_proto;
	1332	+ case BPF_FUNC_ringbuf_reserve:
	1333	+ return &bpf_ringbuf_reserve_proto;
	1334	+ case BPF_FUNC_ringbuf_submit:
	1335	+ return &bpf_ringbuf_submit_proto;
	1336	+ case BPF_FUNC_ringbuf_discard:
	1337	+ return &bpf_ringbuf_discard_proto;
	1338	+ case BPF_FUNC_ringbuf_query:
	1339	+ return &bpf_ringbuf_query_proto;
	1340	+ case BPF_FUNC_jiffies64:
	1341	+ return &bpf_jiffies64_proto;
	1342	+ case BPF_FUNC_get_task_stack:
	1343	+ return &bpf_get_task_stack_proto;
	1344	+ case BPF_FUNC_copy_from_user:
	1345	+ return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
	1346	+ case BPF_FUNC_snprintf_btf:
	1347	+ return &bpf_snprintf_btf_proto;
	1348	+ case BPF_FUNC_per_cpu_ptr:
	1349	+ return &bpf_per_cpu_ptr_proto;
	1350	+ case BPF_FUNC_this_cpu_ptr:
	1351	+ return &bpf_this_cpu_ptr_proto;
596	1352	default:
597	1353	return NULL;
598	1354	}
..	..	@@ -608,14 +1364,12 @@
608	1364	return &bpf_get_stackid_proto;
609	1365	case BPF_FUNC_get_stack:
610	1366	return &bpf_get_stack_proto;
611		- case BPF_FUNC_perf_event_read_value:
612		- return &bpf_perf_event_read_value_proto;
613	1367	#ifdef CONFIG_BPF_KPROBE_OVERRIDE
614	1368	case BPF_FUNC_override_return:
615	1369	return &bpf_override_return_proto;
616	1370	#endif
617	1371	default:
618		- return tracing_func_proto(func_id, prog);
	1372	+ return bpf_tracing_func_proto(func_id, prog);
619	1373	}
620	1374	}
621	1375
..	..	@@ -725,7 +1479,7 @@
725	1479	case BPF_FUNC_get_stack:
726	1480	return &bpf_get_stack_proto_tp;
727	1481	default:
728		- return tracing_func_proto(func_id, prog);
	1482	+ return bpf_tracing_func_proto(func_id, prog);
729	1483	}
730	1484	}
731	1485
..	..	@@ -778,6 +1532,41 @@
778	1532	.arg3_type = ARG_CONST_SIZE,
779	1533	};
780	1534
	1535	+BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
	1536	+ void *, buf, u32, size, u64, flags)
	1537	+{
	1538	+ static const u32 br_entry_size = sizeof(struct perf_branch_entry);
	1539	+ struct perf_branch_stack *br_stack = ctx->data->br_stack;
	1540	+ u32 to_copy;
	1541	+
	1542	+ if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE))
	1543	+ return -EINVAL;
	1544	+
	1545	+ if (unlikely(!br_stack))
	1546	+ return -ENOENT;
	1547	+
	1548	+ if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE)
	1549	+ return br_stack->nr * br_entry_size;
	1550	+
	1551	+ if (!buf \|\| (size % br_entry_size != 0))
	1552	+ return -EINVAL;
	1553	+
	1554	+ to_copy = min_t(u32, br_stack->nr * br_entry_size, size);
	1555	+ memcpy(buf, br_stack->entries, to_copy);
	1556	+
	1557	+ return to_copy;
	1558	+}
	1559	+
	1560	+static const struct bpf_func_proto bpf_read_branch_records_proto = {
	1561	+ .func = bpf_read_branch_records,
	1562	+ .gpl_only = true,
	1563	+ .ret_type = RET_INTEGER,
	1564	+ .arg1_type = ARG_PTR_TO_CTX,
	1565	+ .arg2_type = ARG_PTR_TO_MEM_OR_NULL,
	1566	+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
	1567	+ .arg4_type = ARG_ANYTHING,
	1568	+};
	1569	+
781	1570	static const struct bpf_func_proto *
782	1571	pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
783	1572	{
..	..	@@ -785,13 +1574,15 @@
785	1574	case BPF_FUNC_perf_event_output:
786	1575	return &bpf_perf_event_output_proto_tp;
787	1576	case BPF_FUNC_get_stackid:
788		- return &bpf_get_stackid_proto_tp;
	1577	+ return &bpf_get_stackid_proto_pe;
789	1578	case BPF_FUNC_get_stack:
790		- return &bpf_get_stack_proto_tp;
	1579	+ return &bpf_get_stack_proto_pe;
791	1580	case BPF_FUNC_perf_prog_read_value:
792	1581	return &bpf_perf_prog_read_value_proto;
	1582	+ case BPF_FUNC_read_branch_records:
	1583	+ return &bpf_read_branch_records_proto;
793	1584	default:
794		- return tracing_func_proto(func_id, prog);
	1585	+ return bpf_tracing_func_proto(func_id, prog);
795	1586	}
796	1587	}
797	1588
..	..	@@ -852,6 +1643,9 @@
852	1643	.arg4_type = ARG_PTR_TO_MEM,
853	1644	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
854	1645	};
	1646	+
	1647	+extern const struct bpf_func_proto bpf_skb_output_proto;
	1648	+extern const struct bpf_func_proto bpf_xdp_output_proto;
855	1649
856	1650	BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
857	1651	struct bpf_map *, map, u64, flags)
..	..	@@ -916,7 +1710,46 @@
916	1710	case BPF_FUNC_get_stack:
917	1711	return &bpf_get_stack_proto_raw_tp;
918	1712	default:
919		- return tracing_func_proto(func_id, prog);
	1713	+ return bpf_tracing_func_proto(func_id, prog);
	1714	+ }
	1715	+}
	1716	+
	1717	+const struct bpf_func_proto *
	1718	+tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
	1719	+{
	1720	+ switch (func_id) {
	1721	+#ifdef CONFIG_NET
	1722	+ case BPF_FUNC_skb_output:
	1723	+ return &bpf_skb_output_proto;
	1724	+ case BPF_FUNC_xdp_output:
	1725	+ return &bpf_xdp_output_proto;
	1726	+ case BPF_FUNC_skc_to_tcp6_sock:
	1727	+ return &bpf_skc_to_tcp6_sock_proto;
	1728	+ case BPF_FUNC_skc_to_tcp_sock:
	1729	+ return &bpf_skc_to_tcp_sock_proto;
	1730	+ case BPF_FUNC_skc_to_tcp_timewait_sock:
	1731	+ return &bpf_skc_to_tcp_timewait_sock_proto;
	1732	+ case BPF_FUNC_skc_to_tcp_request_sock:
	1733	+ return &bpf_skc_to_tcp_request_sock_proto;
	1734	+ case BPF_FUNC_skc_to_udp6_sock:
	1735	+ return &bpf_skc_to_udp6_sock_proto;
	1736	+#endif
	1737	+ case BPF_FUNC_seq_printf:
	1738	+ return prog->expected_attach_type == BPF_TRACE_ITER ?
	1739	+ &bpf_seq_printf_proto :
	1740	+ NULL;
	1741	+ case BPF_FUNC_seq_write:
	1742	+ return prog->expected_attach_type == BPF_TRACE_ITER ?
	1743	+ &bpf_seq_write_proto :
	1744	+ NULL;
	1745	+ case BPF_FUNC_seq_printf_btf:
	1746	+ return prog->expected_attach_type == BPF_TRACE_ITER ?
	1747	+ &bpf_seq_printf_btf_proto :
	1748	+ NULL;
	1749	+ case BPF_FUNC_d_path:
	1750	+ return &bpf_d_path_proto;
	1751	+ default:
	1752	+ return raw_tp_prog_func_proto(func_id, prog);
920	1753	}
921	1754	}
922	1755
..	..	@@ -925,8 +1758,7 @@
925	1758	const struct bpf_prog *prog,
926	1759	struct bpf_insn_access_aux *info)
927	1760	{
928		- /* largest tracepoint in the kernel has 12 args */
929		- if (off < 0 \|\| off >= sizeof(__u64) * 12)
	1761	+ if (off < 0 \|\| off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
930	1762	return false;
931	1763	if (type != BPF_READ)
932	1764	return false;
..	..	@@ -935,12 +1767,66 @@
935	1767	return true;
936	1768	}
937	1769
	1770	+static bool tracing_prog_is_valid_access(int off, int size,
	1771	+ enum bpf_access_type type,
	1772	+ const struct bpf_prog *prog,
	1773	+ struct bpf_insn_access_aux *info)
	1774	+{
	1775	+ if (off < 0 \|\| off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
	1776	+ return false;
	1777	+ if (type != BPF_READ)
	1778	+ return false;
	1779	+ if (off % size != 0)
	1780	+ return false;
	1781	+ return btf_ctx_access(off, size, type, prog, info);
	1782	+}
	1783	+
	1784	+int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog,
	1785	+ const union bpf_attr *kattr,
	1786	+ union bpf_attr __user *uattr)
	1787	+{
	1788	+ return -ENOTSUPP;
	1789	+}
	1790	+
938	1791	const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
939	1792	.get_func_proto = raw_tp_prog_func_proto,
940	1793	.is_valid_access = raw_tp_prog_is_valid_access,
941	1794	};
942	1795
943	1796	const struct bpf_prog_ops raw_tracepoint_prog_ops = {
	1797	+#ifdef CONFIG_NET
	1798	+ .test_run = bpf_prog_test_run_raw_tp,
	1799	+#endif
	1800	+};
	1801	+
	1802	+const struct bpf_verifier_ops tracing_verifier_ops = {
	1803	+ .get_func_proto = tracing_prog_func_proto,
	1804	+ .is_valid_access = tracing_prog_is_valid_access,
	1805	+};
	1806	+
	1807	+const struct bpf_prog_ops tracing_prog_ops = {
	1808	+ .test_run = bpf_prog_test_run_tracing,
	1809	+};
	1810	+
	1811	+static bool raw_tp_writable_prog_is_valid_access(int off, int size,
	1812	+ enum bpf_access_type type,
	1813	+ const struct bpf_prog *prog,
	1814	+ struct bpf_insn_access_aux *info)
	1815	+{
	1816	+ if (off == 0) {
	1817	+ if (size != sizeof(u64) \|\| type != BPF_READ)
	1818	+ return false;
	1819	+ info->reg_type = PTR_TO_TP_BUFFER;
	1820	+ }
	1821	+ return raw_tp_prog_is_valid_access(off, size, type, prog, info);
	1822	+}
	1823	+
	1824	+const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
	1825	+ .get_func_proto = raw_tp_prog_func_proto,
	1826	+ .is_valid_access = raw_tp_writable_prog_is_valid_access,
	1827	+};
	1828	+
	1829	+const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
944	1830	};
945	1831
946	1832	static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
..	..	@@ -1033,7 +1919,7 @@
1033	1919	int perf_event_attach_bpf_prog(struct perf_event *event,
1034	1920	struct bpf_prog *prog)
1035	1921	{
1036		- struct bpf_prog_array __rcu *old_array;
	1922	+ struct bpf_prog_array *old_array;
1037	1923	struct bpf_prog_array *new_array;
1038	1924	int ret = -EEXIST;
1039	1925
..	..	@@ -1051,7 +1937,7 @@
1051	1937	if (event->prog)
1052	1938	goto unlock;
1053	1939
1054		- old_array = event->tp_event->prog_array;
	1940	+ old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
1055	1941	if (old_array &&
1056	1942	bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
1057	1943	ret = -E2BIG;
..	..	@@ -1074,7 +1960,7 @@
1074	1960
1075	1961	void perf_event_detach_bpf_prog(struct perf_event *event)
1076	1962	{
1077		- struct bpf_prog_array __rcu *old_array;
	1963	+ struct bpf_prog_array *old_array;
1078	1964	struct bpf_prog_array *new_array;
1079	1965	int ret;
1080	1966
..	..	@@ -1083,7 +1969,7 @@
1083	1969	if (!event->prog)
1084	1970	goto unlock;
1085	1971
1086		- old_array = event->tp_event->prog_array;
	1972	+ old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
1087	1973	ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
1088	1974	if (ret == -ENOENT)
1089	1975	goto unlock;
..	..	@@ -1105,10 +1991,11 @@
1105	1991	{
1106	1992	struct perf_event_query_bpf __user *uquery = info;
1107	1993	struct perf_event_query_bpf query = {};
	1994	+ struct bpf_prog_array *progs;
1108	1995	u32 *ids, prog_cnt, ids_len;
1109	1996	int ret;
1110	1997
1111		- if (!capable(CAP_SYS_ADMIN))
	1998	+ if (!perfmon_capable())
1112	1999	return -EPERM;
1113	2000	if (event->attr.type != PERF_TYPE_TRACEPOINT)
1114	2001	return -EINVAL;
..	..	@@ -1129,10 +2016,8 @@
1129	2016	*/
1130	2017
1131	2018	mutex_lock(&bpf_event_mutex);
1132		- ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
1133		- ids,
1134		- ids_len,
1135		- &prog_cnt);
	2019	+ progs = bpf_event_rcu_dereference(event->tp_event->prog_array);
	2020	+ ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt);
1136	2021	mutex_unlock(&bpf_event_mutex);
1137	2022
1138	2023	if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) \|\|
..	..	@@ -1146,7 +2031,7 @@
1146	2031	extern struct bpf_raw_event_map __start__bpf_raw_tp[];
1147	2032	extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
1148	2033
1149		-struct bpf_raw_event_map bpf_find_raw_tracepoint(const char name)
	2034	+struct bpf_raw_event_map bpf_get_raw_tracepoint(const char name)
1150	2035	{
1151	2036	struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
1152	2037
..	..	@@ -1154,16 +2039,26 @@
1154	2039	if (!strcmp(btp->tp->name, name))
1155	2040	return btp;
1156	2041	}
1157		- return NULL;
	2042	+
	2043	+ return bpf_get_raw_tracepoint_module(name);
	2044	+}
	2045	+
	2046	+void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
	2047	+{
	2048	+ struct module *mod;
	2049	+
	2050	+ preempt_disable();
	2051	+ mod = __module_address((unsigned long)btp);
	2052	+ module_put(mod);
	2053	+ preempt_enable();
1158	2054	}
1159	2055
1160	2056	static __always_inline
1161	2057	void __bpf_trace_run(struct bpf_prog prog, u64 args)
1162	2058	{
	2059	+ cant_sleep();
1163	2060	rcu_read_lock();
1164		- preempt_disable();
1165	2061	(void) BPF_PROG_RUN(prog, args);
1166		- preempt_enable();
1167	2062	rcu_read_unlock();
1168	2063	}
1169	2064
..	..	@@ -1223,6 +2118,9 @@
1223	2118	if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
1224	2119	return -EINVAL;
1225	2120
	2121	+ if (prog->aux->max_tp_access > btp->writable_size)
	2122	+ return -EINVAL;
	2123	+
1226	2124	return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func,
1227	2125	prog);
1228	2126	}
..	..	@@ -1276,10 +2174,78 @@
1276	2174	#ifdef CONFIG_UPROBE_EVENTS
1277	2175	if (flags & TRACE_EVENT_FL_UPROBE)
1278	2176	err = bpf_get_uprobe_info(event, fd_type, buf,
1279		- probe_offset,
	2177	+ probe_offset, probe_addr,
1280	2178	event->attr.type == PERF_TYPE_TRACEPOINT);
1281	2179	#endif
1282	2180	}
1283	2181
1284	2182	return err;
1285	2183	}
	2184	+
	2185	+static int __init send_signal_irq_work_init(void)
	2186	+{
	2187	+ int cpu;
	2188	+ struct send_signal_irq_work *work;
	2189	+
	2190	+ for_each_possible_cpu(cpu) {
	2191	+ work = per_cpu_ptr(&send_signal_work, cpu);
	2192	+ init_irq_work(&work->irq_work, do_bpf_send_signal);
	2193	+ }
	2194	+ return 0;
	2195	+}
	2196	+
	2197	+subsys_initcall(send_signal_irq_work_init);
	2198	+
	2199	+#ifdef CONFIG_MODULES
	2200	+static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
	2201	+ void *module)
	2202	+{
	2203	+ struct bpf_trace_module btm, tmp;
	2204	+ struct module *mod = module;
	2205	+ int ret = 0;
	2206	+
	2207	+ if (mod->num_bpf_raw_events == 0 \|\|
	2208	+ (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
	2209	+ goto out;
	2210	+
	2211	+ mutex_lock(&bpf_module_mutex);
	2212	+
	2213	+ switch (op) {
	2214	+ case MODULE_STATE_COMING:
	2215	+ btm = kzalloc(sizeof(*btm), GFP_KERNEL);
	2216	+ if (btm) {
	2217	+ btm->module = module;
	2218	+ list_add(&btm->list, &bpf_trace_modules);
	2219	+ } else {
	2220	+ ret = -ENOMEM;
	2221	+ }
	2222	+ break;
	2223	+ case MODULE_STATE_GOING:
	2224	+ list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) {
	2225	+ if (btm->module == module) {
	2226	+ list_del(&btm->list);
	2227	+ kfree(btm);
	2228	+ break;
	2229	+ }
	2230	+ }
	2231	+ break;
	2232	+ }
	2233	+
	2234	+ mutex_unlock(&bpf_module_mutex);
	2235	+
	2236	+out:
	2237	+ return notifier_from_errno(ret);
	2238	+}
	2239	+
	2240	+static struct notifier_block bpf_module_nb = {
	2241	+ .notifier_call = bpf_event_notify,
	2242	+};
	2243	+
	2244	+static int __init bpf_event_init(void)
	2245	+{
	2246	+ register_module_notifier(&bpf_module_nb);
	2247	+ return 0;
	2248	+}
	2249	+
	2250	+fs_initcall(bpf_event_init);
	2251	+#endif /* CONFIG_MODULES */