~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,17 +1,10 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2		- *
3		- * This program is free software; you can redistribute it and/or
4		- * modify it under the terms of version 2 of the GNU General Public
5		- * License as published by the Free Software Foundation.
6		- *
7		- * This program is distributed in the hope that it will be useful, but
8		- * WITHOUT ANY WARRANTY; without even the implied warranty of
9		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10		- * General Public License for more details.
11	3	*/
12	4	#include <linux/bpf.h>
13	5	#include <linux/bpf_trace.h>
14	6	#include <linux/bpf_lirc.h>
	7	+#include <linux/bpf_verifier.h>
15	8	#include <linux/btf.h>
16	9	#include <linux/syscalls.h>
17	10	#include <linux/slab.h>
..	..	@@ -30,15 +23,24 @@
30	23	#include <linux/cred.h>
31	24	#include <linux/timekeeping.h>
32	25	#include <linux/ctype.h>
33		-#include <linux/btf.h>
34	26	#include <linux/nospec.h>
	27	+#include <linux/audit.h>
	28	+#include <uapi/linux/btf.h>
	29	+#include <linux/pgtable.h>
	30	+#include <linux/bpf_lsm.h>
	31	+#include <linux/poll.h>
	32	+#include <linux/bpf-netns.h>
	33	+#include <linux/rcupdate_trace.h>
35	34
36		-#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY \|\| \
37		- (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY \|\| \
38		- (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY \|\| \
39		- (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
	35	+#include <trace/hooks/syscall_check.h>
	36	+
	37	+#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY \|\| \
	38	+ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY \|\| \
	39	+ (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
	40	+#define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY)
40	41	#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
41		-#define IS_FD_MAP(map) (IS_FD_ARRAY(map) \|\| IS_FD_HASH(map))
	42	+#define IS_FD_MAP(map) (IS_FD_ARRAY(map) \|\| IS_FD_PROG_ARRAY(map) \|\| \
	43	+ IS_FD_HASH(map))
42	44
43	45	#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY \| BPF_F_WRONLY)
44	46
..	..	@@ -47,17 +49,21 @@
47	49	static DEFINE_SPINLOCK(prog_idr_lock);
48	50	static DEFINE_IDR(map_idr);
49	51	static DEFINE_SPINLOCK(map_idr_lock);
	52	+static DEFINE_IDR(link_idr);
	53	+static DEFINE_SPINLOCK(link_idr_lock);
50	54
51	55	int sysctl_unprivileged_bpf_disabled __read_mostly =
52	56	IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
53	57
54	58	static const struct bpf_map_ops * const bpf_map_types[] = {
55		-#define BPF_PROG_TYPE(_id, _ops)
	59	+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
56	60	#define BPF_MAP_TYPE(_id, _ops) \
57	61	[_id] = &_ops,
	62	+#define BPF_LINK_TYPE(_id, _name)
58	63	#include <linux/bpf_types.h>
59	64	#undef BPF_PROG_TYPE
60	65	#undef BPF_MAP_TYPE
	66	+#undef BPF_LINK_TYPE
61	67	};
62	68
63	69	/*
..	..	@@ -73,35 +79,23 @@
73	79	size_t expected_size,
74	80	size_t actual_size)
75	81	{
76		- unsigned char __user *addr;
77		- unsigned char __user *end;
78		- unsigned char val;
79		- int err;
	82	+ unsigned char __user *addr = uaddr + expected_size;
	83	+ int res;
80	84
81	85	if (unlikely(actual_size > PAGE_SIZE)) /* silly large */
82	86	return -E2BIG;
83	87
84		- if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size)))
85		- return -EFAULT;
86		-
87	88	if (actual_size <= expected_size)
88	89	return 0;
89	90
90		- addr = uaddr + expected_size;
91		- end = uaddr + actual_size;
92		-
93		- for (; addr < end; addr++) {
94		- err = get_user(val, addr);
95		- if (err)
96		- return err;
97		- if (val)
98		- return -E2BIG;
99		- }
100		-
101		- return 0;
	91	+ res = check_zeroed_user(addr, actual_size - expected_size);
	92	+ if (res < 0)
	93	+ return res;
	94	+ return res ? 0 : -E2BIG;
102	95	}
103	96
104	97	const struct bpf_map_ops bpf_map_offload_ops = {
	98	+ .map_meta_equal = bpf_map_meta_equal,
105	99	.map_alloc = bpf_map_offload_map_alloc,
106	100	.map_free = bpf_map_offload_map_free,
107	101	.map_check_btf = map_check_no_btf,
..	..	@@ -136,28 +130,223 @@
136	130	return map;
137	131	}
138	132
139		-void *bpf_map_area_alloc(size_t size, int numa_node)
	133	+static void bpf_map_write_active_inc(struct bpf_map *map)
140	134	{
141		- /* We definitely need __GFP_NORETRY, so OOM killer doesn't
142		- * trigger under memory pressure as we really just want to
143		- * fail instead.
	135	+ atomic64_inc(&map->writecnt);
	136	+}
	137	+
	138	+static void bpf_map_write_active_dec(struct bpf_map *map)
	139	+{
	140	+ atomic64_dec(&map->writecnt);
	141	+}
	142	+
	143	+bool bpf_map_write_active(const struct bpf_map *map)
	144	+{
	145	+ return atomic64_read(&map->writecnt) != 0;
	146	+}
	147	+
	148	+static u32 bpf_map_value_size(struct bpf_map *map)
	149	+{
	150	+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
	151	+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH \|\|
	152	+ map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY \|\|
	153	+ map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
	154	+ return round_up(map->value_size, 8) * num_possible_cpus();
	155	+ else if (IS_FD_MAP(map))
	156	+ return sizeof(u32);
	157	+ else
	158	+ return map->value_size;
	159	+}
	160	+
	161	+static void maybe_wait_bpf_programs(struct bpf_map *map)
	162	+{
	163	+ /* Wait for any running BPF programs to complete so that
	164	+ * userspace, when we return to it, knows that all programs
	165	+ * that could be running use the new map value.
144	166	*/
145		- const gfp_t flags = __GFP_NOWARN \| __GFP_NORETRY \| __GFP_ZERO;
	167	+ if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS \|\|
	168	+ map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
	169	+ synchronize_rcu();
	170	+}
	171	+
	172	+static int bpf_map_update_value(struct bpf_map map, struct fd f, void key,
	173	+ void *value, __u64 flags)
	174	+{
	175	+ int err;
	176	+
	177	+ /* Need to create a kthread, thus must support schedule */
	178	+ if (bpf_map_is_dev_bound(map)) {
	179	+ return bpf_map_offload_update_elem(map, key, value, flags);
	180	+ } else if (map->map_type == BPF_MAP_TYPE_CPUMAP \|\|
	181	+ map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
	182	+ return map->ops->map_update_elem(map, key, value, flags);
	183	+ } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH \|\|
	184	+ map->map_type == BPF_MAP_TYPE_SOCKMAP) {
	185	+ return sock_map_update_elem_sys(map, key, value, flags);
	186	+ } else if (IS_FD_PROG_ARRAY(map)) {
	187	+ return bpf_fd_array_map_update_elem(map, f.file, key, value,
	188	+ flags);
	189	+ }
	190	+
	191	+ bpf_disable_instrumentation();
	192	+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
	193	+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
	194	+ err = bpf_percpu_hash_update(map, key, value, flags);
	195	+ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
	196	+ err = bpf_percpu_array_update(map, key, value, flags);
	197	+ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
	198	+ err = bpf_percpu_cgroup_storage_update(map, key, value,
	199	+ flags);
	200	+ } else if (IS_FD_ARRAY(map)) {
	201	+ rcu_read_lock();
	202	+ err = bpf_fd_array_map_update_elem(map, f.file, key, value,
	203	+ flags);
	204	+ rcu_read_unlock();
	205	+ } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
	206	+ rcu_read_lock();
	207	+ err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
	208	+ flags);
	209	+ rcu_read_unlock();
	210	+ } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
	211	+ /* rcu_read_lock() is not needed */
	212	+ err = bpf_fd_reuseport_array_update_elem(map, key, value,
	213	+ flags);
	214	+ } else if (map->map_type == BPF_MAP_TYPE_QUEUE \|\|
	215	+ map->map_type == BPF_MAP_TYPE_STACK) {
	216	+ err = map->ops->map_push_elem(map, value, flags);
	217	+ } else {
	218	+ rcu_read_lock();
	219	+ err = map->ops->map_update_elem(map, key, value, flags);
	220	+ rcu_read_unlock();
	221	+ }
	222	+ bpf_enable_instrumentation();
	223	+ maybe_wait_bpf_programs(map);
	224	+
	225	+ return err;
	226	+}
	227	+
	228	+static int bpf_map_copy_value(struct bpf_map map, void key, void *value,
	229	+ __u64 flags)
	230	+{
	231	+ void *ptr;
	232	+ int err;
	233	+
	234	+ if (bpf_map_is_dev_bound(map))
	235	+ return bpf_map_offload_lookup_elem(map, key, value);
	236	+
	237	+ bpf_disable_instrumentation();
	238	+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
	239	+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
	240	+ err = bpf_percpu_hash_copy(map, key, value);
	241	+ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
	242	+ err = bpf_percpu_array_copy(map, key, value);
	243	+ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
	244	+ err = bpf_percpu_cgroup_storage_copy(map, key, value);
	245	+ } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
	246	+ err = bpf_stackmap_copy(map, key, value);
	247	+ } else if (IS_FD_ARRAY(map) \|\| IS_FD_PROG_ARRAY(map)) {
	248	+ err = bpf_fd_array_map_lookup_elem(map, key, value);
	249	+ } else if (IS_FD_HASH(map)) {
	250	+ err = bpf_fd_htab_map_lookup_elem(map, key, value);
	251	+ } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
	252	+ err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
	253	+ } else if (map->map_type == BPF_MAP_TYPE_QUEUE \|\|
	254	+ map->map_type == BPF_MAP_TYPE_STACK) {
	255	+ err = map->ops->map_peek_elem(map, value);
	256	+ } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
	257	+ /* struct_ops map requires directly updating "value" */
	258	+ err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
	259	+ } else {
	260	+ rcu_read_lock();
	261	+ if (map->ops->map_lookup_elem_sys_only)
	262	+ ptr = map->ops->map_lookup_elem_sys_only(map, key);
	263	+ else
	264	+ ptr = map->ops->map_lookup_elem(map, key);
	265	+ if (IS_ERR(ptr)) {
	266	+ err = PTR_ERR(ptr);
	267	+ } else if (!ptr) {
	268	+ err = -ENOENT;
	269	+ } else {
	270	+ err = 0;
	271	+ if (flags & BPF_F_LOCK)
	272	+ /* lock 'ptr' and copy everything but lock */
	273	+ copy_map_value_locked(map, value, ptr, true);
	274	+ else
	275	+ copy_map_value(map, value, ptr);
	276	+ /* mask lock, since value wasn't zero inited */
	277	+ check_and_init_map_lock(map, value);
	278	+ }
	279	+ rcu_read_unlock();
	280	+ }
	281	+
	282	+ bpf_enable_instrumentation();
	283	+ maybe_wait_bpf_programs(map);
	284	+
	285	+ return err;
	286	+}
	287	+
	288	+static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
	289	+{
	290	+ /* We really just want to fail instead of triggering OOM killer
	291	+ * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
	292	+ * which is used for lower order allocation requests.
	293	+ *
	294	+ * It has been observed that higher order allocation requests done by
	295	+ * vmalloc with __GFP_NORETRY being set might fail due to not trying
	296	+ * to reclaim memory from the page cache, thus we set
	297	+ * __GFP_RETRY_MAYFAIL to avoid such situations.
	298	+ */
	299	+
	300	+ const gfp_t gfp = __GFP_NOWARN \| __GFP_ZERO;
	301	+ unsigned int flags = 0;
	302	+ unsigned long align = 1;
146	303	void *area;
147	304
148		- if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
149		- area = kmalloc_node(size, GFP_USER \| flags, numa_node);
	305	+ if (size >= SIZE_MAX)
	306	+ return NULL;
	307	+
	308	+ /* kmalloc()'ed memory can't be mmap()'ed */
	309	+ if (mmapable) {
	310	+ BUG_ON(!PAGE_ALIGNED(size));
	311	+ align = SHMLBA;
	312	+ flags = VM_USERMAP;
	313	+ } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
	314	+ area = kmalloc_node(size, gfp \| GFP_USER \| __GFP_NORETRY,
	315	+ numa_node);
150	316	if (area != NULL)
151	317	return area;
152	318	}
153	319
154		- return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL \| flags,
155		- __builtin_return_address(0));
	320	+ return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
	321	+ gfp \| GFP_KERNEL \| __GFP_RETRY_MAYFAIL, PAGE_KERNEL,
	322	+ flags, numa_node, __builtin_return_address(0));
	323	+}
	324	+
	325	+void *bpf_map_area_alloc(u64 size, int numa_node)
	326	+{
	327	+ return __bpf_map_area_alloc(size, numa_node, false);
	328	+}
	329	+
	330	+void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
	331	+{
	332	+ return __bpf_map_area_alloc(size, numa_node, true);
156	333	}
157	334
158	335	void bpf_map_area_free(void *area)
159	336	{
160	337	kvfree(area);
	338	+}
	339	+
	340	+static u32 bpf_map_flags_retain_permanent(u32 flags)
	341	+{
	342	+ /* Some map creation flags are not tied to the map object but
	343	+ * rather to the map fd instead, so they have no meaning upon
	344	+ * map object inspection since multiple file descriptors with
	345	+ * different (access) properties can exist here. Thus, given
	346	+ * this has zero meaning for the map itself, lets clear these
	347	+ * from here.
	348	+ */
	349	+ return flags & ~(BPF_F_RDONLY \| BPF_F_WRONLY);
161	350	}
162	351
163	352	void bpf_map_init_from_attr(struct bpf_map map, union bpf_attr attr)
..	..	@@ -166,21 +355,8 @@
166	355	map->key_size = attr->key_size;
167	356	map->value_size = attr->value_size;
168	357	map->max_entries = attr->max_entries;
169		- map->map_flags = attr->map_flags;
	358	+ map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
170	359	map->numa_node = bpf_map_attr_numa_node(attr);
171		-}
172		-
173		-int bpf_map_precharge_memlock(u32 pages)
174		-{
175		- struct user_struct *user = get_current_user();
176		- unsigned long memlock_limit, cur;
177		-
178		- memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
179		- cur = atomic_long_read(&user->locked_vm);
180		- free_uid(user);
181		- if (cur + pages > memlock_limit)
182		- return -EPERM;
183		- return 0;
184	360	}
185	361
186	362	static int bpf_charge_memlock(struct user_struct *user, u32 pages)
..	..	@@ -196,45 +372,62 @@
196	372
197	373	static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
198	374	{
199		- atomic_long_sub(pages, &user->locked_vm);
	375	+ if (user)
	376	+ atomic_long_sub(pages, &user->locked_vm);
200	377	}
201	378
202		-static int bpf_map_init_memlock(struct bpf_map *map)
	379	+int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
203	380	{
204		- struct user_struct *user = get_current_user();
	381	+ u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
	382	+ struct user_struct *user;
205	383	int ret;
206	384
207		- ret = bpf_charge_memlock(user, map->pages);
	385	+ if (size >= U32_MAX - PAGE_SIZE)
	386	+ return -E2BIG;
	387	+
	388	+ user = get_current_user();
	389	+ ret = bpf_charge_memlock(user, pages);
208	390	if (ret) {
209	391	free_uid(user);
210	392	return ret;
211	393	}
212		- map->user = user;
213		- return ret;
	394	+
	395	+ mem->pages = pages;
	396	+ mem->user = user;
	397	+
	398	+ return 0;
214	399	}
215	400
216		-static void bpf_map_release_memlock(struct bpf_map *map)
	401	+void bpf_map_charge_finish(struct bpf_map_memory *mem)
217	402	{
218		- struct user_struct *user = map->user;
219		- bpf_uncharge_memlock(user, map->pages);
220		- free_uid(user);
	403	+ bpf_uncharge_memlock(mem->user, mem->pages);
	404	+ free_uid(mem->user);
	405	+}
	406	+
	407	+void bpf_map_charge_move(struct bpf_map_memory *dst,
	408	+ struct bpf_map_memory *src)
	409	+{
	410	+ dst = src;
	411	+
	412	+ /* Make sure src will not be used for the redundant uncharging. */
	413	+ memset(src, 0, sizeof(struct bpf_map_memory));
221	414	}
222	415
223	416	int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
224	417	{
225	418	int ret;
226	419
227		- ret = bpf_charge_memlock(map->user, pages);
	420	+ ret = bpf_charge_memlock(map->memory.user, pages);
228	421	if (ret)
229	422	return ret;
230		- map->pages += pages;
	423	+ map->memory.pages += pages;
231	424	return ret;
232	425	}
233	426
234	427	void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
235	428	{
236		- bpf_uncharge_memlock(map->user, pages);
237		- map->pages -= pages;
	429	+ bpf_uncharge_memlock(map->memory.user, pages);
	430	+ map->memory.pages -= pages;
238	431	}
239	432
240	433	static int bpf_map_alloc_id(struct bpf_map *map)
..	..	@@ -285,16 +478,18 @@
285	478	static void bpf_map_free_deferred(struct work_struct *work)
286	479	{
287	480	struct bpf_map *map = container_of(work, struct bpf_map, work);
	481	+ struct bpf_map_memory mem;
288	482
289		- bpf_map_release_memlock(map);
	483	+ bpf_map_charge_move(&mem, &map->memory);
290	484	security_bpf_map_free(map);
291	485	/* implementation dependent freeing */
292	486	map->ops->map_free(map);
	487	+ bpf_map_charge_finish(&mem);
293	488	}
294	489
295	490	static void bpf_map_put_uref(struct bpf_map *map)
296	491	{
297		- if (atomic_dec_and_test(&map->usercnt)) {
	492	+ if (atomic64_dec_and_test(&map->usercnt)) {
298	493	if (map->ops->map_release_uref)
299	494	map->ops->map_release_uref(map);
300	495	}
..	..	@@ -305,7 +500,7 @@
305	500	*/
306	501	static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
307	502	{
308		- if (atomic_dec_and_test(&map->refcnt)) {
	503	+ if (atomic64_dec_and_test(&map->refcnt)) {
309	504	/* bpf_map_free_id() must be called first */
310	505	bpf_map_free_id(map, do_idr_lock);
311	506	btf_put(map->btf);
..	..	@@ -337,18 +532,31 @@
337	532	return 0;
338	533	}
339	534
	535	+static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
	536	+{
	537	+ fmode_t mode = f.file->f_mode;
	538	+
	539	+ /* Our file permissions may have been overridden by global
	540	+ * map permissions facing syscall side.
	541	+ */
	542	+ if (READ_ONCE(map->frozen))
	543	+ mode &= ~FMODE_CAN_WRITE;
	544	+ return mode;
	545	+}
	546	+
340	547	#ifdef CONFIG_PROC_FS
341	548	static void bpf_map_show_fdinfo(struct seq_file m, struct file filp)
342	549	{
343	550	const struct bpf_map *map = filp->private_data;
344	551	const struct bpf_array *array;
345		- u32 owner_prog_type = 0;
346		- u32 owner_jited = 0;
	552	+ u32 type = 0, jited = 0;
347	553
348	554	if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
349	555	array = container_of(map, struct bpf_array, map);
350		- owner_prog_type = array->owner_prog_type;
351		- owner_jited = array->owner_jited;
	556	+ spin_lock(&array->aux->owner.lock);
	557	+ type = array->aux->owner.type;
	558	+ jited = array->aux->owner.jited;
	559	+ spin_unlock(&array->aux->owner.lock);
352	560	}
353	561
354	562	seq_printf(m,
..	..	@@ -358,20 +566,19 @@
358	566	"max_entries:\t%u\n"
359	567	"map_flags:\t%#x\n"
360	568	"memlock:\t%llu\n"
361		- "map_id:\t%u\n",
	569	+ "map_id:\t%u\n"
	570	+ "frozen:\t%u\n",
362	571	map->map_type,
363	572	map->key_size,
364	573	map->value_size,
365	574	map->max_entries,
366	575	map->map_flags,
367		- map->pages * 1ULL << PAGE_SHIFT,
368		- map->id);
369		-
370		- if (owner_prog_type) {
371		- seq_printf(m, "owner_prog_type:\t%u\n",
372		- owner_prog_type);
373		- seq_printf(m, "owner_jited:\t%u\n",
374		- owner_jited);
	576	+ map->memory.pages * 1ULL << PAGE_SHIFT,
	577	+ map->id,
	578	+ READ_ONCE(map->frozen));
	579	+ if (type) {
	580	+ seq_printf(m, "owner_prog_type:\t%u\n", type);
	581	+ seq_printf(m, "owner_jited:\t%u\n", jited);
375	582	}
376	583	}
377	584	#endif
..	..	@@ -394,6 +601,87 @@
394	601	return -EINVAL;
395	602	}
396	603
	604	+/* called for any extra memory-mapped regions (except initial) */
	605	+static void bpf_map_mmap_open(struct vm_area_struct *vma)
	606	+{
	607	+ struct bpf_map *map = vma->vm_file->private_data;
	608	+
	609	+ if (vma->vm_flags & VM_MAYWRITE)
	610	+ bpf_map_write_active_inc(map);
	611	+}
	612	+
	613	+/* called for all unmapped memory region (including initial) */
	614	+static void bpf_map_mmap_close(struct vm_area_struct *vma)
	615	+{
	616	+ struct bpf_map *map = vma->vm_file->private_data;
	617	+
	618	+ if (vma->vm_flags & VM_MAYWRITE)
	619	+ bpf_map_write_active_dec(map);
	620	+}
	621	+
	622	+static const struct vm_operations_struct bpf_map_default_vmops = {
	623	+ .open = bpf_map_mmap_open,
	624	+ .close = bpf_map_mmap_close,
	625	+};
	626	+
	627	+static int bpf_map_mmap(struct file filp, struct vm_area_struct vma)
	628	+{
	629	+ struct bpf_map *map = filp->private_data;
	630	+ int err;
	631	+
	632	+ if (!map->ops->map_mmap \|\| map_value_has_spin_lock(map))
	633	+ return -ENOTSUPP;
	634	+
	635	+ if (!(vma->vm_flags & VM_SHARED))
	636	+ return -EINVAL;
	637	+
	638	+ mutex_lock(&map->freeze_mutex);
	639	+
	640	+ if (vma->vm_flags & VM_WRITE) {
	641	+ if (map->frozen) {
	642	+ err = -EPERM;
	643	+ goto out;
	644	+ }
	645	+ /* map is meant to be read-only, so do not allow mapping as
	646	+ * writable, because it's possible to leak a writable page
	647	+ * reference and allows user-space to still modify it after
	648	+ * freezing, while verifier will assume contents do not change
	649	+ */
	650	+ if (map->map_flags & BPF_F_RDONLY_PROG) {
	651	+ err = -EACCES;
	652	+ goto out;
	653	+ }
	654	+ }
	655	+
	656	+ /* set default open/close callbacks */
	657	+ vma->vm_ops = &bpf_map_default_vmops;
	658	+ vma->vm_private_data = map;
	659	+ vma->vm_flags &= ~VM_MAYEXEC;
	660	+ if (!(vma->vm_flags & VM_WRITE))
	661	+ /* disallow re-mapping with PROT_WRITE */
	662	+ vma->vm_flags &= ~VM_MAYWRITE;
	663	+
	664	+ err = map->ops->map_mmap(map, vma);
	665	+ if (err)
	666	+ goto out;
	667	+
	668	+ if (vma->vm_flags & VM_MAYWRITE)
	669	+ bpf_map_write_active_inc(map);
	670	+out:
	671	+ mutex_unlock(&map->freeze_mutex);
	672	+ return err;
	673	+}
	674	+
	675	+static __poll_t bpf_map_poll(struct file filp, struct poll_table_struct pts)
	676	+{
	677	+ struct bpf_map *map = filp->private_data;
	678	+
	679	+ if (map->ops->map_poll)
	680	+ return map->ops->map_poll(map, filp, pts);
	681	+
	682	+ return EPOLLERR;
	683	+}
	684	+
397	685	const struct file_operations bpf_map_fops = {
398	686	#ifdef CONFIG_PROC_FS
399	687	.show_fdinfo = bpf_map_show_fdinfo,
..	..	@@ -401,6 +689,8 @@
401	689	.release = bpf_map_release,
402	690	.read = bpf_dummy_read,
403	691	.write = bpf_dummy_write,
	692	+ .mmap = bpf_map_mmap,
	693	+ .poll = bpf_map_poll,
404	694	};
405	695
406	696	int bpf_map_new_fd(struct bpf_map *map, int flags)
..	..	@@ -434,62 +724,92 @@
434	724	offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
435	725	sizeof(attr->CMD##_LAST_FIELD)) != NULL
436	726
437		-/* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes.
438		- * Return 0 on success and < 0 on error.
	727	+/* dst and src must have at least "size" number of bytes.
	728	+ * Return strlen on success and < 0 on error.
439	729	*/
440		-static int bpf_obj_name_cpy(char dst, const char src)
	730	+int bpf_obj_name_cpy(char dst, const char src, unsigned int size)
441	731	{
442		- const char *end = src + BPF_OBJ_NAME_LEN;
	732	+ const char *end = src + size;
	733	+ const char *orig_src = src;
443	734
444		- memset(dst, 0, BPF_OBJ_NAME_LEN);
445		-
446		- /* Copy all isalnum() and '_' char */
	735	+ memset(dst, 0, size);
	736	+ /* Copy all isalnum(), '_' and '.' chars. */
447	737	while (src < end && *src) {
448		- if (!isalnum(src) && src != '_')
	738	+ if (!isalnum(*src) &&
	739	+ src != '_' && src != '.')
449	740	return -EINVAL;
450	741	dst++ = src++;
451	742	}
452	743
453		- /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */
	744	+ /* No '\0' found in "size" number of bytes */
454	745	if (src == end)
455	746	return -EINVAL;
456	747
457		- return 0;
	748	+ return src - orig_src;
458	749	}
459	750
460	751	int map_check_no_btf(const struct bpf_map *map,
	752	+ const struct btf *btf,
461	753	const struct btf_type *key_type,
462	754	const struct btf_type *value_type)
463	755	{
464	756	return -ENOTSUPP;
465	757	}
466	758
467		-static int map_check_btf(const struct bpf_map map, const struct btf btf,
	759	+static int map_check_btf(struct bpf_map map, const struct btf btf,
468	760	u32 btf_key_id, u32 btf_value_id)
469	761	{
470	762	const struct btf_type key_type, value_type;
471	763	u32 key_size, value_size;
472	764	int ret = 0;
473	765
474		- key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
475		- if (!key_type \|\| key_size != map->key_size)
476		- return -EINVAL;
	766	+ /* Some maps allow key to be unspecified. */
	767	+ if (btf_key_id) {
	768	+ key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
	769	+ if (!key_type \|\| key_size != map->key_size)
	770	+ return -EINVAL;
	771	+ } else {
	772	+ key_type = btf_type_by_id(btf, 0);
	773	+ if (!map->ops->map_check_btf)
	774	+ return -EINVAL;
	775	+ }
477	776
478	777	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
479	778	if (!value_type \|\| value_size != map->value_size)
480	779	return -EINVAL;
481	780
	781	+ map->spin_lock_off = btf_find_spin_lock(btf, value_type);
	782	+
	783	+ if (map_value_has_spin_lock(map)) {
	784	+ if (map->map_flags & BPF_F_RDONLY_PROG)
	785	+ return -EACCES;
	786	+ if (map->map_type != BPF_MAP_TYPE_HASH &&
	787	+ map->map_type != BPF_MAP_TYPE_ARRAY &&
	788	+ map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
	789	+ map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
	790	+ map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
	791	+ return -ENOTSUPP;
	792	+ if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
	793	+ map->value_size) {
	794	+ WARN_ONCE(1,
	795	+ "verifier bug spin_lock_off %d value_size %d\n",
	796	+ map->spin_lock_off, map->value_size);
	797	+ return -EFAULT;
	798	+ }
	799	+ }
	800	+
482	801	if (map->ops->map_check_btf)
483		- ret = map->ops->map_check_btf(map, key_type, value_type);
	802	+ ret = map->ops->map_check_btf(map, btf, key_type, value_type);
484	803
485	804	return ret;
486	805	}
487	806
488		-#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
	807	+#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id
489	808	/* called via syscall */
490	809	static int map_create(union bpf_attr *attr)
491	810	{
492	811	int numa_node = bpf_map_attr_numa_node(attr);
	812	+ struct bpf_map_memory mem;
493	813	struct bpf_map *map;
494	814	int f_flags;
495	815	int err;
..	..	@@ -497,6 +817,14 @@
497	817	err = CHECK_ATTR(BPF_MAP_CREATE);
498	818	if (err)
499	819	return -EINVAL;
	820	+
	821	+ if (attr->btf_vmlinux_value_type_id) {
	822	+ if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS \|\|
	823	+ attr->btf_key_type_id \|\| attr->btf_value_type_id)
	824	+ return -EINVAL;
	825	+ } else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
	826	+ return -EINVAL;
	827	+ }
500	828
501	829	f_flags = bpf_get_file_flag(attr->map_flags);
502	830	if (f_flags < 0)
..	..	@@ -512,50 +840,53 @@
512	840	if (IS_ERR(map))
513	841	return PTR_ERR(map);
514	842
515		- err = bpf_obj_name_cpy(map->name, attr->map_name);
516		- if (err)
517		- goto free_map_nouncharge;
	843	+ err = bpf_obj_name_cpy(map->name, attr->map_name,
	844	+ sizeof(attr->map_name));
	845	+ if (err < 0)
	846	+ goto free_map;
518	847
519		- atomic_set(&map->refcnt, 1);
520		- atomic_set(&map->usercnt, 1);
	848	+ atomic64_set(&map->refcnt, 1);
	849	+ atomic64_set(&map->usercnt, 1);
	850	+ mutex_init(&map->freeze_mutex);
521	851
522		- if (attr->btf_key_type_id \|\| attr->btf_value_type_id) {
	852	+ map->spin_lock_off = -EINVAL;
	853	+ if (attr->btf_key_type_id \|\| attr->btf_value_type_id \|\|
	854	+ /* Even the map's value is a kernel's struct,
	855	+ * the bpf_prog.o must have BTF to begin with
	856	+ * to figure out the corresponding kernel's
	857	+ * counter part. Thus, attr->btf_fd has
	858	+ * to be valid also.
	859	+ */
	860	+ attr->btf_vmlinux_value_type_id) {
523	861	struct btf *btf;
524		-
525		- if (!attr->btf_key_type_id \|\| !attr->btf_value_type_id) {
526		- err = -EINVAL;
527		- goto free_map_nouncharge;
528		- }
529	862
530	863	btf = btf_get_by_fd(attr->btf_fd);
531	864	if (IS_ERR(btf)) {
532	865	err = PTR_ERR(btf);
533		- goto free_map_nouncharge;
	866	+ goto free_map;
534	867	}
535		-
536		- err = map_check_btf(map, btf, attr->btf_key_type_id,
537		- attr->btf_value_type_id);
538		- if (err) {
539		- btf_put(btf);
540		- goto free_map_nouncharge;
541		- }
542		-
543	868	map->btf = btf;
	869	+
	870	+ if (attr->btf_value_type_id) {
	871	+ err = map_check_btf(map, btf, attr->btf_key_type_id,
	872	+ attr->btf_value_type_id);
	873	+ if (err)
	874	+ goto free_map;
	875	+ }
	876	+
544	877	map->btf_key_type_id = attr->btf_key_type_id;
545	878	map->btf_value_type_id = attr->btf_value_type_id;
	879	+ map->btf_vmlinux_value_type_id =
	880	+ attr->btf_vmlinux_value_type_id;
546	881	}
547	882
548	883	err = security_bpf_map_alloc(map);
549	884	if (err)
550		- goto free_map_nouncharge;
551		-
552		- err = bpf_map_init_memlock(map);
553		- if (err)
554		- goto free_map_sec;
	885	+ goto free_map;
555	886
556	887	err = bpf_map_alloc_id(map);
557	888	if (err)
558		- goto free_map;
	889	+ goto free_map_sec;
559	890
560	891	err = bpf_map_new_fd(map, f_flags);
561	892	if (err < 0) {
..	..	@@ -571,13 +902,13 @@
571	902
572	903	return err;
573	904
574		-free_map:
575		- bpf_map_release_memlock(map);
576	905	free_map_sec:
577	906	security_bpf_map_free(map);
578		-free_map_nouncharge:
	907	+free_map:
579	908	btf_put(map->btf);
	909	+ bpf_map_charge_move(&mem, &map->memory);
580	910	map->ops->map_free(map);
	911	+ bpf_map_charge_finish(&mem);
581	912	return err;
582	913	}
583	914
..	..	@@ -596,20 +927,33 @@
596	927	return f.file->private_data;
597	928	}
598	929
599		-/* prog's and map's refcnt limit */
600		-#define BPF_MAX_REFCNT 32768
601		-
602		-struct bpf_map bpf_map_inc(struct bpf_map map, bool uref)
	930	+void bpf_map_inc(struct bpf_map *map)
603	931	{
604		- if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
605		- atomic_dec(&map->refcnt);
606		- return ERR_PTR(-EBUSY);
607		- }
608		- if (uref)
609		- atomic_inc(&map->usercnt);
610		- return map;
	932	+ atomic64_inc(&map->refcnt);
611	933	}
612	934	EXPORT_SYMBOL_GPL(bpf_map_inc);
	935	+
	936	+void bpf_map_inc_with_uref(struct bpf_map *map)
	937	+{
	938	+ atomic64_inc(&map->refcnt);
	939	+ atomic64_inc(&map->usercnt);
	940	+}
	941	+EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
	942	+
	943	+struct bpf_map *bpf_map_get(u32 ufd)
	944	+{
	945	+ struct fd f = fdget(ufd);
	946	+ struct bpf_map *map;
	947	+
	948	+ map = __bpf_map_get(f);
	949	+ if (IS_ERR(map))
	950	+ return map;
	951	+
	952	+ bpf_map_inc(map);
	953	+ fdput(f);
	954	+
	955	+ return map;
	956	+}
613	957
614	958	struct bpf_map *bpf_map_get_with_uref(u32 ufd)
615	959	{
..	..	@@ -620,41 +964,54 @@
620	964	if (IS_ERR(map))
621	965	return map;
622	966
623		- map = bpf_map_inc(map, true);
	967	+ bpf_map_inc_with_uref(map);
624	968	fdput(f);
625	969
626	970	return map;
627	971	}
628	972
629	973	/* map_idr_lock should have been held */
630		-static struct bpf_map bpf_map_inc_not_zero(struct bpf_map map,
631		- bool uref)
	974	+static struct bpf_map __bpf_map_inc_not_zero(struct bpf_map map, bool uref)
632	975	{
633	976	int refold;
634	977
635		- refold = atomic_fetch_add_unless(&map->refcnt, 1, 0);
636		-
637		- if (refold >= BPF_MAX_REFCNT) {
638		- __bpf_map_put(map, false);
639		- return ERR_PTR(-EBUSY);
640		- }
641		-
	978	+ refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0);
642	979	if (!refold)
643	980	return ERR_PTR(-ENOENT);
644		-
645	981	if (uref)
646		- atomic_inc(&map->usercnt);
	982	+ atomic64_inc(&map->usercnt);
647	983
648	984	return map;
649	985	}
	986	+
	987	+struct bpf_map bpf_map_inc_not_zero(struct bpf_map map)
	988	+{
	989	+ spin_lock_bh(&map_idr_lock);
	990	+ map = __bpf_map_inc_not_zero(map, false);
	991	+ spin_unlock_bh(&map_idr_lock);
	992	+
	993	+ return map;
	994	+}
	995	+EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
650	996
651	997	int __weak bpf_stackmap_copy(struct bpf_map map, void key, void *value)
652	998	{
653	999	return -ENOTSUPP;
654	1000	}
655	1001
	1002	+static void __bpf_copy_key(void __user ukey, u64 key_size)
	1003	+{
	1004	+ if (key_size)
	1005	+ return memdup_user(ukey, key_size);
	1006	+
	1007	+ if (ukey)
	1008	+ return ERR_PTR(-EINVAL);
	1009	+
	1010	+ return NULL;
	1011	+}
	1012	+
656	1013	/* last field in 'union bpf_attr' used by this command */
657		-#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
	1014	+#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
658	1015
659	1016	static int map_lookup_elem(union bpf_attr *attr)
660	1017	{
..	..	@@ -662,7 +1019,7 @@
662	1019	void __user *uvalue = u64_to_user_ptr(attr->value);
663	1020	int ufd = attr->map_fd;
664	1021	struct bpf_map *map;
665		- void key, value, *ptr;
	1022	+ void key, value;
666	1023	u32 value_size;
667	1024	struct fd f;
668	1025	int err;
..	..	@@ -670,71 +1027,38 @@
670	1027	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
671	1028	return -EINVAL;
672	1029
	1030	+ if (attr->flags & ~BPF_F_LOCK)
	1031	+ return -EINVAL;
	1032	+
673	1033	f = fdget(ufd);
674	1034	map = __bpf_map_get(f);
675	1035	if (IS_ERR(map))
676	1036	return PTR_ERR(map);
677		-
678		- if (!(f.file->f_mode & FMODE_CAN_READ)) {
	1037	+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
679	1038	err = -EPERM;
680	1039	goto err_put;
681	1040	}
682	1041
683		- key = memdup_user(ukey, map->key_size);
	1042	+ if ((attr->flags & BPF_F_LOCK) &&
	1043	+ !map_value_has_spin_lock(map)) {
	1044	+ err = -EINVAL;
	1045	+ goto err_put;
	1046	+ }
	1047	+
	1048	+ key = __bpf_copy_key(ukey, map->key_size);
684	1049	if (IS_ERR(key)) {
685	1050	err = PTR_ERR(key);
686	1051	goto err_put;
687	1052	}
688	1053
689		- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
690		- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH \|\|
691		- map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
692		- value_size = round_up(map->value_size, 8) * num_possible_cpus();
693		- else if (IS_FD_MAP(map))
694		- value_size = sizeof(u32);
695		- else
696		- value_size = map->value_size;
	1054	+ value_size = bpf_map_value_size(map);
697	1055
698	1056	err = -ENOMEM;
699	1057	value = kmalloc(value_size, GFP_USER \| __GFP_NOWARN);
700	1058	if (!value)
701	1059	goto free_key;
702	1060
703		- if (bpf_map_is_dev_bound(map)) {
704		- err = bpf_map_offload_lookup_elem(map, key, value);
705		- goto done;
706		- }
707		-
708		- preempt_disable();
709		- this_cpu_inc(bpf_prog_active);
710		- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
711		- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
712		- err = bpf_percpu_hash_copy(map, key, value);
713		- } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
714		- err = bpf_percpu_array_copy(map, key, value);
715		- } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
716		- err = bpf_stackmap_copy(map, key, value);
717		- } else if (IS_FD_ARRAY(map)) {
718		- err = bpf_fd_array_map_lookup_elem(map, key, value);
719		- } else if (IS_FD_HASH(map)) {
720		- err = bpf_fd_htab_map_lookup_elem(map, key, value);
721		- } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
722		- err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
723		- } else {
724		- rcu_read_lock();
725		- if (map->ops->map_lookup_elem_sys_only)
726		- ptr = map->ops->map_lookup_elem_sys_only(map, key);
727		- else
728		- ptr = map->ops->map_lookup_elem(map, key);
729		- if (ptr)
730		- memcpy(value, ptr, value_size);
731		- rcu_read_unlock();
732		- err = ptr ? 0 : -ENOENT;
733		- }
734		- this_cpu_dec(bpf_prog_active);
735		- preempt_enable();
736		-
737		-done:
	1061	+ err = bpf_map_copy_value(map, key, value, attr->flags);
738	1062	if (err)
739	1063	goto free_value;
740	1064
..	..	@@ -753,16 +1077,6 @@
753	1077	return err;
754	1078	}
755	1079
756		-static void maybe_wait_bpf_programs(struct bpf_map *map)
757		-{
758		- /* Wait for any running BPF programs to complete so that
759		- * userspace, when we return to it, knows that all programs
760		- * that could be running use the new map value.
761		- */
762		- if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS \|\|
763		- map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
764		- synchronize_rcu();
765		-}
766	1080
767	1081	#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
768	1082
..	..	@@ -784,13 +1098,19 @@
784	1098	map = __bpf_map_get(f);
785	1099	if (IS_ERR(map))
786	1100	return PTR_ERR(map);
787		-
788		- if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
	1101	+ bpf_map_write_active_inc(map);
	1102	+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
789	1103	err = -EPERM;
790	1104	goto err_put;
791	1105	}
792	1106
793		- key = memdup_user(ukey, map->key_size);
	1107	+ if ((attr->flags & BPF_F_LOCK) &&
	1108	+ !map_value_has_spin_lock(map)) {
	1109	+ err = -EINVAL;
	1110	+ goto err_put;
	1111	+ }
	1112	+
	1113	+ key = __bpf_copy_key(ukey, map->key_size);
794	1114	if (IS_ERR(key)) {
795	1115	err = PTR_ERR(key);
796	1116	goto err_put;
..	..	@@ -798,7 +1118,8 @@
798	1118
799	1119	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
800	1120	map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH \|\|
801		- map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
	1121	+ map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY \|\|
	1122	+ map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
802	1123	value_size = round_up(map->value_size, 8) * num_possible_cpus();
803	1124	else
804	1125	value_size = map->value_size;
..	..	@@ -812,55 +1133,14 @@
812	1133	if (copy_from_user(value, uvalue, value_size) != 0)
813	1134	goto free_value;
814	1135
815		- /* Need to create a kthread, thus must support schedule */
816		- if (bpf_map_is_dev_bound(map)) {
817		- err = bpf_map_offload_update_elem(map, key, value, attr->flags);
818		- goto out;
819		- } else if (map->map_type == BPF_MAP_TYPE_CPUMAP \|\|
820		- map->map_type == BPF_MAP_TYPE_SOCKHASH \|\|
821		- map->map_type == BPF_MAP_TYPE_SOCKMAP) {
822		- err = map->ops->map_update_elem(map, key, value, attr->flags);
823		- goto out;
824		- }
	1136	+ err = bpf_map_update_value(map, f, key, value, attr->flags);
825	1137
826		- /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
827		- * inside bpf map update or delete otherwise deadlocks are possible
828		- */
829		- preempt_disable();
830		- __this_cpu_inc(bpf_prog_active);
831		- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
832		- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
833		- err = bpf_percpu_hash_update(map, key, value, attr->flags);
834		- } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
835		- err = bpf_percpu_array_update(map, key, value, attr->flags);
836		- } else if (IS_FD_ARRAY(map)) {
837		- rcu_read_lock();
838		- err = bpf_fd_array_map_update_elem(map, f.file, key, value,
839		- attr->flags);
840		- rcu_read_unlock();
841		- } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
842		- rcu_read_lock();
843		- err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
844		- attr->flags);
845		- rcu_read_unlock();
846		- } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
847		- /* rcu_read_lock() is not needed */
848		- err = bpf_fd_reuseport_array_update_elem(map, key, value,
849		- attr->flags);
850		- } else {
851		- rcu_read_lock();
852		- err = map->ops->map_update_elem(map, key, value, attr->flags);
853		- rcu_read_unlock();
854		- }
855		- __this_cpu_dec(bpf_prog_active);
856		- preempt_enable();
857		- maybe_wait_bpf_programs(map);
858		-out:
859	1138	free_value:
860	1139	kfree(value);
861	1140	free_key:
862	1141	kfree(key);
863	1142	err_put:
	1143	+ bpf_map_write_active_dec(map);
864	1144	fdput(f);
865	1145	return err;
866	1146	}
..	..	@@ -883,13 +1163,13 @@
883	1163	map = __bpf_map_get(f);
884	1164	if (IS_ERR(map))
885	1165	return PTR_ERR(map);
886		-
887		- if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
	1166	+ bpf_map_write_active_inc(map);
	1167	+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
888	1168	err = -EPERM;
889	1169	goto err_put;
890	1170	}
891	1171
892		- key = memdup_user(ukey, map->key_size);
	1172	+ key = __bpf_copy_key(ukey, map->key_size);
893	1173	if (IS_ERR(key)) {
894	1174	err = PTR_ERR(key);
895	1175	goto err_put;
..	..	@@ -898,19 +1178,23 @@
898	1178	if (bpf_map_is_dev_bound(map)) {
899	1179	err = bpf_map_offload_delete_elem(map, key);
900	1180	goto out;
	1181	+ } else if (IS_FD_PROG_ARRAY(map) \|\|
	1182	+ map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
	1183	+ /* These maps require sleepable context */
	1184	+ err = map->ops->map_delete_elem(map, key);
	1185	+ goto out;
901	1186	}
902	1187
903		- preempt_disable();
904		- __this_cpu_inc(bpf_prog_active);
	1188	+ bpf_disable_instrumentation();
905	1189	rcu_read_lock();
906	1190	err = map->ops->map_delete_elem(map, key);
907	1191	rcu_read_unlock();
908		- __this_cpu_dec(bpf_prog_active);
909		- preempt_enable();
	1192	+ bpf_enable_instrumentation();
910	1193	maybe_wait_bpf_programs(map);
911	1194	out:
912	1195	kfree(key);
913	1196	err_put:
	1197	+ bpf_map_write_active_dec(map);
914	1198	fdput(f);
915	1199	return err;
916	1200	}
..	..	@@ -935,14 +1219,13 @@
935	1219	map = __bpf_map_get(f);
936	1220	if (IS_ERR(map))
937	1221	return PTR_ERR(map);
938		-
939		- if (!(f.file->f_mode & FMODE_CAN_READ)) {
	1222	+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
940	1223	err = -EPERM;
941	1224	goto err_put;
942	1225	}
943	1226
944	1227	if (ukey) {
945		- key = memdup_user(ukey, map->key_size);
	1228	+ key = __bpf_copy_key(ukey, map->key_size);
946	1229	if (IS_ERR(key)) {
947	1230	err = PTR_ERR(key);
948	1231	goto err_put;
..	..	@@ -983,13 +1266,340 @@
983	1266	return err;
984	1267	}
985	1268
	1269	+int generic_map_delete_batch(struct bpf_map *map,
	1270	+ const union bpf_attr *attr,
	1271	+ union bpf_attr __user *uattr)
	1272	+{
	1273	+ void __user *keys = u64_to_user_ptr(attr->batch.keys);
	1274	+ u32 cp, max_count;
	1275	+ int err = 0;
	1276	+ void *key;
	1277	+
	1278	+ if (attr->batch.elem_flags & ~BPF_F_LOCK)
	1279	+ return -EINVAL;
	1280	+
	1281	+ if ((attr->batch.elem_flags & BPF_F_LOCK) &&
	1282	+ !map_value_has_spin_lock(map)) {
	1283	+ return -EINVAL;
	1284	+ }
	1285	+
	1286	+ max_count = attr->batch.count;
	1287	+ if (!max_count)
	1288	+ return 0;
	1289	+
	1290	+ key = kmalloc(map->key_size, GFP_USER \| __GFP_NOWARN);
	1291	+ if (!key)
	1292	+ return -ENOMEM;
	1293	+
	1294	+ for (cp = 0; cp < max_count; cp++) {
	1295	+ err = -EFAULT;
	1296	+ if (copy_from_user(key, keys + cp * map->key_size,
	1297	+ map->key_size))
	1298	+ break;
	1299	+
	1300	+ if (bpf_map_is_dev_bound(map)) {
	1301	+ err = bpf_map_offload_delete_elem(map, key);
	1302	+ break;
	1303	+ }
	1304	+
	1305	+ bpf_disable_instrumentation();
	1306	+ rcu_read_lock();
	1307	+ err = map->ops->map_delete_elem(map, key);
	1308	+ rcu_read_unlock();
	1309	+ bpf_enable_instrumentation();
	1310	+ maybe_wait_bpf_programs(map);
	1311	+ if (err)
	1312	+ break;
	1313	+ cond_resched();
	1314	+ }
	1315	+ if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
	1316	+ err = -EFAULT;
	1317	+
	1318	+ kfree(key);
	1319	+ return err;
	1320	+}
	1321	+
	1322	+int generic_map_update_batch(struct bpf_map *map,
	1323	+ const union bpf_attr *attr,
	1324	+ union bpf_attr __user *uattr)
	1325	+{
	1326	+ void __user *values = u64_to_user_ptr(attr->batch.values);
	1327	+ void __user *keys = u64_to_user_ptr(attr->batch.keys);
	1328	+ u32 value_size, cp, max_count;
	1329	+ int ufd = attr->batch.map_fd;
	1330	+ void key, value;
	1331	+ struct fd f;
	1332	+ int err = 0;
	1333	+
	1334	+ if (attr->batch.elem_flags & ~BPF_F_LOCK)
	1335	+ return -EINVAL;
	1336	+
	1337	+ if ((attr->batch.elem_flags & BPF_F_LOCK) &&
	1338	+ !map_value_has_spin_lock(map)) {
	1339	+ return -EINVAL;
	1340	+ }
	1341	+
	1342	+ value_size = bpf_map_value_size(map);
	1343	+
	1344	+ max_count = attr->batch.count;
	1345	+ if (!max_count)
	1346	+ return 0;
	1347	+
	1348	+ key = kmalloc(map->key_size, GFP_USER \| __GFP_NOWARN);
	1349	+ if (!key)
	1350	+ return -ENOMEM;
	1351	+
	1352	+ value = kmalloc(value_size, GFP_USER \| __GFP_NOWARN);
	1353	+ if (!value) {
	1354	+ kfree(key);
	1355	+ return -ENOMEM;
	1356	+ }
	1357	+
	1358	+ f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */
	1359	+ for (cp = 0; cp < max_count; cp++) {
	1360	+ err = -EFAULT;
	1361	+ if (copy_from_user(key, keys + cp * map->key_size,
	1362	+ map->key_size) \|\|
	1363	+ copy_from_user(value, values + cp * value_size, value_size))
	1364	+ break;
	1365	+
	1366	+ err = bpf_map_update_value(map, f, key, value,
	1367	+ attr->batch.elem_flags);
	1368	+
	1369	+ if (err)
	1370	+ break;
	1371	+ cond_resched();
	1372	+ }
	1373	+
	1374	+ if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
	1375	+ err = -EFAULT;
	1376	+
	1377	+ kfree(value);
	1378	+ kfree(key);
	1379	+ fdput(f);
	1380	+ return err;
	1381	+}
	1382	+
	1383	+#define MAP_LOOKUP_RETRIES 3
	1384	+
	1385	+int generic_map_lookup_batch(struct bpf_map *map,
	1386	+ const union bpf_attr *attr,
	1387	+ union bpf_attr __user *uattr)
	1388	+{
	1389	+ void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
	1390	+ void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
	1391	+ void __user *values = u64_to_user_ptr(attr->batch.values);
	1392	+ void __user *keys = u64_to_user_ptr(attr->batch.keys);
	1393	+ void buf, buf_prevkey, prev_key, key, *value;
	1394	+ int err, retry = MAP_LOOKUP_RETRIES;
	1395	+ u32 value_size, cp, max_count;
	1396	+
	1397	+ if (attr->batch.elem_flags & ~BPF_F_LOCK)
	1398	+ return -EINVAL;
	1399	+
	1400	+ if ((attr->batch.elem_flags & BPF_F_LOCK) &&
	1401	+ !map_value_has_spin_lock(map))
	1402	+ return -EINVAL;
	1403	+
	1404	+ value_size = bpf_map_value_size(map);
	1405	+
	1406	+ max_count = attr->batch.count;
	1407	+ if (!max_count)
	1408	+ return 0;
	1409	+
	1410	+ if (put_user(0, &uattr->batch.count))
	1411	+ return -EFAULT;
	1412	+
	1413	+ buf_prevkey = kmalloc(map->key_size, GFP_USER \| __GFP_NOWARN);
	1414	+ if (!buf_prevkey)
	1415	+ return -ENOMEM;
	1416	+
	1417	+ buf = kmalloc(map->key_size + value_size, GFP_USER \| __GFP_NOWARN);
	1418	+ if (!buf) {
	1419	+ kfree(buf_prevkey);
	1420	+ return -ENOMEM;
	1421	+ }
	1422	+
	1423	+ err = -EFAULT;
	1424	+ prev_key = NULL;
	1425	+ if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size))
	1426	+ goto free_buf;
	1427	+ key = buf;
	1428	+ value = key + map->key_size;
	1429	+ if (ubatch)
	1430	+ prev_key = buf_prevkey;
	1431	+
	1432	+ for (cp = 0; cp < max_count;) {
	1433	+ rcu_read_lock();
	1434	+ err = map->ops->map_get_next_key(map, prev_key, key);
	1435	+ rcu_read_unlock();
	1436	+ if (err)
	1437	+ break;
	1438	+ err = bpf_map_copy_value(map, key, value,
	1439	+ attr->batch.elem_flags);
	1440	+
	1441	+ if (err == -ENOENT) {
	1442	+ if (retry) {
	1443	+ retry--;
	1444	+ continue;
	1445	+ }
	1446	+ err = -EINTR;
	1447	+ break;
	1448	+ }
	1449	+
	1450	+ if (err)
	1451	+ goto free_buf;
	1452	+
	1453	+ if (copy_to_user(keys + cp * map->key_size, key,
	1454	+ map->key_size)) {
	1455	+ err = -EFAULT;
	1456	+ goto free_buf;
	1457	+ }
	1458	+ if (copy_to_user(values + cp * value_size, value, value_size)) {
	1459	+ err = -EFAULT;
	1460	+ goto free_buf;
	1461	+ }
	1462	+
	1463	+ if (!prev_key)
	1464	+ prev_key = buf_prevkey;
	1465	+
	1466	+ swap(prev_key, key);
	1467	+ retry = MAP_LOOKUP_RETRIES;
	1468	+ cp++;
	1469	+ cond_resched();
	1470	+ }
	1471	+
	1472	+ if (err == -EFAULT)
	1473	+ goto free_buf;
	1474	+
	1475	+ if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) \|\|
	1476	+ (cp && copy_to_user(uobatch, prev_key, map->key_size))))
	1477	+ err = -EFAULT;
	1478	+
	1479	+free_buf:
	1480	+ kfree(buf_prevkey);
	1481	+ kfree(buf);
	1482	+ return err;
	1483	+}
	1484	+
	1485	+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
	1486	+
	1487	+static int map_lookup_and_delete_elem(union bpf_attr *attr)
	1488	+{
	1489	+ void __user *ukey = u64_to_user_ptr(attr->key);
	1490	+ void __user *uvalue = u64_to_user_ptr(attr->value);
	1491	+ int ufd = attr->map_fd;
	1492	+ struct bpf_map *map;
	1493	+ void key, value;
	1494	+ u32 value_size;
	1495	+ struct fd f;
	1496	+ int err;
	1497	+
	1498	+ if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
	1499	+ return -EINVAL;
	1500	+
	1501	+ f = fdget(ufd);
	1502	+ map = __bpf_map_get(f);
	1503	+ if (IS_ERR(map))
	1504	+ return PTR_ERR(map);
	1505	+ bpf_map_write_active_inc(map);
	1506	+ if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) \|\|
	1507	+ !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
	1508	+ err = -EPERM;
	1509	+ goto err_put;
	1510	+ }
	1511	+
	1512	+ key = __bpf_copy_key(ukey, map->key_size);
	1513	+ if (IS_ERR(key)) {
	1514	+ err = PTR_ERR(key);
	1515	+ goto err_put;
	1516	+ }
	1517	+
	1518	+ value_size = map->value_size;
	1519	+
	1520	+ err = -ENOMEM;
	1521	+ value = kmalloc(value_size, GFP_USER \| __GFP_NOWARN);
	1522	+ if (!value)
	1523	+ goto free_key;
	1524	+
	1525	+ if (map->map_type == BPF_MAP_TYPE_QUEUE \|\|
	1526	+ map->map_type == BPF_MAP_TYPE_STACK) {
	1527	+ err = map->ops->map_pop_elem(map, value);
	1528	+ } else {
	1529	+ err = -ENOTSUPP;
	1530	+ }
	1531	+
	1532	+ if (err)
	1533	+ goto free_value;
	1534	+
	1535	+ if (copy_to_user(uvalue, value, value_size) != 0) {
	1536	+ err = -EFAULT;
	1537	+ goto free_value;
	1538	+ }
	1539	+
	1540	+ err = 0;
	1541	+
	1542	+free_value:
	1543	+ kfree(value);
	1544	+free_key:
	1545	+ kfree(key);
	1546	+err_put:
	1547	+ bpf_map_write_active_dec(map);
	1548	+ fdput(f);
	1549	+ return err;
	1550	+}
	1551	+
	1552	+#define BPF_MAP_FREEZE_LAST_FIELD map_fd
	1553	+
	1554	+static int map_freeze(const union bpf_attr *attr)
	1555	+{
	1556	+ int err = 0, ufd = attr->map_fd;
	1557	+ struct bpf_map *map;
	1558	+ struct fd f;
	1559	+
	1560	+ if (CHECK_ATTR(BPF_MAP_FREEZE))
	1561	+ return -EINVAL;
	1562	+
	1563	+ f = fdget(ufd);
	1564	+ map = __bpf_map_get(f);
	1565	+ if (IS_ERR(map))
	1566	+ return PTR_ERR(map);
	1567	+
	1568	+ if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
	1569	+ fdput(f);
	1570	+ return -ENOTSUPP;
	1571	+ }
	1572	+
	1573	+ mutex_lock(&map->freeze_mutex);
	1574	+ if (bpf_map_write_active(map)) {
	1575	+ err = -EBUSY;
	1576	+ goto err_put;
	1577	+ }
	1578	+ if (READ_ONCE(map->frozen)) {
	1579	+ err = -EBUSY;
	1580	+ goto err_put;
	1581	+ }
	1582	+ if (!bpf_capable()) {
	1583	+ err = -EPERM;
	1584	+ goto err_put;
	1585	+ }
	1586	+
	1587	+ WRITE_ONCE(map->frozen, true);
	1588	+err_put:
	1589	+ mutex_unlock(&map->freeze_mutex);
	1590	+ fdput(f);
	1591	+ return err;
	1592	+}
	1593	+
986	1594	static const struct bpf_prog_ops * const bpf_prog_types[] = {
987		-#define BPF_PROG_TYPE(_id, _name) \
	1595	+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
988	1596	[_id] = & _name ## _prog_ops,
989	1597	#define BPF_MAP_TYPE(_id, _ops)
	1598	+#define BPF_LINK_TYPE(_id, _name)
990	1599	#include <linux/bpf_types.h>
991	1600	#undef BPF_PROG_TYPE
992	1601	#undef BPF_MAP_TYPE
	1602	+#undef BPF_LINK_TYPE
993	1603	};
994	1604
995	1605	static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
..	..	@@ -1011,18 +1621,34 @@
1011	1621	return 0;
1012	1622	}
1013	1623
1014		-/* drop refcnt on maps used by eBPF program and free auxilary data */
1015		-static void free_used_maps(struct bpf_prog_aux *aux)
	1624	+enum bpf_audit {
	1625	+ BPF_AUDIT_LOAD,
	1626	+ BPF_AUDIT_UNLOAD,
	1627	+ BPF_AUDIT_MAX,
	1628	+};
	1629	+
	1630	+static const char * const bpf_audit_str[BPF_AUDIT_MAX] = {
	1631	+ [BPF_AUDIT_LOAD] = "LOAD",
	1632	+ [BPF_AUDIT_UNLOAD] = "UNLOAD",
	1633	+};
	1634	+
	1635	+static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
1016	1636	{
1017		- int i;
	1637	+ struct audit_context *ctx = NULL;
	1638	+ struct audit_buffer *ab;
1018	1639
1019		- if (aux->cgroup_storage)
1020		- bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage);
1021		-
1022		- for (i = 0; i < aux->used_map_cnt; i++)
1023		- bpf_map_put(aux->used_maps[i]);
1024		-
1025		- kfree(aux->used_maps);
	1640	+ if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX))
	1641	+ return;
	1642	+ if (audit_enabled == AUDIT_OFF)
	1643	+ return;
	1644	+ if (op == BPF_AUDIT_LOAD)
	1645	+ ctx = audit_context();
	1646	+ ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
	1647	+ if (unlikely(!ab))
	1648	+ return;
	1649	+ audit_log_format(ab, "prog-id=%u op=%s",
	1650	+ prog->aux->id, bpf_audit_str[op]);
	1651	+ audit_log_end(ab);
1026	1652	}
1027	1653
1028	1654	int __bpf_prog_charge(struct user_struct *user, u32 pages)
..	..	@@ -1117,20 +1743,37 @@
1117	1743	{
1118	1744	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
1119	1745
1120		- free_used_maps(aux);
	1746	+ kvfree(aux->func_info);
	1747	+ kfree(aux->func_info_aux);
1121	1748	bpf_prog_uncharge_memlock(aux->prog);
1122	1749	security_bpf_prog_free(aux);
1123	1750	bpf_prog_free(aux->prog);
1124	1751	}
1125	1752
	1753	+static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
	1754	+{
	1755	+ bpf_prog_kallsyms_del_all(prog);
	1756	+ btf_put(prog->aux->btf);
	1757	+ bpf_prog_free_linfo(prog);
	1758	+
	1759	+ if (deferred) {
	1760	+ if (prog->aux->sleepable)
	1761	+ call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
	1762	+ else
	1763	+ call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
	1764	+ } else {
	1765	+ __bpf_prog_put_rcu(&prog->aux->rcu);
	1766	+ }
	1767	+}
	1768	+
1126	1769	static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
1127	1770	{
1128		- if (atomic_dec_and_test(&prog->aux->refcnt)) {
	1771	+ if (atomic64_dec_and_test(&prog->aux->refcnt)) {
	1772	+ perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
	1773	+ bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
1129	1774	/* bpf_prog_free_id() must be called first */
1130	1775	bpf_prog_free_id(prog, do_idr_lock);
1131		- bpf_prog_kallsyms_del_all(prog);
1132		-
1133		- call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
	1776	+ __bpf_prog_put_noref(prog, true);
1134	1777	}
1135	1778	}
1136	1779
..	..	@@ -1148,24 +1791,54 @@
1148	1791	return 0;
1149	1792	}
1150	1793
	1794	+static void bpf_prog_get_stats(const struct bpf_prog *prog,
	1795	+ struct bpf_prog_stats *stats)
	1796	+{
	1797	+ u64 nsecs = 0, cnt = 0;
	1798	+ int cpu;
	1799	+
	1800	+ for_each_possible_cpu(cpu) {
	1801	+ const struct bpf_prog_stats *st;
	1802	+ unsigned int start;
	1803	+ u64 tnsecs, tcnt;
	1804	+
	1805	+ st = per_cpu_ptr(prog->aux->stats, cpu);
	1806	+ do {
	1807	+ start = u64_stats_fetch_begin_irq(&st->syncp);
	1808	+ tnsecs = st->nsecs;
	1809	+ tcnt = st->cnt;
	1810	+ } while (u64_stats_fetch_retry_irq(&st->syncp, start));
	1811	+ nsecs += tnsecs;
	1812	+ cnt += tcnt;
	1813	+ }
	1814	+ stats->nsecs = nsecs;
	1815	+ stats->cnt = cnt;
	1816	+}
	1817	+
1151	1818	#ifdef CONFIG_PROC_FS
1152	1819	static void bpf_prog_show_fdinfo(struct seq_file m, struct file filp)
1153	1820	{
1154	1821	const struct bpf_prog *prog = filp->private_data;
1155	1822	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
	1823	+ struct bpf_prog_stats stats;
1156	1824
	1825	+ bpf_prog_get_stats(prog, &stats);
1157	1826	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
1158	1827	seq_printf(m,
1159	1828	"prog_type:\t%u\n"
1160	1829	"prog_jited:\t%u\n"
1161	1830	"prog_tag:\t%s\n"
1162	1831	"memlock:\t%llu\n"
1163		- "prog_id:\t%u\n",
	1832	+ "prog_id:\t%u\n"
	1833	+ "run_time_ns:\t%llu\n"
	1834	+ "run_cnt:\t%llu\n",
1164	1835	prog->type,
1165	1836	prog->jited,
1166	1837	prog_tag,
1167	1838	prog->pages * 1ULL << PAGE_SHIFT,
1168		- prog->aux->id);
	1839	+ prog->aux->id,
	1840	+ stats.nsecs,
	1841	+ stats.cnt);
1169	1842	}
1170	1843	#endif
1171	1844
..	..	@@ -1202,13 +1875,9 @@
1202	1875	return f.file->private_data;
1203	1876	}
1204	1877
1205		-struct bpf_prog bpf_prog_add(struct bpf_prog prog, int i)
	1878	+void bpf_prog_add(struct bpf_prog *prog, int i)
1206	1879	{
1207		- if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
1208		- atomic_sub(i, &prog->aux->refcnt);
1209		- return ERR_PTR(-EBUSY);
1210		- }
1211		- return prog;
	1880	+ atomic64_add(i, &prog->aux->refcnt);
1212	1881	}
1213	1882	EXPORT_SYMBOL_GPL(bpf_prog_add);
1214	1883
..	..	@@ -1219,13 +1888,13 @@
1219	1888	* path holds a reference to the program, thus atomic_sub() can
1220	1889	* be safely used in such cases!
1221	1890	*/
1222		- WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
	1891	+ WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0);
1223	1892	}
1224	1893	EXPORT_SYMBOL_GPL(bpf_prog_sub);
1225	1894
1226		-struct bpf_prog bpf_prog_inc(struct bpf_prog prog)
	1895	+void bpf_prog_inc(struct bpf_prog *prog)
1227	1896	{
1228		- return bpf_prog_add(prog, 1);
	1897	+ atomic64_inc(&prog->aux->refcnt);
1229	1898	}
1230	1899	EXPORT_SYMBOL_GPL(bpf_prog_inc);
1231	1900
..	..	@@ -1234,12 +1903,7 @@
1234	1903	{
1235	1904	int refold;
1236	1905
1237		- refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0);
1238		-
1239		- if (refold >= BPF_MAX_REFCNT) {
1240		- __bpf_prog_put(prog, false);
1241		- return ERR_PTR(-EBUSY);
1242		- }
	1906	+ refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0);
1243	1907
1244	1908	if (!refold)
1245	1909	return ERR_PTR(-ENOENT);
..	..	@@ -1277,7 +1941,7 @@
1277	1941	goto out;
1278	1942	}
1279	1943
1280		- prog = bpf_prog_inc(prog);
	1944	+ bpf_prog_inc(prog);
1281	1945	out:
1282	1946	fdput(f);
1283	1947	return prog;
..	..	@@ -1322,13 +1986,34 @@
1322	1986	}
1323	1987
1324	1988	static int
1325		-bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
1326		- enum bpf_attach_type expected_attach_type)
	1989	+bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
	1990	+ enum bpf_attach_type expected_attach_type,
	1991	+ u32 btf_id, u32 prog_fd)
1327	1992	{
	1993	+ if (btf_id) {
	1994	+ if (btf_id > BTF_MAX_TYPE)
	1995	+ return -EINVAL;
	1996	+
	1997	+ switch (prog_type) {
	1998	+ case BPF_PROG_TYPE_TRACING:
	1999	+ case BPF_PROG_TYPE_LSM:
	2000	+ case BPF_PROG_TYPE_STRUCT_OPS:
	2001	+ case BPF_PROG_TYPE_EXT:
	2002	+ break;
	2003	+ default:
	2004	+ return -EINVAL;
	2005	+ }
	2006	+ }
	2007	+
	2008	+ if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING &&
	2009	+ prog_type != BPF_PROG_TYPE_EXT)
	2010	+ return -EINVAL;
	2011	+
1328	2012	switch (prog_type) {
1329	2013	case BPF_PROG_TYPE_CGROUP_SOCK:
1330	2014	switch (expected_attach_type) {
1331	2015	case BPF_CGROUP_INET_SOCK_CREATE:
	2016	+ case BPF_CGROUP_INET_SOCK_RELEASE:
1332	2017	case BPF_CGROUP_INET4_POST_BIND:
1333	2018	case BPF_CGROUP_INET6_POST_BIND:
1334	2019	return 0;
..	..	@@ -1341,6 +2026,10 @@
1341	2026	case BPF_CGROUP_INET6_BIND:
1342	2027	case BPF_CGROUP_INET4_CONNECT:
1343	2028	case BPF_CGROUP_INET6_CONNECT:
	2029	+ case BPF_CGROUP_INET4_GETPEERNAME:
	2030	+ case BPF_CGROUP_INET6_GETPEERNAME:
	2031	+ case BPF_CGROUP_INET4_GETSOCKNAME:
	2032	+ case BPF_CGROUP_INET6_GETSOCKNAME:
1344	2033	case BPF_CGROUP_UDP4_SENDMSG:
1345	2034	case BPF_CGROUP_UDP6_SENDMSG:
1346	2035	case BPF_CGROUP_UDP4_RECVMSG:
..	..	@@ -1349,15 +2038,88 @@
1349	2038	default:
1350	2039	return -EINVAL;
1351	2040	}
	2041	+ case BPF_PROG_TYPE_CGROUP_SKB:
	2042	+ switch (expected_attach_type) {
	2043	+ case BPF_CGROUP_INET_INGRESS:
	2044	+ case BPF_CGROUP_INET_EGRESS:
	2045	+ return 0;
	2046	+ default:
	2047	+ return -EINVAL;
	2048	+ }
	2049	+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
	2050	+ switch (expected_attach_type) {
	2051	+ case BPF_CGROUP_SETSOCKOPT:
	2052	+ case BPF_CGROUP_GETSOCKOPT:
	2053	+ return 0;
	2054	+ default:
	2055	+ return -EINVAL;
	2056	+ }
	2057	+ case BPF_PROG_TYPE_SK_LOOKUP:
	2058	+ if (expected_attach_type == BPF_SK_LOOKUP)
	2059	+ return 0;
	2060	+ return -EINVAL;
	2061	+ case BPF_PROG_TYPE_EXT:
	2062	+ if (expected_attach_type)
	2063	+ return -EINVAL;
	2064	+ fallthrough;
1352	2065	default:
1353	2066	return 0;
1354	2067	}
1355	2068	}
1356	2069
1357		-/* last field in 'union bpf_attr' used by this command */
1358		-#define BPF_PROG_LOAD_LAST_FIELD expected_attach_type
	2070	+static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
	2071	+{
	2072	+ switch (prog_type) {
	2073	+ case BPF_PROG_TYPE_SCHED_CLS:
	2074	+ case BPF_PROG_TYPE_SCHED_ACT:
	2075	+ case BPF_PROG_TYPE_XDP:
	2076	+ case BPF_PROG_TYPE_LWT_IN:
	2077	+ case BPF_PROG_TYPE_LWT_OUT:
	2078	+ case BPF_PROG_TYPE_LWT_XMIT:
	2079	+ case BPF_PROG_TYPE_LWT_SEG6LOCAL:
	2080	+ case BPF_PROG_TYPE_SK_SKB:
	2081	+ case BPF_PROG_TYPE_SK_MSG:
	2082	+ case BPF_PROG_TYPE_LIRC_MODE2:
	2083	+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
	2084	+ case BPF_PROG_TYPE_CGROUP_DEVICE:
	2085	+ case BPF_PROG_TYPE_CGROUP_SOCK:
	2086	+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
	2087	+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
	2088	+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
	2089	+ case BPF_PROG_TYPE_SOCK_OPS:
	2090	+ case BPF_PROG_TYPE_EXT: /* extends any prog */
	2091	+ return true;
	2092	+ case BPF_PROG_TYPE_CGROUP_SKB:
	2093	+ /* always unpriv */
	2094	+ case BPF_PROG_TYPE_SK_REUSEPORT:
	2095	+ /* equivalent to SOCKET_FILTER. need CAP_BPF only */
	2096	+ default:
	2097	+ return false;
	2098	+ }
	2099	+}
1359	2100
1360		-static int bpf_prog_load(union bpf_attr *attr)
	2101	+static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
	2102	+{
	2103	+ switch (prog_type) {
	2104	+ case BPF_PROG_TYPE_KPROBE:
	2105	+ case BPF_PROG_TYPE_TRACEPOINT:
	2106	+ case BPF_PROG_TYPE_PERF_EVENT:
	2107	+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
	2108	+ case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
	2109	+ case BPF_PROG_TYPE_TRACING:
	2110	+ case BPF_PROG_TYPE_LSM:
	2111	+ case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
	2112	+ case BPF_PROG_TYPE_EXT: /* extends any prog */
	2113	+ return true;
	2114	+ default:
	2115	+ return false;
	2116	+ }
	2117	+}
	2118	+
	2119	+/* last field in 'union bpf_attr' used by this command */
	2120	+#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
	2121	+
	2122	+static int bpf_prog_load(union bpf_attr attr, union bpf_attr __user uattr)
1361	2123	{
1362	2124	enum bpf_prog_type type = attr->prog_type;
1363	2125	struct bpf_prog *prog;
..	..	@@ -1368,12 +2130,16 @@
1368	2130	if (CHECK_ATTR(BPF_PROG_LOAD))
1369	2131	return -EINVAL;
1370	2132
1371		- if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT \| BPF_F_ANY_ALIGNMENT))
	2133	+ if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT \|
	2134	+ BPF_F_ANY_ALIGNMENT \|
	2135	+ BPF_F_TEST_STATE_FREQ \|
	2136	+ BPF_F_SLEEPABLE \|
	2137	+ BPF_F_TEST_RND_HI32))
1372	2138	return -EINVAL;
1373	2139
1374	2140	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
1375	2141	(attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
1376		- !capable(CAP_SYS_ADMIN))
	2142	+ !bpf_capable())
1377	2143	return -EPERM;
1378	2144
1379	2145	/* copy eBPF program license from user space */
..	..	@@ -1385,20 +2151,23 @@
1385	2151	/* eBPF programs must be GPL compatible to use GPL-ed functions */
1386	2152	is_gpl = license_is_gpl_compatible(license);
1387	2153
1388		- if (attr->insn_cnt == 0 \|\| attr->insn_cnt > BPF_MAXINSNS)
	2154	+ if (attr->insn_cnt == 0 \|\|
	2155	+ attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
1389	2156	return -E2BIG;
1390		-
1391		- if (type == BPF_PROG_TYPE_KPROBE &&
1392		- attr->kern_version != LINUX_VERSION_CODE)
1393		- return -EINVAL;
1394		-
1395	2157	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
1396	2158	type != BPF_PROG_TYPE_CGROUP_SKB &&
1397		- !capable(CAP_SYS_ADMIN))
	2159	+ !bpf_capable())
	2160	+ return -EPERM;
	2161	+
	2162	+ if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN))
	2163	+ return -EPERM;
	2164	+ if (is_perfmon_prog_type(type) && !perfmon_capable())
1398	2165	return -EPERM;
1399	2166
1400	2167	bpf_prog_load_fixup_attach_type(attr);
1401		- if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
	2168	+ if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
	2169	+ attr->attach_btf_id,
	2170	+ attr->attach_prog_fd))
1402	2171	return -EINVAL;
1403	2172
1404	2173	/* plain bpf_prog allocation */
..	..	@@ -1407,8 +2176,20 @@
1407	2176	return -ENOMEM;
1408	2177
1409	2178	prog->expected_attach_type = attr->expected_attach_type;
	2179	+ prog->aux->attach_btf_id = attr->attach_btf_id;
	2180	+ if (attr->attach_prog_fd) {
	2181	+ struct bpf_prog *dst_prog;
	2182	+
	2183	+ dst_prog = bpf_prog_get(attr->attach_prog_fd);
	2184	+ if (IS_ERR(dst_prog)) {
	2185	+ err = PTR_ERR(dst_prog);
	2186	+ goto free_prog_nouncharge;
	2187	+ }
	2188	+ prog->aux->dst_prog = dst_prog;
	2189	+ }
1410	2190
1411	2191	prog->aux->offload_requested = !!attr->prog_ifindex;
	2192	+ prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
1412	2193
1413	2194	err = security_bpf_prog_alloc(prog->aux);
1414	2195	if (err)
..	..	@@ -1428,7 +2209,7 @@
1428	2209	prog->orig_prog = NULL;
1429	2210	prog->jited = 0;
1430	2211
1431		- atomic_set(&prog->aux->refcnt, 1);
	2212	+ atomic64_set(&prog->aux->refcnt, 1);
1432	2213	prog->gpl_compatible = is_gpl ? 1 : 0;
1433	2214
1434	2215	if (bpf_prog_is_dev_bound(prog->aux)) {
..	..	@@ -1442,13 +2223,14 @@
1442	2223	if (err < 0)
1443	2224	goto free_prog;
1444	2225
1445		- prog->aux->load_time = ktime_get_boot_ns();
1446		- err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name);
1447		- if (err)
	2226	+ prog->aux->load_time = ktime_get_boottime_ns();
	2227	+ err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
	2228	+ sizeof(attr->prog_name));
	2229	+ if (err < 0)
1448	2230	goto free_prog;
1449	2231
1450	2232	/* run eBPF verifier */
1451		- err = bpf_check(&prog, attr);
	2233	+ err = bpf_check(&prog, attr, uattr);
1452	2234	if (err < 0)
1453	2235	goto free_used_maps;
1454	2236
..	..	@@ -1475,6 +2257,8 @@
1475	2257	* be using bpf_prog_put() given the program is exposed.
1476	2258	*/
1477	2259	bpf_prog_kallsyms_add(prog);
	2260	+ perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
	2261	+ bpf_audit_prog(prog, BPF_AUDIT_LOAD);
1478	2262
1479	2263	err = bpf_prog_new_fd(prog);
1480	2264	if (err < 0)
..	..	@@ -1482,8 +2266,12 @@
1482	2266	return err;
1483	2267
1484	2268	free_used_maps:
1485		- bpf_prog_kallsyms_del_subprogs(prog);
1486		- free_used_maps(prog->aux);
	2269	+ /* In case we have subprogs, we need to wait for a grace
	2270	+ * period before we can tear down JIT memory since symbols
	2271	+ * are already exposed under kallsyms.
	2272	+ */
	2273	+ __bpf_prog_put_noref(prog, prog->aux->func_cnt);
	2274	+ return err;
1487	2275	free_prog:
1488	2276	bpf_prog_uncharge_memlock(prog);
1489	2277	free_prog_sec:
..	..	@@ -1513,78 +2301,610 @@
1513	2301	attr->file_flags);
1514	2302	}
1515	2303
1516		-struct bpf_raw_tracepoint {
1517		- struct bpf_raw_event_map *btp;
1518		- struct bpf_prog *prog;
1519		-};
1520		-
1521		-static int bpf_raw_tracepoint_release(struct inode inode, struct file filp)
	2304	+void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
	2305	+ const struct bpf_link_ops ops, struct bpf_prog prog)
1522	2306	{
1523		- struct bpf_raw_tracepoint *raw_tp = filp->private_data;
	2307	+ atomic64_set(&link->refcnt, 1);
	2308	+ link->type = type;
	2309	+ link->id = 0;
	2310	+ link->ops = ops;
	2311	+ link->prog = prog;
	2312	+}
1524	2313
1525		- if (raw_tp->prog) {
1526		- bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
1527		- bpf_prog_put(raw_tp->prog);
	2314	+static void bpf_link_free_id(int id)
	2315	+{
	2316	+ if (!id)
	2317	+ return;
	2318	+
	2319	+ spin_lock_bh(&link_idr_lock);
	2320	+ idr_remove(&link_idr, id);
	2321	+ spin_unlock_bh(&link_idr_lock);
	2322	+}
	2323	+
	2324	+/* Clean up bpf_link and corresponding anon_inode file and FD. After
	2325	+ * anon_inode is created, bpf_link can't be just kfree()'d due to deferred
	2326	+ * anon_inode's release() call. This helper marksbpf_link as
	2327	+ * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt
	2328	+ * is not decremented, it's the responsibility of a calling code that failed
	2329	+ * to complete bpf_link initialization.
	2330	+ */
	2331	+void bpf_link_cleanup(struct bpf_link_primer *primer)
	2332	+{
	2333	+ primer->link->prog = NULL;
	2334	+ bpf_link_free_id(primer->id);
	2335	+ fput(primer->file);
	2336	+ put_unused_fd(primer->fd);
	2337	+}
	2338	+
	2339	+void bpf_link_inc(struct bpf_link *link)
	2340	+{
	2341	+ atomic64_inc(&link->refcnt);
	2342	+}
	2343	+
	2344	+/* bpf_link_free is guaranteed to be called from process context */
	2345	+static void bpf_link_free(struct bpf_link *link)
	2346	+{
	2347	+ bpf_link_free_id(link->id);
	2348	+ if (link->prog) {
	2349	+ /* detach BPF program, clean up used resources */
	2350	+ link->ops->release(link);
	2351	+ bpf_prog_put(link->prog);
1528	2352	}
1529		- kfree(raw_tp);
	2353	+ /* free bpf_link and its containing memory */
	2354	+ link->ops->dealloc(link);
	2355	+}
	2356	+
	2357	+static void bpf_link_put_deferred(struct work_struct *work)
	2358	+{
	2359	+ struct bpf_link *link = container_of(work, struct bpf_link, work);
	2360	+
	2361	+ bpf_link_free(link);
	2362	+}
	2363	+
	2364	+/* bpf_link_put can be called from atomic context, but ensures that resources
	2365	+ * are freed from process context
	2366	+ */
	2367	+void bpf_link_put(struct bpf_link *link)
	2368	+{
	2369	+ if (!atomic64_dec_and_test(&link->refcnt))
	2370	+ return;
	2371	+
	2372	+ if (in_atomic()) {
	2373	+ INIT_WORK(&link->work, bpf_link_put_deferred);
	2374	+ schedule_work(&link->work);
	2375	+ } else {
	2376	+ bpf_link_free(link);
	2377	+ }
	2378	+}
	2379	+
	2380	+static int bpf_link_release(struct inode inode, struct file filp)
	2381	+{
	2382	+ struct bpf_link *link = filp->private_data;
	2383	+
	2384	+ bpf_link_put(link);
1530	2385	return 0;
1531	2386	}
1532	2387
1533		-static const struct file_operations bpf_raw_tp_fops = {
1534		- .release = bpf_raw_tracepoint_release,
	2388	+#ifdef CONFIG_PROC_FS
	2389	+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
	2390	+#define BPF_MAP_TYPE(_id, _ops)
	2391	+#define BPF_LINK_TYPE(_id, _name) [_id] = #_name,
	2392	+static const char *bpf_link_type_strs[] = {
	2393	+ [BPF_LINK_TYPE_UNSPEC] = "<invalid>",
	2394	+#include <linux/bpf_types.h>
	2395	+};
	2396	+#undef BPF_PROG_TYPE
	2397	+#undef BPF_MAP_TYPE
	2398	+#undef BPF_LINK_TYPE
	2399	+
	2400	+static void bpf_link_show_fdinfo(struct seq_file m, struct file filp)
	2401	+{
	2402	+ const struct bpf_link *link = filp->private_data;
	2403	+ const struct bpf_prog *prog = link->prog;
	2404	+ char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
	2405	+
	2406	+ bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
	2407	+ seq_printf(m,
	2408	+ "link_type:\t%s\n"
	2409	+ "link_id:\t%u\n"
	2410	+ "prog_tag:\t%s\n"
	2411	+ "prog_id:\t%u\n",
	2412	+ bpf_link_type_strs[link->type],
	2413	+ link->id,
	2414	+ prog_tag,
	2415	+ prog->aux->id);
	2416	+ if (link->ops->show_fdinfo)
	2417	+ link->ops->show_fdinfo(link, m);
	2418	+}
	2419	+#endif
	2420	+
	2421	+static const struct file_operations bpf_link_fops = {
	2422	+#ifdef CONFIG_PROC_FS
	2423	+ .show_fdinfo = bpf_link_show_fdinfo,
	2424	+#endif
	2425	+ .release = bpf_link_release,
1535	2426	.read = bpf_dummy_read,
1536	2427	.write = bpf_dummy_write,
	2428	+};
	2429	+
	2430	+static int bpf_link_alloc_id(struct bpf_link *link)
	2431	+{
	2432	+ int id;
	2433	+
	2434	+ idr_preload(GFP_KERNEL);
	2435	+ spin_lock_bh(&link_idr_lock);
	2436	+ id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC);
	2437	+ spin_unlock_bh(&link_idr_lock);
	2438	+ idr_preload_end();
	2439	+
	2440	+ return id;
	2441	+}
	2442	+
	2443	+/* Prepare bpf_link to be exposed to user-space by allocating anon_inode file,
	2444	+ * reserving unused FD and allocating ID from link_idr. This is to be paired
	2445	+ * with bpf_link_settle() to install FD and ID and expose bpf_link to
	2446	+ * user-space, if bpf_link is successfully attached. If not, bpf_link and
	2447	+ * pre-allocated resources are to be freed with bpf_cleanup() call. All the
	2448	+ * transient state is passed around in struct bpf_link_primer.
	2449	+ * This is preferred way to create and initialize bpf_link, especially when
	2450	+ * there are complicated and expensive operations inbetween creating bpf_link
	2451	+ * itself and attaching it to BPF hook. By using bpf_link_prime() and
	2452	+ * bpf_link_settle() kernel code using bpf_link doesn't have to perform
	2453	+ * expensive (and potentially failing) roll back operations in a rare case
	2454	+ * that file, FD, or ID can't be allocated.
	2455	+ */
	2456	+int bpf_link_prime(struct bpf_link link, struct bpf_link_primer primer)
	2457	+{
	2458	+ struct file *file;
	2459	+ int fd, id;
	2460	+
	2461	+ fd = get_unused_fd_flags(O_CLOEXEC);
	2462	+ if (fd < 0)
	2463	+ return fd;
	2464	+
	2465	+
	2466	+ id = bpf_link_alloc_id(link);
	2467	+ if (id < 0) {
	2468	+ put_unused_fd(fd);
	2469	+ return id;
	2470	+ }
	2471	+
	2472	+ file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
	2473	+ if (IS_ERR(file)) {
	2474	+ bpf_link_free_id(id);
	2475	+ put_unused_fd(fd);
	2476	+ return PTR_ERR(file);
	2477	+ }
	2478	+
	2479	+ primer->link = link;
	2480	+ primer->file = file;
	2481	+ primer->fd = fd;
	2482	+ primer->id = id;
	2483	+ return 0;
	2484	+}
	2485	+
	2486	+int bpf_link_settle(struct bpf_link_primer *primer)
	2487	+{
	2488	+ /* make bpf_link fetchable by ID */
	2489	+ spin_lock_bh(&link_idr_lock);
	2490	+ primer->link->id = primer->id;
	2491	+ spin_unlock_bh(&link_idr_lock);
	2492	+ /* make bpf_link fetchable by FD */
	2493	+ fd_install(primer->fd, primer->file);
	2494	+ /* pass through installed FD */
	2495	+ return primer->fd;
	2496	+}
	2497	+
	2498	+int bpf_link_new_fd(struct bpf_link *link)
	2499	+{
	2500	+ return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
	2501	+}
	2502	+
	2503	+struct bpf_link *bpf_link_get_from_fd(u32 ufd)
	2504	+{
	2505	+ struct fd f = fdget(ufd);
	2506	+ struct bpf_link *link;
	2507	+
	2508	+ if (!f.file)
	2509	+ return ERR_PTR(-EBADF);
	2510	+ if (f.file->f_op != &bpf_link_fops) {
	2511	+ fdput(f);
	2512	+ return ERR_PTR(-EINVAL);
	2513	+ }
	2514	+
	2515	+ link = f.file->private_data;
	2516	+ bpf_link_inc(link);
	2517	+ fdput(f);
	2518	+
	2519	+ return link;
	2520	+}
	2521	+
	2522	+struct bpf_tracing_link {
	2523	+ struct bpf_link link;
	2524	+ enum bpf_attach_type attach_type;
	2525	+ struct bpf_trampoline *trampoline;
	2526	+ struct bpf_prog *tgt_prog;
	2527	+};
	2528	+
	2529	+static void bpf_tracing_link_release(struct bpf_link *link)
	2530	+{
	2531	+ struct bpf_tracing_link *tr_link =
	2532	+ container_of(link, struct bpf_tracing_link, link);
	2533	+
	2534	+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog,
	2535	+ tr_link->trampoline));
	2536	+
	2537	+ bpf_trampoline_put(tr_link->trampoline);
	2538	+
	2539	+ /* tgt_prog is NULL if target is a kernel function */
	2540	+ if (tr_link->tgt_prog)
	2541	+ bpf_prog_put(tr_link->tgt_prog);
	2542	+}
	2543	+
	2544	+static void bpf_tracing_link_dealloc(struct bpf_link *link)
	2545	+{
	2546	+ struct bpf_tracing_link *tr_link =
	2547	+ container_of(link, struct bpf_tracing_link, link);
	2548	+
	2549	+ kfree(tr_link);
	2550	+}
	2551	+
	2552	+static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
	2553	+ struct seq_file *seq)
	2554	+{
	2555	+ struct bpf_tracing_link *tr_link =
	2556	+ container_of(link, struct bpf_tracing_link, link);
	2557	+
	2558	+ seq_printf(seq,
	2559	+ "attach_type:\t%d\n",
	2560	+ tr_link->attach_type);
	2561	+}
	2562	+
	2563	+static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
	2564	+ struct bpf_link_info *info)
	2565	+{
	2566	+ struct bpf_tracing_link *tr_link =
	2567	+ container_of(link, struct bpf_tracing_link, link);
	2568	+
	2569	+ info->tracing.attach_type = tr_link->attach_type;
	2570	+
	2571	+ return 0;
	2572	+}
	2573	+
	2574	+static const struct bpf_link_ops bpf_tracing_link_lops = {
	2575	+ .release = bpf_tracing_link_release,
	2576	+ .dealloc = bpf_tracing_link_dealloc,
	2577	+ .show_fdinfo = bpf_tracing_link_show_fdinfo,
	2578	+ .fill_link_info = bpf_tracing_link_fill_link_info,
	2579	+};
	2580	+
	2581	+static int bpf_tracing_prog_attach(struct bpf_prog *prog,
	2582	+ int tgt_prog_fd,
	2583	+ u32 btf_id)
	2584	+{
	2585	+ struct bpf_link_primer link_primer;
	2586	+ struct bpf_prog *tgt_prog = NULL;
	2587	+ struct bpf_trampoline *tr = NULL;
	2588	+ struct bpf_tracing_link *link;
	2589	+ u64 key = 0;
	2590	+ int err;
	2591	+
	2592	+ switch (prog->type) {
	2593	+ case BPF_PROG_TYPE_TRACING:
	2594	+ if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
	2595	+ prog->expected_attach_type != BPF_TRACE_FEXIT &&
	2596	+ prog->expected_attach_type != BPF_MODIFY_RETURN) {
	2597	+ err = -EINVAL;
	2598	+ goto out_put_prog;
	2599	+ }
	2600	+ break;
	2601	+ case BPF_PROG_TYPE_EXT:
	2602	+ if (prog->expected_attach_type != 0) {
	2603	+ err = -EINVAL;
	2604	+ goto out_put_prog;
	2605	+ }
	2606	+ break;
	2607	+ case BPF_PROG_TYPE_LSM:
	2608	+ if (prog->expected_attach_type != BPF_LSM_MAC) {
	2609	+ err = -EINVAL;
	2610	+ goto out_put_prog;
	2611	+ }
	2612	+ break;
	2613	+ default:
	2614	+ err = -EINVAL;
	2615	+ goto out_put_prog;
	2616	+ }
	2617	+
	2618	+ if (!!tgt_prog_fd != !!btf_id) {
	2619	+ err = -EINVAL;
	2620	+ goto out_put_prog;
	2621	+ }
	2622	+
	2623	+ if (tgt_prog_fd) {
	2624	+ /* For now we only allow new targets for BPF_PROG_TYPE_EXT */
	2625	+ if (prog->type != BPF_PROG_TYPE_EXT) {
	2626	+ err = -EINVAL;
	2627	+ goto out_put_prog;
	2628	+ }
	2629	+
	2630	+ tgt_prog = bpf_prog_get(tgt_prog_fd);
	2631	+ if (IS_ERR(tgt_prog)) {
	2632	+ err = PTR_ERR(tgt_prog);
	2633	+ tgt_prog = NULL;
	2634	+ goto out_put_prog;
	2635	+ }
	2636	+
	2637	+ key = bpf_trampoline_compute_key(tgt_prog, btf_id);
	2638	+ }
	2639	+
	2640	+ link = kzalloc(sizeof(*link), GFP_USER);
	2641	+ if (!link) {
	2642	+ err = -ENOMEM;
	2643	+ goto out_put_prog;
	2644	+ }
	2645	+ bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING,
	2646	+ &bpf_tracing_link_lops, prog);
	2647	+ link->attach_type = prog->expected_attach_type;
	2648	+
	2649	+ mutex_lock(&prog->aux->dst_mutex);
	2650	+
	2651	+ /* There are a few possible cases here:
	2652	+ *
	2653	+ * - if prog->aux->dst_trampoline is set, the program was just loaded
	2654	+ * and not yet attached to anything, so we can use the values stored
	2655	+ * in prog->aux
	2656	+ *
	2657	+ * - if prog->aux->dst_trampoline is NULL, the program has already been
	2658	+ * attached to a target and its initial target was cleared (below)
	2659	+ *
	2660	+ * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
	2661	+ * target_btf_id using the link_create API.
	2662	+ *
	2663	+ * - if tgt_prog == NULL when this function was called using the old
	2664	+ * raw_tracepoint_open API, and we need a target from prog->aux
	2665	+ *
	2666	+ * The combination of no saved target in prog->aux, and no target
	2667	+ * specified on load is illegal, and we reject that here.
	2668	+ */
	2669	+ if (!prog->aux->dst_trampoline && !tgt_prog) {
	2670	+ err = -ENOENT;
	2671	+ goto out_unlock;
	2672	+ }
	2673	+
	2674	+ if (!prog->aux->dst_trampoline \|\|
	2675	+ (key && key != prog->aux->dst_trampoline->key)) {
	2676	+ /* If there is no saved target, or the specified target is
	2677	+ * different from the destination specified at load time, we
	2678	+ * need a new trampoline and a check for compatibility
	2679	+ */
	2680	+ struct bpf_attach_target_info tgt_info = {};
	2681	+
	2682	+ err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id,
	2683	+ &tgt_info);
	2684	+ if (err)
	2685	+ goto out_unlock;
	2686	+
	2687	+ tr = bpf_trampoline_get(key, &tgt_info);
	2688	+ if (!tr) {
	2689	+ err = -ENOMEM;
	2690	+ goto out_unlock;
	2691	+ }
	2692	+ } else {
	2693	+ /* The caller didn't specify a target, or the target was the
	2694	+ * same as the destination supplied during program load. This
	2695	+ * means we can reuse the trampoline and reference from program
	2696	+ * load time, and there is no need to allocate a new one. This
	2697	+ * can only happen once for any program, as the saved values in
	2698	+ * prog->aux are cleared below.
	2699	+ */
	2700	+ tr = prog->aux->dst_trampoline;
	2701	+ tgt_prog = prog->aux->dst_prog;
	2702	+ }
	2703	+
	2704	+ err = bpf_link_prime(&link->link, &link_primer);
	2705	+ if (err)
	2706	+ goto out_unlock;
	2707	+
	2708	+ err = bpf_trampoline_link_prog(prog, tr);
	2709	+ if (err) {
	2710	+ bpf_link_cleanup(&link_primer);
	2711	+ link = NULL;
	2712	+ goto out_unlock;
	2713	+ }
	2714	+
	2715	+ link->tgt_prog = tgt_prog;
	2716	+ link->trampoline = tr;
	2717	+
	2718	+ /* Always clear the trampoline and target prog from prog->aux to make
	2719	+ * sure the original attach destination is not kept alive after a
	2720	+ * program is (re-)attached to another target.
	2721	+ */
	2722	+ if (prog->aux->dst_prog &&
	2723	+ (tgt_prog_fd \|\| tr != prog->aux->dst_trampoline))
	2724	+ /* got extra prog ref from syscall, or attaching to different prog */
	2725	+ bpf_prog_put(prog->aux->dst_prog);
	2726	+ if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline)
	2727	+ /* we allocated a new trampoline, so free the old one */
	2728	+ bpf_trampoline_put(prog->aux->dst_trampoline);
	2729	+
	2730	+ prog->aux->dst_prog = NULL;
	2731	+ prog->aux->dst_trampoline = NULL;
	2732	+ mutex_unlock(&prog->aux->dst_mutex);
	2733	+
	2734	+ return bpf_link_settle(&link_primer);
	2735	+out_unlock:
	2736	+ if (tr && tr != prog->aux->dst_trampoline)
	2737	+ bpf_trampoline_put(tr);
	2738	+ mutex_unlock(&prog->aux->dst_mutex);
	2739	+ kfree(link);
	2740	+out_put_prog:
	2741	+ if (tgt_prog_fd && tgt_prog)
	2742	+ bpf_prog_put(tgt_prog);
	2743	+ return err;
	2744	+}
	2745	+
	2746	+struct bpf_raw_tp_link {
	2747	+ struct bpf_link link;
	2748	+ struct bpf_raw_event_map *btp;
	2749	+};
	2750	+
	2751	+static void bpf_raw_tp_link_release(struct bpf_link *link)
	2752	+{
	2753	+ struct bpf_raw_tp_link *raw_tp =
	2754	+ container_of(link, struct bpf_raw_tp_link, link);
	2755	+
	2756	+ bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
	2757	+ bpf_put_raw_tracepoint(raw_tp->btp);
	2758	+}
	2759	+
	2760	+static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
	2761	+{
	2762	+ struct bpf_raw_tp_link *raw_tp =
	2763	+ container_of(link, struct bpf_raw_tp_link, link);
	2764	+
	2765	+ kfree(raw_tp);
	2766	+}
	2767	+
	2768	+static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link,
	2769	+ struct seq_file *seq)
	2770	+{
	2771	+ struct bpf_raw_tp_link *raw_tp_link =
	2772	+ container_of(link, struct bpf_raw_tp_link, link);
	2773	+
	2774	+ seq_printf(seq,
	2775	+ "tp_name:\t%s\n",
	2776	+ raw_tp_link->btp->tp->name);
	2777	+}
	2778	+
	2779	+static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
	2780	+ struct bpf_link_info *info)
	2781	+{
	2782	+ struct bpf_raw_tp_link *raw_tp_link =
	2783	+ container_of(link, struct bpf_raw_tp_link, link);
	2784	+ char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name);
	2785	+ const char *tp_name = raw_tp_link->btp->tp->name;
	2786	+ u32 ulen = info->raw_tracepoint.tp_name_len;
	2787	+ size_t tp_len = strlen(tp_name);
	2788	+
	2789	+ if (!ulen ^ !ubuf)
	2790	+ return -EINVAL;
	2791	+
	2792	+ info->raw_tracepoint.tp_name_len = tp_len + 1;
	2793	+
	2794	+ if (!ubuf)
	2795	+ return 0;
	2796	+
	2797	+ if (ulen >= tp_len + 1) {
	2798	+ if (copy_to_user(ubuf, tp_name, tp_len + 1))
	2799	+ return -EFAULT;
	2800	+ } else {
	2801	+ char zero = '\0';
	2802	+
	2803	+ if (copy_to_user(ubuf, tp_name, ulen - 1))
	2804	+ return -EFAULT;
	2805	+ if (put_user(zero, ubuf + ulen - 1))
	2806	+ return -EFAULT;
	2807	+ return -ENOSPC;
	2808	+ }
	2809	+
	2810	+ return 0;
	2811	+}
	2812	+
	2813	+static const struct bpf_link_ops bpf_raw_tp_link_lops = {
	2814	+ .release = bpf_raw_tp_link_release,
	2815	+ .dealloc = bpf_raw_tp_link_dealloc,
	2816	+ .show_fdinfo = bpf_raw_tp_link_show_fdinfo,
	2817	+ .fill_link_info = bpf_raw_tp_link_fill_link_info,
1537	2818	};
1538	2819
1539	2820	#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
1540	2821
1541	2822	static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
1542	2823	{
1543		- struct bpf_raw_tracepoint *raw_tp;
	2824	+ struct bpf_link_primer link_primer;
	2825	+ struct bpf_raw_tp_link *link;
1544	2826	struct bpf_raw_event_map *btp;
1545	2827	struct bpf_prog *prog;
1546		- char tp_name[128];
1547		- int tp_fd, err;
	2828	+ const char *tp_name;
	2829	+ char buf[128];
	2830	+ int err;
1548	2831
1549		- if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name),
1550		- sizeof(tp_name) - 1) < 0)
1551		- return -EFAULT;
1552		- tp_name[sizeof(tp_name) - 1] = 0;
	2832	+ if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
	2833	+ return -EINVAL;
1553	2834
1554		- btp = bpf_find_raw_tracepoint(tp_name);
1555		- if (!btp)
1556		- return -ENOENT;
	2835	+ prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
	2836	+ if (IS_ERR(prog))
	2837	+ return PTR_ERR(prog);
1557	2838
1558		- raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
1559		- if (!raw_tp)
1560		- return -ENOMEM;
1561		- raw_tp->btp = btp;
1562		-
1563		- prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
1564		- BPF_PROG_TYPE_RAW_TRACEPOINT);
1565		- if (IS_ERR(prog)) {
1566		- err = PTR_ERR(prog);
1567		- goto out_free_tp;
1568		- }
1569		-
1570		- err = bpf_probe_register(raw_tp->btp, prog);
1571		- if (err)
	2839	+ switch (prog->type) {
	2840	+ case BPF_PROG_TYPE_TRACING:
	2841	+ case BPF_PROG_TYPE_EXT:
	2842	+ case BPF_PROG_TYPE_LSM:
	2843	+ if (attr->raw_tracepoint.name) {
	2844	+ /* The attach point for this category of programs
	2845	+ * should be specified via btf_id during program load.
	2846	+ */
	2847	+ err = -EINVAL;
	2848	+ goto out_put_prog;
	2849	+ }
	2850	+ if (prog->type == BPF_PROG_TYPE_TRACING &&
	2851	+ prog->expected_attach_type == BPF_TRACE_RAW_TP) {
	2852	+ tp_name = prog->aux->attach_func_name;
	2853	+ break;
	2854	+ }
	2855	+ err = bpf_tracing_prog_attach(prog, 0, 0);
	2856	+ if (err >= 0)
	2857	+ return err;
1572	2858	goto out_put_prog;
1573		-
1574		- raw_tp->prog = prog;
1575		- tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
1576		- O_CLOEXEC);
1577		- if (tp_fd < 0) {
1578		- bpf_probe_unregister(raw_tp->btp, prog);
1579		- err = tp_fd;
	2859	+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
	2860	+ case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
	2861	+ if (strncpy_from_user(buf,
	2862	+ u64_to_user_ptr(attr->raw_tracepoint.name),
	2863	+ sizeof(buf) - 1) < 0) {
	2864	+ err = -EFAULT;
	2865	+ goto out_put_prog;
	2866	+ }
	2867	+ buf[sizeof(buf) - 1] = 0;
	2868	+ tp_name = buf;
	2869	+ break;
	2870	+ default:
	2871	+ err = -EINVAL;
1580	2872	goto out_put_prog;
1581	2873	}
1582		- return tp_fd;
1583	2874
	2875	+ btp = bpf_get_raw_tracepoint(tp_name);
	2876	+ if (!btp) {
	2877	+ err = -ENOENT;
	2878	+ goto out_put_prog;
	2879	+ }
	2880	+
	2881	+ link = kzalloc(sizeof(*link), GFP_USER);
	2882	+ if (!link) {
	2883	+ err = -ENOMEM;
	2884	+ goto out_put_btp;
	2885	+ }
	2886	+ bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT,
	2887	+ &bpf_raw_tp_link_lops, prog);
	2888	+ link->btp = btp;
	2889	+
	2890	+ err = bpf_link_prime(&link->link, &link_primer);
	2891	+ if (err) {
	2892	+ kfree(link);
	2893	+ goto out_put_btp;
	2894	+ }
	2895	+
	2896	+ err = bpf_probe_register(link->btp, prog);
	2897	+ if (err) {
	2898	+ bpf_link_cleanup(&link_primer);
	2899	+ goto out_put_btp;
	2900	+ }
	2901	+
	2902	+ return bpf_link_settle(&link_primer);
	2903	+
	2904	+out_put_btp:
	2905	+ bpf_put_raw_tracepoint(btp);
1584	2906	out_put_prog:
1585	2907	bpf_prog_put(prog);
1586		-out_free_tp:
1587		- kfree(raw_tp);
1588	2908	return err;
1589	2909	}
1590	2910
..	..	@@ -1594,16 +2914,81 @@
1594	2914	switch (prog->type) {
1595	2915	case BPF_PROG_TYPE_CGROUP_SOCK:
1596	2916	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
	2917	+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
	2918	+ case BPF_PROG_TYPE_SK_LOOKUP:
1597	2919	return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
	2920	+ case BPF_PROG_TYPE_CGROUP_SKB:
	2921	+ if (!capable(CAP_NET_ADMIN))
	2922	+ /* cg-skb progs can be loaded by unpriv user.
	2923	+ * check permissions at attach time.
	2924	+ */
	2925	+ return -EPERM;
	2926	+ return prog->enforce_expected_attach_type &&
	2927	+ prog->expected_attach_type != attach_type ?
	2928	+ -EINVAL : 0;
1598	2929	default:
1599	2930	return 0;
1600	2931	}
1601	2932	}
1602	2933
1603		-#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
	2934	+static enum bpf_prog_type
	2935	+attach_type_to_prog_type(enum bpf_attach_type attach_type)
	2936	+{
	2937	+ switch (attach_type) {
	2938	+ case BPF_CGROUP_INET_INGRESS:
	2939	+ case BPF_CGROUP_INET_EGRESS:
	2940	+ return BPF_PROG_TYPE_CGROUP_SKB;
	2941	+ case BPF_CGROUP_INET_SOCK_CREATE:
	2942	+ case BPF_CGROUP_INET_SOCK_RELEASE:
	2943	+ case BPF_CGROUP_INET4_POST_BIND:
	2944	+ case BPF_CGROUP_INET6_POST_BIND:
	2945	+ return BPF_PROG_TYPE_CGROUP_SOCK;
	2946	+ case BPF_CGROUP_INET4_BIND:
	2947	+ case BPF_CGROUP_INET6_BIND:
	2948	+ case BPF_CGROUP_INET4_CONNECT:
	2949	+ case BPF_CGROUP_INET6_CONNECT:
	2950	+ case BPF_CGROUP_INET4_GETPEERNAME:
	2951	+ case BPF_CGROUP_INET6_GETPEERNAME:
	2952	+ case BPF_CGROUP_INET4_GETSOCKNAME:
	2953	+ case BPF_CGROUP_INET6_GETSOCKNAME:
	2954	+ case BPF_CGROUP_UDP4_SENDMSG:
	2955	+ case BPF_CGROUP_UDP6_SENDMSG:
	2956	+ case BPF_CGROUP_UDP4_RECVMSG:
	2957	+ case BPF_CGROUP_UDP6_RECVMSG:
	2958	+ return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
	2959	+ case BPF_CGROUP_SOCK_OPS:
	2960	+ return BPF_PROG_TYPE_SOCK_OPS;
	2961	+ case BPF_CGROUP_DEVICE:
	2962	+ return BPF_PROG_TYPE_CGROUP_DEVICE;
	2963	+ case BPF_SK_MSG_VERDICT:
	2964	+ return BPF_PROG_TYPE_SK_MSG;
	2965	+ case BPF_SK_SKB_STREAM_PARSER:
	2966	+ case BPF_SK_SKB_STREAM_VERDICT:
	2967	+ return BPF_PROG_TYPE_SK_SKB;
	2968	+ case BPF_LIRC_MODE2:
	2969	+ return BPF_PROG_TYPE_LIRC_MODE2;
	2970	+ case BPF_FLOW_DISSECTOR:
	2971	+ return BPF_PROG_TYPE_FLOW_DISSECTOR;
	2972	+ case BPF_CGROUP_SYSCTL:
	2973	+ return BPF_PROG_TYPE_CGROUP_SYSCTL;
	2974	+ case BPF_CGROUP_GETSOCKOPT:
	2975	+ case BPF_CGROUP_SETSOCKOPT:
	2976	+ return BPF_PROG_TYPE_CGROUP_SOCKOPT;
	2977	+ case BPF_TRACE_ITER:
	2978	+ return BPF_PROG_TYPE_TRACING;
	2979	+ case BPF_SK_LOOKUP:
	2980	+ return BPF_PROG_TYPE_SK_LOOKUP;
	2981	+ case BPF_XDP:
	2982	+ return BPF_PROG_TYPE_XDP;
	2983	+ default:
	2984	+ return BPF_PROG_TYPE_UNSPEC;
	2985	+ }
	2986	+}
	2987	+
	2988	+#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
1604	2989
1605	2990	#define BPF_F_ATTACH_MASK \
1606		- (BPF_F_ALLOW_OVERRIDE \| BPF_F_ALLOW_MULTI)
	2991	+ (BPF_F_ALLOW_OVERRIDE \| BPF_F_ALLOW_MULTI \| BPF_F_REPLACE)
1607	2992
1608	2993	static int bpf_prog_attach(const union bpf_attr *attr)
1609	2994	{
..	..	@@ -1611,54 +2996,15 @@
1611	2996	struct bpf_prog *prog;
1612	2997	int ret;
1613	2998
1614		- if (!capable(CAP_NET_ADMIN))
1615		- return -EPERM;
1616		-
1617	2999	if (CHECK_ATTR(BPF_PROG_ATTACH))
1618	3000	return -EINVAL;
1619	3001
1620	3002	if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
1621	3003	return -EINVAL;
1622	3004
1623		- switch (attr->attach_type) {
1624		- case BPF_CGROUP_INET_INGRESS:
1625		- case BPF_CGROUP_INET_EGRESS:
1626		- ptype = BPF_PROG_TYPE_CGROUP_SKB;
1627		- break;
1628		- case BPF_CGROUP_INET_SOCK_CREATE:
1629		- case BPF_CGROUP_INET4_POST_BIND:
1630		- case BPF_CGROUP_INET6_POST_BIND:
1631		- ptype = BPF_PROG_TYPE_CGROUP_SOCK;
1632		- break;
1633		- case BPF_CGROUP_INET4_BIND:
1634		- case BPF_CGROUP_INET6_BIND:
1635		- case BPF_CGROUP_INET4_CONNECT:
1636		- case BPF_CGROUP_INET6_CONNECT:
1637		- case BPF_CGROUP_UDP4_SENDMSG:
1638		- case BPF_CGROUP_UDP6_SENDMSG:
1639		- case BPF_CGROUP_UDP4_RECVMSG:
1640		- case BPF_CGROUP_UDP6_RECVMSG:
1641		- ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
1642		- break;
1643		- case BPF_CGROUP_SOCK_OPS:
1644		- ptype = BPF_PROG_TYPE_SOCK_OPS;
1645		- break;
1646		- case BPF_CGROUP_DEVICE:
1647		- ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
1648		- break;
1649		- case BPF_SK_MSG_VERDICT:
1650		- ptype = BPF_PROG_TYPE_SK_MSG;
1651		- break;
1652		- case BPF_SK_SKB_STREAM_PARSER:
1653		- case BPF_SK_SKB_STREAM_VERDICT:
1654		- ptype = BPF_PROG_TYPE_SK_SKB;
1655		- break;
1656		- case BPF_LIRC_MODE2:
1657		- ptype = BPF_PROG_TYPE_LIRC_MODE2;
1658		- break;
1659		- default:
	3005	+ ptype = attach_type_to_prog_type(attr->attach_type);
	3006	+ if (ptype == BPF_PROG_TYPE_UNSPEC)
1660	3007	return -EINVAL;
1661		- }
1662	3008
1663	3009	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
1664	3010	if (IS_ERR(prog))
..	..	@@ -1672,13 +3018,25 @@
1672	3018	switch (ptype) {
1673	3019	case BPF_PROG_TYPE_SK_SKB:
1674	3020	case BPF_PROG_TYPE_SK_MSG:
1675		- ret = sockmap_get_from_fd(attr, ptype, prog);
	3021	+ ret = sock_map_get_from_fd(attr, prog);
1676	3022	break;
1677	3023	case BPF_PROG_TYPE_LIRC_MODE2:
1678	3024	ret = lirc_prog_attach(attr, prog);
1679	3025	break;
1680		- default:
	3026	+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
	3027	+ ret = netns_bpf_prog_attach(attr, prog);
	3028	+ break;
	3029	+ case BPF_PROG_TYPE_CGROUP_DEVICE:
	3030	+ case BPF_PROG_TYPE_CGROUP_SKB:
	3031	+ case BPF_PROG_TYPE_CGROUP_SOCK:
	3032	+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
	3033	+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
	3034	+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
	3035	+ case BPF_PROG_TYPE_SOCK_OPS:
1681	3036	ret = cgroup_bpf_prog_attach(attr, ptype, prog);
	3037	+ break;
	3038	+ default:
	3039	+ ret = -EINVAL;
1682	3040	}
1683	3041
1684	3042	if (ret)
..	..	@@ -1692,50 +3050,30 @@
1692	3050	{
1693	3051	enum bpf_prog_type ptype;
1694	3052
1695		- if (!capable(CAP_NET_ADMIN))
1696		- return -EPERM;
1697		-
1698	3053	if (CHECK_ATTR(BPF_PROG_DETACH))
1699	3054	return -EINVAL;
1700	3055
1701		- switch (attr->attach_type) {
1702		- case BPF_CGROUP_INET_INGRESS:
1703		- case BPF_CGROUP_INET_EGRESS:
1704		- ptype = BPF_PROG_TYPE_CGROUP_SKB;
1705		- break;
1706		- case BPF_CGROUP_INET_SOCK_CREATE:
1707		- case BPF_CGROUP_INET4_POST_BIND:
1708		- case BPF_CGROUP_INET6_POST_BIND:
1709		- ptype = BPF_PROG_TYPE_CGROUP_SOCK;
1710		- break;
1711		- case BPF_CGROUP_INET4_BIND:
1712		- case BPF_CGROUP_INET6_BIND:
1713		- case BPF_CGROUP_INET4_CONNECT:
1714		- case BPF_CGROUP_INET6_CONNECT:
1715		- case BPF_CGROUP_UDP4_SENDMSG:
1716		- case BPF_CGROUP_UDP6_SENDMSG:
1717		- case BPF_CGROUP_UDP4_RECVMSG:
1718		- case BPF_CGROUP_UDP6_RECVMSG:
1719		- ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
1720		- break;
1721		- case BPF_CGROUP_SOCK_OPS:
1722		- ptype = BPF_PROG_TYPE_SOCK_OPS;
1723		- break;
1724		- case BPF_CGROUP_DEVICE:
1725		- ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
1726		- break;
1727		- case BPF_SK_MSG_VERDICT:
1728		- return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
1729		- case BPF_SK_SKB_STREAM_PARSER:
1730		- case BPF_SK_SKB_STREAM_VERDICT:
1731		- return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
1732		- case BPF_LIRC_MODE2:
	3056	+ ptype = attach_type_to_prog_type(attr->attach_type);
	3057	+
	3058	+ switch (ptype) {
	3059	+ case BPF_PROG_TYPE_SK_MSG:
	3060	+ case BPF_PROG_TYPE_SK_SKB:
	3061	+ return sock_map_prog_detach(attr, ptype);
	3062	+ case BPF_PROG_TYPE_LIRC_MODE2:
1733	3063	return lirc_prog_detach(attr);
	3064	+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
	3065	+ return netns_bpf_prog_detach(attr, ptype);
	3066	+ case BPF_PROG_TYPE_CGROUP_DEVICE:
	3067	+ case BPF_PROG_TYPE_CGROUP_SKB:
	3068	+ case BPF_PROG_TYPE_CGROUP_SOCK:
	3069	+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
	3070	+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
	3071	+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
	3072	+ case BPF_PROG_TYPE_SOCK_OPS:
	3073	+ return cgroup_bpf_prog_detach(attr, ptype);
1734	3074	default:
1735	3075	return -EINVAL;
1736	3076	}
1737		-
1738		- return cgroup_bpf_prog_detach(attr, ptype);
1739	3077	}
1740	3078
1741	3079	#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
..	..	@@ -1754,29 +3092,38 @@
1754	3092	case BPF_CGROUP_INET_INGRESS:
1755	3093	case BPF_CGROUP_INET_EGRESS:
1756	3094	case BPF_CGROUP_INET_SOCK_CREATE:
	3095	+ case BPF_CGROUP_INET_SOCK_RELEASE:
1757	3096	case BPF_CGROUP_INET4_BIND:
1758	3097	case BPF_CGROUP_INET6_BIND:
1759	3098	case BPF_CGROUP_INET4_POST_BIND:
1760	3099	case BPF_CGROUP_INET6_POST_BIND:
1761	3100	case BPF_CGROUP_INET4_CONNECT:
1762	3101	case BPF_CGROUP_INET6_CONNECT:
	3102	+ case BPF_CGROUP_INET4_GETPEERNAME:
	3103	+ case BPF_CGROUP_INET6_GETPEERNAME:
	3104	+ case BPF_CGROUP_INET4_GETSOCKNAME:
	3105	+ case BPF_CGROUP_INET6_GETSOCKNAME:
1763	3106	case BPF_CGROUP_UDP4_SENDMSG:
1764	3107	case BPF_CGROUP_UDP6_SENDMSG:
1765	3108	case BPF_CGROUP_UDP4_RECVMSG:
1766	3109	case BPF_CGROUP_UDP6_RECVMSG:
1767	3110	case BPF_CGROUP_SOCK_OPS:
1768	3111	case BPF_CGROUP_DEVICE:
1769		- break;
	3112	+ case BPF_CGROUP_SYSCTL:
	3113	+ case BPF_CGROUP_GETSOCKOPT:
	3114	+ case BPF_CGROUP_SETSOCKOPT:
	3115	+ return cgroup_bpf_prog_query(attr, uattr);
1770	3116	case BPF_LIRC_MODE2:
1771	3117	return lirc_prog_query(attr, uattr);
	3118	+ case BPF_FLOW_DISSECTOR:
	3119	+ case BPF_SK_LOOKUP:
	3120	+ return netns_bpf_prog_query(attr, uattr);
1772	3121	default:
1773	3122	return -EINVAL;
1774	3123	}
1775		-
1776		- return cgroup_bpf_prog_query(attr, uattr);
1777	3124	}
1778	3125
1779		-#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
	3126	+#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu
1780	3127
1781	3128	static int bpf_prog_test_run(const union bpf_attr *attr,
1782	3129	union bpf_attr __user *uattr)
..	..	@@ -1784,9 +3131,15 @@
1784	3131	struct bpf_prog *prog;
1785	3132	int ret = -ENOTSUPP;
1786	3133
1787		- if (!capable(CAP_SYS_ADMIN))
1788		- return -EPERM;
1789	3134	if (CHECK_ATTR(BPF_PROG_TEST_RUN))
	3135	+ return -EINVAL;
	3136	+
	3137	+ if ((attr->test.ctx_size_in && !attr->test.ctx_in) \|\|
	3138	+ (!attr->test.ctx_size_in && attr->test.ctx_in))
	3139	+ return -EINVAL;
	3140	+
	3141	+ if ((attr->test.ctx_size_out && !attr->test.ctx_out) \|\|
	3142	+ (!attr->test.ctx_size_out && attr->test.ctx_out))
1790	3143	return -EINVAL;
1791	3144
1792	3145	prog = bpf_prog_get(attr->test.prog_fd);
..	..	@@ -1828,7 +3181,62 @@
1828	3181	return err;
1829	3182	}
1830	3183
	3184	+struct bpf_map bpf_map_get_curr_or_next(u32 id)
	3185	+{
	3186	+ struct bpf_map *map;
	3187	+
	3188	+ spin_lock_bh(&map_idr_lock);
	3189	+again:
	3190	+ map = idr_get_next(&map_idr, id);
	3191	+ if (map) {
	3192	+ map = __bpf_map_inc_not_zero(map, false);
	3193	+ if (IS_ERR(map)) {
	3194	+ (*id)++;
	3195	+ goto again;
	3196	+ }
	3197	+ }
	3198	+ spin_unlock_bh(&map_idr_lock);
	3199	+
	3200	+ return map;
	3201	+}
	3202	+
	3203	+struct bpf_prog bpf_prog_get_curr_or_next(u32 id)
	3204	+{
	3205	+ struct bpf_prog *prog;
	3206	+
	3207	+ spin_lock_bh(&prog_idr_lock);
	3208	+again:
	3209	+ prog = idr_get_next(&prog_idr, id);
	3210	+ if (prog) {
	3211	+ prog = bpf_prog_inc_not_zero(prog);
	3212	+ if (IS_ERR(prog)) {
	3213	+ (*id)++;
	3214	+ goto again;
	3215	+ }
	3216	+ }
	3217	+ spin_unlock_bh(&prog_idr_lock);
	3218	+
	3219	+ return prog;
	3220	+}
	3221	+
1831	3222	#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
	3223	+
	3224	+struct bpf_prog *bpf_prog_by_id(u32 id)
	3225	+{
	3226	+ struct bpf_prog *prog;
	3227	+
	3228	+ if (!id)
	3229	+ return ERR_PTR(-ENOENT);
	3230	+
	3231	+ spin_lock_bh(&prog_idr_lock);
	3232	+ prog = idr_find(&prog_idr, id);
	3233	+ if (prog)
	3234	+ prog = bpf_prog_inc_not_zero(prog);
	3235	+ else
	3236	+ prog = ERR_PTR(-ENOENT);
	3237	+ spin_unlock_bh(&prog_idr_lock);
	3238	+ return prog;
	3239	+}
1832	3240
1833	3241	static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
1834	3242	{
..	..	@@ -1842,14 +3250,7 @@
1842	3250	if (!capable(CAP_SYS_ADMIN))
1843	3251	return -EPERM;
1844	3252
1845		- spin_lock_bh(&prog_idr_lock);
1846		- prog = idr_find(&prog_idr, id);
1847		- if (prog)
1848		- prog = bpf_prog_inc_not_zero(prog);
1849		- else
1850		- prog = ERR_PTR(-ENOENT);
1851		- spin_unlock_bh(&prog_idr_lock);
1852		-
	3253	+ prog = bpf_prog_by_id(id);
1853	3254	if (IS_ERR(prog))
1854	3255	return PTR_ERR(prog);
1855	3256
..	..	@@ -1883,7 +3284,7 @@
1883	3284	spin_lock_bh(&map_idr_lock);
1884	3285	map = idr_find(&map_idr, id);
1885	3286	if (map)
1886		- map = bpf_map_inc_not_zero(map, true);
	3287	+ map = __bpf_map_inc_not_zero(map, true);
1887	3288	else
1888	3289	map = ERR_PTR(-ENOENT);
1889	3290	spin_unlock_bh(&map_idr_lock);
..	..	@@ -1899,14 +3300,31 @@
1899	3300	}
1900	3301
1901	3302	static const struct bpf_map bpf_map_from_imm(const struct bpf_prog prog,
1902		- unsigned long addr)
	3303	+ unsigned long addr, u32 *off,
	3304	+ u32 *type)
1903	3305	{
	3306	+ const struct bpf_map *map;
1904	3307	int i;
1905	3308
1906		- for (i = 0; i < prog->aux->used_map_cnt; i++)
1907		- if (prog->aux->used_maps[i] == (void *)addr)
1908		- return prog->aux->used_maps[i];
1909		- return NULL;
	3309	+ mutex_lock(&prog->aux->used_maps_mutex);
	3310	+ for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
	3311	+ map = prog->aux->used_maps[i];
	3312	+ if (map == (void *)addr) {
	3313	+ *type = BPF_PSEUDO_MAP_FD;
	3314	+ goto out;
	3315	+ }
	3316	+ if (!map->ops->map_direct_value_meta)
	3317	+ continue;
	3318	+ if (!map->ops->map_direct_value_meta(map, addr, off)) {
	3319	+ *type = BPF_PSEUDO_MAP_VALUE;
	3320	+ goto out;
	3321	+ }
	3322	+ }
	3323	+ map = NULL;
	3324	+
	3325	+out:
	3326	+ mutex_unlock(&prog->aux->used_maps_mutex);
	3327	+ return map;
1910	3328	}
1911	3329
1912	3330	static struct bpf_insn bpf_insn_prepare_dump(const struct bpf_prog prog,
..	..	@@ -1914,7 +3332,9 @@
1914	3332	{
1915	3333	const struct bpf_map *map;
1916	3334	struct bpf_insn *insns;
	3335	+ u32 off, type;
1917	3336	u64 imm;
	3337	+ u8 code;
1918	3338	int i;
1919	3339
1920	3340	insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
..	..	@@ -1923,41 +3343,71 @@
1923	3343	return insns;
1924	3344
1925	3345	for (i = 0; i < prog->len; i++) {
1926		- if (insns[i].code == (BPF_JMP \| BPF_TAIL_CALL)) {
	3346	+ code = insns[i].code;
	3347	+
	3348	+ if (code == (BPF_JMP \| BPF_TAIL_CALL)) {
1927	3349	insns[i].code = BPF_JMP \| BPF_CALL;
1928	3350	insns[i].imm = BPF_FUNC_tail_call;
1929	3351	/* fall-through */
1930	3352	}
1931		- if (insns[i].code == (BPF_JMP \| BPF_CALL) \|\|
1932		- insns[i].code == (BPF_JMP \| BPF_CALL_ARGS)) {
1933		- if (insns[i].code == (BPF_JMP \| BPF_CALL_ARGS))
	3353	+ if (code == (BPF_JMP \| BPF_CALL) \|\|
	3354	+ code == (BPF_JMP \| BPF_CALL_ARGS)) {
	3355	+ if (code == (BPF_JMP \| BPF_CALL_ARGS))
1934	3356	insns[i].code = BPF_JMP \| BPF_CALL;
1935	3357	if (!bpf_dump_raw_ok(f_cred))
1936	3358	insns[i].imm = 0;
1937	3359	continue;
1938	3360	}
1939		-
1940		- if (insns[i].code != (BPF_LD \| BPF_IMM \| BPF_DW))
1941		- continue;
1942		-
1943		- imm = ((u64)insns[i + 1].imm << 32) \| (u32)insns[i].imm;
1944		- map = bpf_map_from_imm(prog, imm);
1945		- if (map) {
1946		- insns[i].src_reg = BPF_PSEUDO_MAP_FD;
1947		- insns[i].imm = map->id;
1948		- insns[i + 1].imm = 0;
	3361	+ if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) {
	3362	+ insns[i].code = BPF_LDX \| BPF_SIZE(code) \| BPF_MEM;
1949	3363	continue;
1950	3364	}
1951	3365
1952		- if (!bpf_dump_raw_ok(f_cred) &&
1953		- imm == (unsigned long)prog->aux) {
1954		- insns[i].imm = 0;
1955		- insns[i + 1].imm = 0;
	3366	+ if (code != (BPF_LD \| BPF_IMM \| BPF_DW))
	3367	+ continue;
	3368	+
	3369	+ imm = ((u64)insns[i + 1].imm << 32) \| (u32)insns[i].imm;
	3370	+ map = bpf_map_from_imm(prog, imm, &off, &type);
	3371	+ if (map) {
	3372	+ insns[i].src_reg = type;
	3373	+ insns[i].imm = map->id;
	3374	+ insns[i + 1].imm = off;
1956	3375	continue;
1957	3376	}
1958	3377	}
1959	3378
1960	3379	return insns;
	3380	+}
	3381	+
	3382	+static int set_info_rec_size(struct bpf_prog_info *info)
	3383	+{
	3384	+ /*
	3385	+ * Ensure info.*_rec_size is the same as kernel expected size
	3386	+ *
	3387	+ * or
	3388	+ *
	3389	+ * Only allow zero *_rec_size if both _rec_size and _cnt are
	3390	+ * zero. In this case, the kernel will set the expected
	3391	+ * _rec_size back to the info.
	3392	+ */
	3393	+
	3394	+ if ((info->nr_func_info \|\| info->func_info_rec_size) &&
	3395	+ info->func_info_rec_size != sizeof(struct bpf_func_info))
	3396	+ return -EINVAL;
	3397	+
	3398	+ if ((info->nr_line_info \|\| info->line_info_rec_size) &&
	3399	+ info->line_info_rec_size != sizeof(struct bpf_line_info))
	3400	+ return -EINVAL;
	3401	+
	3402	+ if ((info->nr_jited_line_info \|\| info->jited_line_info_rec_size) &&
	3403	+ info->jited_line_info_rec_size != sizeof(__u64))
	3404	+ return -EINVAL;
	3405	+
	3406	+ info->func_info_rec_size = sizeof(struct bpf_func_info);
	3407	+ info->line_info_rec_size = sizeof(struct bpf_line_info);
	3408	+ info->jited_line_info_rec_size = sizeof(__u64);
	3409	+
	3410	+ return 0;
1961	3411	}
1962	3412
1963	3413	static int bpf_prog_get_info_by_fd(struct file *file,
..	..	@@ -1968,6 +3418,7 @@
1968	3418	struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
1969	3419	struct bpf_prog_info info;
1970	3420	u32 info_len = attr->info.info_len;
	3421	+ struct bpf_prog_stats stats;
1971	3422	char __user *uinsns;
1972	3423	u32 ulen;
1973	3424	int err;
..	..	@@ -1991,6 +3442,7 @@
1991	3442	memcpy(info.tag, prog->tag, sizeof(prog->tag));
1992	3443	memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
1993	3444
	3445	+ mutex_lock(&prog->aux->used_maps_mutex);
1994	3446	ulen = info.nr_map_ids;
1995	3447	info.nr_map_ids = prog->aux->used_map_cnt;
1996	3448	ulen = min_t(u32, info.nr_map_ids, ulen);
..	..	@@ -2000,15 +3452,29 @@
2000	3452
2001	3453	for (i = 0; i < ulen; i++)
2002	3454	if (put_user(prog->aux->used_maps[i]->id,
2003		- &user_map_ids[i]))
	3455	+ &user_map_ids[i])) {
	3456	+ mutex_unlock(&prog->aux->used_maps_mutex);
2004	3457	return -EFAULT;
	3458	+ }
2005	3459	}
	3460	+ mutex_unlock(&prog->aux->used_maps_mutex);
2006	3461
2007		- if (!capable(CAP_SYS_ADMIN)) {
	3462	+ err = set_info_rec_size(&info);
	3463	+ if (err)
	3464	+ return err;
	3465	+
	3466	+ bpf_prog_get_stats(prog, &stats);
	3467	+ info.run_time_ns = stats.nsecs;
	3468	+ info.run_cnt = stats.cnt;
	3469	+
	3470	+ if (!bpf_capable()) {
2008	3471	info.jited_prog_len = 0;
2009	3472	info.xlated_prog_len = 0;
2010	3473	info.nr_jited_ksyms = 0;
2011	3474	info.nr_jited_func_lens = 0;
	3475	+ info.nr_func_info = 0;
	3476	+ info.nr_line_info = 0;
	3477	+ info.nr_jited_line_info = 0;
2012	3478	goto done;
2013	3479	}
2014	3480
..	..	@@ -2089,11 +3555,11 @@
2089	3555	}
2090	3556
2091	3557	ulen = info.nr_jited_ksyms;
2092		- info.nr_jited_ksyms = prog->aux->func_cnt;
2093		- if (info.nr_jited_ksyms && ulen) {
	3558	+ info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
	3559	+ if (ulen) {
2094	3560	if (bpf_dump_raw_ok(file->f_cred)) {
	3561	+ unsigned long ksym_addr;
2095	3562	u64 __user *user_ksyms;
2096		- ulong ksym_addr;
2097	3563	u32 i;
2098	3564
2099	3565	/* copy the address of the kernel symbol
..	..	@@ -2101,10 +3567,17 @@
2101	3567	*/
2102	3568	ulen = min_t(u32, info.nr_jited_ksyms, ulen);
2103	3569	user_ksyms = u64_to_user_ptr(info.jited_ksyms);
2104		- for (i = 0; i < ulen; i++) {
2105		- ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
2106		- ksym_addr &= PAGE_MASK;
2107		- if (put_user((u64) ksym_addr, &user_ksyms[i]))
	3570	+ if (prog->aux->func_cnt) {
	3571	+ for (i = 0; i < ulen; i++) {
	3572	+ ksym_addr = (unsigned long)
	3573	+ prog->aux->func[i]->bpf_func;
	3574	+ if (put_user((u64) ksym_addr,
	3575	+ &user_ksyms[i]))
	3576	+ return -EFAULT;
	3577	+ }
	3578	+ } else {
	3579	+ ksym_addr = (unsigned long) prog->bpf_func;
	3580	+ if (put_user((u64) ksym_addr, &user_ksyms[0]))
2108	3581	return -EFAULT;
2109	3582	}
2110	3583	} else {
..	..	@@ -2113,8 +3586,8 @@
2113	3586	}
2114	3587
2115	3588	ulen = info.nr_jited_func_lens;
2116		- info.nr_jited_func_lens = prog->aux->func_cnt;
2117		- if (info.nr_jited_func_lens && ulen) {
	3589	+ info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
	3590	+ if (ulen) {
2118	3591	if (bpf_dump_raw_ok(file->f_cred)) {
2119	3592	u32 __user *user_lens;
2120	3593	u32 func_len, i;
..	..	@@ -2122,13 +3595,91 @@
2122	3595	/* copy the JITed image lengths for each function */
2123	3596	ulen = min_t(u32, info.nr_jited_func_lens, ulen);
2124	3597	user_lens = u64_to_user_ptr(info.jited_func_lens);
2125		- for (i = 0; i < ulen; i++) {
2126		- func_len = prog->aux->func[i]->jited_len;
2127		- if (put_user(func_len, &user_lens[i]))
	3598	+ if (prog->aux->func_cnt) {
	3599	+ for (i = 0; i < ulen; i++) {
	3600	+ func_len =
	3601	+ prog->aux->func[i]->jited_len;
	3602	+ if (put_user(func_len, &user_lens[i]))
	3603	+ return -EFAULT;
	3604	+ }
	3605	+ } else {
	3606	+ func_len = prog->jited_len;
	3607	+ if (put_user(func_len, &user_lens[0]))
2128	3608	return -EFAULT;
2129	3609	}
2130	3610	} else {
2131	3611	info.jited_func_lens = 0;
	3612	+ }
	3613	+ }
	3614	+
	3615	+ if (prog->aux->btf)
	3616	+ info.btf_id = btf_id(prog->aux->btf);
	3617	+
	3618	+ ulen = info.nr_func_info;
	3619	+ info.nr_func_info = prog->aux->func_info_cnt;
	3620	+ if (info.nr_func_info && ulen) {
	3621	+ char __user *user_finfo;
	3622	+
	3623	+ user_finfo = u64_to_user_ptr(info.func_info);
	3624	+ ulen = min_t(u32, info.nr_func_info, ulen);
	3625	+ if (copy_to_user(user_finfo, prog->aux->func_info,
	3626	+ info.func_info_rec_size * ulen))
	3627	+ return -EFAULT;
	3628	+ }
	3629	+
	3630	+ ulen = info.nr_line_info;
	3631	+ info.nr_line_info = prog->aux->nr_linfo;
	3632	+ if (info.nr_line_info && ulen) {
	3633	+ __u8 __user *user_linfo;
	3634	+
	3635	+ user_linfo = u64_to_user_ptr(info.line_info);
	3636	+ ulen = min_t(u32, info.nr_line_info, ulen);
	3637	+ if (copy_to_user(user_linfo, prog->aux->linfo,
	3638	+ info.line_info_rec_size * ulen))
	3639	+ return -EFAULT;
	3640	+ }
	3641	+
	3642	+ ulen = info.nr_jited_line_info;
	3643	+ if (prog->aux->jited_linfo)
	3644	+ info.nr_jited_line_info = prog->aux->nr_linfo;
	3645	+ else
	3646	+ info.nr_jited_line_info = 0;
	3647	+ if (info.nr_jited_line_info && ulen) {
	3648	+ if (bpf_dump_raw_ok(file->f_cred)) {
	3649	+ __u64 __user *user_linfo;
	3650	+ u32 i;
	3651	+
	3652	+ user_linfo = u64_to_user_ptr(info.jited_line_info);
	3653	+ ulen = min_t(u32, info.nr_jited_line_info, ulen);
	3654	+ for (i = 0; i < ulen; i++) {
	3655	+ if (put_user((__u64)(long)prog->aux->jited_linfo[i],
	3656	+ &user_linfo[i]))
	3657	+ return -EFAULT;
	3658	+ }
	3659	+ } else {
	3660	+ info.jited_line_info = 0;
	3661	+ }
	3662	+ }
	3663	+
	3664	+ ulen = info.nr_prog_tags;
	3665	+ info.nr_prog_tags = prog->aux->func_cnt ? : 1;
	3666	+ if (ulen) {
	3667	+ __u8 __user (*user_prog_tags)[BPF_TAG_SIZE];
	3668	+ u32 i;
	3669	+
	3670	+ user_prog_tags = u64_to_user_ptr(info.prog_tags);
	3671	+ ulen = min_t(u32, info.nr_prog_tags, ulen);
	3672	+ if (prog->aux->func_cnt) {
	3673	+ for (i = 0; i < ulen; i++) {
	3674	+ if (copy_to_user(user_prog_tags[i],
	3675	+ prog->aux->func[i]->tag,
	3676	+ BPF_TAG_SIZE))
	3677	+ return -EFAULT;
	3678	+ }
	3679	+ } else {
	3680	+ if (copy_to_user(user_prog_tags[0],
	3681	+ prog->tag, BPF_TAG_SIZE))
	3682	+ return -EFAULT;
2132	3683	}
2133	3684	}
2134	3685
..	..	@@ -2169,6 +3720,7 @@
2169	3720	info.btf_key_type_id = map->btf_key_type_id;
2170	3721	info.btf_value_type_id = map->btf_value_type_id;
2171	3722	}
	3723	+ info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
2172	3724
2173	3725	if (bpf_map_is_dev_bound(map)) {
2174	3726	err = bpf_map_offload_info_fill(&info, map);
..	..	@@ -2199,6 +3751,43 @@
2199	3751	return btf_get_info_by_fd(btf, attr, uattr);
2200	3752	}
2201	3753
	3754	+static int bpf_link_get_info_by_fd(struct file *file,
	3755	+ struct bpf_link *link,
	3756	+ const union bpf_attr *attr,
	3757	+ union bpf_attr __user *uattr)
	3758	+{
	3759	+ struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info);
	3760	+ struct bpf_link_info info;
	3761	+ u32 info_len = attr->info.info_len;
	3762	+ int err;
	3763	+
	3764	+ err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
	3765	+ if (err)
	3766	+ return err;
	3767	+ info_len = min_t(u32, sizeof(info), info_len);
	3768	+
	3769	+ memset(&info, 0, sizeof(info));
	3770	+ if (copy_from_user(&info, uinfo, info_len))
	3771	+ return -EFAULT;
	3772	+
	3773	+ info.type = link->type;
	3774	+ info.id = link->id;
	3775	+ info.prog_id = link->prog->aux->id;
	3776	+
	3777	+ if (link->ops->fill_link_info) {
	3778	+ err = link->ops->fill_link_info(link, &info);
	3779	+ if (err)
	3780	+ return err;
	3781	+ }
	3782	+
	3783	+ if (copy_to_user(uinfo, &info, info_len) \|\|
	3784	+ put_user(info_len, &uattr->info.info_len))
	3785	+ return -EFAULT;
	3786	+
	3787	+ return 0;
	3788	+}
	3789	+
	3790	+
2202	3791	#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
2203	3792
2204	3793	static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
..	..	@@ -2223,6 +3812,9 @@
2223	3812	uattr);
2224	3813	else if (f.file->f_op == &btf_fops)
2225	3814	err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
	3815	+ else if (f.file->f_op == &bpf_link_fops)
	3816	+ err = bpf_link_get_info_by_fd(f.file, f.file->private_data,
	3817	+ attr, uattr);
2226	3818	else
2227	3819	err = -EINVAL;
2228	3820
..	..	@@ -2237,7 +3829,7 @@
2237	3829	if (CHECK_ATTR(BPF_BTF_LOAD))
2238	3830	return -EINVAL;
2239	3831
2240		- if (!capable(CAP_SYS_ADMIN))
	3832	+ if (!bpf_capable())
2241	3833	return -EPERM;
2242	3834
2243	3835	return btf_new_fd(attr);
..	..	@@ -2325,7 +3917,9 @@
2325	3917	if (attr->task_fd_query.flags != 0)
2326	3918	return -EINVAL;
2327	3919
	3920	+ rcu_read_lock();
2328	3921	task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
	3922	+ rcu_read_unlock();
2329	3923	if (!task)
2330	3924	return -ENOENT;
2331	3925
..	..	@@ -2347,15 +3941,21 @@
2347	3941	if (err)
2348	3942	goto out;
2349	3943
2350		- if (file->f_op == &bpf_raw_tp_fops) {
2351		- struct bpf_raw_tracepoint *raw_tp = file->private_data;
2352		- struct bpf_raw_event_map *btp = raw_tp->btp;
	3944	+ if (file->f_op == &bpf_link_fops) {
	3945	+ struct bpf_link *link = file->private_data;
2353	3946
2354		- err = bpf_task_fd_query_copy(attr, uattr,
2355		- raw_tp->prog->aux->id,
2356		- BPF_FD_TYPE_RAW_TRACEPOINT,
2357		- btp->tp->name, 0, 0);
2358		- goto put_file;
	3947	+ if (link->ops == &bpf_raw_tp_link_lops) {
	3948	+ struct bpf_raw_tp_link *raw_tp =
	3949	+ container_of(link, struct bpf_raw_tp_link, link);
	3950	+ struct bpf_raw_event_map *btp = raw_tp->btp;
	3951	+
	3952	+ err = bpf_task_fd_query_copy(attr, uattr,
	3953	+ raw_tp->link.prog->aux->id,
	3954	+ BPF_FD_TYPE_RAW_TRACEPOINT,
	3955	+ btp->tp->name, 0, 0);
	3956	+ goto put_file;
	3957	+ }
	3958	+ goto out_not_supp;
2359	3959	}
2360	3960
2361	3961	event = perf_get_event(file);
..	..	@@ -2375,6 +3975,7 @@
2375	3975	goto put_file;
2376	3976	}
2377	3977
	3978	+out_not_supp:
2378	3979	err = -ENOTSUPP;
2379	3980	put_file:
2380	3981	fput(file);
..	..	@@ -2382,12 +3983,411 @@
2382	3983	return err;
2383	3984	}
2384	3985
	3986	+#define BPF_MAP_BATCH_LAST_FIELD batch.flags
	3987	+
	3988	+#define BPF_DO_BATCH(fn) \
	3989	+ do { \
	3990	+ if (!fn) { \
	3991	+ err = -ENOTSUPP; \
	3992	+ goto err_put; \
	3993	+ } \
	3994	+ err = fn(map, attr, uattr); \
	3995	+ } while (0)
	3996	+
	3997	+static int bpf_map_do_batch(const union bpf_attr *attr,
	3998	+ union bpf_attr __user *uattr,
	3999	+ int cmd)
	4000	+{
	4001	+ bool has_read = cmd == BPF_MAP_LOOKUP_BATCH \|\|
	4002	+ cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
	4003	+ bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
	4004	+ struct bpf_map *map;
	4005	+ int err, ufd;
	4006	+ struct fd f;
	4007	+
	4008	+ if (CHECK_ATTR(BPF_MAP_BATCH))
	4009	+ return -EINVAL;
	4010	+
	4011	+ ufd = attr->batch.map_fd;
	4012	+ f = fdget(ufd);
	4013	+ map = __bpf_map_get(f);
	4014	+ if (IS_ERR(map))
	4015	+ return PTR_ERR(map);
	4016	+ if (has_write)
	4017	+ bpf_map_write_active_inc(map);
	4018	+ if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
	4019	+ err = -EPERM;
	4020	+ goto err_put;
	4021	+ }
	4022	+ if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
	4023	+ err = -EPERM;
	4024	+ goto err_put;
	4025	+ }
	4026	+
	4027	+ if (cmd == BPF_MAP_LOOKUP_BATCH)
	4028	+ BPF_DO_BATCH(map->ops->map_lookup_batch);
	4029	+ else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
	4030	+ BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
	4031	+ else if (cmd == BPF_MAP_UPDATE_BATCH)
	4032	+ BPF_DO_BATCH(map->ops->map_update_batch);
	4033	+ else
	4034	+ BPF_DO_BATCH(map->ops->map_delete_batch);
	4035	+err_put:
	4036	+ if (has_write)
	4037	+ bpf_map_write_active_dec(map);
	4038	+ fdput(f);
	4039	+ return err;
	4040	+}
	4041	+
	4042	+static int tracing_bpf_link_attach(const union bpf_attr attr, struct bpf_prog prog)
	4043	+{
	4044	+ if (attr->link_create.attach_type != prog->expected_attach_type)
	4045	+ return -EINVAL;
	4046	+
	4047	+ if (prog->expected_attach_type == BPF_TRACE_ITER)
	4048	+ return bpf_iter_link_attach(attr, prog);
	4049	+ else if (prog->type == BPF_PROG_TYPE_EXT)
	4050	+ return bpf_tracing_prog_attach(prog,
	4051	+ attr->link_create.target_fd,
	4052	+ attr->link_create.target_btf_id);
	4053	+ return -EINVAL;
	4054	+}
	4055	+
	4056	+#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
	4057	+static int link_create(union bpf_attr *attr)
	4058	+{
	4059	+ enum bpf_prog_type ptype;
	4060	+ struct bpf_prog *prog;
	4061	+ int ret;
	4062	+
	4063	+ if (CHECK_ATTR(BPF_LINK_CREATE))
	4064	+ return -EINVAL;
	4065	+
	4066	+ prog = bpf_prog_get(attr->link_create.prog_fd);
	4067	+ if (IS_ERR(prog))
	4068	+ return PTR_ERR(prog);
	4069	+
	4070	+ ret = bpf_prog_attach_check_attach_type(prog,
	4071	+ attr->link_create.attach_type);
	4072	+ if (ret)
	4073	+ goto out;
	4074	+
	4075	+ if (prog->type == BPF_PROG_TYPE_EXT) {
	4076	+ ret = tracing_bpf_link_attach(attr, prog);
	4077	+ goto out;
	4078	+ }
	4079	+
	4080	+ ptype = attach_type_to_prog_type(attr->link_create.attach_type);
	4081	+ if (ptype == BPF_PROG_TYPE_UNSPEC \|\| ptype != prog->type) {
	4082	+ ret = -EINVAL;
	4083	+ goto out;
	4084	+ }
	4085	+
	4086	+ switch (ptype) {
	4087	+ case BPF_PROG_TYPE_CGROUP_SKB:
	4088	+ case BPF_PROG_TYPE_CGROUP_SOCK:
	4089	+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
	4090	+ case BPF_PROG_TYPE_SOCK_OPS:
	4091	+ case BPF_PROG_TYPE_CGROUP_DEVICE:
	4092	+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
	4093	+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
	4094	+ ret = cgroup_bpf_link_attach(attr, prog);
	4095	+ break;
	4096	+ case BPF_PROG_TYPE_TRACING:
	4097	+ ret = tracing_bpf_link_attach(attr, prog);
	4098	+ break;
	4099	+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
	4100	+ case BPF_PROG_TYPE_SK_LOOKUP:
	4101	+ ret = netns_bpf_link_create(attr, prog);
	4102	+ break;
	4103	+#ifdef CONFIG_NET
	4104	+ case BPF_PROG_TYPE_XDP:
	4105	+ ret = bpf_xdp_link_attach(attr, prog);
	4106	+ break;
	4107	+#endif
	4108	+ default:
	4109	+ ret = -EINVAL;
	4110	+ }
	4111	+
	4112	+out:
	4113	+ if (ret < 0)
	4114	+ bpf_prog_put(prog);
	4115	+ return ret;
	4116	+}
	4117	+
	4118	+#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
	4119	+
	4120	+static int link_update(union bpf_attr *attr)
	4121	+{
	4122	+ struct bpf_prog old_prog = NULL, new_prog;
	4123	+ struct bpf_link *link;
	4124	+ u32 flags;
	4125	+ int ret;
	4126	+
	4127	+ if (CHECK_ATTR(BPF_LINK_UPDATE))
	4128	+ return -EINVAL;
	4129	+
	4130	+ flags = attr->link_update.flags;
	4131	+ if (flags & ~BPF_F_REPLACE)
	4132	+ return -EINVAL;
	4133	+
	4134	+ link = bpf_link_get_from_fd(attr->link_update.link_fd);
	4135	+ if (IS_ERR(link))
	4136	+ return PTR_ERR(link);
	4137	+
	4138	+ new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
	4139	+ if (IS_ERR(new_prog)) {
	4140	+ ret = PTR_ERR(new_prog);
	4141	+ goto out_put_link;
	4142	+ }
	4143	+
	4144	+ if (flags & BPF_F_REPLACE) {
	4145	+ old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
	4146	+ if (IS_ERR(old_prog)) {
	4147	+ ret = PTR_ERR(old_prog);
	4148	+ old_prog = NULL;
	4149	+ goto out_put_progs;
	4150	+ }
	4151	+ } else if (attr->link_update.old_prog_fd) {
	4152	+ ret = -EINVAL;
	4153	+ goto out_put_progs;
	4154	+ }
	4155	+
	4156	+ if (link->ops->update_prog)
	4157	+ ret = link->ops->update_prog(link, new_prog, old_prog);
	4158	+ else
	4159	+ ret = -EINVAL;
	4160	+
	4161	+out_put_progs:
	4162	+ if (old_prog)
	4163	+ bpf_prog_put(old_prog);
	4164	+ if (ret)
	4165	+ bpf_prog_put(new_prog);
	4166	+out_put_link:
	4167	+ bpf_link_put(link);
	4168	+ return ret;
	4169	+}
	4170	+
	4171	+#define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd
	4172	+
	4173	+static int link_detach(union bpf_attr *attr)
	4174	+{
	4175	+ struct bpf_link *link;
	4176	+ int ret;
	4177	+
	4178	+ if (CHECK_ATTR(BPF_LINK_DETACH))
	4179	+ return -EINVAL;
	4180	+
	4181	+ link = bpf_link_get_from_fd(attr->link_detach.link_fd);
	4182	+ if (IS_ERR(link))
	4183	+ return PTR_ERR(link);
	4184	+
	4185	+ if (link->ops->detach)
	4186	+ ret = link->ops->detach(link);
	4187	+ else
	4188	+ ret = -EOPNOTSUPP;
	4189	+
	4190	+ bpf_link_put(link);
	4191	+ return ret;
	4192	+}
	4193	+
	4194	+static struct bpf_link bpf_link_inc_not_zero(struct bpf_link link)
	4195	+{
	4196	+ return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT);
	4197	+}
	4198	+
	4199	+struct bpf_link *bpf_link_by_id(u32 id)
	4200	+{
	4201	+ struct bpf_link *link;
	4202	+
	4203	+ if (!id)
	4204	+ return ERR_PTR(-ENOENT);
	4205	+
	4206	+ spin_lock_bh(&link_idr_lock);
	4207	+ /* before link is "settled", ID is 0, pretend it doesn't exist yet */
	4208	+ link = idr_find(&link_idr, id);
	4209	+ if (link) {
	4210	+ if (link->id)
	4211	+ link = bpf_link_inc_not_zero(link);
	4212	+ else
	4213	+ link = ERR_PTR(-EAGAIN);
	4214	+ } else {
	4215	+ link = ERR_PTR(-ENOENT);
	4216	+ }
	4217	+ spin_unlock_bh(&link_idr_lock);
	4218	+ return link;
	4219	+}
	4220	+
	4221	+#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
	4222	+
	4223	+static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
	4224	+{
	4225	+ struct bpf_link *link;
	4226	+ u32 id = attr->link_id;
	4227	+ int fd;
	4228	+
	4229	+ if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID))
	4230	+ return -EINVAL;
	4231	+
	4232	+ if (!capable(CAP_SYS_ADMIN))
	4233	+ return -EPERM;
	4234	+
	4235	+ link = bpf_link_by_id(id);
	4236	+ if (IS_ERR(link))
	4237	+ return PTR_ERR(link);
	4238	+
	4239	+ fd = bpf_link_new_fd(link);
	4240	+ if (fd < 0)
	4241	+ bpf_link_put(link);
	4242	+
	4243	+ return fd;
	4244	+}
	4245	+
	4246	+DEFINE_MUTEX(bpf_stats_enabled_mutex);
	4247	+
	4248	+static int bpf_stats_release(struct inode inode, struct file file)
	4249	+{
	4250	+ mutex_lock(&bpf_stats_enabled_mutex);
	4251	+ static_key_slow_dec(&bpf_stats_enabled_key.key);
	4252	+ mutex_unlock(&bpf_stats_enabled_mutex);
	4253	+ return 0;
	4254	+}
	4255	+
	4256	+static const struct file_operations bpf_stats_fops = {
	4257	+ .release = bpf_stats_release,
	4258	+};
	4259	+
	4260	+static int bpf_enable_runtime_stats(void)
	4261	+{
	4262	+ int fd;
	4263	+
	4264	+ mutex_lock(&bpf_stats_enabled_mutex);
	4265	+
	4266	+ /* Set a very high limit to avoid overflow */
	4267	+ if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) {
	4268	+ mutex_unlock(&bpf_stats_enabled_mutex);
	4269	+ return -EBUSY;
	4270	+ }
	4271	+
	4272	+ fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
	4273	+ if (fd >= 0)
	4274	+ static_key_slow_inc(&bpf_stats_enabled_key.key);
	4275	+
	4276	+ mutex_unlock(&bpf_stats_enabled_mutex);
	4277	+ return fd;
	4278	+}
	4279	+
	4280	+#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
	4281	+
	4282	+static int bpf_enable_stats(union bpf_attr *attr)
	4283	+{
	4284	+
	4285	+ if (CHECK_ATTR(BPF_ENABLE_STATS))
	4286	+ return -EINVAL;
	4287	+
	4288	+ if (!capable(CAP_SYS_ADMIN))
	4289	+ return -EPERM;
	4290	+
	4291	+ switch (attr->enable_stats.type) {
	4292	+ case BPF_STATS_RUN_TIME:
	4293	+ return bpf_enable_runtime_stats();
	4294	+ default:
	4295	+ break;
	4296	+ }
	4297	+ return -EINVAL;
	4298	+}
	4299	+
	4300	+#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
	4301	+
	4302	+static int bpf_iter_create(union bpf_attr *attr)
	4303	+{
	4304	+ struct bpf_link *link;
	4305	+ int err;
	4306	+
	4307	+ if (CHECK_ATTR(BPF_ITER_CREATE))
	4308	+ return -EINVAL;
	4309	+
	4310	+ if (attr->iter_create.flags)
	4311	+ return -EINVAL;
	4312	+
	4313	+ link = bpf_link_get_from_fd(attr->iter_create.link_fd);
	4314	+ if (IS_ERR(link))
	4315	+ return PTR_ERR(link);
	4316	+
	4317	+ err = bpf_iter_new_fd(link);
	4318	+ bpf_link_put(link);
	4319	+
	4320	+ return err;
	4321	+}
	4322	+
	4323	+#define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags
	4324	+
	4325	+static int bpf_prog_bind_map(union bpf_attr *attr)
	4326	+{
	4327	+ struct bpf_prog *prog;
	4328	+ struct bpf_map *map;
	4329	+ struct bpf_map used_maps_old, used_maps_new;
	4330	+ int i, ret = 0;
	4331	+
	4332	+ if (CHECK_ATTR(BPF_PROG_BIND_MAP))
	4333	+ return -EINVAL;
	4334	+
	4335	+ if (attr->prog_bind_map.flags)
	4336	+ return -EINVAL;
	4337	+
	4338	+ prog = bpf_prog_get(attr->prog_bind_map.prog_fd);
	4339	+ if (IS_ERR(prog))
	4340	+ return PTR_ERR(prog);
	4341	+
	4342	+ map = bpf_map_get(attr->prog_bind_map.map_fd);
	4343	+ if (IS_ERR(map)) {
	4344	+ ret = PTR_ERR(map);
	4345	+ goto out_prog_put;
	4346	+ }
	4347	+
	4348	+ mutex_lock(&prog->aux->used_maps_mutex);
	4349	+
	4350	+ used_maps_old = prog->aux->used_maps;
	4351	+
	4352	+ for (i = 0; i < prog->aux->used_map_cnt; i++)
	4353	+ if (used_maps_old[i] == map) {
	4354	+ bpf_map_put(map);
	4355	+ goto out_unlock;
	4356	+ }
	4357	+
	4358	+ used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
	4359	+ sizeof(used_maps_new[0]),
	4360	+ GFP_KERNEL);
	4361	+ if (!used_maps_new) {
	4362	+ ret = -ENOMEM;
	4363	+ goto out_unlock;
	4364	+ }
	4365	+
	4366	+ memcpy(used_maps_new, used_maps_old,
	4367	+ sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
	4368	+ used_maps_new[prog->aux->used_map_cnt] = map;
	4369	+
	4370	+ prog->aux->used_map_cnt++;
	4371	+ prog->aux->used_maps = used_maps_new;
	4372	+
	4373	+ kfree(used_maps_old);
	4374	+
	4375	+out_unlock:
	4376	+ mutex_unlock(&prog->aux->used_maps_mutex);
	4377	+
	4378	+ if (ret)
	4379	+ bpf_map_put(map);
	4380	+out_prog_put:
	4381	+ bpf_prog_put(prog);
	4382	+ return ret;
	4383	+}
	4384	+
2385	4385	SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
2386	4386	{
2387	4387	union bpf_attr attr;
2388	4388	int err;
2389	4389
2390		- if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
	4390	+ if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
2391	4391	return -EPERM;
2392	4392
2393	4393	err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
..	..	@@ -2399,6 +4399,8 @@
2399	4399	memset(&attr, 0, sizeof(attr));
2400	4400	if (copy_from_user(&attr, uattr, size) != 0)
2401	4401	return -EFAULT;
	4402	+
	4403	+ trace_android_vh_check_bpf_syscall(cmd, &attr, size);
2402	4404
2403	4405	err = security_bpf(cmd, &attr, size);
2404	4406	if (err < 0)
..	..	@@ -2420,8 +4422,11 @@
2420	4422	case BPF_MAP_GET_NEXT_KEY:
2421	4423	err = map_get_next_key(&attr);
2422	4424	break;
	4425	+ case BPF_MAP_FREEZE:
	4426	+ err = map_freeze(&attr);
	4427	+ break;
2423	4428	case BPF_PROG_LOAD:
2424		- err = bpf_prog_load(&attr);
	4429	+ err = bpf_prog_load(&attr, uattr);
2425	4430	break;
2426	4431	case BPF_OBJ_PIN:
2427	4432	err = bpf_obj_pin(&attr);
..	..	@@ -2449,6 +4454,10 @@
2449	4454	err = bpf_obj_get_next_id(&attr, uattr,
2450	4455	&map_idr, &map_idr_lock);
2451	4456	break;
	4457	+ case BPF_BTF_GET_NEXT_ID:
	4458	+ err = bpf_obj_get_next_id(&attr, uattr,
	4459	+ &btf_idr, &btf_idr_lock);
	4460	+ break;
2452	4461	case BPF_PROG_GET_FD_BY_ID:
2453	4462	err = bpf_prog_get_fd_by_id(&attr);
2454	4463	break;
..	..	@@ -2470,6 +4479,47 @@
2470	4479	case BPF_TASK_FD_QUERY:
2471	4480	err = bpf_task_fd_query(&attr, uattr);
2472	4481	break;
	4482	+ case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
	4483	+ err = map_lookup_and_delete_elem(&attr);
	4484	+ break;
	4485	+ case BPF_MAP_LOOKUP_BATCH:
	4486	+ err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH);
	4487	+ break;
	4488	+ case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
	4489	+ err = bpf_map_do_batch(&attr, uattr,
	4490	+ BPF_MAP_LOOKUP_AND_DELETE_BATCH);
	4491	+ break;
	4492	+ case BPF_MAP_UPDATE_BATCH:
	4493	+ err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH);
	4494	+ break;
	4495	+ case BPF_MAP_DELETE_BATCH:
	4496	+ err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH);
	4497	+ break;
	4498	+ case BPF_LINK_CREATE:
	4499	+ err = link_create(&attr);
	4500	+ break;
	4501	+ case BPF_LINK_UPDATE:
	4502	+ err = link_update(&attr);
	4503	+ break;
	4504	+ case BPF_LINK_GET_FD_BY_ID:
	4505	+ err = bpf_link_get_fd_by_id(&attr);
	4506	+ break;
	4507	+ case BPF_LINK_GET_NEXT_ID:
	4508	+ err = bpf_obj_get_next_id(&attr, uattr,
	4509	+ &link_idr, &link_idr_lock);
	4510	+ break;
	4511	+ case BPF_ENABLE_STATS:
	4512	+ err = bpf_enable_stats(&attr);
	4513	+ break;
	4514	+ case BPF_ITER_CREATE:
	4515	+ err = bpf_iter_create(&attr);
	4516	+ break;
	4517	+ case BPF_LINK_DETACH:
	4518	+ err = link_detach(&attr);
	4519	+ break;
	4520	+ case BPF_PROG_BIND_MAP:
	4521	+ err = bpf_prog_bind_map(&attr);
	4522	+ break;
2473	4523	default:
2474	4524	err = -EINVAL;
2475	4525	break;