hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/include/linux/bpf-cgroup.h
....@@ -2,9 +2,11 @@
22 #ifndef _BPF_CGROUP_H
33 #define _BPF_CGROUP_H
44
5
+#include <linux/bpf.h>
56 #include <linux/errno.h>
67 #include <linux/jump_label.h>
78 #include <linux/percpu.h>
9
+#include <linux/percpu-refcount.h>
810 #include <linux/rbtree.h>
911 #include <uapi/linux/bpf.h>
1012
....@@ -16,34 +18,62 @@
1618 struct bpf_prog;
1719 struct bpf_sock_ops_kern;
1820 struct bpf_cgroup_storage;
21
+struct ctl_table;
22
+struct ctl_table_header;
23
+struct task_struct;
1924
2025 #ifdef CONFIG_CGROUP_BPF
2126
2227 extern struct static_key_false cgroup_bpf_enabled_key;
2328 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
2429
25
-DECLARE_PER_CPU(void*, bpf_cgroup_storage);
30
+#define BPF_CGROUP_STORAGE_NEST_MAX 8
31
+
32
+struct bpf_cgroup_storage_info {
33
+ struct task_struct *task;
34
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
35
+};
36
+
37
+/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
38
+ * to use bpf cgroup storage simultaneously.
39
+ */
40
+DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
41
+ bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
42
+
43
+#define for_each_cgroup_storage_type(stype) \
44
+ for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
2645
2746 struct bpf_cgroup_storage_map;
2847
2948 struct bpf_storage_buffer {
3049 struct rcu_head rcu;
31
- char data[0];
50
+ char data[];
3251 };
3352
3453 struct bpf_cgroup_storage {
35
- struct bpf_storage_buffer *buf;
54
+ union {
55
+ struct bpf_storage_buffer *buf;
56
+ void __percpu *percpu_buf;
57
+ };
3658 struct bpf_cgroup_storage_map *map;
3759 struct bpf_cgroup_storage_key key;
38
- struct list_head list;
60
+ struct list_head list_map;
61
+ struct list_head list_cg;
3962 struct rb_node node;
4063 struct rcu_head rcu;
64
+};
65
+
66
+struct bpf_cgroup_link {
67
+ struct bpf_link link;
68
+ struct cgroup *cgroup;
69
+ enum bpf_attach_type type;
4170 };
4271
4372 struct bpf_prog_list {
4473 struct list_head node;
4574 struct bpf_prog *prog;
46
- struct bpf_cgroup_storage *storage;
75
+ struct bpf_cgroup_link *link;
76
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
4777 };
4878
4979 struct bpf_prog_array;
....@@ -60,25 +90,39 @@
6090 struct list_head progs[MAX_BPF_ATTACH_TYPE];
6191 u32 flags[MAX_BPF_ATTACH_TYPE];
6292
93
+ /* list of cgroup shared storages */
94
+ struct list_head storages;
95
+
6396 /* temp storage for effective prog array used by prog_attach/detach */
64
- struct bpf_prog_array __rcu *inactive;
97
+ struct bpf_prog_array *inactive;
98
+
99
+ /* reference counter used to detach bpf programs after cgroup removal */
100
+ struct percpu_ref refcnt;
101
+
102
+ /* cgroup_bpf is released using a work queue */
103
+ struct work_struct release_work;
65104 };
66105
67
-void cgroup_bpf_put(struct cgroup *cgrp);
68106 int cgroup_bpf_inherit(struct cgroup *cgrp);
107
+void cgroup_bpf_offline(struct cgroup *cgrp);
69108
70
-int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
109
+int __cgroup_bpf_attach(struct cgroup *cgrp,
110
+ struct bpf_prog *prog, struct bpf_prog *replace_prog,
111
+ struct bpf_cgroup_link *link,
71112 enum bpf_attach_type type, u32 flags);
72113 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
73
- enum bpf_attach_type type, u32 flags);
114
+ struct bpf_cgroup_link *link,
115
+ enum bpf_attach_type type);
74116 int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
75117 union bpf_attr __user *uattr);
76118
77119 /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
78
-int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
79
- enum bpf_attach_type type, u32 flags);
120
+int cgroup_bpf_attach(struct cgroup *cgrp,
121
+ struct bpf_prog *prog, struct bpf_prog *replace_prog,
122
+ struct bpf_cgroup_link *link, enum bpf_attach_type type,
123
+ u32 flags);
80124 int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
81
- enum bpf_attach_type type, u32 flags);
125
+ enum bpf_attach_type type);
82126 int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
83127 union bpf_attr __user *uattr);
84128
....@@ -101,25 +145,81 @@
101145 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
102146 short access, enum bpf_attach_type type);
103147
104
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
148
+int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
149
+ struct ctl_table *table, int write,
150
+ char **buf, size_t *pcount, loff_t *ppos,
151
+ enum bpf_attach_type type);
152
+
153
+int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
154
+ int *optname, char __user *optval,
155
+ int *optlen, char **kernel_optval);
156
+int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
157
+ int optname, char __user *optval,
158
+ int __user *optlen, int max_optlen,
159
+ int retval);
160
+
161
+static inline enum bpf_cgroup_storage_type cgroup_storage_type(
162
+ struct bpf_map *map)
105163 {
106
- struct bpf_storage_buffer *buf;
164
+ if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
165
+ return BPF_CGROUP_STORAGE_PERCPU;
107166
108
- if (!storage)
109
- return;
110
-
111
- buf = READ_ONCE(storage->buf);
112
- this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
167
+ return BPF_CGROUP_STORAGE_SHARED;
113168 }
114169
115
-struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
170
+static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
171
+ *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
172
+{
173
+ enum bpf_cgroup_storage_type stype;
174
+ int i, err = 0;
175
+
176
+ preempt_disable();
177
+ for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
178
+ if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
179
+ continue;
180
+
181
+ this_cpu_write(bpf_cgroup_storage_info[i].task, current);
182
+ for_each_cgroup_storage_type(stype)
183
+ this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
184
+ storage[stype]);
185
+ goto out;
186
+ }
187
+ err = -EBUSY;
188
+ WARN_ON_ONCE(1);
189
+
190
+out:
191
+ preempt_enable();
192
+ return err;
193
+}
194
+
195
+static inline void bpf_cgroup_storage_unset(void)
196
+{
197
+ int i;
198
+
199
+ for (i = BPF_CGROUP_STORAGE_NEST_MAX - 1; i >= 0; i--) {
200
+ if (likely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
201
+ continue;
202
+
203
+ this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
204
+ return;
205
+ }
206
+}
207
+
208
+struct bpf_cgroup_storage *
209
+cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
210
+ void *key, bool locked);
211
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
212
+ enum bpf_cgroup_storage_type stype);
116213 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
117214 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
118215 struct cgroup *cgroup,
119216 enum bpf_attach_type type);
120217 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
121
-int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
122
-void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
218
+int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map);
219
+
220
+int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
221
+int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
222
+ void *value, u64 flags);
123223
124224 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
125225 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
....@@ -155,6 +255,9 @@
155255
156256 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
157257 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
258
+
259
+#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) \
260
+ BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_RELEASE)
158261
159262 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
160263 BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
....@@ -216,6 +319,31 @@
216319 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \
217320 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL)
218321
322
+/* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
323
+ * fullsock and its parent fullsock cannot be traced by
324
+ * sk_to_full_sk().
325
+ *
326
+ * e.g. sock_ops->sk is a request_sock and it is under syncookie mode.
327
+ * Its listener-sk is not attached to the rsk_listener.
328
+ * In this case, the caller holds the listener-sk (unlocked),
329
+ * set its sock_ops->sk to req_sk, and call this SOCK_OPS"_SK" with
330
+ * the listener-sk such that the cgroup-bpf-progs of the
331
+ * listener-sk will be run.
332
+ *
333
+ * Regardless of syncookie mode or not,
334
+ * calling bpf_setsockopt on listener-sk will not make sense anyway,
335
+ * so passing 'sock_ops->sk == req_sk' to the bpf prog is appropriate here.
336
+ */
337
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \
338
+({ \
339
+ int __ret = 0; \
340
+ if (cgroup_bpf_enabled) \
341
+ __ret = __cgroup_bpf_run_filter_sock_ops(sk, \
342
+ sock_ops, \
343
+ BPF_CGROUP_SOCK_OPS); \
344
+ __ret; \
345
+})
346
+
219347 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
220348 ({ \
221349 int __ret = 0; \
....@@ -239,18 +367,63 @@
239367 \
240368 __ret; \
241369 })
370
+
371
+
372
+#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \
373
+({ \
374
+ int __ret = 0; \
375
+ if (cgroup_bpf_enabled) \
376
+ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
377
+ buf, count, pos, \
378
+ BPF_CGROUP_SYSCTL); \
379
+ __ret; \
380
+})
381
+
382
+#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
383
+ kernel_optval) \
384
+({ \
385
+ int __ret = 0; \
386
+ if (cgroup_bpf_enabled) \
387
+ __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
388
+ optname, optval, \
389
+ optlen, \
390
+ kernel_optval); \
391
+ __ret; \
392
+})
393
+
394
+#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
395
+({ \
396
+ int __ret = 0; \
397
+ if (cgroup_bpf_enabled) \
398
+ get_user(__ret, optlen); \
399
+ __ret; \
400
+})
401
+
402
+#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \
403
+ max_optlen, retval) \
404
+({ \
405
+ int __ret = retval; \
406
+ if (cgroup_bpf_enabled) \
407
+ __ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \
408
+ optname, optval, \
409
+ optlen, max_optlen, \
410
+ retval); \
411
+ __ret; \
412
+})
413
+
242414 int cgroup_bpf_prog_attach(const union bpf_attr *attr,
243415 enum bpf_prog_type ptype, struct bpf_prog *prog);
244416 int cgroup_bpf_prog_detach(const union bpf_attr *attr,
245417 enum bpf_prog_type ptype);
418
+int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
246419 int cgroup_bpf_prog_query(const union bpf_attr *attr,
247420 union bpf_attr __user *uattr);
248421 #else
249422
250423 struct bpf_prog;
251424 struct cgroup_bpf {};
252
-static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
253425 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
426
+static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
254427
255428 static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
256429 enum bpf_prog_type ptype,
....@@ -265,27 +438,43 @@
265438 return -EINVAL;
266439 }
267440
441
+static inline int cgroup_bpf_link_attach(const union bpf_attr *attr,
442
+ struct bpf_prog *prog)
443
+{
444
+ return -EINVAL;
445
+}
446
+
268447 static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
269448 union bpf_attr __user *uattr)
270449 {
271450 return -EINVAL;
272451 }
273452
274
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
275
-static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
453
+static inline int bpf_cgroup_storage_set(
454
+ struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
455
+static inline void bpf_cgroup_storage_unset(void) {}
456
+static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
276457 struct bpf_map *map) { return 0; }
277
-static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
278
- struct bpf_map *map) {}
279458 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
280
- struct bpf_prog *prog) { return 0; }
459
+ struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; }
281460 static inline void bpf_cgroup_storage_free(
282461 struct bpf_cgroup_storage *storage) {}
462
+static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
463
+ void *value) {
464
+ return 0;
465
+}
466
+static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
467
+ void *key, void *value, u64 flags) {
468
+ return 0;
469
+}
283470
284471 #define cgroup_bpf_enabled (0)
472
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; })
285473 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
286474 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
287475 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
288476 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
477
+#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
289478 #define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
290479 #define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
291480 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
....@@ -300,6 +489,14 @@
300489 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
301490 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
302491 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
492
+#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
493
+#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
494
+#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
495
+ optlen, max_optlen, retval) ({ retval; })
496
+#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
497
+ kernel_optval) ({ 0; })
498
+
499
+#define for_each_cgroup_storage_type(stype) for (; false; )
303500
304501 #endif /* CONFIG_CGROUP_BPF */
305502