.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Functions to manage eBPF programs attached to cgroups |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (c) 2016 Daniel Mack |
---|
5 | | - * |
---|
6 | | - * This file is subject to the terms and conditions of version 2 of the GNU |
---|
7 | | - * General Public License. See the file COPYING in the main directory of the |
---|
8 | | - * Linux distribution for more details. |
---|
9 | 6 | */ |
---|
10 | 7 | |
---|
11 | 8 | #include <linux/kernel.h> |
---|
12 | 9 | #include <linux/atomic.h> |
---|
13 | 10 | #include <linux/cgroup.h> |
---|
| 11 | +#include <linux/filter.h> |
---|
14 | 12 | #include <linux/slab.h> |
---|
| 13 | +#include <linux/sysctl.h> |
---|
| 14 | +#include <linux/string.h> |
---|
15 | 15 | #include <linux/bpf.h> |
---|
16 | 16 | #include <linux/bpf-cgroup.h> |
---|
17 | 17 | #include <net/sock.h> |
---|
| 18 | +#include <net/bpf_sk_storage.h> |
---|
| 19 | + |
---|
| 20 | +#include "../cgroup/cgroup-internal.h" |
---|
18 | 21 | |
---|
19 | 22 | DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); |
---|
20 | 23 | EXPORT_SYMBOL(cgroup_bpf_enabled_key); |
---|
21 | 24 | |
---|
22 | | -/** |
---|
23 | | - * cgroup_bpf_put() - put references of all bpf programs |
---|
24 | | - * @cgrp: the cgroup to modify |
---|
25 | | - */ |
---|
26 | | -void cgroup_bpf_put(struct cgroup *cgrp) |
---|
| 25 | +void cgroup_bpf_offline(struct cgroup *cgrp) |
---|
27 | 26 | { |
---|
| 27 | + cgroup_get(cgrp); |
---|
| 28 | + percpu_ref_kill(&cgrp->bpf.refcnt); |
---|
| 29 | +} |
---|
| 30 | + |
---|
| 31 | +static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[]) |
---|
| 32 | +{ |
---|
| 33 | + enum bpf_cgroup_storage_type stype; |
---|
| 34 | + |
---|
| 35 | + for_each_cgroup_storage_type(stype) |
---|
| 36 | + bpf_cgroup_storage_free(storages[stype]); |
---|
| 37 | +} |
---|
| 38 | + |
---|
| 39 | +static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[], |
---|
| 40 | + struct bpf_cgroup_storage *new_storages[], |
---|
| 41 | + enum bpf_attach_type type, |
---|
| 42 | + struct bpf_prog *prog, |
---|
| 43 | + struct cgroup *cgrp) |
---|
| 44 | +{ |
---|
| 45 | + enum bpf_cgroup_storage_type stype; |
---|
| 46 | + struct bpf_cgroup_storage_key key; |
---|
| 47 | + struct bpf_map *map; |
---|
| 48 | + |
---|
| 49 | + key.cgroup_inode_id = cgroup_id(cgrp); |
---|
| 50 | + key.attach_type = type; |
---|
| 51 | + |
---|
| 52 | + for_each_cgroup_storage_type(stype) { |
---|
| 53 | + map = prog->aux->cgroup_storage[stype]; |
---|
| 54 | + if (!map) |
---|
| 55 | + continue; |
---|
| 56 | + |
---|
| 57 | + storages[stype] = cgroup_storage_lookup((void *)map, &key, false); |
---|
| 58 | + if (storages[stype]) |
---|
| 59 | + continue; |
---|
| 60 | + |
---|
| 61 | + storages[stype] = bpf_cgroup_storage_alloc(prog, stype); |
---|
| 62 | + if (IS_ERR(storages[stype])) { |
---|
| 63 | + bpf_cgroup_storages_free(new_storages); |
---|
| 64 | + return -ENOMEM; |
---|
| 65 | + } |
---|
| 66 | + |
---|
| 67 | + new_storages[stype] = storages[stype]; |
---|
| 68 | + } |
---|
| 69 | + |
---|
| 70 | + return 0; |
---|
| 71 | +} |
---|
| 72 | + |
---|
| 73 | +static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[], |
---|
| 74 | + struct bpf_cgroup_storage *src[]) |
---|
| 75 | +{ |
---|
| 76 | + enum bpf_cgroup_storage_type stype; |
---|
| 77 | + |
---|
| 78 | + for_each_cgroup_storage_type(stype) |
---|
| 79 | + dst[stype] = src[stype]; |
---|
| 80 | +} |
---|
| 81 | + |
---|
| 82 | +static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[], |
---|
| 83 | + struct cgroup *cgrp, |
---|
| 84 | + enum bpf_attach_type attach_type) |
---|
| 85 | +{ |
---|
| 86 | + enum bpf_cgroup_storage_type stype; |
---|
| 87 | + |
---|
| 88 | + for_each_cgroup_storage_type(stype) |
---|
| 89 | + bpf_cgroup_storage_link(storages[stype], cgrp, attach_type); |
---|
| 90 | +} |
---|
| 91 | + |
---|
| 92 | +/* Called when bpf_cgroup_link is auto-detached from dying cgroup. |
---|
| 93 | + * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It |
---|
| 94 | + * doesn't free link memory, which will eventually be done by bpf_link's |
---|
| 95 | + * release() callback, when its last FD is closed. |
---|
| 96 | + */ |
---|
| 97 | +static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link) |
---|
| 98 | +{ |
---|
| 99 | + cgroup_put(link->cgroup); |
---|
| 100 | + link->cgroup = NULL; |
---|
| 101 | +} |
---|
| 102 | + |
---|
| 103 | +/** |
---|
| 104 | + * cgroup_bpf_release() - put references of all bpf programs and |
---|
| 105 | + * release all cgroup bpf data |
---|
| 106 | + * @work: work structure embedded into the cgroup to modify |
---|
| 107 | + */ |
---|
| 108 | +static void cgroup_bpf_release(struct work_struct *work) |
---|
| 109 | +{ |
---|
| 110 | + struct cgroup *p, *cgrp = container_of(work, struct cgroup, |
---|
| 111 | + bpf.release_work); |
---|
| 112 | + struct bpf_prog_array *old_array; |
---|
| 113 | + struct list_head *storages = &cgrp->bpf.storages; |
---|
| 114 | + struct bpf_cgroup_storage *storage, *stmp; |
---|
| 115 | + |
---|
28 | 116 | unsigned int type; |
---|
| 117 | + |
---|
| 118 | + mutex_lock(&cgroup_mutex); |
---|
29 | 119 | |
---|
30 | 120 | for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { |
---|
31 | 121 | struct list_head *progs = &cgrp->bpf.progs[type]; |
---|
32 | | - struct bpf_prog_list *pl, *tmp; |
---|
| 122 | + struct bpf_prog_list *pl, *pltmp; |
---|
33 | 123 | |
---|
34 | | - list_for_each_entry_safe(pl, tmp, progs, node) { |
---|
| 124 | + list_for_each_entry_safe(pl, pltmp, progs, node) { |
---|
35 | 125 | list_del(&pl->node); |
---|
36 | | - bpf_prog_put(pl->prog); |
---|
37 | | - bpf_cgroup_storage_unlink(pl->storage); |
---|
38 | | - bpf_cgroup_storage_free(pl->storage); |
---|
| 126 | + if (pl->prog) |
---|
| 127 | + bpf_prog_put(pl->prog); |
---|
| 128 | + if (pl->link) |
---|
| 129 | + bpf_cgroup_link_auto_detach(pl->link); |
---|
39 | 130 | kfree(pl); |
---|
40 | 131 | static_branch_dec(&cgroup_bpf_enabled_key); |
---|
41 | 132 | } |
---|
42 | | - bpf_prog_array_free(cgrp->bpf.effective[type]); |
---|
| 133 | + old_array = rcu_dereference_protected( |
---|
| 134 | + cgrp->bpf.effective[type], |
---|
| 135 | + lockdep_is_held(&cgroup_mutex)); |
---|
| 136 | + bpf_prog_array_free(old_array); |
---|
43 | 137 | } |
---|
| 138 | + |
---|
| 139 | + list_for_each_entry_safe(storage, stmp, storages, list_cg) { |
---|
| 140 | + bpf_cgroup_storage_unlink(storage); |
---|
| 141 | + bpf_cgroup_storage_free(storage); |
---|
| 142 | + } |
---|
| 143 | + |
---|
| 144 | + mutex_unlock(&cgroup_mutex); |
---|
| 145 | + |
---|
| 146 | + for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) |
---|
| 147 | + cgroup_bpf_put(p); |
---|
| 148 | + |
---|
| 149 | + percpu_ref_exit(&cgrp->bpf.refcnt); |
---|
| 150 | + cgroup_put(cgrp); |
---|
| 151 | +} |
---|
| 152 | + |
---|
| 153 | +/** |
---|
| 154 | + * cgroup_bpf_release_fn() - callback used to schedule releasing |
---|
| 155 | + * of bpf cgroup data |
---|
| 156 | + * @ref: percpu ref counter structure |
---|
| 157 | + */ |
---|
| 158 | +static void cgroup_bpf_release_fn(struct percpu_ref *ref) |
---|
| 159 | +{ |
---|
| 160 | + struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt); |
---|
| 161 | + |
---|
| 162 | + INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release); |
---|
| 163 | + queue_work(system_wq, &cgrp->bpf.release_work); |
---|
| 164 | +} |
---|
| 165 | + |
---|
| 166 | +/* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through |
---|
| 167 | + * link or direct prog. |
---|
| 168 | + */ |
---|
| 169 | +static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl) |
---|
| 170 | +{ |
---|
| 171 | + if (pl->prog) |
---|
| 172 | + return pl->prog; |
---|
| 173 | + if (pl->link) |
---|
| 174 | + return pl->link->link.prog; |
---|
| 175 | + return NULL; |
---|
44 | 176 | } |
---|
45 | 177 | |
---|
46 | 178 | /* count number of elements in the list. |
---|
.. | .. |
---|
52 | 184 | u32 cnt = 0; |
---|
53 | 185 | |
---|
54 | 186 | list_for_each_entry(pl, head, node) { |
---|
55 | | - if (!pl->prog) |
---|
| 187 | + if (!prog_list_prog(pl)) |
---|
56 | 188 | continue; |
---|
57 | 189 | cnt++; |
---|
58 | 190 | } |
---|
.. | .. |
---|
64 | 196 | * if parent has overridable or multi-prog, allow attaching |
---|
65 | 197 | */ |
---|
66 | 198 | static bool hierarchy_allows_attach(struct cgroup *cgrp, |
---|
67 | | - enum bpf_attach_type type, |
---|
68 | | - u32 new_flags) |
---|
| 199 | + enum bpf_attach_type type) |
---|
69 | 200 | { |
---|
70 | 201 | struct cgroup *p; |
---|
71 | 202 | |
---|
.. | .. |
---|
95 | 226 | */ |
---|
96 | 227 | static int compute_effective_progs(struct cgroup *cgrp, |
---|
97 | 228 | enum bpf_attach_type type, |
---|
98 | | - struct bpf_prog_array __rcu **array) |
---|
| 229 | + struct bpf_prog_array **array) |
---|
99 | 230 | { |
---|
| 231 | + struct bpf_prog_array_item *item; |
---|
100 | 232 | struct bpf_prog_array *progs; |
---|
101 | 233 | struct bpf_prog_list *pl; |
---|
102 | 234 | struct cgroup *p = cgrp; |
---|
.. | .. |
---|
121 | 253 | continue; |
---|
122 | 254 | |
---|
123 | 255 | list_for_each_entry(pl, &p->bpf.progs[type], node) { |
---|
124 | | - if (!pl->prog) |
---|
| 256 | + if (!prog_list_prog(pl)) |
---|
125 | 257 | continue; |
---|
126 | 258 | |
---|
127 | | - progs->items[cnt].prog = pl->prog; |
---|
128 | | - progs->items[cnt].cgroup_storage = pl->storage; |
---|
| 259 | + item = &progs->items[cnt]; |
---|
| 260 | + item->prog = prog_list_prog(pl); |
---|
| 261 | + bpf_cgroup_storages_assign(item->cgroup_storage, |
---|
| 262 | + pl->storage); |
---|
129 | 263 | cnt++; |
---|
130 | 264 | } |
---|
131 | 265 | } while ((p = cgroup_parent(p))); |
---|
132 | 266 | |
---|
133 | | - rcu_assign_pointer(*array, progs); |
---|
| 267 | + *array = progs; |
---|
134 | 268 | return 0; |
---|
135 | 269 | } |
---|
136 | 270 | |
---|
137 | 271 | static void activate_effective_progs(struct cgroup *cgrp, |
---|
138 | 272 | enum bpf_attach_type type, |
---|
139 | | - struct bpf_prog_array __rcu *array) |
---|
| 273 | + struct bpf_prog_array *old_array) |
---|
140 | 274 | { |
---|
141 | | - struct bpf_prog_array __rcu *old_array; |
---|
142 | | - |
---|
143 | | - old_array = xchg(&cgrp->bpf.effective[type], array); |
---|
| 275 | + old_array = rcu_replace_pointer(cgrp->bpf.effective[type], old_array, |
---|
| 276 | + lockdep_is_held(&cgroup_mutex)); |
---|
144 | 277 | /* free prog array after grace period, since __cgroup_bpf_run_*() |
---|
145 | 278 | * might be still walking the array |
---|
146 | 279 | */ |
---|
.. | .. |
---|
157 | 290 | * that array below is variable length |
---|
158 | 291 | */ |
---|
159 | 292 | #define NR ARRAY_SIZE(cgrp->bpf.effective) |
---|
160 | | - struct bpf_prog_array __rcu *arrays[NR] = {}; |
---|
161 | | - int i; |
---|
| 293 | + struct bpf_prog_array *arrays[NR] = {}; |
---|
| 294 | + struct cgroup *p; |
---|
| 295 | + int ret, i; |
---|
| 296 | + |
---|
| 297 | + ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0, |
---|
| 298 | + GFP_KERNEL); |
---|
| 299 | + if (ret) |
---|
| 300 | + return ret; |
---|
| 301 | + |
---|
| 302 | + for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) |
---|
| 303 | + cgroup_bpf_get(p); |
---|
162 | 304 | |
---|
163 | 305 | for (i = 0; i < NR; i++) |
---|
164 | 306 | INIT_LIST_HEAD(&cgrp->bpf.progs[i]); |
---|
| 307 | + |
---|
| 308 | + INIT_LIST_HEAD(&cgrp->bpf.storages); |
---|
165 | 309 | |
---|
166 | 310 | for (i = 0; i < NR; i++) |
---|
167 | 311 | if (compute_effective_progs(cgrp, i, &arrays[i])) |
---|
.. | .. |
---|
174 | 318 | cleanup: |
---|
175 | 319 | for (i = 0; i < NR; i++) |
---|
176 | 320 | bpf_prog_array_free(arrays[i]); |
---|
| 321 | + |
---|
| 322 | + for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) |
---|
| 323 | + cgroup_bpf_put(p); |
---|
| 324 | + |
---|
| 325 | + percpu_ref_exit(&cgrp->bpf.refcnt); |
---|
| 326 | + |
---|
177 | 327 | return -ENOMEM; |
---|
178 | 328 | } |
---|
179 | 329 | |
---|
.. | .. |
---|
187 | 337 | css_for_each_descendant_pre(css, &cgrp->self) { |
---|
188 | 338 | struct cgroup *desc = container_of(css, struct cgroup, self); |
---|
189 | 339 | |
---|
| 340 | + if (percpu_ref_is_zero(&desc->bpf.refcnt)) |
---|
| 341 | + continue; |
---|
| 342 | + |
---|
190 | 343 | err = compute_effective_progs(desc, type, &desc->bpf.inactive); |
---|
191 | 344 | if (err) |
---|
192 | 345 | goto cleanup; |
---|
.. | .. |
---|
195 | 348 | /* all allocations were successful. Activate all prog arrays */ |
---|
196 | 349 | css_for_each_descendant_pre(css, &cgrp->self) { |
---|
197 | 350 | struct cgroup *desc = container_of(css, struct cgroup, self); |
---|
| 351 | + |
---|
| 352 | + if (percpu_ref_is_zero(&desc->bpf.refcnt)) { |
---|
| 353 | + if (unlikely(desc->bpf.inactive)) { |
---|
| 354 | + bpf_prog_array_free(desc->bpf.inactive); |
---|
| 355 | + desc->bpf.inactive = NULL; |
---|
| 356 | + } |
---|
| 357 | + continue; |
---|
| 358 | + } |
---|
198 | 359 | |
---|
199 | 360 | activate_effective_progs(desc, type, desc->bpf.inactive); |
---|
200 | 361 | desc->bpf.inactive = NULL; |
---|
.. | .. |
---|
218 | 379 | |
---|
219 | 380 | #define BPF_CGROUP_MAX_PROGS 64 |
---|
220 | 381 | |
---|
| 382 | +static struct bpf_prog_list *find_attach_entry(struct list_head *progs, |
---|
| 383 | + struct bpf_prog *prog, |
---|
| 384 | + struct bpf_cgroup_link *link, |
---|
| 385 | + struct bpf_prog *replace_prog, |
---|
| 386 | + bool allow_multi) |
---|
| 387 | +{ |
---|
| 388 | + struct bpf_prog_list *pl; |
---|
| 389 | + |
---|
| 390 | + /* single-attach case */ |
---|
| 391 | + if (!allow_multi) { |
---|
| 392 | + if (list_empty(progs)) |
---|
| 393 | + return NULL; |
---|
| 394 | + return list_first_entry(progs, typeof(*pl), node); |
---|
| 395 | + } |
---|
| 396 | + |
---|
| 397 | + list_for_each_entry(pl, progs, node) { |
---|
| 398 | + if (prog && pl->prog == prog && prog != replace_prog) |
---|
| 399 | + /* disallow attaching the same prog twice */ |
---|
| 400 | + return ERR_PTR(-EINVAL); |
---|
| 401 | + if (link && pl->link == link) |
---|
| 402 | + /* disallow attaching the same link twice */ |
---|
| 403 | + return ERR_PTR(-EINVAL); |
---|
| 404 | + } |
---|
| 405 | + |
---|
| 406 | + /* direct prog multi-attach w/ replacement case */ |
---|
| 407 | + if (replace_prog) { |
---|
| 408 | + list_for_each_entry(pl, progs, node) { |
---|
| 409 | + if (pl->prog == replace_prog) |
---|
| 410 | + /* a match found */ |
---|
| 411 | + return pl; |
---|
| 412 | + } |
---|
| 413 | + /* prog to replace not found for cgroup */ |
---|
| 414 | + return ERR_PTR(-ENOENT); |
---|
| 415 | + } |
---|
| 416 | + |
---|
| 417 | + return NULL; |
---|
| 418 | +} |
---|
| 419 | + |
---|
221 | 420 | /** |
---|
222 | | - * __cgroup_bpf_attach() - Attach the program to a cgroup, and |
---|
| 421 | + * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and |
---|
223 | 422 | * propagate the change to descendants |
---|
224 | 423 | * @cgrp: The cgroup which descendants to traverse |
---|
225 | 424 | * @prog: A program to attach |
---|
| 425 | + * @link: A link to attach |
---|
| 426 | + * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set |
---|
226 | 427 | * @type: Type of attach operation |
---|
| 428 | + * @flags: Option flags |
---|
227 | 429 | * |
---|
| 430 | + * Exactly one of @prog or @link can be non-null. |
---|
228 | 431 | * Must be called with cgroup_mutex held. |
---|
229 | 432 | */ |
---|
230 | | -int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, |
---|
| 433 | +int __cgroup_bpf_attach(struct cgroup *cgrp, |
---|
| 434 | + struct bpf_prog *prog, struct bpf_prog *replace_prog, |
---|
| 435 | + struct bpf_cgroup_link *link, |
---|
231 | 436 | enum bpf_attach_type type, u32 flags) |
---|
232 | 437 | { |
---|
| 438 | + u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)); |
---|
233 | 439 | struct list_head *progs = &cgrp->bpf.progs[type]; |
---|
234 | 440 | struct bpf_prog *old_prog = NULL; |
---|
235 | | - struct bpf_cgroup_storage *storage, *old_storage = NULL; |
---|
| 441 | + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; |
---|
| 442 | + struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; |
---|
236 | 443 | struct bpf_prog_list *pl; |
---|
237 | | - bool pl_was_allocated; |
---|
238 | 444 | int err; |
---|
239 | 445 | |
---|
240 | | - if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) |
---|
| 446 | + if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) || |
---|
| 447 | + ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI))) |
---|
241 | 448 | /* invalid combination */ |
---|
242 | 449 | return -EINVAL; |
---|
| 450 | + if (link && (prog || replace_prog)) |
---|
| 451 | + /* only either link or prog/replace_prog can be specified */ |
---|
| 452 | + return -EINVAL; |
---|
| 453 | + if (!!replace_prog != !!(flags & BPF_F_REPLACE)) |
---|
| 454 | + /* replace_prog implies BPF_F_REPLACE, and vice versa */ |
---|
| 455 | + return -EINVAL; |
---|
243 | 456 | |
---|
244 | | - if (!hierarchy_allows_attach(cgrp, type, flags)) |
---|
| 457 | + if (!hierarchy_allows_attach(cgrp, type)) |
---|
245 | 458 | return -EPERM; |
---|
246 | 459 | |
---|
247 | | - if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) |
---|
| 460 | + if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags) |
---|
248 | 461 | /* Disallow attaching non-overridable on top |
---|
249 | 462 | * of existing overridable in this cgroup. |
---|
250 | 463 | * Disallow attaching multi-prog if overridable or none |
---|
.. | .. |
---|
254 | 467 | if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) |
---|
255 | 468 | return -E2BIG; |
---|
256 | 469 | |
---|
257 | | - storage = bpf_cgroup_storage_alloc(prog); |
---|
258 | | - if (IS_ERR(storage)) |
---|
| 470 | + pl = find_attach_entry(progs, prog, link, replace_prog, |
---|
| 471 | + flags & BPF_F_ALLOW_MULTI); |
---|
| 472 | + if (IS_ERR(pl)) |
---|
| 473 | + return PTR_ERR(pl); |
---|
| 474 | + |
---|
| 475 | + if (bpf_cgroup_storages_alloc(storage, new_storage, type, |
---|
| 476 | + prog ? : link->link.prog, cgrp)) |
---|
259 | 477 | return -ENOMEM; |
---|
260 | 478 | |
---|
261 | | - if (flags & BPF_F_ALLOW_MULTI) { |
---|
262 | | - list_for_each_entry(pl, progs, node) { |
---|
263 | | - if (pl->prog == prog) { |
---|
264 | | - /* disallow attaching the same prog twice */ |
---|
265 | | - bpf_cgroup_storage_free(storage); |
---|
266 | | - return -EINVAL; |
---|
267 | | - } |
---|
268 | | - } |
---|
269 | | - |
---|
| 479 | + if (pl) { |
---|
| 480 | + old_prog = pl->prog; |
---|
| 481 | + } else { |
---|
270 | 482 | pl = kmalloc(sizeof(*pl), GFP_KERNEL); |
---|
271 | 483 | if (!pl) { |
---|
272 | | - bpf_cgroup_storage_free(storage); |
---|
| 484 | + bpf_cgroup_storages_free(new_storage); |
---|
273 | 485 | return -ENOMEM; |
---|
274 | 486 | } |
---|
275 | | - |
---|
276 | | - pl_was_allocated = true; |
---|
277 | | - pl->prog = prog; |
---|
278 | | - pl->storage = storage; |
---|
279 | 487 | list_add_tail(&pl->node, progs); |
---|
280 | | - } else { |
---|
281 | | - if (list_empty(progs)) { |
---|
282 | | - pl = kmalloc(sizeof(*pl), GFP_KERNEL); |
---|
283 | | - if (!pl) { |
---|
284 | | - bpf_cgroup_storage_free(storage); |
---|
285 | | - return -ENOMEM; |
---|
286 | | - } |
---|
287 | | - pl_was_allocated = true; |
---|
288 | | - list_add_tail(&pl->node, progs); |
---|
289 | | - } else { |
---|
290 | | - pl = list_first_entry(progs, typeof(*pl), node); |
---|
291 | | - old_prog = pl->prog; |
---|
292 | | - old_storage = pl->storage; |
---|
293 | | - bpf_cgroup_storage_unlink(old_storage); |
---|
294 | | - pl_was_allocated = false; |
---|
295 | | - } |
---|
296 | | - pl->prog = prog; |
---|
297 | | - pl->storage = storage; |
---|
298 | 488 | } |
---|
299 | 489 | |
---|
300 | | - cgrp->bpf.flags[type] = flags; |
---|
| 490 | + pl->prog = prog; |
---|
| 491 | + pl->link = link; |
---|
| 492 | + bpf_cgroup_storages_assign(pl->storage, storage); |
---|
| 493 | + cgrp->bpf.flags[type] = saved_flags; |
---|
301 | 494 | |
---|
302 | 495 | err = update_effective_progs(cgrp, type); |
---|
303 | 496 | if (err) |
---|
304 | 497 | goto cleanup; |
---|
305 | 498 | |
---|
306 | | - static_branch_inc(&cgroup_bpf_enabled_key); |
---|
307 | | - if (old_storage) |
---|
308 | | - bpf_cgroup_storage_free(old_storage); |
---|
309 | | - if (old_prog) { |
---|
| 499 | + if (old_prog) |
---|
310 | 500 | bpf_prog_put(old_prog); |
---|
311 | | - static_branch_dec(&cgroup_bpf_enabled_key); |
---|
312 | | - } |
---|
313 | | - bpf_cgroup_storage_link(storage, cgrp, type); |
---|
| 501 | + else |
---|
| 502 | + static_branch_inc(&cgroup_bpf_enabled_key); |
---|
| 503 | + bpf_cgroup_storages_link(new_storage, cgrp, type); |
---|
314 | 504 | return 0; |
---|
315 | 505 | |
---|
316 | 506 | cleanup: |
---|
317 | | - /* and cleanup the prog list */ |
---|
318 | | - pl->prog = old_prog; |
---|
319 | | - bpf_cgroup_storage_free(pl->storage); |
---|
320 | | - pl->storage = old_storage; |
---|
321 | | - bpf_cgroup_storage_link(old_storage, cgrp, type); |
---|
322 | | - if (pl_was_allocated) { |
---|
| 507 | + if (old_prog) { |
---|
| 508 | + pl->prog = old_prog; |
---|
| 509 | + pl->link = NULL; |
---|
| 510 | + } |
---|
| 511 | + bpf_cgroup_storages_free(new_storage); |
---|
| 512 | + if (!old_prog) { |
---|
323 | 513 | list_del(&pl->node); |
---|
324 | 514 | kfree(pl); |
---|
325 | 515 | } |
---|
326 | 516 | return err; |
---|
327 | 517 | } |
---|
328 | 518 | |
---|
| 519 | +/* Swap updated BPF program for given link in effective program arrays across |
---|
| 520 | + * all descendant cgroups. This function is guaranteed to succeed. |
---|
| 521 | + */ |
---|
| 522 | +static void replace_effective_prog(struct cgroup *cgrp, |
---|
| 523 | + enum bpf_attach_type type, |
---|
| 524 | + struct bpf_cgroup_link *link) |
---|
| 525 | +{ |
---|
| 526 | + struct bpf_prog_array_item *item; |
---|
| 527 | + struct cgroup_subsys_state *css; |
---|
| 528 | + struct bpf_prog_array *progs; |
---|
| 529 | + struct bpf_prog_list *pl; |
---|
| 530 | + struct list_head *head; |
---|
| 531 | + struct cgroup *cg; |
---|
| 532 | + int pos; |
---|
| 533 | + |
---|
| 534 | + css_for_each_descendant_pre(css, &cgrp->self) { |
---|
| 535 | + struct cgroup *desc = container_of(css, struct cgroup, self); |
---|
| 536 | + |
---|
| 537 | + if (percpu_ref_is_zero(&desc->bpf.refcnt)) |
---|
| 538 | + continue; |
---|
| 539 | + |
---|
| 540 | + /* find position of link in effective progs array */ |
---|
| 541 | + for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { |
---|
| 542 | + if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI)) |
---|
| 543 | + continue; |
---|
| 544 | + |
---|
| 545 | + head = &cg->bpf.progs[type]; |
---|
| 546 | + list_for_each_entry(pl, head, node) { |
---|
| 547 | + if (!prog_list_prog(pl)) |
---|
| 548 | + continue; |
---|
| 549 | + if (pl->link == link) |
---|
| 550 | + goto found; |
---|
| 551 | + pos++; |
---|
| 552 | + } |
---|
| 553 | + } |
---|
| 554 | +found: |
---|
| 555 | + BUG_ON(!cg); |
---|
| 556 | + progs = rcu_dereference_protected( |
---|
| 557 | + desc->bpf.effective[type], |
---|
| 558 | + lockdep_is_held(&cgroup_mutex)); |
---|
| 559 | + item = &progs->items[pos]; |
---|
| 560 | + WRITE_ONCE(item->prog, link->link.prog); |
---|
| 561 | + } |
---|
| 562 | +} |
---|
| 563 | + |
---|
329 | 564 | /** |
---|
330 | | - * __cgroup_bpf_detach() - Detach the program from a cgroup, and |
---|
331 | | - * propagate the change to descendants |
---|
| 565 | + * __cgroup_bpf_replace() - Replace link's program and propagate the change |
---|
| 566 | + * to descendants |
---|
332 | 567 | * @cgrp: The cgroup which descendants to traverse |
---|
333 | | - * @prog: A program to detach or NULL |
---|
334 | | - * @type: Type of detach operation |
---|
| 568 | + * @link: A link for which to replace BPF program |
---|
| 569 | + * @type: Type of attach operation |
---|
335 | 570 | * |
---|
336 | 571 | * Must be called with cgroup_mutex held. |
---|
337 | 572 | */ |
---|
| 573 | +static int __cgroup_bpf_replace(struct cgroup *cgrp, |
---|
| 574 | + struct bpf_cgroup_link *link, |
---|
| 575 | + struct bpf_prog *new_prog) |
---|
| 576 | +{ |
---|
| 577 | + struct list_head *progs = &cgrp->bpf.progs[link->type]; |
---|
| 578 | + struct bpf_prog *old_prog; |
---|
| 579 | + struct bpf_prog_list *pl; |
---|
| 580 | + bool found = false; |
---|
| 581 | + |
---|
| 582 | + if (link->link.prog->type != new_prog->type) |
---|
| 583 | + return -EINVAL; |
---|
| 584 | + |
---|
| 585 | + list_for_each_entry(pl, progs, node) { |
---|
| 586 | + if (pl->link == link) { |
---|
| 587 | + found = true; |
---|
| 588 | + break; |
---|
| 589 | + } |
---|
| 590 | + } |
---|
| 591 | + if (!found) |
---|
| 592 | + return -ENOENT; |
---|
| 593 | + |
---|
| 594 | + old_prog = xchg(&link->link.prog, new_prog); |
---|
| 595 | + replace_effective_prog(cgrp, link->type, link); |
---|
| 596 | + bpf_prog_put(old_prog); |
---|
| 597 | + return 0; |
---|
| 598 | +} |
---|
| 599 | + |
---|
| 600 | +static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog, |
---|
| 601 | + struct bpf_prog *old_prog) |
---|
| 602 | +{ |
---|
| 603 | + struct bpf_cgroup_link *cg_link; |
---|
| 604 | + int ret; |
---|
| 605 | + |
---|
| 606 | + cg_link = container_of(link, struct bpf_cgroup_link, link); |
---|
| 607 | + |
---|
| 608 | + mutex_lock(&cgroup_mutex); |
---|
| 609 | + /* link might have been auto-released by dying cgroup, so fail */ |
---|
| 610 | + if (!cg_link->cgroup) { |
---|
| 611 | + ret = -ENOLINK; |
---|
| 612 | + goto out_unlock; |
---|
| 613 | + } |
---|
| 614 | + if (old_prog && link->prog != old_prog) { |
---|
| 615 | + ret = -EPERM; |
---|
| 616 | + goto out_unlock; |
---|
| 617 | + } |
---|
| 618 | + ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog); |
---|
| 619 | +out_unlock: |
---|
| 620 | + mutex_unlock(&cgroup_mutex); |
---|
| 621 | + return ret; |
---|
| 622 | +} |
---|
| 623 | + |
---|
| 624 | +static struct bpf_prog_list *find_detach_entry(struct list_head *progs, |
---|
| 625 | + struct bpf_prog *prog, |
---|
| 626 | + struct bpf_cgroup_link *link, |
---|
| 627 | + bool allow_multi) |
---|
| 628 | +{ |
---|
| 629 | + struct bpf_prog_list *pl; |
---|
| 630 | + |
---|
| 631 | + if (!allow_multi) { |
---|
| 632 | + if (list_empty(progs)) |
---|
| 633 | + /* report error when trying to detach and nothing is attached */ |
---|
| 634 | + return ERR_PTR(-ENOENT); |
---|
| 635 | + |
---|
| 636 | + /* to maintain backward compatibility NONE and OVERRIDE cgroups |
---|
| 637 | + * allow detaching with invalid FD (prog==NULL) in legacy mode |
---|
| 638 | + */ |
---|
| 639 | + return list_first_entry(progs, typeof(*pl), node); |
---|
| 640 | + } |
---|
| 641 | + |
---|
| 642 | + if (!prog && !link) |
---|
| 643 | + /* to detach MULTI prog the user has to specify valid FD |
---|
| 644 | + * of the program or link to be detached |
---|
| 645 | + */ |
---|
| 646 | + return ERR_PTR(-EINVAL); |
---|
| 647 | + |
---|
| 648 | + /* find the prog or link and detach it */ |
---|
| 649 | + list_for_each_entry(pl, progs, node) { |
---|
| 650 | + if (pl->prog == prog && pl->link == link) |
---|
| 651 | + return pl; |
---|
| 652 | + } |
---|
| 653 | + return ERR_PTR(-ENOENT); |
---|
| 654 | +} |
---|
| 655 | + |
---|
| 656 | +/** |
---|
| 657 | + * purge_effective_progs() - After compute_effective_progs fails to alloc new |
---|
| 658 | + * cgrp->bpf.inactive table we can recover by |
---|
| 659 | + * recomputing the array in place. |
---|
| 660 | + * |
---|
| 661 | + * @cgrp: The cgroup which descendants to travers |
---|
| 662 | + * @prog: A program to detach or NULL |
---|
| 663 | + * @link: A link to detach or NULL |
---|
| 664 | + * @type: Type of detach operation |
---|
| 665 | + */ |
---|
| 666 | +static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, |
---|
| 667 | + struct bpf_cgroup_link *link, |
---|
| 668 | + enum bpf_attach_type type) |
---|
| 669 | +{ |
---|
| 670 | + struct cgroup_subsys_state *css; |
---|
| 671 | + struct bpf_prog_array *progs; |
---|
| 672 | + struct bpf_prog_list *pl; |
---|
| 673 | + struct list_head *head; |
---|
| 674 | + struct cgroup *cg; |
---|
| 675 | + int pos; |
---|
| 676 | + |
---|
| 677 | + /* recompute effective prog array in place */ |
---|
| 678 | + css_for_each_descendant_pre(css, &cgrp->self) { |
---|
| 679 | + struct cgroup *desc = container_of(css, struct cgroup, self); |
---|
| 680 | + |
---|
| 681 | + if (percpu_ref_is_zero(&desc->bpf.refcnt)) |
---|
| 682 | + continue; |
---|
| 683 | + |
---|
| 684 | + /* find position of link or prog in effective progs array */ |
---|
| 685 | + for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { |
---|
| 686 | + if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI)) |
---|
| 687 | + continue; |
---|
| 688 | + |
---|
| 689 | + head = &cg->bpf.progs[type]; |
---|
| 690 | + list_for_each_entry(pl, head, node) { |
---|
| 691 | + if (!prog_list_prog(pl)) |
---|
| 692 | + continue; |
---|
| 693 | + if (pl->prog == prog && pl->link == link) |
---|
| 694 | + goto found; |
---|
| 695 | + pos++; |
---|
| 696 | + } |
---|
| 697 | + } |
---|
| 698 | + |
---|
| 699 | + /* no link or prog match, skip the cgroup of this layer */ |
---|
| 700 | + continue; |
---|
| 701 | +found: |
---|
| 702 | + progs = rcu_dereference_protected( |
---|
| 703 | + desc->bpf.effective[type], |
---|
| 704 | + lockdep_is_held(&cgroup_mutex)); |
---|
| 705 | + |
---|
| 706 | + /* Remove the program from the array */ |
---|
| 707 | + WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos), |
---|
| 708 | + "Failed to purge a prog from array at index %d", pos); |
---|
| 709 | + } |
---|
| 710 | +} |
---|
| 711 | + |
---|
| 712 | +/** |
---|
| 713 | + * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and |
---|
| 714 | + * propagate the change to descendants |
---|
| 715 | + * @cgrp: The cgroup which descendants to traverse |
---|
| 716 | + * @prog: A program to detach or NULL |
---|
| 717 | + * @prog: A link to detach or NULL |
---|
| 718 | + * @type: Type of detach operation |
---|
| 719 | + * |
---|
| 720 | + * At most one of @prog or @link can be non-NULL. |
---|
| 721 | + * Must be called with cgroup_mutex held. |
---|
| 722 | + */ |
---|
338 | 723 | int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, |
---|
339 | | - enum bpf_attach_type type, u32 unused_flags) |
---|
| 724 | + struct bpf_cgroup_link *link, enum bpf_attach_type type) |
---|
340 | 725 | { |
---|
341 | 726 | struct list_head *progs = &cgrp->bpf.progs[type]; |
---|
342 | 727 | u32 flags = cgrp->bpf.flags[type]; |
---|
343 | | - struct bpf_prog *old_prog = NULL; |
---|
344 | 728 | struct bpf_prog_list *pl; |
---|
345 | | - int err; |
---|
| 729 | + struct bpf_prog *old_prog; |
---|
346 | 730 | |
---|
347 | | - if (flags & BPF_F_ALLOW_MULTI) { |
---|
348 | | - if (!prog) |
---|
349 | | - /* to detach MULTI prog the user has to specify valid FD |
---|
350 | | - * of the program to be detached |
---|
351 | | - */ |
---|
352 | | - return -EINVAL; |
---|
353 | | - } else { |
---|
354 | | - if (list_empty(progs)) |
---|
355 | | - /* report error when trying to detach and nothing is attached */ |
---|
356 | | - return -ENOENT; |
---|
| 731 | + if (prog && link) |
---|
| 732 | + /* only one of prog or link can be specified */ |
---|
| 733 | + return -EINVAL; |
---|
| 734 | + |
---|
| 735 | + pl = find_detach_entry(progs, prog, link, flags & BPF_F_ALLOW_MULTI); |
---|
| 736 | + if (IS_ERR(pl)) |
---|
| 737 | + return PTR_ERR(pl); |
---|
| 738 | + |
---|
| 739 | + /* mark it deleted, so it's ignored while recomputing effective */ |
---|
| 740 | + old_prog = pl->prog; |
---|
| 741 | + pl->prog = NULL; |
---|
| 742 | + pl->link = NULL; |
---|
| 743 | + |
---|
| 744 | + if (update_effective_progs(cgrp, type)) { |
---|
| 745 | + /* if update effective array failed replace the prog with a dummy prog*/ |
---|
| 746 | + pl->prog = old_prog; |
---|
| 747 | + pl->link = link; |
---|
| 748 | + purge_effective_progs(cgrp, old_prog, link, type); |
---|
357 | 749 | } |
---|
358 | | - |
---|
359 | | - if (flags & BPF_F_ALLOW_MULTI) { |
---|
360 | | - /* find the prog and detach it */ |
---|
361 | | - list_for_each_entry(pl, progs, node) { |
---|
362 | | - if (pl->prog != prog) |
---|
363 | | - continue; |
---|
364 | | - old_prog = prog; |
---|
365 | | - /* mark it deleted, so it's ignored while |
---|
366 | | - * recomputing effective |
---|
367 | | - */ |
---|
368 | | - pl->prog = NULL; |
---|
369 | | - break; |
---|
370 | | - } |
---|
371 | | - if (!old_prog) |
---|
372 | | - return -ENOENT; |
---|
373 | | - } else { |
---|
374 | | - /* to maintain backward compatibility NONE and OVERRIDE cgroups |
---|
375 | | - * allow detaching with invalid FD (prog==NULL) |
---|
376 | | - */ |
---|
377 | | - pl = list_first_entry(progs, typeof(*pl), node); |
---|
378 | | - old_prog = pl->prog; |
---|
379 | | - pl->prog = NULL; |
---|
380 | | - } |
---|
381 | | - |
---|
382 | | - err = update_effective_progs(cgrp, type); |
---|
383 | | - if (err) |
---|
384 | | - goto cleanup; |
---|
385 | 750 | |
---|
386 | 751 | /* now can actually delete it from this cgroup list */ |
---|
387 | 752 | list_del(&pl->node); |
---|
388 | | - bpf_cgroup_storage_unlink(pl->storage); |
---|
389 | | - bpf_cgroup_storage_free(pl->storage); |
---|
390 | 753 | kfree(pl); |
---|
391 | 754 | if (list_empty(progs)) |
---|
392 | 755 | /* last program was detached, reset flags to zero */ |
---|
393 | 756 | cgrp->bpf.flags[type] = 0; |
---|
394 | | - |
---|
395 | | - bpf_prog_put(old_prog); |
---|
| 757 | + if (old_prog) |
---|
| 758 | + bpf_prog_put(old_prog); |
---|
396 | 759 | static_branch_dec(&cgroup_bpf_enabled_key); |
---|
397 | 760 | return 0; |
---|
398 | | - |
---|
399 | | -cleanup: |
---|
400 | | - /* and restore back old_prog */ |
---|
401 | | - pl->prog = old_prog; |
---|
402 | | - return err; |
---|
403 | 761 | } |
---|
404 | 762 | |
---|
405 | 763 | /* Must be called with cgroup_mutex held to avoid races. */ |
---|
.. | .. |
---|
410 | 768 | enum bpf_attach_type type = attr->query.attach_type; |
---|
411 | 769 | struct list_head *progs = &cgrp->bpf.progs[type]; |
---|
412 | 770 | u32 flags = cgrp->bpf.flags[type]; |
---|
| 771 | + struct bpf_prog_array *effective; |
---|
| 772 | + struct bpf_prog *prog; |
---|
413 | 773 | int cnt, ret = 0, i; |
---|
414 | 774 | |
---|
| 775 | + effective = rcu_dereference_protected(cgrp->bpf.effective[type], |
---|
| 776 | + lockdep_is_held(&cgroup_mutex)); |
---|
| 777 | + |
---|
415 | 778 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) |
---|
416 | | - cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); |
---|
| 779 | + cnt = bpf_prog_array_length(effective); |
---|
417 | 780 | else |
---|
418 | 781 | cnt = prog_list_length(progs); |
---|
419 | 782 | |
---|
.. | .. |
---|
430 | 793 | } |
---|
431 | 794 | |
---|
432 | 795 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { |
---|
433 | | - return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], |
---|
434 | | - prog_ids, cnt); |
---|
| 796 | + return bpf_prog_array_copy_to_user(effective, prog_ids, cnt); |
---|
435 | 797 | } else { |
---|
436 | 798 | struct bpf_prog_list *pl; |
---|
437 | 799 | u32 id; |
---|
438 | 800 | |
---|
439 | 801 | i = 0; |
---|
440 | 802 | list_for_each_entry(pl, progs, node) { |
---|
441 | | - id = pl->prog->aux->id; |
---|
| 803 | + prog = prog_list_prog(pl); |
---|
| 804 | + id = prog->aux->id; |
---|
442 | 805 | if (copy_to_user(prog_ids + i, &id, sizeof(id))) |
---|
443 | 806 | return -EFAULT; |
---|
444 | 807 | if (++i == cnt) |
---|
.. | .. |
---|
451 | 814 | int cgroup_bpf_prog_attach(const union bpf_attr *attr, |
---|
452 | 815 | enum bpf_prog_type ptype, struct bpf_prog *prog) |
---|
453 | 816 | { |
---|
| 817 | + struct bpf_prog *replace_prog = NULL; |
---|
454 | 818 | struct cgroup *cgrp; |
---|
455 | 819 | int ret; |
---|
456 | 820 | |
---|
.. | .. |
---|
458 | 822 | if (IS_ERR(cgrp)) |
---|
459 | 823 | return PTR_ERR(cgrp); |
---|
460 | 824 | |
---|
461 | | - ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, |
---|
462 | | - attr->attach_flags); |
---|
| 825 | + if ((attr->attach_flags & BPF_F_ALLOW_MULTI) && |
---|
| 826 | + (attr->attach_flags & BPF_F_REPLACE)) { |
---|
| 827 | + replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype); |
---|
| 828 | + if (IS_ERR(replace_prog)) { |
---|
| 829 | + cgroup_put(cgrp); |
---|
| 830 | + return PTR_ERR(replace_prog); |
---|
| 831 | + } |
---|
| 832 | + } |
---|
| 833 | + |
---|
| 834 | + ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL, |
---|
| 835 | + attr->attach_type, attr->attach_flags); |
---|
| 836 | + |
---|
| 837 | + if (replace_prog) |
---|
| 838 | + bpf_prog_put(replace_prog); |
---|
463 | 839 | cgroup_put(cgrp); |
---|
464 | 840 | return ret; |
---|
465 | 841 | } |
---|
.. | .. |
---|
478 | 854 | if (IS_ERR(prog)) |
---|
479 | 855 | prog = NULL; |
---|
480 | 856 | |
---|
481 | | - ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); |
---|
| 857 | + ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type); |
---|
482 | 858 | if (prog) |
---|
483 | 859 | bpf_prog_put(prog); |
---|
484 | 860 | |
---|
485 | 861 | cgroup_put(cgrp); |
---|
486 | 862 | return ret; |
---|
| 863 | +} |
---|
| 864 | + |
---|
| 865 | +static void bpf_cgroup_link_release(struct bpf_link *link) |
---|
| 866 | +{ |
---|
| 867 | + struct bpf_cgroup_link *cg_link = |
---|
| 868 | + container_of(link, struct bpf_cgroup_link, link); |
---|
| 869 | + struct cgroup *cg; |
---|
| 870 | + |
---|
| 871 | + /* link might have been auto-detached by dying cgroup already, |
---|
| 872 | + * in that case our work is done here |
---|
| 873 | + */ |
---|
| 874 | + if (!cg_link->cgroup) |
---|
| 875 | + return; |
---|
| 876 | + |
---|
| 877 | + mutex_lock(&cgroup_mutex); |
---|
| 878 | + |
---|
| 879 | + /* re-check cgroup under lock again */ |
---|
| 880 | + if (!cg_link->cgroup) { |
---|
| 881 | + mutex_unlock(&cgroup_mutex); |
---|
| 882 | + return; |
---|
| 883 | + } |
---|
| 884 | + |
---|
| 885 | + WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link, |
---|
| 886 | + cg_link->type)); |
---|
| 887 | + |
---|
| 888 | + cg = cg_link->cgroup; |
---|
| 889 | + cg_link->cgroup = NULL; |
---|
| 890 | + |
---|
| 891 | + mutex_unlock(&cgroup_mutex); |
---|
| 892 | + |
---|
| 893 | + cgroup_put(cg); |
---|
| 894 | +} |
---|
| 895 | + |
---|
| 896 | +static void bpf_cgroup_link_dealloc(struct bpf_link *link) |
---|
| 897 | +{ |
---|
| 898 | + struct bpf_cgroup_link *cg_link = |
---|
| 899 | + container_of(link, struct bpf_cgroup_link, link); |
---|
| 900 | + |
---|
| 901 | + kfree(cg_link); |
---|
| 902 | +} |
---|
| 903 | + |
---|
| 904 | +static int bpf_cgroup_link_detach(struct bpf_link *link) |
---|
| 905 | +{ |
---|
| 906 | + bpf_cgroup_link_release(link); |
---|
| 907 | + |
---|
| 908 | + return 0; |
---|
| 909 | +} |
---|
| 910 | + |
---|
| 911 | +static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link, |
---|
| 912 | + struct seq_file *seq) |
---|
| 913 | +{ |
---|
| 914 | + struct bpf_cgroup_link *cg_link = |
---|
| 915 | + container_of(link, struct bpf_cgroup_link, link); |
---|
| 916 | + u64 cg_id = 0; |
---|
| 917 | + |
---|
| 918 | + mutex_lock(&cgroup_mutex); |
---|
| 919 | + if (cg_link->cgroup) |
---|
| 920 | + cg_id = cgroup_id(cg_link->cgroup); |
---|
| 921 | + mutex_unlock(&cgroup_mutex); |
---|
| 922 | + |
---|
| 923 | + seq_printf(seq, |
---|
| 924 | + "cgroup_id:\t%llu\n" |
---|
| 925 | + "attach_type:\t%d\n", |
---|
| 926 | + cg_id, |
---|
| 927 | + cg_link->type); |
---|
| 928 | +} |
---|
| 929 | + |
---|
| 930 | +static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link, |
---|
| 931 | + struct bpf_link_info *info) |
---|
| 932 | +{ |
---|
| 933 | + struct bpf_cgroup_link *cg_link = |
---|
| 934 | + container_of(link, struct bpf_cgroup_link, link); |
---|
| 935 | + u64 cg_id = 0; |
---|
| 936 | + |
---|
| 937 | + mutex_lock(&cgroup_mutex); |
---|
| 938 | + if (cg_link->cgroup) |
---|
| 939 | + cg_id = cgroup_id(cg_link->cgroup); |
---|
| 940 | + mutex_unlock(&cgroup_mutex); |
---|
| 941 | + |
---|
| 942 | + info->cgroup.cgroup_id = cg_id; |
---|
| 943 | + info->cgroup.attach_type = cg_link->type; |
---|
| 944 | + return 0; |
---|
| 945 | +} |
---|
| 946 | + |
---|
| 947 | +static const struct bpf_link_ops bpf_cgroup_link_lops = { |
---|
| 948 | + .release = bpf_cgroup_link_release, |
---|
| 949 | + .dealloc = bpf_cgroup_link_dealloc, |
---|
| 950 | + .detach = bpf_cgroup_link_detach, |
---|
| 951 | + .update_prog = cgroup_bpf_replace, |
---|
| 952 | + .show_fdinfo = bpf_cgroup_link_show_fdinfo, |
---|
| 953 | + .fill_link_info = bpf_cgroup_link_fill_link_info, |
---|
| 954 | +}; |
---|
| 955 | + |
---|
| 956 | +int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
---|
| 957 | +{ |
---|
| 958 | + struct bpf_link_primer link_primer; |
---|
| 959 | + struct bpf_cgroup_link *link; |
---|
| 960 | + struct cgroup *cgrp; |
---|
| 961 | + int err; |
---|
| 962 | + |
---|
| 963 | + if (attr->link_create.flags) |
---|
| 964 | + return -EINVAL; |
---|
| 965 | + |
---|
| 966 | + cgrp = cgroup_get_from_fd(attr->link_create.target_fd); |
---|
| 967 | + if (IS_ERR(cgrp)) |
---|
| 968 | + return PTR_ERR(cgrp); |
---|
| 969 | + |
---|
| 970 | + link = kzalloc(sizeof(*link), GFP_USER); |
---|
| 971 | + if (!link) { |
---|
| 972 | + err = -ENOMEM; |
---|
| 973 | + goto out_put_cgroup; |
---|
| 974 | + } |
---|
| 975 | + bpf_link_init(&link->link, BPF_LINK_TYPE_CGROUP, &bpf_cgroup_link_lops, |
---|
| 976 | + prog); |
---|
| 977 | + link->cgroup = cgrp; |
---|
| 978 | + link->type = attr->link_create.attach_type; |
---|
| 979 | + |
---|
| 980 | + err = bpf_link_prime(&link->link, &link_primer); |
---|
| 981 | + if (err) { |
---|
| 982 | + kfree(link); |
---|
| 983 | + goto out_put_cgroup; |
---|
| 984 | + } |
---|
| 985 | + |
---|
| 986 | + err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type, |
---|
| 987 | + BPF_F_ALLOW_MULTI); |
---|
| 988 | + if (err) { |
---|
| 989 | + bpf_link_cleanup(&link_primer); |
---|
| 990 | + goto out_put_cgroup; |
---|
| 991 | + } |
---|
| 992 | + |
---|
| 993 | + return bpf_link_settle(&link_primer); |
---|
| 994 | + |
---|
| 995 | +out_put_cgroup: |
---|
| 996 | + cgroup_put(cgrp); |
---|
| 997 | + return err; |
---|
487 | 998 | } |
---|
488 | 999 | |
---|
489 | 1000 | int cgroup_bpf_prog_query(const union bpf_attr *attr, |
---|
.. | .. |
---|
514 | 1025 | * The program type passed in via @type must be suitable for network |
---|
515 | 1026 | * filtering. No further check is performed to assert that. |
---|
516 | 1027 | * |
---|
517 | | - * This function will return %-EPERM if any if an attached program was found |
---|
518 | | - * and if it returned != 1 during execution. In all other cases, 0 is returned. |
---|
| 1028 | + * For egress packets, this function can return: |
---|
| 1029 | + * NET_XMIT_SUCCESS (0) - continue with packet output |
---|
| 1030 | + * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr |
---|
| 1031 | + * NET_XMIT_CN (2) - continue with packet output and notify TCP |
---|
| 1032 | + * to call cwr |
---|
| 1033 | + * -EPERM - drop packet |
---|
| 1034 | + * |
---|
| 1035 | + * For ingress packets, this function will return -EPERM if any |
---|
| 1036 | + * attached program was found and if it returned != 1 during execution. |
---|
| 1037 | + * Otherwise 0 is returned. |
---|
519 | 1038 | */ |
---|
520 | 1039 | int __cgroup_bpf_run_filter_skb(struct sock *sk, |
---|
521 | 1040 | struct sk_buff *skb, |
---|
.. | .. |
---|
523 | 1042 | { |
---|
524 | 1043 | unsigned int offset = skb->data - skb_network_header(skb); |
---|
525 | 1044 | struct sock *save_sk; |
---|
| 1045 | + void *saved_data_end; |
---|
526 | 1046 | struct cgroup *cgrp; |
---|
527 | 1047 | int ret; |
---|
528 | 1048 | |
---|
.. | .. |
---|
536 | 1056 | save_sk = skb->sk; |
---|
537 | 1057 | skb->sk = sk; |
---|
538 | 1058 | __skb_push(skb, offset); |
---|
539 | | - ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, |
---|
540 | | - bpf_prog_run_save_cb); |
---|
| 1059 | + |
---|
| 1060 | + /* compute pointers for the bpf prog */ |
---|
| 1061 | + bpf_compute_and_save_data_end(skb, &saved_data_end); |
---|
| 1062 | + |
---|
| 1063 | + if (type == BPF_CGROUP_INET_EGRESS) { |
---|
| 1064 | + ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY( |
---|
| 1065 | + cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb); |
---|
| 1066 | + } else { |
---|
| 1067 | + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, |
---|
| 1068 | + __bpf_prog_run_save_cb); |
---|
| 1069 | + ret = (ret == 1 ? 0 : -EPERM); |
---|
| 1070 | + } |
---|
| 1071 | + bpf_restore_data_end(skb, saved_data_end); |
---|
541 | 1072 | __skb_pull(skb, offset); |
---|
542 | 1073 | skb->sk = save_sk; |
---|
543 | | - return ret == 1 ? 0 : -EPERM; |
---|
| 1074 | + |
---|
| 1075 | + return ret; |
---|
544 | 1076 | } |
---|
545 | 1077 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); |
---|
546 | 1078 | |
---|
.. | .. |
---|
661 | 1193 | |
---|
662 | 1194 | return !allow; |
---|
663 | 1195 | } |
---|
664 | | -EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); |
---|
665 | 1196 | |
---|
666 | 1197 | static const struct bpf_func_proto * |
---|
667 | | -cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
---|
| 1198 | +cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
---|
668 | 1199 | { |
---|
669 | 1200 | switch (func_id) { |
---|
670 | | - case BPF_FUNC_map_lookup_elem: |
---|
671 | | - return &bpf_map_lookup_elem_proto; |
---|
672 | | - case BPF_FUNC_map_update_elem: |
---|
673 | | - return &bpf_map_update_elem_proto; |
---|
674 | | - case BPF_FUNC_map_delete_elem: |
---|
675 | | - return &bpf_map_delete_elem_proto; |
---|
676 | 1201 | case BPF_FUNC_get_current_uid_gid: |
---|
677 | 1202 | return &bpf_get_current_uid_gid_proto; |
---|
678 | 1203 | case BPF_FUNC_get_local_storage: |
---|
679 | 1204 | return &bpf_get_local_storage_proto; |
---|
680 | | - case BPF_FUNC_trace_printk: |
---|
681 | | - if (capable(CAP_SYS_ADMIN)) |
---|
682 | | - return bpf_get_trace_printk_proto(); |
---|
| 1205 | + case BPF_FUNC_get_current_cgroup_id: |
---|
| 1206 | + return &bpf_get_current_cgroup_id_proto; |
---|
| 1207 | + case BPF_FUNC_perf_event_output: |
---|
| 1208 | + return &bpf_event_output_data_proto; |
---|
683 | 1209 | default: |
---|
684 | | - return NULL; |
---|
| 1210 | + return bpf_base_func_proto(func_id); |
---|
685 | 1211 | } |
---|
| 1212 | +} |
---|
| 1213 | + |
---|
| 1214 | +static const struct bpf_func_proto * |
---|
| 1215 | +cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
---|
| 1216 | +{ |
---|
| 1217 | + return cgroup_base_func_proto(func_id, prog); |
---|
686 | 1218 | } |
---|
687 | 1219 | |
---|
688 | 1220 | static bool cgroup_dev_is_valid_access(int off, int size, |
---|
.. | .. |
---|
722 | 1254 | .get_func_proto = cgroup_dev_func_proto, |
---|
723 | 1255 | .is_valid_access = cgroup_dev_is_valid_access, |
---|
724 | 1256 | }; |
---|
| 1257 | + |
---|
| 1258 | +/** |
---|
| 1259 | + * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl |
---|
| 1260 | + * |
---|
| 1261 | + * @head: sysctl table header |
---|
| 1262 | + * @table: sysctl table |
---|
| 1263 | + * @write: sysctl is being read (= 0) or written (= 1) |
---|
| 1264 | + * @buf: pointer to buffer (in and out) |
---|
| 1265 | + * @pcount: value-result argument: value is size of buffer pointed to by @buf, |
---|
| 1266 | + * result is size of @new_buf if program set new value, initial value |
---|
| 1267 | + * otherwise |
---|
| 1268 | + * @ppos: value-result argument: value is position at which read from or write |
---|
| 1269 | + * to sysctl is happening, result is new position if program overrode it, |
---|
| 1270 | + * initial value otherwise |
---|
| 1271 | + * @type: type of program to be executed |
---|
| 1272 | + * |
---|
| 1273 | + * Program is run when sysctl is being accessed, either read or written, and |
---|
| 1274 | + * can allow or deny such access. |
---|
| 1275 | + * |
---|
| 1276 | + * This function will return %-EPERM if an attached program is found and |
---|
| 1277 | + * returned value != 1 during execution. In all other cases 0 is returned. |
---|
| 1278 | + */ |
---|
| 1279 | +int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, |
---|
| 1280 | + struct ctl_table *table, int write, |
---|
| 1281 | + char **buf, size_t *pcount, loff_t *ppos, |
---|
| 1282 | + enum bpf_attach_type type) |
---|
| 1283 | +{ |
---|
| 1284 | + struct bpf_sysctl_kern ctx = { |
---|
| 1285 | + .head = head, |
---|
| 1286 | + .table = table, |
---|
| 1287 | + .write = write, |
---|
| 1288 | + .ppos = ppos, |
---|
| 1289 | + .cur_val = NULL, |
---|
| 1290 | + .cur_len = PAGE_SIZE, |
---|
| 1291 | + .new_val = NULL, |
---|
| 1292 | + .new_len = 0, |
---|
| 1293 | + .new_updated = 0, |
---|
| 1294 | + }; |
---|
| 1295 | + struct cgroup *cgrp; |
---|
| 1296 | + loff_t pos = 0; |
---|
| 1297 | + int ret; |
---|
| 1298 | + |
---|
| 1299 | + ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL); |
---|
| 1300 | + if (!ctx.cur_val || |
---|
| 1301 | + table->proc_handler(table, 0, ctx.cur_val, &ctx.cur_len, &pos)) { |
---|
| 1302 | + /* Let BPF program decide how to proceed. */ |
---|
| 1303 | + ctx.cur_len = 0; |
---|
| 1304 | + } |
---|
| 1305 | + |
---|
| 1306 | + if (write && *buf && *pcount) { |
---|
| 1307 | + /* BPF program should be able to override new value with a |
---|
| 1308 | + * buffer bigger than provided by user. |
---|
| 1309 | + */ |
---|
| 1310 | + ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL); |
---|
| 1311 | + ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount); |
---|
| 1312 | + if (ctx.new_val) { |
---|
| 1313 | + memcpy(ctx.new_val, *buf, ctx.new_len); |
---|
| 1314 | + } else { |
---|
| 1315 | + /* Let BPF program decide how to proceed. */ |
---|
| 1316 | + ctx.new_len = 0; |
---|
| 1317 | + } |
---|
| 1318 | + } |
---|
| 1319 | + |
---|
| 1320 | + rcu_read_lock(); |
---|
| 1321 | + cgrp = task_dfl_cgroup(current); |
---|
| 1322 | + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); |
---|
| 1323 | + rcu_read_unlock(); |
---|
| 1324 | + |
---|
| 1325 | + kfree(ctx.cur_val); |
---|
| 1326 | + |
---|
| 1327 | + if (ret == 1 && ctx.new_updated) { |
---|
| 1328 | + kfree(*buf); |
---|
| 1329 | + *buf = ctx.new_val; |
---|
| 1330 | + *pcount = ctx.new_len; |
---|
| 1331 | + } else { |
---|
| 1332 | + kfree(ctx.new_val); |
---|
| 1333 | + } |
---|
| 1334 | + |
---|
| 1335 | + return ret == 1 ? 0 : -EPERM; |
---|
| 1336 | +} |
---|
| 1337 | + |
---|
| 1338 | +#ifdef CONFIG_NET |
---|
| 1339 | +static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp, |
---|
| 1340 | + enum bpf_attach_type attach_type) |
---|
| 1341 | +{ |
---|
| 1342 | + struct bpf_prog_array *prog_array; |
---|
| 1343 | + bool empty; |
---|
| 1344 | + |
---|
| 1345 | + rcu_read_lock(); |
---|
| 1346 | + prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]); |
---|
| 1347 | + empty = bpf_prog_array_is_empty(prog_array); |
---|
| 1348 | + rcu_read_unlock(); |
---|
| 1349 | + |
---|
| 1350 | + return empty; |
---|
| 1351 | +} |
---|
| 1352 | + |
---|
| 1353 | +static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen) |
---|
| 1354 | +{ |
---|
| 1355 | + if (unlikely(max_optlen < 0)) |
---|
| 1356 | + return -EINVAL; |
---|
| 1357 | + |
---|
| 1358 | + if (unlikely(max_optlen > PAGE_SIZE)) { |
---|
| 1359 | + /* We don't expose optvals that are greater than PAGE_SIZE |
---|
| 1360 | + * to the BPF program. |
---|
| 1361 | + */ |
---|
| 1362 | + max_optlen = PAGE_SIZE; |
---|
| 1363 | + } |
---|
| 1364 | + |
---|
| 1365 | + ctx->optval = kzalloc(max_optlen, GFP_USER); |
---|
| 1366 | + if (!ctx->optval) |
---|
| 1367 | + return -ENOMEM; |
---|
| 1368 | + |
---|
| 1369 | + ctx->optval_end = ctx->optval + max_optlen; |
---|
| 1370 | + |
---|
| 1371 | + return max_optlen; |
---|
| 1372 | +} |
---|
| 1373 | + |
---|
| 1374 | +static void sockopt_free_buf(struct bpf_sockopt_kern *ctx) |
---|
| 1375 | +{ |
---|
| 1376 | + kfree(ctx->optval); |
---|
| 1377 | +} |
---|
| 1378 | + |
---|
| 1379 | +int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, |
---|
| 1380 | + int *optname, char __user *optval, |
---|
| 1381 | + int *optlen, char **kernel_optval) |
---|
| 1382 | +{ |
---|
| 1383 | + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); |
---|
| 1384 | + struct bpf_sockopt_kern ctx = { |
---|
| 1385 | + .sk = sk, |
---|
| 1386 | + .level = *level, |
---|
| 1387 | + .optname = *optname, |
---|
| 1388 | + }; |
---|
| 1389 | + int ret, max_optlen; |
---|
| 1390 | + |
---|
| 1391 | + /* Opportunistic check to see whether we have any BPF program |
---|
| 1392 | + * attached to the hook so we don't waste time allocating |
---|
| 1393 | + * memory and locking the socket. |
---|
| 1394 | + */ |
---|
| 1395 | + if (!cgroup_bpf_enabled || |
---|
| 1396 | + __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT)) |
---|
| 1397 | + return 0; |
---|
| 1398 | + |
---|
| 1399 | + /* Allocate a bit more than the initial user buffer for |
---|
| 1400 | + * BPF program. The canonical use case is overriding |
---|
| 1401 | + * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic). |
---|
| 1402 | + */ |
---|
| 1403 | + max_optlen = max_t(int, 16, *optlen); |
---|
| 1404 | + |
---|
| 1405 | + max_optlen = sockopt_alloc_buf(&ctx, max_optlen); |
---|
| 1406 | + if (max_optlen < 0) |
---|
| 1407 | + return max_optlen; |
---|
| 1408 | + |
---|
| 1409 | + ctx.optlen = *optlen; |
---|
| 1410 | + |
---|
| 1411 | + if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) { |
---|
| 1412 | + ret = -EFAULT; |
---|
| 1413 | + goto out; |
---|
| 1414 | + } |
---|
| 1415 | + |
---|
| 1416 | + lock_sock(sk); |
---|
| 1417 | + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT], |
---|
| 1418 | + &ctx, BPF_PROG_RUN); |
---|
| 1419 | + release_sock(sk); |
---|
| 1420 | + |
---|
| 1421 | + if (!ret) { |
---|
| 1422 | + ret = -EPERM; |
---|
| 1423 | + goto out; |
---|
| 1424 | + } |
---|
| 1425 | + |
---|
| 1426 | + if (ctx.optlen == -1) { |
---|
| 1427 | + /* optlen set to -1, bypass kernel */ |
---|
| 1428 | + ret = 1; |
---|
| 1429 | + } else if (ctx.optlen > max_optlen || ctx.optlen < -1) { |
---|
| 1430 | + /* optlen is out of bounds */ |
---|
| 1431 | + ret = -EFAULT; |
---|
| 1432 | + } else { |
---|
| 1433 | + /* optlen within bounds, run kernel handler */ |
---|
| 1434 | + ret = 0; |
---|
| 1435 | + |
---|
| 1436 | + /* export any potential modifications */ |
---|
| 1437 | + *level = ctx.level; |
---|
| 1438 | + *optname = ctx.optname; |
---|
| 1439 | + |
---|
| 1440 | + /* optlen == 0 from BPF indicates that we should |
---|
| 1441 | + * use original userspace data. |
---|
| 1442 | + */ |
---|
| 1443 | + if (ctx.optlen != 0) { |
---|
| 1444 | + *optlen = ctx.optlen; |
---|
| 1445 | + *kernel_optval = ctx.optval; |
---|
| 1446 | + /* export and don't free sockopt buf */ |
---|
| 1447 | + return 0; |
---|
| 1448 | + } |
---|
| 1449 | + } |
---|
| 1450 | + |
---|
| 1451 | +out: |
---|
| 1452 | + sockopt_free_buf(&ctx); |
---|
| 1453 | + return ret; |
---|
| 1454 | +} |
---|
| 1455 | + |
---|
| 1456 | +int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, |
---|
| 1457 | + int optname, char __user *optval, |
---|
| 1458 | + int __user *optlen, int max_optlen, |
---|
| 1459 | + int retval) |
---|
| 1460 | +{ |
---|
| 1461 | + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); |
---|
| 1462 | + struct bpf_sockopt_kern ctx = { |
---|
| 1463 | + .sk = sk, |
---|
| 1464 | + .level = level, |
---|
| 1465 | + .optname = optname, |
---|
| 1466 | + .retval = retval, |
---|
| 1467 | + }; |
---|
| 1468 | + int ret; |
---|
| 1469 | + |
---|
| 1470 | + /* Opportunistic check to see whether we have any BPF program |
---|
| 1471 | + * attached to the hook so we don't waste time allocating |
---|
| 1472 | + * memory and locking the socket. |
---|
| 1473 | + */ |
---|
| 1474 | + if (!cgroup_bpf_enabled || |
---|
| 1475 | + __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT)) |
---|
| 1476 | + return retval; |
---|
| 1477 | + |
---|
| 1478 | + ctx.optlen = max_optlen; |
---|
| 1479 | + |
---|
| 1480 | + max_optlen = sockopt_alloc_buf(&ctx, max_optlen); |
---|
| 1481 | + if (max_optlen < 0) |
---|
| 1482 | + return max_optlen; |
---|
| 1483 | + |
---|
| 1484 | + if (!retval) { |
---|
| 1485 | + /* If kernel getsockopt finished successfully, |
---|
| 1486 | + * copy whatever was returned to the user back |
---|
| 1487 | + * into our temporary buffer. Set optlen to the |
---|
| 1488 | + * one that kernel returned as well to let |
---|
| 1489 | + * BPF programs inspect the value. |
---|
| 1490 | + */ |
---|
| 1491 | + |
---|
| 1492 | + if (get_user(ctx.optlen, optlen)) { |
---|
| 1493 | + ret = -EFAULT; |
---|
| 1494 | + goto out; |
---|
| 1495 | + } |
---|
| 1496 | + |
---|
| 1497 | + if (ctx.optlen < 0) { |
---|
| 1498 | + ret = -EFAULT; |
---|
| 1499 | + goto out; |
---|
| 1500 | + } |
---|
| 1501 | + |
---|
| 1502 | + if (copy_from_user(ctx.optval, optval, |
---|
| 1503 | + min(ctx.optlen, max_optlen)) != 0) { |
---|
| 1504 | + ret = -EFAULT; |
---|
| 1505 | + goto out; |
---|
| 1506 | + } |
---|
| 1507 | + } |
---|
| 1508 | + |
---|
| 1509 | + lock_sock(sk); |
---|
| 1510 | + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT], |
---|
| 1511 | + &ctx, BPF_PROG_RUN); |
---|
| 1512 | + release_sock(sk); |
---|
| 1513 | + |
---|
| 1514 | + if (!ret) { |
---|
| 1515 | + ret = -EPERM; |
---|
| 1516 | + goto out; |
---|
| 1517 | + } |
---|
| 1518 | + |
---|
| 1519 | + if (optval && (ctx.optlen > max_optlen || ctx.optlen < 0)) { |
---|
| 1520 | + ret = -EFAULT; |
---|
| 1521 | + goto out; |
---|
| 1522 | + } |
---|
| 1523 | + |
---|
| 1524 | + /* BPF programs only allowed to set retval to 0, not some |
---|
| 1525 | + * arbitrary value. |
---|
| 1526 | + */ |
---|
| 1527 | + if (ctx.retval != 0 && ctx.retval != retval) { |
---|
| 1528 | + ret = -EFAULT; |
---|
| 1529 | + goto out; |
---|
| 1530 | + } |
---|
| 1531 | + |
---|
| 1532 | + if (ctx.optlen != 0) { |
---|
| 1533 | + if (optval && copy_to_user(optval, ctx.optval, ctx.optlen)) { |
---|
| 1534 | + ret = -EFAULT; |
---|
| 1535 | + goto out; |
---|
| 1536 | + } |
---|
| 1537 | + if (put_user(ctx.optlen, optlen)) { |
---|
| 1538 | + ret = -EFAULT; |
---|
| 1539 | + goto out; |
---|
| 1540 | + } |
---|
| 1541 | + } |
---|
| 1542 | + |
---|
| 1543 | + ret = ctx.retval; |
---|
| 1544 | + |
---|
| 1545 | +out: |
---|
| 1546 | + sockopt_free_buf(&ctx); |
---|
| 1547 | + return ret; |
---|
| 1548 | +} |
---|
| 1549 | +#endif |
---|
| 1550 | + |
---|
| 1551 | +static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, |
---|
| 1552 | + size_t *lenp) |
---|
| 1553 | +{ |
---|
| 1554 | + ssize_t tmp_ret = 0, ret; |
---|
| 1555 | + |
---|
| 1556 | + if (dir->header.parent) { |
---|
| 1557 | + tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp); |
---|
| 1558 | + if (tmp_ret < 0) |
---|
| 1559 | + return tmp_ret; |
---|
| 1560 | + } |
---|
| 1561 | + |
---|
| 1562 | + ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp); |
---|
| 1563 | + if (ret < 0) |
---|
| 1564 | + return ret; |
---|
| 1565 | + *bufp += ret; |
---|
| 1566 | + *lenp -= ret; |
---|
| 1567 | + ret += tmp_ret; |
---|
| 1568 | + |
---|
| 1569 | + /* Avoid leading slash. */ |
---|
| 1570 | + if (!ret) |
---|
| 1571 | + return ret; |
---|
| 1572 | + |
---|
| 1573 | + tmp_ret = strscpy(*bufp, "/", *lenp); |
---|
| 1574 | + if (tmp_ret < 0) |
---|
| 1575 | + return tmp_ret; |
---|
| 1576 | + *bufp += tmp_ret; |
---|
| 1577 | + *lenp -= tmp_ret; |
---|
| 1578 | + |
---|
| 1579 | + return ret + tmp_ret; |
---|
| 1580 | +} |
---|
| 1581 | + |
---|
| 1582 | +BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf, |
---|
| 1583 | + size_t, buf_len, u64, flags) |
---|
| 1584 | +{ |
---|
| 1585 | + ssize_t tmp_ret = 0, ret; |
---|
| 1586 | + |
---|
| 1587 | + if (!buf) |
---|
| 1588 | + return -EINVAL; |
---|
| 1589 | + |
---|
| 1590 | + if (!(flags & BPF_F_SYSCTL_BASE_NAME)) { |
---|
| 1591 | + if (!ctx->head) |
---|
| 1592 | + return -EINVAL; |
---|
| 1593 | + tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len); |
---|
| 1594 | + if (tmp_ret < 0) |
---|
| 1595 | + return tmp_ret; |
---|
| 1596 | + } |
---|
| 1597 | + |
---|
| 1598 | + ret = strscpy(buf, ctx->table->procname, buf_len); |
---|
| 1599 | + |
---|
| 1600 | + return ret < 0 ? ret : tmp_ret + ret; |
---|
| 1601 | +} |
---|
| 1602 | + |
---|
| 1603 | +static const struct bpf_func_proto bpf_sysctl_get_name_proto = { |
---|
| 1604 | + .func = bpf_sysctl_get_name, |
---|
| 1605 | + .gpl_only = false, |
---|
| 1606 | + .ret_type = RET_INTEGER, |
---|
| 1607 | + .arg1_type = ARG_PTR_TO_CTX, |
---|
| 1608 | + .arg2_type = ARG_PTR_TO_MEM, |
---|
| 1609 | + .arg3_type = ARG_CONST_SIZE, |
---|
| 1610 | + .arg4_type = ARG_ANYTHING, |
---|
| 1611 | +}; |
---|
| 1612 | + |
---|
| 1613 | +static int copy_sysctl_value(char *dst, size_t dst_len, char *src, |
---|
| 1614 | + size_t src_len) |
---|
| 1615 | +{ |
---|
| 1616 | + if (!dst) |
---|
| 1617 | + return -EINVAL; |
---|
| 1618 | + |
---|
| 1619 | + if (!dst_len) |
---|
| 1620 | + return -E2BIG; |
---|
| 1621 | + |
---|
| 1622 | + if (!src || !src_len) { |
---|
| 1623 | + memset(dst, 0, dst_len); |
---|
| 1624 | + return -EINVAL; |
---|
| 1625 | + } |
---|
| 1626 | + |
---|
| 1627 | + memcpy(dst, src, min(dst_len, src_len)); |
---|
| 1628 | + |
---|
| 1629 | + if (dst_len > src_len) { |
---|
| 1630 | + memset(dst + src_len, '\0', dst_len - src_len); |
---|
| 1631 | + return src_len; |
---|
| 1632 | + } |
---|
| 1633 | + |
---|
| 1634 | + dst[dst_len - 1] = '\0'; |
---|
| 1635 | + |
---|
| 1636 | + return -E2BIG; |
---|
| 1637 | +} |
---|
| 1638 | + |
---|
| 1639 | +BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx, |
---|
| 1640 | + char *, buf, size_t, buf_len) |
---|
| 1641 | +{ |
---|
| 1642 | + return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len); |
---|
| 1643 | +} |
---|
| 1644 | + |
---|
| 1645 | +static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = { |
---|
| 1646 | + .func = bpf_sysctl_get_current_value, |
---|
| 1647 | + .gpl_only = false, |
---|
| 1648 | + .ret_type = RET_INTEGER, |
---|
| 1649 | + .arg1_type = ARG_PTR_TO_CTX, |
---|
| 1650 | + .arg2_type = ARG_PTR_TO_UNINIT_MEM, |
---|
| 1651 | + .arg3_type = ARG_CONST_SIZE, |
---|
| 1652 | +}; |
---|
| 1653 | + |
---|
| 1654 | +BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf, |
---|
| 1655 | + size_t, buf_len) |
---|
| 1656 | +{ |
---|
| 1657 | + if (!ctx->write) { |
---|
| 1658 | + if (buf && buf_len) |
---|
| 1659 | + memset(buf, '\0', buf_len); |
---|
| 1660 | + return -EINVAL; |
---|
| 1661 | + } |
---|
| 1662 | + return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len); |
---|
| 1663 | +} |
---|
| 1664 | + |
---|
| 1665 | +static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = { |
---|
| 1666 | + .func = bpf_sysctl_get_new_value, |
---|
| 1667 | + .gpl_only = false, |
---|
| 1668 | + .ret_type = RET_INTEGER, |
---|
| 1669 | + .arg1_type = ARG_PTR_TO_CTX, |
---|
| 1670 | + .arg2_type = ARG_PTR_TO_UNINIT_MEM, |
---|
| 1671 | + .arg3_type = ARG_CONST_SIZE, |
---|
| 1672 | +}; |
---|
| 1673 | + |
---|
| 1674 | +BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx, |
---|
| 1675 | + const char *, buf, size_t, buf_len) |
---|
| 1676 | +{ |
---|
| 1677 | + if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len) |
---|
| 1678 | + return -EINVAL; |
---|
| 1679 | + |
---|
| 1680 | + if (buf_len > PAGE_SIZE - 1) |
---|
| 1681 | + return -E2BIG; |
---|
| 1682 | + |
---|
| 1683 | + memcpy(ctx->new_val, buf, buf_len); |
---|
| 1684 | + ctx->new_len = buf_len; |
---|
| 1685 | + ctx->new_updated = 1; |
---|
| 1686 | + |
---|
| 1687 | + return 0; |
---|
| 1688 | +} |
---|
| 1689 | + |
---|
| 1690 | +static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = { |
---|
| 1691 | + .func = bpf_sysctl_set_new_value, |
---|
| 1692 | + .gpl_only = false, |
---|
| 1693 | + .ret_type = RET_INTEGER, |
---|
| 1694 | + .arg1_type = ARG_PTR_TO_CTX, |
---|
| 1695 | + .arg2_type = ARG_PTR_TO_MEM, |
---|
| 1696 | + .arg3_type = ARG_CONST_SIZE, |
---|
| 1697 | +}; |
---|
| 1698 | + |
---|
| 1699 | +static const struct bpf_func_proto * |
---|
| 1700 | +sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
---|
| 1701 | +{ |
---|
| 1702 | + switch (func_id) { |
---|
| 1703 | + case BPF_FUNC_strtol: |
---|
| 1704 | + return &bpf_strtol_proto; |
---|
| 1705 | + case BPF_FUNC_strtoul: |
---|
| 1706 | + return &bpf_strtoul_proto; |
---|
| 1707 | + case BPF_FUNC_sysctl_get_name: |
---|
| 1708 | + return &bpf_sysctl_get_name_proto; |
---|
| 1709 | + case BPF_FUNC_sysctl_get_current_value: |
---|
| 1710 | + return &bpf_sysctl_get_current_value_proto; |
---|
| 1711 | + case BPF_FUNC_sysctl_get_new_value: |
---|
| 1712 | + return &bpf_sysctl_get_new_value_proto; |
---|
| 1713 | + case BPF_FUNC_sysctl_set_new_value: |
---|
| 1714 | + return &bpf_sysctl_set_new_value_proto; |
---|
| 1715 | + default: |
---|
| 1716 | + return cgroup_base_func_proto(func_id, prog); |
---|
| 1717 | + } |
---|
| 1718 | +} |
---|
| 1719 | + |
---|
| 1720 | +static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, |
---|
| 1721 | + const struct bpf_prog *prog, |
---|
| 1722 | + struct bpf_insn_access_aux *info) |
---|
| 1723 | +{ |
---|
| 1724 | + const int size_default = sizeof(__u32); |
---|
| 1725 | + |
---|
| 1726 | + if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size) |
---|
| 1727 | + return false; |
---|
| 1728 | + |
---|
| 1729 | + switch (off) { |
---|
| 1730 | + case bpf_ctx_range(struct bpf_sysctl, write): |
---|
| 1731 | + if (type != BPF_READ) |
---|
| 1732 | + return false; |
---|
| 1733 | + bpf_ctx_record_field_size(info, size_default); |
---|
| 1734 | + return bpf_ctx_narrow_access_ok(off, size, size_default); |
---|
| 1735 | + case bpf_ctx_range(struct bpf_sysctl, file_pos): |
---|
| 1736 | + if (type == BPF_READ) { |
---|
| 1737 | + bpf_ctx_record_field_size(info, size_default); |
---|
| 1738 | + return bpf_ctx_narrow_access_ok(off, size, size_default); |
---|
| 1739 | + } else { |
---|
| 1740 | + return size == size_default; |
---|
| 1741 | + } |
---|
| 1742 | + default: |
---|
| 1743 | + return false; |
---|
| 1744 | + } |
---|
| 1745 | +} |
---|
| 1746 | + |
---|
| 1747 | +static u32 sysctl_convert_ctx_access(enum bpf_access_type type, |
---|
| 1748 | + const struct bpf_insn *si, |
---|
| 1749 | + struct bpf_insn *insn_buf, |
---|
| 1750 | + struct bpf_prog *prog, u32 *target_size) |
---|
| 1751 | +{ |
---|
| 1752 | + struct bpf_insn *insn = insn_buf; |
---|
| 1753 | + u32 read_size; |
---|
| 1754 | + |
---|
| 1755 | + switch (si->off) { |
---|
| 1756 | + case offsetof(struct bpf_sysctl, write): |
---|
| 1757 | + *insn++ = BPF_LDX_MEM( |
---|
| 1758 | + BPF_SIZE(si->code), si->dst_reg, si->src_reg, |
---|
| 1759 | + bpf_target_off(struct bpf_sysctl_kern, write, |
---|
| 1760 | + sizeof_field(struct bpf_sysctl_kern, |
---|
| 1761 | + write), |
---|
| 1762 | + target_size)); |
---|
| 1763 | + break; |
---|
| 1764 | + case offsetof(struct bpf_sysctl, file_pos): |
---|
| 1765 | + /* ppos is a pointer so it should be accessed via indirect |
---|
| 1766 | + * loads and stores. Also for stores additional temporary |
---|
| 1767 | + * register is used since neither src_reg nor dst_reg can be |
---|
| 1768 | + * overridden. |
---|
| 1769 | + */ |
---|
| 1770 | + if (type == BPF_WRITE) { |
---|
| 1771 | + int treg = BPF_REG_9; |
---|
| 1772 | + |
---|
| 1773 | + if (si->src_reg == treg || si->dst_reg == treg) |
---|
| 1774 | + --treg; |
---|
| 1775 | + if (si->src_reg == treg || si->dst_reg == treg) |
---|
| 1776 | + --treg; |
---|
| 1777 | + *insn++ = BPF_STX_MEM( |
---|
| 1778 | + BPF_DW, si->dst_reg, treg, |
---|
| 1779 | + offsetof(struct bpf_sysctl_kern, tmp_reg)); |
---|
| 1780 | + *insn++ = BPF_LDX_MEM( |
---|
| 1781 | + BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), |
---|
| 1782 | + treg, si->dst_reg, |
---|
| 1783 | + offsetof(struct bpf_sysctl_kern, ppos)); |
---|
| 1784 | + *insn++ = BPF_STX_MEM( |
---|
| 1785 | + BPF_SIZEOF(u32), treg, si->src_reg, |
---|
| 1786 | + bpf_ctx_narrow_access_offset( |
---|
| 1787 | + 0, sizeof(u32), sizeof(loff_t))); |
---|
| 1788 | + *insn++ = BPF_LDX_MEM( |
---|
| 1789 | + BPF_DW, treg, si->dst_reg, |
---|
| 1790 | + offsetof(struct bpf_sysctl_kern, tmp_reg)); |
---|
| 1791 | + } else { |
---|
| 1792 | + *insn++ = BPF_LDX_MEM( |
---|
| 1793 | + BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), |
---|
| 1794 | + si->dst_reg, si->src_reg, |
---|
| 1795 | + offsetof(struct bpf_sysctl_kern, ppos)); |
---|
| 1796 | + read_size = bpf_size_to_bytes(BPF_SIZE(si->code)); |
---|
| 1797 | + *insn++ = BPF_LDX_MEM( |
---|
| 1798 | + BPF_SIZE(si->code), si->dst_reg, si->dst_reg, |
---|
| 1799 | + bpf_ctx_narrow_access_offset( |
---|
| 1800 | + 0, read_size, sizeof(loff_t))); |
---|
| 1801 | + } |
---|
| 1802 | + *target_size = sizeof(u32); |
---|
| 1803 | + break; |
---|
| 1804 | + } |
---|
| 1805 | + |
---|
| 1806 | + return insn - insn_buf; |
---|
| 1807 | +} |
---|
| 1808 | + |
---|
| 1809 | +const struct bpf_verifier_ops cg_sysctl_verifier_ops = { |
---|
| 1810 | + .get_func_proto = sysctl_func_proto, |
---|
| 1811 | + .is_valid_access = sysctl_is_valid_access, |
---|
| 1812 | + .convert_ctx_access = sysctl_convert_ctx_access, |
---|
| 1813 | +}; |
---|
| 1814 | + |
---|
| 1815 | +const struct bpf_prog_ops cg_sysctl_prog_ops = { |
---|
| 1816 | +}; |
---|
| 1817 | + |
---|
| 1818 | +static const struct bpf_func_proto * |
---|
| 1819 | +cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
---|
| 1820 | +{ |
---|
| 1821 | + switch (func_id) { |
---|
| 1822 | +#ifdef CONFIG_NET |
---|
| 1823 | + case BPF_FUNC_sk_storage_get: |
---|
| 1824 | + return &bpf_sk_storage_get_proto; |
---|
| 1825 | + case BPF_FUNC_sk_storage_delete: |
---|
| 1826 | + return &bpf_sk_storage_delete_proto; |
---|
| 1827 | +#endif |
---|
| 1828 | +#ifdef CONFIG_INET |
---|
| 1829 | + case BPF_FUNC_tcp_sock: |
---|
| 1830 | + return &bpf_tcp_sock_proto; |
---|
| 1831 | +#endif |
---|
| 1832 | + default: |
---|
| 1833 | + return cgroup_base_func_proto(func_id, prog); |
---|
| 1834 | + } |
---|
| 1835 | +} |
---|
| 1836 | + |
---|
| 1837 | +static bool cg_sockopt_is_valid_access(int off, int size, |
---|
| 1838 | + enum bpf_access_type type, |
---|
| 1839 | + const struct bpf_prog *prog, |
---|
| 1840 | + struct bpf_insn_access_aux *info) |
---|
| 1841 | +{ |
---|
| 1842 | + const int size_default = sizeof(__u32); |
---|
| 1843 | + |
---|
| 1844 | + if (off < 0 || off >= sizeof(struct bpf_sockopt)) |
---|
| 1845 | + return false; |
---|
| 1846 | + |
---|
| 1847 | + if (off % size != 0) |
---|
| 1848 | + return false; |
---|
| 1849 | + |
---|
| 1850 | + if (type == BPF_WRITE) { |
---|
| 1851 | + switch (off) { |
---|
| 1852 | + case offsetof(struct bpf_sockopt, retval): |
---|
| 1853 | + if (size != size_default) |
---|
| 1854 | + return false; |
---|
| 1855 | + return prog->expected_attach_type == |
---|
| 1856 | + BPF_CGROUP_GETSOCKOPT; |
---|
| 1857 | + case offsetof(struct bpf_sockopt, optname): |
---|
| 1858 | + fallthrough; |
---|
| 1859 | + case offsetof(struct bpf_sockopt, level): |
---|
| 1860 | + if (size != size_default) |
---|
| 1861 | + return false; |
---|
| 1862 | + return prog->expected_attach_type == |
---|
| 1863 | + BPF_CGROUP_SETSOCKOPT; |
---|
| 1864 | + case offsetof(struct bpf_sockopt, optlen): |
---|
| 1865 | + return size == size_default; |
---|
| 1866 | + default: |
---|
| 1867 | + return false; |
---|
| 1868 | + } |
---|
| 1869 | + } |
---|
| 1870 | + |
---|
| 1871 | + switch (off) { |
---|
| 1872 | + case offsetof(struct bpf_sockopt, sk): |
---|
| 1873 | + if (size != sizeof(__u64)) |
---|
| 1874 | + return false; |
---|
| 1875 | + info->reg_type = PTR_TO_SOCKET; |
---|
| 1876 | + break; |
---|
| 1877 | + case offsetof(struct bpf_sockopt, optval): |
---|
| 1878 | + if (size != sizeof(__u64)) |
---|
| 1879 | + return false; |
---|
| 1880 | + info->reg_type = PTR_TO_PACKET; |
---|
| 1881 | + break; |
---|
| 1882 | + case offsetof(struct bpf_sockopt, optval_end): |
---|
| 1883 | + if (size != sizeof(__u64)) |
---|
| 1884 | + return false; |
---|
| 1885 | + info->reg_type = PTR_TO_PACKET_END; |
---|
| 1886 | + break; |
---|
| 1887 | + case offsetof(struct bpf_sockopt, retval): |
---|
| 1888 | + if (size != size_default) |
---|
| 1889 | + return false; |
---|
| 1890 | + return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT; |
---|
| 1891 | + default: |
---|
| 1892 | + if (size != size_default) |
---|
| 1893 | + return false; |
---|
| 1894 | + break; |
---|
| 1895 | + } |
---|
| 1896 | + return true; |
---|
| 1897 | +} |
---|
| 1898 | + |
---|
| 1899 | +#define CG_SOCKOPT_ACCESS_FIELD(T, F) \ |
---|
| 1900 | + T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \ |
---|
| 1901 | + si->dst_reg, si->src_reg, \ |
---|
| 1902 | + offsetof(struct bpf_sockopt_kern, F)) |
---|
| 1903 | + |
---|
| 1904 | +static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, |
---|
| 1905 | + const struct bpf_insn *si, |
---|
| 1906 | + struct bpf_insn *insn_buf, |
---|
| 1907 | + struct bpf_prog *prog, |
---|
| 1908 | + u32 *target_size) |
---|
| 1909 | +{ |
---|
| 1910 | + struct bpf_insn *insn = insn_buf; |
---|
| 1911 | + |
---|
| 1912 | + switch (si->off) { |
---|
| 1913 | + case offsetof(struct bpf_sockopt, sk): |
---|
| 1914 | + *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk); |
---|
| 1915 | + break; |
---|
| 1916 | + case offsetof(struct bpf_sockopt, level): |
---|
| 1917 | + if (type == BPF_WRITE) |
---|
| 1918 | + *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level); |
---|
| 1919 | + else |
---|
| 1920 | + *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level); |
---|
| 1921 | + break; |
---|
| 1922 | + case offsetof(struct bpf_sockopt, optname): |
---|
| 1923 | + if (type == BPF_WRITE) |
---|
| 1924 | + *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname); |
---|
| 1925 | + else |
---|
| 1926 | + *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname); |
---|
| 1927 | + break; |
---|
| 1928 | + case offsetof(struct bpf_sockopt, optlen): |
---|
| 1929 | + if (type == BPF_WRITE) |
---|
| 1930 | + *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen); |
---|
| 1931 | + else |
---|
| 1932 | + *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen); |
---|
| 1933 | + break; |
---|
| 1934 | + case offsetof(struct bpf_sockopt, retval): |
---|
| 1935 | + if (type == BPF_WRITE) |
---|
| 1936 | + *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval); |
---|
| 1937 | + else |
---|
| 1938 | + *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval); |
---|
| 1939 | + break; |
---|
| 1940 | + case offsetof(struct bpf_sockopt, optval): |
---|
| 1941 | + *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval); |
---|
| 1942 | + break; |
---|
| 1943 | + case offsetof(struct bpf_sockopt, optval_end): |
---|
| 1944 | + *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end); |
---|
| 1945 | + break; |
---|
| 1946 | + } |
---|
| 1947 | + |
---|
| 1948 | + return insn - insn_buf; |
---|
| 1949 | +} |
---|
| 1950 | + |
---|
| 1951 | +static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf, |
---|
| 1952 | + bool direct_write, |
---|
| 1953 | + const struct bpf_prog *prog) |
---|
| 1954 | +{ |
---|
| 1955 | + /* Nothing to do for sockopt argument. The data is kzalloc'ated. |
---|
| 1956 | + */ |
---|
| 1957 | + return 0; |
---|
| 1958 | +} |
---|
| 1959 | + |
---|
| 1960 | +const struct bpf_verifier_ops cg_sockopt_verifier_ops = { |
---|
| 1961 | + .get_func_proto = cg_sockopt_func_proto, |
---|
| 1962 | + .is_valid_access = cg_sockopt_is_valid_access, |
---|
| 1963 | + .convert_ctx_access = cg_sockopt_convert_ctx_access, |
---|
| 1964 | + .gen_prologue = cg_sockopt_get_prologue, |
---|
| 1965 | +}; |
---|
| 1966 | + |
---|
| 1967 | +const struct bpf_prog_ops cg_sockopt_prog_ops = { |
---|
| 1968 | +}; |
---|