| .. | .. | 
|---|
 | 1 | +// SPDX-License-Identifier: GPL-2.0-only  | 
|---|
| 1 | 2 |  /* | 
|---|
| 2 | 3 |   * Functions to manage eBPF programs attached to cgroups | 
|---|
| 3 | 4 |   * | 
|---|
| 4 | 5 |   * Copyright (c) 2016 Daniel Mack | 
|---|
| 5 |  | - *  | 
|---|
| 6 |  | - * This file is subject to the terms and conditions of version 2 of the GNU  | 
|---|
| 7 |  | - * General Public License.  See the file COPYING in the main directory of the  | 
|---|
| 8 |  | - * Linux distribution for more details.  | 
|---|
| 9 | 6 |   */ | 
|---|
| 10 | 7 |   | 
|---|
| 11 | 8 |  #include <linux/kernel.h> | 
|---|
| 12 | 9 |  #include <linux/atomic.h> | 
|---|
| 13 | 10 |  #include <linux/cgroup.h> | 
|---|
 | 11 | +#include <linux/filter.h>  | 
|---|
| 14 | 12 |  #include <linux/slab.h> | 
|---|
 | 13 | +#include <linux/sysctl.h>  | 
|---|
 | 14 | +#include <linux/string.h>  | 
|---|
| 15 | 15 |  #include <linux/bpf.h> | 
|---|
| 16 | 16 |  #include <linux/bpf-cgroup.h> | 
|---|
| 17 | 17 |  #include <net/sock.h> | 
|---|
 | 18 | +#include <net/bpf_sk_storage.h>  | 
|---|
 | 19 | +  | 
|---|
 | 20 | +#include "../cgroup/cgroup-internal.h"  | 
|---|
| 18 | 21 |   | 
|---|
| 19 | 22 |  DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); | 
|---|
| 20 | 23 |  EXPORT_SYMBOL(cgroup_bpf_enabled_key); | 
|---|
| 21 | 24 |   | 
|---|
| 22 |  | -/**  | 
|---|
| 23 |  | - * cgroup_bpf_put() - put references of all bpf programs  | 
|---|
| 24 |  | - * @cgrp: the cgroup to modify  | 
|---|
| 25 |  | - */  | 
|---|
| 26 |  | -void cgroup_bpf_put(struct cgroup *cgrp)  | 
|---|
 | 25 | +void cgroup_bpf_offline(struct cgroup *cgrp)  | 
|---|
| 27 | 26 |  { | 
|---|
 | 27 | +	cgroup_get(cgrp);  | 
|---|
 | 28 | +	percpu_ref_kill(&cgrp->bpf.refcnt);  | 
|---|
 | 29 | +}  | 
|---|
 | 30 | +  | 
|---|
 | 31 | +static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])  | 
|---|
 | 32 | +{  | 
|---|
 | 33 | +	enum bpf_cgroup_storage_type stype;  | 
|---|
 | 34 | +  | 
|---|
 | 35 | +	for_each_cgroup_storage_type(stype)  | 
|---|
 | 36 | +		bpf_cgroup_storage_free(storages[stype]);  | 
|---|
 | 37 | +}  | 
|---|
 | 38 | +  | 
|---|
 | 39 | +static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[],  | 
|---|
 | 40 | +				     struct bpf_cgroup_storage *new_storages[],  | 
|---|
 | 41 | +				     enum bpf_attach_type type,  | 
|---|
 | 42 | +				     struct bpf_prog *prog,  | 
|---|
 | 43 | +				     struct cgroup *cgrp)  | 
|---|
 | 44 | +{  | 
|---|
 | 45 | +	enum bpf_cgroup_storage_type stype;  | 
|---|
 | 46 | +	struct bpf_cgroup_storage_key key;  | 
|---|
 | 47 | +	struct bpf_map *map;  | 
|---|
 | 48 | +  | 
|---|
 | 49 | +	key.cgroup_inode_id = cgroup_id(cgrp);  | 
|---|
 | 50 | +	key.attach_type = type;  | 
|---|
 | 51 | +  | 
|---|
 | 52 | +	for_each_cgroup_storage_type(stype) {  | 
|---|
 | 53 | +		map = prog->aux->cgroup_storage[stype];  | 
|---|
 | 54 | +		if (!map)  | 
|---|
 | 55 | +			continue;  | 
|---|
 | 56 | +  | 
|---|
 | 57 | +		storages[stype] = cgroup_storage_lookup((void *)map, &key, false);  | 
|---|
 | 58 | +		if (storages[stype])  | 
|---|
 | 59 | +			continue;  | 
|---|
 | 60 | +  | 
|---|
 | 61 | +		storages[stype] = bpf_cgroup_storage_alloc(prog, stype);  | 
|---|
 | 62 | +		if (IS_ERR(storages[stype])) {  | 
|---|
 | 63 | +			bpf_cgroup_storages_free(new_storages);  | 
|---|
 | 64 | +			return -ENOMEM;  | 
|---|
 | 65 | +		}  | 
|---|
 | 66 | +  | 
|---|
 | 67 | +		new_storages[stype] = storages[stype];  | 
|---|
 | 68 | +	}  | 
|---|
 | 69 | +  | 
|---|
 | 70 | +	return 0;  | 
|---|
 | 71 | +}  | 
|---|
 | 72 | +  | 
|---|
 | 73 | +static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[],  | 
|---|
 | 74 | +				       struct bpf_cgroup_storage *src[])  | 
|---|
 | 75 | +{  | 
|---|
 | 76 | +	enum bpf_cgroup_storage_type stype;  | 
|---|
 | 77 | +  | 
|---|
 | 78 | +	for_each_cgroup_storage_type(stype)  | 
|---|
 | 79 | +		dst[stype] = src[stype];  | 
|---|
 | 80 | +}  | 
|---|
 | 81 | +  | 
|---|
 | 82 | +static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],  | 
|---|
 | 83 | +				     struct cgroup *cgrp,  | 
|---|
 | 84 | +				     enum bpf_attach_type attach_type)  | 
|---|
 | 85 | +{  | 
|---|
 | 86 | +	enum bpf_cgroup_storage_type stype;  | 
|---|
 | 87 | +  | 
|---|
 | 88 | +	for_each_cgroup_storage_type(stype)  | 
|---|
 | 89 | +		bpf_cgroup_storage_link(storages[stype], cgrp, attach_type);  | 
|---|
 | 90 | +}  | 
|---|
 | 91 | +  | 
|---|
 | 92 | +/* Called when bpf_cgroup_link is auto-detached from dying cgroup.  | 
|---|
 | 93 | + * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It  | 
|---|
 | 94 | + * doesn't free link memory, which will eventually be done by bpf_link's  | 
|---|
 | 95 | + * release() callback, when its last FD is closed.  | 
|---|
 | 96 | + */  | 
|---|
 | 97 | +static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link)  | 
|---|
 | 98 | +{  | 
|---|
 | 99 | +	cgroup_put(link->cgroup);  | 
|---|
 | 100 | +	link->cgroup = NULL;  | 
|---|
 | 101 | +}  | 
|---|
 | 102 | +  | 
|---|
 | 103 | +/**  | 
|---|
 | 104 | + * cgroup_bpf_release() - put references of all bpf programs and  | 
|---|
 | 105 | + *                        release all cgroup bpf data  | 
|---|
 | 106 | + * @work: work structure embedded into the cgroup to modify  | 
|---|
 | 107 | + */  | 
|---|
 | 108 | +static void cgroup_bpf_release(struct work_struct *work)  | 
|---|
 | 109 | +{  | 
|---|
 | 110 | +	struct cgroup *p, *cgrp = container_of(work, struct cgroup,  | 
|---|
 | 111 | +					       bpf.release_work);  | 
|---|
 | 112 | +	struct bpf_prog_array *old_array;  | 
|---|
 | 113 | +	struct list_head *storages = &cgrp->bpf.storages;  | 
|---|
 | 114 | +	struct bpf_cgroup_storage *storage, *stmp;  | 
|---|
 | 115 | +  | 
|---|
| 28 | 116 |  	unsigned int type; | 
|---|
 | 117 | +  | 
|---|
 | 118 | +	mutex_lock(&cgroup_mutex);  | 
|---|
| 29 | 119 |   | 
|---|
| 30 | 120 |  	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { | 
|---|
| 31 | 121 |  		struct list_head *progs = &cgrp->bpf.progs[type]; | 
|---|
| 32 |  | -		struct bpf_prog_list *pl, *tmp;  | 
|---|
 | 122 | +		struct bpf_prog_list *pl, *pltmp;  | 
|---|
| 33 | 123 |   | 
|---|
| 34 |  | -		list_for_each_entry_safe(pl, tmp, progs, node) {  | 
|---|
 | 124 | +		list_for_each_entry_safe(pl, pltmp, progs, node) {  | 
|---|
| 35 | 125 |  			list_del(&pl->node); | 
|---|
| 36 |  | -			bpf_prog_put(pl->prog);  | 
|---|
| 37 |  | -			bpf_cgroup_storage_unlink(pl->storage);  | 
|---|
| 38 |  | -			bpf_cgroup_storage_free(pl->storage);  | 
|---|
 | 126 | +			if (pl->prog)  | 
|---|
 | 127 | +				bpf_prog_put(pl->prog);  | 
|---|
 | 128 | +			if (pl->link)  | 
|---|
 | 129 | +				bpf_cgroup_link_auto_detach(pl->link);  | 
|---|
| 39 | 130 |  			kfree(pl); | 
|---|
| 40 | 131 |  			static_branch_dec(&cgroup_bpf_enabled_key); | 
|---|
| 41 | 132 |  		} | 
|---|
| 42 |  | -		bpf_prog_array_free(cgrp->bpf.effective[type]);  | 
|---|
 | 133 | +		old_array = rcu_dereference_protected(  | 
|---|
 | 134 | +				cgrp->bpf.effective[type],  | 
|---|
 | 135 | +				lockdep_is_held(&cgroup_mutex));  | 
|---|
 | 136 | +		bpf_prog_array_free(old_array);  | 
|---|
| 43 | 137 |  	} | 
|---|
 | 138 | +  | 
|---|
 | 139 | +	list_for_each_entry_safe(storage, stmp, storages, list_cg) {  | 
|---|
 | 140 | +		bpf_cgroup_storage_unlink(storage);  | 
|---|
 | 141 | +		bpf_cgroup_storage_free(storage);  | 
|---|
 | 142 | +	}  | 
|---|
 | 143 | +  | 
|---|
 | 144 | +	mutex_unlock(&cgroup_mutex);  | 
|---|
 | 145 | +  | 
|---|
 | 146 | +	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))  | 
|---|
 | 147 | +		cgroup_bpf_put(p);  | 
|---|
 | 148 | +  | 
|---|
 | 149 | +	percpu_ref_exit(&cgrp->bpf.refcnt);  | 
|---|
 | 150 | +	cgroup_put(cgrp);  | 
|---|
 | 151 | +}  | 
|---|
 | 152 | +  | 
|---|
 | 153 | +/**  | 
|---|
 | 154 | + * cgroup_bpf_release_fn() - callback used to schedule releasing  | 
|---|
 | 155 | + *                           of bpf cgroup data  | 
|---|
 | 156 | + * @ref: percpu ref counter structure  | 
|---|
 | 157 | + */  | 
|---|
 | 158 | +static void cgroup_bpf_release_fn(struct percpu_ref *ref)  | 
|---|
 | 159 | +{  | 
|---|
 | 160 | +	struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);  | 
|---|
 | 161 | +  | 
|---|
 | 162 | +	INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);  | 
|---|
 | 163 | +	queue_work(system_wq, &cgrp->bpf.release_work);  | 
|---|
 | 164 | +}  | 
|---|
 | 165 | +  | 
|---|
 | 166 | +/* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through  | 
|---|
 | 167 | + * link or direct prog.  | 
|---|
 | 168 | + */  | 
|---|
 | 169 | +static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)  | 
|---|
 | 170 | +{  | 
|---|
 | 171 | +	if (pl->prog)  | 
|---|
 | 172 | +		return pl->prog;  | 
|---|
 | 173 | +	if (pl->link)  | 
|---|
 | 174 | +		return pl->link->link.prog;  | 
|---|
 | 175 | +	return NULL;  | 
|---|
| 44 | 176 |  } | 
|---|
| 45 | 177 |   | 
|---|
| 46 | 178 |  /* count number of elements in the list. | 
|---|
| .. | .. | 
|---|
| 52 | 184 |  	u32 cnt = 0; | 
|---|
| 53 | 185 |   | 
|---|
| 54 | 186 |  	list_for_each_entry(pl, head, node) { | 
|---|
| 55 |  | -		if (!pl->prog)  | 
|---|
 | 187 | +		if (!prog_list_prog(pl))  | 
|---|
| 56 | 188 |  			continue; | 
|---|
| 57 | 189 |  		cnt++; | 
|---|
| 58 | 190 |  	} | 
|---|
| .. | .. | 
|---|
| 64 | 196 |   * if parent has overridable or multi-prog, allow attaching | 
|---|
| 65 | 197 |   */ | 
|---|
| 66 | 198 |  static bool hierarchy_allows_attach(struct cgroup *cgrp, | 
|---|
| 67 |  | -				    enum bpf_attach_type type,  | 
|---|
| 68 |  | -				    u32 new_flags)  | 
|---|
 | 199 | +				    enum bpf_attach_type type)  | 
|---|
| 69 | 200 |  { | 
|---|
| 70 | 201 |  	struct cgroup *p; | 
|---|
| 71 | 202 |   | 
|---|
| .. | .. | 
|---|
| 95 | 226 |   */ | 
|---|
| 96 | 227 |  static int compute_effective_progs(struct cgroup *cgrp, | 
|---|
| 97 | 228 |  				   enum bpf_attach_type type, | 
|---|
| 98 |  | -				   struct bpf_prog_array __rcu **array)  | 
|---|
 | 229 | +				   struct bpf_prog_array **array)  | 
|---|
| 99 | 230 |  { | 
|---|
 | 231 | +	struct bpf_prog_array_item *item;  | 
|---|
| 100 | 232 |  	struct bpf_prog_array *progs; | 
|---|
| 101 | 233 |  	struct bpf_prog_list *pl; | 
|---|
| 102 | 234 |  	struct cgroup *p = cgrp; | 
|---|
| .. | .. | 
|---|
| 121 | 253 |  			continue; | 
|---|
| 122 | 254 |   | 
|---|
| 123 | 255 |  		list_for_each_entry(pl, &p->bpf.progs[type], node) { | 
|---|
| 124 |  | -			if (!pl->prog)  | 
|---|
 | 256 | +			if (!prog_list_prog(pl))  | 
|---|
| 125 | 257 |  				continue; | 
|---|
| 126 | 258 |   | 
|---|
| 127 |  | -			progs->items[cnt].prog = pl->prog;  | 
|---|
| 128 |  | -			progs->items[cnt].cgroup_storage = pl->storage;  | 
|---|
 | 259 | +			item = &progs->items[cnt];  | 
|---|
 | 260 | +			item->prog = prog_list_prog(pl);  | 
|---|
 | 261 | +			bpf_cgroup_storages_assign(item->cgroup_storage,  | 
|---|
 | 262 | +						   pl->storage);  | 
|---|
| 129 | 263 |  			cnt++; | 
|---|
| 130 | 264 |  		} | 
|---|
| 131 | 265 |  	} while ((p = cgroup_parent(p))); | 
|---|
| 132 | 266 |   | 
|---|
| 133 |  | -	rcu_assign_pointer(*array, progs);  | 
|---|
 | 267 | +	*array = progs;  | 
|---|
| 134 | 268 |  	return 0; | 
|---|
| 135 | 269 |  } | 
|---|
| 136 | 270 |   | 
|---|
| 137 | 271 |  static void activate_effective_progs(struct cgroup *cgrp, | 
|---|
| 138 | 272 |  				     enum bpf_attach_type type, | 
|---|
| 139 |  | -				     struct bpf_prog_array __rcu *array)  | 
|---|
 | 273 | +				     struct bpf_prog_array *old_array)  | 
|---|
| 140 | 274 |  { | 
|---|
| 141 |  | -	struct bpf_prog_array __rcu *old_array;  | 
|---|
| 142 |  | -  | 
|---|
| 143 |  | -	old_array = xchg(&cgrp->bpf.effective[type], array);  | 
|---|
 | 275 | +	old_array = rcu_replace_pointer(cgrp->bpf.effective[type], old_array,  | 
|---|
 | 276 | +					lockdep_is_held(&cgroup_mutex));  | 
|---|
| 144 | 277 |  	/* free prog array after grace period, since __cgroup_bpf_run_*() | 
|---|
| 145 | 278 |  	 * might be still walking the array | 
|---|
| 146 | 279 |  	 */ | 
|---|
| .. | .. | 
|---|
| 157 | 290 |   * that array below is variable length | 
|---|
| 158 | 291 |   */ | 
|---|
| 159 | 292 |  #define	NR ARRAY_SIZE(cgrp->bpf.effective) | 
|---|
| 160 |  | -	struct bpf_prog_array __rcu *arrays[NR] = {};  | 
|---|
| 161 |  | -	int i;  | 
|---|
 | 293 | +	struct bpf_prog_array *arrays[NR] = {};  | 
|---|
 | 294 | +	struct cgroup *p;  | 
|---|
 | 295 | +	int ret, i;  | 
|---|
 | 296 | +  | 
|---|
 | 297 | +	ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,  | 
|---|
 | 298 | +			      GFP_KERNEL);  | 
|---|
 | 299 | +	if (ret)  | 
|---|
 | 300 | +		return ret;  | 
|---|
 | 301 | +  | 
|---|
 | 302 | +	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))  | 
|---|
 | 303 | +		cgroup_bpf_get(p);  | 
|---|
| 162 | 304 |   | 
|---|
| 163 | 305 |  	for (i = 0; i < NR; i++) | 
|---|
| 164 | 306 |  		INIT_LIST_HEAD(&cgrp->bpf.progs[i]); | 
|---|
 | 307 | +  | 
|---|
 | 308 | +	INIT_LIST_HEAD(&cgrp->bpf.storages);  | 
|---|
| 165 | 309 |   | 
|---|
| 166 | 310 |  	for (i = 0; i < NR; i++) | 
|---|
| 167 | 311 |  		if (compute_effective_progs(cgrp, i, &arrays[i])) | 
|---|
| .. | .. | 
|---|
| 174 | 318 |  cleanup: | 
|---|
| 175 | 319 |  	for (i = 0; i < NR; i++) | 
|---|
| 176 | 320 |  		bpf_prog_array_free(arrays[i]); | 
|---|
 | 321 | +  | 
|---|
 | 322 | +	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))  | 
|---|
 | 323 | +		cgroup_bpf_put(p);  | 
|---|
 | 324 | +  | 
|---|
 | 325 | +	percpu_ref_exit(&cgrp->bpf.refcnt);  | 
|---|
 | 326 | +  | 
|---|
| 177 | 327 |  	return -ENOMEM; | 
|---|
| 178 | 328 |  } | 
|---|
| 179 | 329 |   | 
|---|
| .. | .. | 
|---|
| 187 | 337 |  	css_for_each_descendant_pre(css, &cgrp->self) { | 
|---|
| 188 | 338 |  		struct cgroup *desc = container_of(css, struct cgroup, self); | 
|---|
| 189 | 339 |   | 
|---|
 | 340 | +		if (percpu_ref_is_zero(&desc->bpf.refcnt))  | 
|---|
 | 341 | +			continue;  | 
|---|
 | 342 | +  | 
|---|
| 190 | 343 |  		err = compute_effective_progs(desc, type, &desc->bpf.inactive); | 
|---|
| 191 | 344 |  		if (err) | 
|---|
| 192 | 345 |  			goto cleanup; | 
|---|
| .. | .. | 
|---|
| 195 | 348 |  	/* all allocations were successful. Activate all prog arrays */ | 
|---|
| 196 | 349 |  	css_for_each_descendant_pre(css, &cgrp->self) { | 
|---|
| 197 | 350 |  		struct cgroup *desc = container_of(css, struct cgroup, self); | 
|---|
 | 351 | +  | 
|---|
 | 352 | +		if (percpu_ref_is_zero(&desc->bpf.refcnt)) {  | 
|---|
 | 353 | +			if (unlikely(desc->bpf.inactive)) {  | 
|---|
 | 354 | +				bpf_prog_array_free(desc->bpf.inactive);  | 
|---|
 | 355 | +				desc->bpf.inactive = NULL;  | 
|---|
 | 356 | +			}  | 
|---|
 | 357 | +			continue;  | 
|---|
 | 358 | +		}  | 
|---|
| 198 | 359 |   | 
|---|
| 199 | 360 |  		activate_effective_progs(desc, type, desc->bpf.inactive); | 
|---|
| 200 | 361 |  		desc->bpf.inactive = NULL; | 
|---|
| .. | .. | 
|---|
| 218 | 379 |   | 
|---|
| 219 | 380 |  #define BPF_CGROUP_MAX_PROGS 64 | 
|---|
| 220 | 381 |   | 
|---|
 | 382 | +static struct bpf_prog_list *find_attach_entry(struct list_head *progs,  | 
|---|
 | 383 | +					       struct bpf_prog *prog,  | 
|---|
 | 384 | +					       struct bpf_cgroup_link *link,  | 
|---|
 | 385 | +					       struct bpf_prog *replace_prog,  | 
|---|
 | 386 | +					       bool allow_multi)  | 
|---|
 | 387 | +{  | 
|---|
 | 388 | +	struct bpf_prog_list *pl;  | 
|---|
 | 389 | +  | 
|---|
 | 390 | +	/* single-attach case */  | 
|---|
 | 391 | +	if (!allow_multi) {  | 
|---|
 | 392 | +		if (list_empty(progs))  | 
|---|
 | 393 | +			return NULL;  | 
|---|
 | 394 | +		return list_first_entry(progs, typeof(*pl), node);  | 
|---|
 | 395 | +	}  | 
|---|
 | 396 | +  | 
|---|
 | 397 | +	list_for_each_entry(pl, progs, node) {  | 
|---|
 | 398 | +		if (prog && pl->prog == prog && prog != replace_prog)  | 
|---|
 | 399 | +			/* disallow attaching the same prog twice */  | 
|---|
 | 400 | +			return ERR_PTR(-EINVAL);  | 
|---|
 | 401 | +		if (link && pl->link == link)  | 
|---|
 | 402 | +			/* disallow attaching the same link twice */  | 
|---|
 | 403 | +			return ERR_PTR(-EINVAL);  | 
|---|
 | 404 | +	}  | 
|---|
 | 405 | +  | 
|---|
 | 406 | +	/* direct prog multi-attach w/ replacement case */  | 
|---|
 | 407 | +	if (replace_prog) {  | 
|---|
 | 408 | +		list_for_each_entry(pl, progs, node) {  | 
|---|
 | 409 | +			if (pl->prog == replace_prog)  | 
|---|
 | 410 | +				/* a match found */  | 
|---|
 | 411 | +				return pl;  | 
|---|
 | 412 | +		}  | 
|---|
 | 413 | +		/* prog to replace not found for cgroup */  | 
|---|
 | 414 | +		return ERR_PTR(-ENOENT);  | 
|---|
 | 415 | +	}  | 
|---|
 | 416 | +  | 
|---|
 | 417 | +	return NULL;  | 
|---|
 | 418 | +}  | 
|---|
 | 419 | +  | 
|---|
| 221 | 420 |  /** | 
|---|
| 222 |  | - * __cgroup_bpf_attach() - Attach the program to a cgroup, and  | 
|---|
 | 421 | + * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and  | 
|---|
| 223 | 422 |   *                         propagate the change to descendants | 
|---|
| 224 | 423 |   * @cgrp: The cgroup which descendants to traverse | 
|---|
| 225 | 424 |   * @prog: A program to attach | 
|---|
 | 425 | + * @link: A link to attach  | 
|---|
 | 426 | + * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set  | 
|---|
| 226 | 427 |   * @type: Type of attach operation | 
|---|
 | 428 | + * @flags: Option flags  | 
|---|
| 227 | 429 |   * | 
|---|
 | 430 | + * Exactly one of @prog or @link can be non-null.  | 
|---|
| 228 | 431 |   * Must be called with cgroup_mutex held. | 
|---|
| 229 | 432 |   */ | 
|---|
| 230 |  | -int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,  | 
|---|
 | 433 | +int __cgroup_bpf_attach(struct cgroup *cgrp,  | 
|---|
 | 434 | +			struct bpf_prog *prog, struct bpf_prog *replace_prog,  | 
|---|
 | 435 | +			struct bpf_cgroup_link *link,  | 
|---|
| 231 | 436 |  			enum bpf_attach_type type, u32 flags) | 
|---|
| 232 | 437 |  { | 
|---|
 | 438 | +	u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));  | 
|---|
| 233 | 439 |  	struct list_head *progs = &cgrp->bpf.progs[type]; | 
|---|
| 234 | 440 |  	struct bpf_prog *old_prog = NULL; | 
|---|
| 235 |  | -	struct bpf_cgroup_storage *storage, *old_storage = NULL;  | 
|---|
 | 441 | +	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};  | 
|---|
 | 442 | +	struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};  | 
|---|
| 236 | 443 |  	struct bpf_prog_list *pl; | 
|---|
| 237 |  | -	bool pl_was_allocated;  | 
|---|
| 238 | 444 |  	int err; | 
|---|
| 239 | 445 |   | 
|---|
| 240 |  | -	if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))  | 
|---|
 | 446 | +	if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||  | 
|---|
 | 447 | +	    ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))  | 
|---|
| 241 | 448 |  		/* invalid combination */ | 
|---|
| 242 | 449 |  		return -EINVAL; | 
|---|
 | 450 | +	if (link && (prog || replace_prog))  | 
|---|
 | 451 | +		/* only either link or prog/replace_prog can be specified */  | 
|---|
 | 452 | +		return -EINVAL;  | 
|---|
 | 453 | +	if (!!replace_prog != !!(flags & BPF_F_REPLACE))  | 
|---|
 | 454 | +		/* replace_prog implies BPF_F_REPLACE, and vice versa */  | 
|---|
 | 455 | +		return -EINVAL;  | 
|---|
| 243 | 456 |   | 
|---|
| 244 |  | -	if (!hierarchy_allows_attach(cgrp, type, flags))  | 
|---|
 | 457 | +	if (!hierarchy_allows_attach(cgrp, type))  | 
|---|
| 245 | 458 |  		return -EPERM; | 
|---|
| 246 | 459 |   | 
|---|
| 247 |  | -	if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)  | 
|---|
 | 460 | +	if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags)  | 
|---|
| 248 | 461 |  		/* Disallow attaching non-overridable on top | 
|---|
| 249 | 462 |  		 * of existing overridable in this cgroup. | 
|---|
| 250 | 463 |  		 * Disallow attaching multi-prog if overridable or none | 
|---|
| .. | .. | 
|---|
| 254 | 467 |  	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) | 
|---|
| 255 | 468 |  		return -E2BIG; | 
|---|
| 256 | 469 |   | 
|---|
| 257 |  | -	storage = bpf_cgroup_storage_alloc(prog);  | 
|---|
| 258 |  | -	if (IS_ERR(storage))  | 
|---|
 | 470 | +	pl = find_attach_entry(progs, prog, link, replace_prog,  | 
|---|
 | 471 | +			       flags & BPF_F_ALLOW_MULTI);  | 
|---|
 | 472 | +	if (IS_ERR(pl))  | 
|---|
 | 473 | +		return PTR_ERR(pl);  | 
|---|
 | 474 | +  | 
|---|
 | 475 | +	if (bpf_cgroup_storages_alloc(storage, new_storage, type,  | 
|---|
 | 476 | +				      prog ? : link->link.prog, cgrp))  | 
|---|
| 259 | 477 |  		return -ENOMEM; | 
|---|
| 260 | 478 |   | 
|---|
| 261 |  | -	if (flags & BPF_F_ALLOW_MULTI) {  | 
|---|
| 262 |  | -		list_for_each_entry(pl, progs, node) {  | 
|---|
| 263 |  | -			if (pl->prog == prog) {  | 
|---|
| 264 |  | -				/* disallow attaching the same prog twice */  | 
|---|
| 265 |  | -				bpf_cgroup_storage_free(storage);  | 
|---|
| 266 |  | -				return -EINVAL;  | 
|---|
| 267 |  | -			}  | 
|---|
| 268 |  | -		}  | 
|---|
| 269 |  | -  | 
|---|
 | 479 | +	if (pl) {  | 
|---|
 | 480 | +		old_prog = pl->prog;  | 
|---|
 | 481 | +	} else {  | 
|---|
| 270 | 482 |  		pl = kmalloc(sizeof(*pl), GFP_KERNEL); | 
|---|
| 271 | 483 |  		if (!pl) { | 
|---|
| 272 |  | -			bpf_cgroup_storage_free(storage);  | 
|---|
 | 484 | +			bpf_cgroup_storages_free(new_storage);  | 
|---|
| 273 | 485 |  			return -ENOMEM; | 
|---|
| 274 | 486 |  		} | 
|---|
| 275 |  | -  | 
|---|
| 276 |  | -		pl_was_allocated = true;  | 
|---|
| 277 |  | -		pl->prog = prog;  | 
|---|
| 278 |  | -		pl->storage = storage;  | 
|---|
| 279 | 487 |  		list_add_tail(&pl->node, progs); | 
|---|
| 280 |  | -	} else {  | 
|---|
| 281 |  | -		if (list_empty(progs)) {  | 
|---|
| 282 |  | -			pl = kmalloc(sizeof(*pl), GFP_KERNEL);  | 
|---|
| 283 |  | -			if (!pl) {  | 
|---|
| 284 |  | -				bpf_cgroup_storage_free(storage);  | 
|---|
| 285 |  | -				return -ENOMEM;  | 
|---|
| 286 |  | -			}  | 
|---|
| 287 |  | -			pl_was_allocated = true;  | 
|---|
| 288 |  | -			list_add_tail(&pl->node, progs);  | 
|---|
| 289 |  | -		} else {  | 
|---|
| 290 |  | -			pl = list_first_entry(progs, typeof(*pl), node);  | 
|---|
| 291 |  | -			old_prog = pl->prog;  | 
|---|
| 292 |  | -			old_storage = pl->storage;  | 
|---|
| 293 |  | -			bpf_cgroup_storage_unlink(old_storage);  | 
|---|
| 294 |  | -			pl_was_allocated = false;  | 
|---|
| 295 |  | -		}  | 
|---|
| 296 |  | -		pl->prog = prog;  | 
|---|
| 297 |  | -		pl->storage = storage;  | 
|---|
| 298 | 488 |  	} | 
|---|
| 299 | 489 |   | 
|---|
| 300 |  | -	cgrp->bpf.flags[type] = flags;  | 
|---|
 | 490 | +	pl->prog = prog;  | 
|---|
 | 491 | +	pl->link = link;  | 
|---|
 | 492 | +	bpf_cgroup_storages_assign(pl->storage, storage);  | 
|---|
 | 493 | +	cgrp->bpf.flags[type] = saved_flags;  | 
|---|
| 301 | 494 |   | 
|---|
| 302 | 495 |  	err = update_effective_progs(cgrp, type); | 
|---|
| 303 | 496 |  	if (err) | 
|---|
| 304 | 497 |  		goto cleanup; | 
|---|
| 305 | 498 |   | 
|---|
| 306 |  | -	static_branch_inc(&cgroup_bpf_enabled_key);  | 
|---|
| 307 |  | -	if (old_storage)  | 
|---|
| 308 |  | -		bpf_cgroup_storage_free(old_storage);  | 
|---|
| 309 |  | -	if (old_prog) {  | 
|---|
 | 499 | +	if (old_prog)  | 
|---|
| 310 | 500 |  		bpf_prog_put(old_prog); | 
|---|
| 311 |  | -		static_branch_dec(&cgroup_bpf_enabled_key);  | 
|---|
| 312 |  | -	}  | 
|---|
| 313 |  | -	bpf_cgroup_storage_link(storage, cgrp, type);  | 
|---|
 | 501 | +	else  | 
|---|
 | 502 | +		static_branch_inc(&cgroup_bpf_enabled_key);  | 
|---|
 | 503 | +	bpf_cgroup_storages_link(new_storage, cgrp, type);  | 
|---|
| 314 | 504 |  	return 0; | 
|---|
| 315 | 505 |   | 
|---|
| 316 | 506 |  cleanup: | 
|---|
| 317 |  | -	/* and cleanup the prog list */  | 
|---|
| 318 |  | -	pl->prog = old_prog;  | 
|---|
| 319 |  | -	bpf_cgroup_storage_free(pl->storage);  | 
|---|
| 320 |  | -	pl->storage = old_storage;  | 
|---|
| 321 |  | -	bpf_cgroup_storage_link(old_storage, cgrp, type);  | 
|---|
| 322 |  | -	if (pl_was_allocated) {  | 
|---|
 | 507 | +	if (old_prog) {  | 
|---|
 | 508 | +		pl->prog = old_prog;  | 
|---|
 | 509 | +		pl->link = NULL;  | 
|---|
 | 510 | +	}  | 
|---|
 | 511 | +	bpf_cgroup_storages_free(new_storage);  | 
|---|
 | 512 | +	if (!old_prog) {  | 
|---|
| 323 | 513 |  		list_del(&pl->node); | 
|---|
| 324 | 514 |  		kfree(pl); | 
|---|
| 325 | 515 |  	} | 
|---|
| 326 | 516 |  	return err; | 
|---|
| 327 | 517 |  } | 
|---|
| 328 | 518 |   | 
|---|
 | 519 | +/* Swap updated BPF program for given link in effective program arrays across  | 
|---|
 | 520 | + * all descendant cgroups. This function is guaranteed to succeed.  | 
|---|
 | 521 | + */  | 
|---|
 | 522 | +static void replace_effective_prog(struct cgroup *cgrp,  | 
|---|
 | 523 | +				   enum bpf_attach_type type,  | 
|---|
 | 524 | +				   struct bpf_cgroup_link *link)  | 
|---|
 | 525 | +{  | 
|---|
 | 526 | +	struct bpf_prog_array_item *item;  | 
|---|
 | 527 | +	struct cgroup_subsys_state *css;  | 
|---|
 | 528 | +	struct bpf_prog_array *progs;  | 
|---|
 | 529 | +	struct bpf_prog_list *pl;  | 
|---|
 | 530 | +	struct list_head *head;  | 
|---|
 | 531 | +	struct cgroup *cg;  | 
|---|
 | 532 | +	int pos;  | 
|---|
 | 533 | +  | 
|---|
 | 534 | +	css_for_each_descendant_pre(css, &cgrp->self) {  | 
|---|
 | 535 | +		struct cgroup *desc = container_of(css, struct cgroup, self);  | 
|---|
 | 536 | +  | 
|---|
 | 537 | +		if (percpu_ref_is_zero(&desc->bpf.refcnt))  | 
|---|
 | 538 | +			continue;  | 
|---|
 | 539 | +  | 
|---|
 | 540 | +		/* find position of link in effective progs array */  | 
|---|
 | 541 | +		for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {  | 
|---|
 | 542 | +			if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI))  | 
|---|
 | 543 | +				continue;  | 
|---|
 | 544 | +  | 
|---|
 | 545 | +			head = &cg->bpf.progs[type];  | 
|---|
 | 546 | +			list_for_each_entry(pl, head, node) {  | 
|---|
 | 547 | +				if (!prog_list_prog(pl))  | 
|---|
 | 548 | +					continue;  | 
|---|
 | 549 | +				if (pl->link == link)  | 
|---|
 | 550 | +					goto found;  | 
|---|
 | 551 | +				pos++;  | 
|---|
 | 552 | +			}  | 
|---|
 | 553 | +		}  | 
|---|
 | 554 | +found:  | 
|---|
 | 555 | +		BUG_ON(!cg);  | 
|---|
 | 556 | +		progs = rcu_dereference_protected(  | 
|---|
 | 557 | +				desc->bpf.effective[type],  | 
|---|
 | 558 | +				lockdep_is_held(&cgroup_mutex));  | 
|---|
 | 559 | +		item = &progs->items[pos];  | 
|---|
 | 560 | +		WRITE_ONCE(item->prog, link->link.prog);  | 
|---|
 | 561 | +	}  | 
|---|
 | 562 | +}  | 
|---|
 | 563 | +  | 
|---|
| 329 | 564 |  /** | 
|---|
| 330 |  | - * __cgroup_bpf_detach() - Detach the program from a cgroup, and  | 
|---|
| 331 |  | - *                         propagate the change to descendants  | 
|---|
 | 565 | + * __cgroup_bpf_replace() - Replace link's program and propagate the change  | 
|---|
 | 566 | + *                          to descendants  | 
|---|
| 332 | 567 |   * @cgrp: The cgroup which descendants to traverse | 
|---|
| 333 |  | - * @prog: A program to detach or NULL  | 
|---|
| 334 |  | - * @type: Type of detach operation  | 
|---|
 | 568 | + * @link: A link for which to replace BPF program  | 
|---|
 | 569 | + * @type: Type of attach operation  | 
|---|
| 335 | 570 |   * | 
|---|
| 336 | 571 |   * Must be called with cgroup_mutex held. | 
|---|
| 337 | 572 |   */ | 
|---|
 | 573 | +static int __cgroup_bpf_replace(struct cgroup *cgrp,  | 
|---|
 | 574 | +				struct bpf_cgroup_link *link,  | 
|---|
 | 575 | +				struct bpf_prog *new_prog)  | 
|---|
 | 576 | +{  | 
|---|
 | 577 | +	struct list_head *progs = &cgrp->bpf.progs[link->type];  | 
|---|
 | 578 | +	struct bpf_prog *old_prog;  | 
|---|
 | 579 | +	struct bpf_prog_list *pl;  | 
|---|
 | 580 | +	bool found = false;  | 
|---|
 | 581 | +  | 
|---|
 | 582 | +	if (link->link.prog->type != new_prog->type)  | 
|---|
 | 583 | +		return -EINVAL;  | 
|---|
 | 584 | +  | 
|---|
 | 585 | +	list_for_each_entry(pl, progs, node) {  | 
|---|
 | 586 | +		if (pl->link == link) {  | 
|---|
 | 587 | +			found = true;  | 
|---|
 | 588 | +			break;  | 
|---|
 | 589 | +		}  | 
|---|
 | 590 | +	}  | 
|---|
 | 591 | +	if (!found)  | 
|---|
 | 592 | +		return -ENOENT;  | 
|---|
 | 593 | +  | 
|---|
 | 594 | +	old_prog = xchg(&link->link.prog, new_prog);  | 
|---|
 | 595 | +	replace_effective_prog(cgrp, link->type, link);  | 
|---|
 | 596 | +	bpf_prog_put(old_prog);  | 
|---|
 | 597 | +	return 0;  | 
|---|
 | 598 | +}  | 
|---|
 | 599 | +  | 
|---|
 | 600 | +static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,  | 
|---|
 | 601 | +			      struct bpf_prog *old_prog)  | 
|---|
 | 602 | +{  | 
|---|
 | 603 | +	struct bpf_cgroup_link *cg_link;  | 
|---|
 | 604 | +	int ret;  | 
|---|
 | 605 | +  | 
|---|
 | 606 | +	cg_link = container_of(link, struct bpf_cgroup_link, link);  | 
|---|
 | 607 | +  | 
|---|
 | 608 | +	mutex_lock(&cgroup_mutex);  | 
|---|
 | 609 | +	/* link might have been auto-released by dying cgroup, so fail */  | 
|---|
 | 610 | +	if (!cg_link->cgroup) {  | 
|---|
 | 611 | +		ret = -ENOLINK;  | 
|---|
 | 612 | +		goto out_unlock;  | 
|---|
 | 613 | +	}  | 
|---|
 | 614 | +	if (old_prog && link->prog != old_prog) {  | 
|---|
 | 615 | +		ret = -EPERM;  | 
|---|
 | 616 | +		goto out_unlock;  | 
|---|
 | 617 | +	}  | 
|---|
 | 618 | +	ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);  | 
|---|
 | 619 | +out_unlock:  | 
|---|
 | 620 | +	mutex_unlock(&cgroup_mutex);  | 
|---|
 | 621 | +	return ret;  | 
|---|
 | 622 | +}  | 
|---|
 | 623 | +  | 
|---|
 | 624 | +static struct bpf_prog_list *find_detach_entry(struct list_head *progs,  | 
|---|
 | 625 | +					       struct bpf_prog *prog,  | 
|---|
 | 626 | +					       struct bpf_cgroup_link *link,  | 
|---|
 | 627 | +					       bool allow_multi)  | 
|---|
 | 628 | +{  | 
|---|
 | 629 | +	struct bpf_prog_list *pl;  | 
|---|
 | 630 | +  | 
|---|
 | 631 | +	if (!allow_multi) {  | 
|---|
 | 632 | +		if (list_empty(progs))  | 
|---|
 | 633 | +			/* report error when trying to detach and nothing is attached */  | 
|---|
 | 634 | +			return ERR_PTR(-ENOENT);  | 
|---|
 | 635 | +  | 
|---|
 | 636 | +		/* to maintain backward compatibility NONE and OVERRIDE cgroups  | 
|---|
 | 637 | +		 * allow detaching with invalid FD (prog==NULL) in legacy mode  | 
|---|
 | 638 | +		 */  | 
|---|
 | 639 | +		return list_first_entry(progs, typeof(*pl), node);  | 
|---|
 | 640 | +	}  | 
|---|
 | 641 | +  | 
|---|
 | 642 | +	if (!prog && !link)  | 
|---|
 | 643 | +		/* to detach MULTI prog the user has to specify valid FD  | 
|---|
 | 644 | +		 * of the program or link to be detached  | 
|---|
 | 645 | +		 */  | 
|---|
 | 646 | +		return ERR_PTR(-EINVAL);  | 
|---|
 | 647 | +  | 
|---|
 | 648 | +	/* find the prog or link and detach it */  | 
|---|
 | 649 | +	list_for_each_entry(pl, progs, node) {  | 
|---|
 | 650 | +		if (pl->prog == prog && pl->link == link)  | 
|---|
 | 651 | +			return pl;  | 
|---|
 | 652 | +	}  | 
|---|
 | 653 | +	return ERR_PTR(-ENOENT);  | 
|---|
 | 654 | +}  | 
|---|
 | 655 | +  | 
|---|
 | 656 | +/**  | 
|---|
 | 657 | + * purge_effective_progs() - After compute_effective_progs fails to alloc new  | 
|---|
 | 658 | + *			     cgrp->bpf.inactive table we can recover by  | 
|---|
 | 659 | + *			     recomputing the array in place.  | 
|---|
 | 660 | + *  | 
|---|
 | 661 | + * @cgrp: The cgroup which descendants to travers  | 
|---|
 | 662 | + * @prog: A program to detach or NULL  | 
|---|
 | 663 | + * @link: A link to detach or NULL  | 
|---|
 | 664 | + * @type: Type of detach operation  | 
|---|
 | 665 | + */  | 
|---|
 | 666 | +static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,  | 
|---|
 | 667 | +				  struct bpf_cgroup_link *link,  | 
|---|
 | 668 | +				  enum bpf_attach_type type)  | 
|---|
 | 669 | +{  | 
|---|
 | 670 | +	struct cgroup_subsys_state *css;  | 
|---|
 | 671 | +	struct bpf_prog_array *progs;  | 
|---|
 | 672 | +	struct bpf_prog_list *pl;  | 
|---|
 | 673 | +	struct list_head *head;  | 
|---|
 | 674 | +	struct cgroup *cg;  | 
|---|
 | 675 | +	int pos;  | 
|---|
 | 676 | +  | 
|---|
 | 677 | +	/* recompute effective prog array in place */  | 
|---|
 | 678 | +	css_for_each_descendant_pre(css, &cgrp->self) {  | 
|---|
 | 679 | +		struct cgroup *desc = container_of(css, struct cgroup, self);  | 
|---|
 | 680 | +  | 
|---|
 | 681 | +		if (percpu_ref_is_zero(&desc->bpf.refcnt))  | 
|---|
 | 682 | +			continue;  | 
|---|
 | 683 | +  | 
|---|
 | 684 | +		/* find position of link or prog in effective progs array */  | 
|---|
 | 685 | +		for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {  | 
|---|
 | 686 | +			if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI))  | 
|---|
 | 687 | +				continue;  | 
|---|
 | 688 | +  | 
|---|
 | 689 | +			head = &cg->bpf.progs[type];  | 
|---|
 | 690 | +			list_for_each_entry(pl, head, node) {  | 
|---|
 | 691 | +				if (!prog_list_prog(pl))  | 
|---|
 | 692 | +					continue;  | 
|---|
 | 693 | +				if (pl->prog == prog && pl->link == link)  | 
|---|
 | 694 | +					goto found;  | 
|---|
 | 695 | +				pos++;  | 
|---|
 | 696 | +			}  | 
|---|
 | 697 | +		}  | 
|---|
 | 698 | +  | 
|---|
 | 699 | +		/* no link or prog match, skip the cgroup of this layer */  | 
|---|
 | 700 | +		continue;  | 
|---|
 | 701 | +found:  | 
|---|
 | 702 | +		progs = rcu_dereference_protected(  | 
|---|
 | 703 | +				desc->bpf.effective[type],  | 
|---|
 | 704 | +				lockdep_is_held(&cgroup_mutex));  | 
|---|
 | 705 | +  | 
|---|
 | 706 | +		/* Remove the program from the array */  | 
|---|
 | 707 | +		WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos),  | 
|---|
 | 708 | +			  "Failed to purge a prog from array at index %d", pos);  | 
|---|
 | 709 | +	}  | 
|---|
 | 710 | +}  | 
|---|
 | 711 | +  | 
|---|
 | 712 | +/**  | 
|---|
 | 713 | + * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and  | 
|---|
 | 714 | + *                         propagate the change to descendants  | 
|---|
 | 715 | + * @cgrp: The cgroup which descendants to traverse  | 
|---|
 | 716 | + * @prog: A program to detach or NULL  | 
|---|
 | 717 | + * @prog: A link to detach or NULL  | 
|---|
 | 718 | + * @type: Type of detach operation  | 
|---|
 | 719 | + *  | 
|---|
 | 720 | + * At most one of @prog or @link can be non-NULL.  | 
|---|
 | 721 | + * Must be called with cgroup_mutex held.  | 
|---|
 | 722 | + */  | 
|---|
| 338 | 723 |  int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, | 
|---|
| 339 |  | -			enum bpf_attach_type type, u32 unused_flags)  | 
|---|
 | 724 | +			struct bpf_cgroup_link *link, enum bpf_attach_type type)  | 
|---|
| 340 | 725 |  { | 
|---|
| 341 | 726 |  	struct list_head *progs = &cgrp->bpf.progs[type]; | 
|---|
| 342 | 727 |  	u32 flags = cgrp->bpf.flags[type]; | 
|---|
| 343 |  | -	struct bpf_prog *old_prog = NULL;  | 
|---|
| 344 | 728 |  	struct bpf_prog_list *pl; | 
|---|
| 345 |  | -	int err;  | 
|---|
 | 729 | +	struct bpf_prog *old_prog;  | 
|---|
| 346 | 730 |   | 
|---|
| 347 |  | -	if (flags & BPF_F_ALLOW_MULTI) {  | 
|---|
| 348 |  | -		if (!prog)  | 
|---|
| 349 |  | -			/* to detach MULTI prog the user has to specify valid FD  | 
|---|
| 350 |  | -			 * of the program to be detached  | 
|---|
| 351 |  | -			 */  | 
|---|
| 352 |  | -			return -EINVAL;  | 
|---|
| 353 |  | -	} else {  | 
|---|
| 354 |  | -		if (list_empty(progs))  | 
|---|
| 355 |  | -			/* report error when trying to detach and nothing is attached */  | 
|---|
| 356 |  | -			return -ENOENT;  | 
|---|
 | 731 | +	if (prog && link)  | 
|---|
 | 732 | +		/* only one of prog or link can be specified */  | 
|---|
 | 733 | +		return -EINVAL;  | 
|---|
 | 734 | +  | 
|---|
 | 735 | +	pl = find_detach_entry(progs, prog, link, flags & BPF_F_ALLOW_MULTI);  | 
|---|
 | 736 | +	if (IS_ERR(pl))  | 
|---|
 | 737 | +		return PTR_ERR(pl);  | 
|---|
 | 738 | +  | 
|---|
 | 739 | +	/* mark it deleted, so it's ignored while recomputing effective */  | 
|---|
 | 740 | +	old_prog = pl->prog;  | 
|---|
 | 741 | +	pl->prog = NULL;  | 
|---|
 | 742 | +	pl->link = NULL;  | 
|---|
 | 743 | +  | 
|---|
 | 744 | +	if (update_effective_progs(cgrp, type)) {  | 
|---|
 | 745 | +		/* if update effective array failed replace the prog with a dummy prog*/  | 
|---|
 | 746 | +		pl->prog = old_prog;  | 
|---|
 | 747 | +		pl->link = link;  | 
|---|
 | 748 | +		purge_effective_progs(cgrp, old_prog, link, type);  | 
|---|
| 357 | 749 |  	} | 
|---|
| 358 |  | -  | 
|---|
| 359 |  | -	if (flags & BPF_F_ALLOW_MULTI) {  | 
|---|
| 360 |  | -		/* find the prog and detach it */  | 
|---|
| 361 |  | -		list_for_each_entry(pl, progs, node) {  | 
|---|
| 362 |  | -			if (pl->prog != prog)  | 
|---|
| 363 |  | -				continue;  | 
|---|
| 364 |  | -			old_prog = prog;  | 
|---|
| 365 |  | -			/* mark it deleted, so it's ignored while  | 
|---|
| 366 |  | -			 * recomputing effective  | 
|---|
| 367 |  | -			 */  | 
|---|
| 368 |  | -			pl->prog = NULL;  | 
|---|
| 369 |  | -			break;  | 
|---|
| 370 |  | -		}  | 
|---|
| 371 |  | -		if (!old_prog)  | 
|---|
| 372 |  | -			return -ENOENT;  | 
|---|
| 373 |  | -	} else {  | 
|---|
| 374 |  | -		/* to maintain backward compatibility NONE and OVERRIDE cgroups  | 
|---|
| 375 |  | -		 * allow detaching with invalid FD (prog==NULL)  | 
|---|
| 376 |  | -		 */  | 
|---|
| 377 |  | -		pl = list_first_entry(progs, typeof(*pl), node);  | 
|---|
| 378 |  | -		old_prog = pl->prog;  | 
|---|
| 379 |  | -		pl->prog = NULL;  | 
|---|
| 380 |  | -	}  | 
|---|
| 381 |  | -  | 
|---|
| 382 |  | -	err = update_effective_progs(cgrp, type);  | 
|---|
| 383 |  | -	if (err)  | 
|---|
| 384 |  | -		goto cleanup;  | 
|---|
| 385 | 750 |   | 
|---|
| 386 | 751 |  	/* now can actually delete it from this cgroup list */ | 
|---|
| 387 | 752 |  	list_del(&pl->node); | 
|---|
| 388 |  | -	bpf_cgroup_storage_unlink(pl->storage);  | 
|---|
| 389 |  | -	bpf_cgroup_storage_free(pl->storage);  | 
|---|
| 390 | 753 |  	kfree(pl); | 
|---|
| 391 | 754 |  	if (list_empty(progs)) | 
|---|
| 392 | 755 |  		/* last program was detached, reset flags to zero */ | 
|---|
| 393 | 756 |  		cgrp->bpf.flags[type] = 0; | 
|---|
| 394 |  | -  | 
|---|
| 395 |  | -	bpf_prog_put(old_prog);  | 
|---|
 | 757 | +	if (old_prog)  | 
|---|
 | 758 | +		bpf_prog_put(old_prog);  | 
|---|
| 396 | 759 |  	static_branch_dec(&cgroup_bpf_enabled_key); | 
|---|
| 397 | 760 |  	return 0; | 
|---|
| 398 |  | -  | 
|---|
| 399 |  | -cleanup:  | 
|---|
| 400 |  | -	/* and restore back old_prog */  | 
|---|
| 401 |  | -	pl->prog = old_prog;  | 
|---|
| 402 |  | -	return err;  | 
|---|
| 403 | 761 |  } | 
|---|
| 404 | 762 |   | 
|---|
| 405 | 763 |  /* Must be called with cgroup_mutex held to avoid races. */ | 
|---|
| .. | .. | 
|---|
| 410 | 768 |  	enum bpf_attach_type type = attr->query.attach_type; | 
|---|
| 411 | 769 |  	struct list_head *progs = &cgrp->bpf.progs[type]; | 
|---|
| 412 | 770 |  	u32 flags = cgrp->bpf.flags[type]; | 
|---|
 | 771 | +	struct bpf_prog_array *effective;  | 
|---|
 | 772 | +	struct bpf_prog *prog;  | 
|---|
| 413 | 773 |  	int cnt, ret = 0, i; | 
|---|
| 414 | 774 |   | 
|---|
 | 775 | +	effective = rcu_dereference_protected(cgrp->bpf.effective[type],  | 
|---|
 | 776 | +					      lockdep_is_held(&cgroup_mutex));  | 
|---|
 | 777 | +  | 
|---|
| 415 | 778 |  	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) | 
|---|
| 416 |  | -		cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);  | 
|---|
 | 779 | +		cnt = bpf_prog_array_length(effective);  | 
|---|
| 417 | 780 |  	else | 
|---|
| 418 | 781 |  		cnt = prog_list_length(progs); | 
|---|
| 419 | 782 |   | 
|---|
| .. | .. | 
|---|
| 430 | 793 |  	} | 
|---|
| 431 | 794 |   | 
|---|
| 432 | 795 |  	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { | 
|---|
| 433 |  | -		return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],  | 
|---|
| 434 |  | -						   prog_ids, cnt);  | 
|---|
 | 796 | +		return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);  | 
|---|
| 435 | 797 |  	} else { | 
|---|
| 436 | 798 |  		struct bpf_prog_list *pl; | 
|---|
| 437 | 799 |  		u32 id; | 
|---|
| 438 | 800 |   | 
|---|
| 439 | 801 |  		i = 0; | 
|---|
| 440 | 802 |  		list_for_each_entry(pl, progs, node) { | 
|---|
| 441 |  | -			id = pl->prog->aux->id;  | 
|---|
 | 803 | +			prog = prog_list_prog(pl);  | 
|---|
 | 804 | +			id = prog->aux->id;  | 
|---|
| 442 | 805 |  			if (copy_to_user(prog_ids + i, &id, sizeof(id))) | 
|---|
| 443 | 806 |  				return -EFAULT; | 
|---|
| 444 | 807 |  			if (++i == cnt) | 
|---|
| .. | .. | 
|---|
| 451 | 814 |  int cgroup_bpf_prog_attach(const union bpf_attr *attr, | 
|---|
| 452 | 815 |  			   enum bpf_prog_type ptype, struct bpf_prog *prog) | 
|---|
| 453 | 816 |  { | 
|---|
 | 817 | +	struct bpf_prog *replace_prog = NULL;  | 
|---|
| 454 | 818 |  	struct cgroup *cgrp; | 
|---|
| 455 | 819 |  	int ret; | 
|---|
| 456 | 820 |   | 
|---|
| .. | .. | 
|---|
| 458 | 822 |  	if (IS_ERR(cgrp)) | 
|---|
| 459 | 823 |  		return PTR_ERR(cgrp); | 
|---|
| 460 | 824 |   | 
|---|
| 461 |  | -	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,  | 
|---|
| 462 |  | -				attr->attach_flags);  | 
|---|
 | 825 | +	if ((attr->attach_flags & BPF_F_ALLOW_MULTI) &&  | 
|---|
 | 826 | +	    (attr->attach_flags & BPF_F_REPLACE)) {  | 
|---|
 | 827 | +		replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype);  | 
|---|
 | 828 | +		if (IS_ERR(replace_prog)) {  | 
|---|
 | 829 | +			cgroup_put(cgrp);  | 
|---|
 | 830 | +			return PTR_ERR(replace_prog);  | 
|---|
 | 831 | +		}  | 
|---|
 | 832 | +	}  | 
|---|
 | 833 | +  | 
|---|
 | 834 | +	ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,  | 
|---|
 | 835 | +				attr->attach_type, attr->attach_flags);  | 
|---|
 | 836 | +  | 
|---|
 | 837 | +	if (replace_prog)  | 
|---|
 | 838 | +		bpf_prog_put(replace_prog);  | 
|---|
| 463 | 839 |  	cgroup_put(cgrp); | 
|---|
| 464 | 840 |  	return ret; | 
|---|
| 465 | 841 |  } | 
|---|
| .. | .. | 
|---|
| 478 | 854 |  	if (IS_ERR(prog)) | 
|---|
| 479 | 855 |  		prog = NULL; | 
|---|
| 480 | 856 |   | 
|---|
| 481 |  | -	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);  | 
|---|
 | 857 | +	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);  | 
|---|
| 482 | 858 |  	if (prog) | 
|---|
| 483 | 859 |  		bpf_prog_put(prog); | 
|---|
| 484 | 860 |   | 
|---|
| 485 | 861 |  	cgroup_put(cgrp); | 
|---|
| 486 | 862 |  	return ret; | 
|---|
 | 863 | +}  | 
|---|
 | 864 | +  | 
|---|
 | 865 | +static void bpf_cgroup_link_release(struct bpf_link *link)  | 
|---|
 | 866 | +{  | 
|---|
 | 867 | +	struct bpf_cgroup_link *cg_link =  | 
|---|
 | 868 | +		container_of(link, struct bpf_cgroup_link, link);  | 
|---|
 | 869 | +	struct cgroup *cg;  | 
|---|
 | 870 | +  | 
|---|
 | 871 | +	/* link might have been auto-detached by dying cgroup already,  | 
|---|
 | 872 | +	 * in that case our work is done here  | 
|---|
 | 873 | +	 */  | 
|---|
 | 874 | +	if (!cg_link->cgroup)  | 
|---|
 | 875 | +		return;  | 
|---|
 | 876 | +  | 
|---|
 | 877 | +	mutex_lock(&cgroup_mutex);  | 
|---|
 | 878 | +  | 
|---|
 | 879 | +	/* re-check cgroup under lock again */  | 
|---|
 | 880 | +	if (!cg_link->cgroup) {  | 
|---|
 | 881 | +		mutex_unlock(&cgroup_mutex);  | 
|---|
 | 882 | +		return;  | 
|---|
 | 883 | +	}  | 
|---|
 | 884 | +  | 
|---|
 | 885 | +	WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,  | 
|---|
 | 886 | +				    cg_link->type));  | 
|---|
 | 887 | +  | 
|---|
 | 888 | +	cg = cg_link->cgroup;  | 
|---|
 | 889 | +	cg_link->cgroup = NULL;  | 
|---|
 | 890 | +  | 
|---|
 | 891 | +	mutex_unlock(&cgroup_mutex);  | 
|---|
 | 892 | +  | 
|---|
 | 893 | +	cgroup_put(cg);  | 
|---|
 | 894 | +}  | 
|---|
 | 895 | +  | 
|---|
 | 896 | +static void bpf_cgroup_link_dealloc(struct bpf_link *link)  | 
|---|
 | 897 | +{  | 
|---|
 | 898 | +	struct bpf_cgroup_link *cg_link =  | 
|---|
 | 899 | +		container_of(link, struct bpf_cgroup_link, link);  | 
|---|
 | 900 | +  | 
|---|
 | 901 | +	kfree(cg_link);  | 
|---|
 | 902 | +}  | 
|---|
 | 903 | +  | 
|---|
 | 904 | +static int bpf_cgroup_link_detach(struct bpf_link *link)  | 
|---|
 | 905 | +{  | 
|---|
 | 906 | +	bpf_cgroup_link_release(link);  | 
|---|
 | 907 | +  | 
|---|
 | 908 | +	return 0;  | 
|---|
 | 909 | +}  | 
|---|
 | 910 | +  | 
|---|
 | 911 | +static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link,  | 
|---|
 | 912 | +					struct seq_file *seq)  | 
|---|
 | 913 | +{  | 
|---|
 | 914 | +	struct bpf_cgroup_link *cg_link =  | 
|---|
 | 915 | +		container_of(link, struct bpf_cgroup_link, link);  | 
|---|
 | 916 | +	u64 cg_id = 0;  | 
|---|
 | 917 | +  | 
|---|
 | 918 | +	mutex_lock(&cgroup_mutex);  | 
|---|
 | 919 | +	if (cg_link->cgroup)  | 
|---|
 | 920 | +		cg_id = cgroup_id(cg_link->cgroup);  | 
|---|
 | 921 | +	mutex_unlock(&cgroup_mutex);  | 
|---|
 | 922 | +  | 
|---|
 | 923 | +	seq_printf(seq,  | 
|---|
 | 924 | +		   "cgroup_id:\t%llu\n"  | 
|---|
 | 925 | +		   "attach_type:\t%d\n",  | 
|---|
 | 926 | +		   cg_id,  | 
|---|
 | 927 | +		   cg_link->type);  | 
|---|
 | 928 | +}  | 
|---|
 | 929 | +  | 
|---|
 | 930 | +static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link,  | 
|---|
 | 931 | +					  struct bpf_link_info *info)  | 
|---|
 | 932 | +{  | 
|---|
 | 933 | +	struct bpf_cgroup_link *cg_link =  | 
|---|
 | 934 | +		container_of(link, struct bpf_cgroup_link, link);  | 
|---|
 | 935 | +	u64 cg_id = 0;  | 
|---|
 | 936 | +  | 
|---|
 | 937 | +	mutex_lock(&cgroup_mutex);  | 
|---|
 | 938 | +	if (cg_link->cgroup)  | 
|---|
 | 939 | +		cg_id = cgroup_id(cg_link->cgroup);  | 
|---|
 | 940 | +	mutex_unlock(&cgroup_mutex);  | 
|---|
 | 941 | +  | 
|---|
 | 942 | +	info->cgroup.cgroup_id = cg_id;  | 
|---|
 | 943 | +	info->cgroup.attach_type = cg_link->type;  | 
|---|
 | 944 | +	return 0;  | 
|---|
 | 945 | +}  | 
|---|
 | 946 | +  | 
|---|
 | 947 | +static const struct bpf_link_ops bpf_cgroup_link_lops = {  | 
|---|
 | 948 | +	.release = bpf_cgroup_link_release,  | 
|---|
 | 949 | +	.dealloc = bpf_cgroup_link_dealloc,  | 
|---|
 | 950 | +	.detach = bpf_cgroup_link_detach,  | 
|---|
 | 951 | +	.update_prog = cgroup_bpf_replace,  | 
|---|
 | 952 | +	.show_fdinfo = bpf_cgroup_link_show_fdinfo,  | 
|---|
 | 953 | +	.fill_link_info = bpf_cgroup_link_fill_link_info,  | 
|---|
 | 954 | +};  | 
|---|
 | 955 | +  | 
|---|
 | 956 | +int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)  | 
|---|
 | 957 | +{  | 
|---|
 | 958 | +	struct bpf_link_primer link_primer;  | 
|---|
 | 959 | +	struct bpf_cgroup_link *link;  | 
|---|
 | 960 | +	struct cgroup *cgrp;  | 
|---|
 | 961 | +	int err;  | 
|---|
 | 962 | +  | 
|---|
 | 963 | +	if (attr->link_create.flags)  | 
|---|
 | 964 | +		return -EINVAL;  | 
|---|
 | 965 | +  | 
|---|
 | 966 | +	cgrp = cgroup_get_from_fd(attr->link_create.target_fd);  | 
|---|
 | 967 | +	if (IS_ERR(cgrp))  | 
|---|
 | 968 | +		return PTR_ERR(cgrp);  | 
|---|
 | 969 | +  | 
|---|
 | 970 | +	link = kzalloc(sizeof(*link), GFP_USER);  | 
|---|
 | 971 | +	if (!link) {  | 
|---|
 | 972 | +		err = -ENOMEM;  | 
|---|
 | 973 | +		goto out_put_cgroup;  | 
|---|
 | 974 | +	}  | 
|---|
 | 975 | +	bpf_link_init(&link->link, BPF_LINK_TYPE_CGROUP, &bpf_cgroup_link_lops,  | 
|---|
 | 976 | +		      prog);  | 
|---|
 | 977 | +	link->cgroup = cgrp;  | 
|---|
 | 978 | +	link->type = attr->link_create.attach_type;  | 
|---|
 | 979 | +  | 
|---|
 | 980 | +	err  = bpf_link_prime(&link->link, &link_primer);  | 
|---|
 | 981 | +	if (err) {  | 
|---|
 | 982 | +		kfree(link);  | 
|---|
 | 983 | +		goto out_put_cgroup;  | 
|---|
 | 984 | +	}  | 
|---|
 | 985 | +  | 
|---|
 | 986 | +	err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type,  | 
|---|
 | 987 | +				BPF_F_ALLOW_MULTI);  | 
|---|
 | 988 | +	if (err) {  | 
|---|
 | 989 | +		bpf_link_cleanup(&link_primer);  | 
|---|
 | 990 | +		goto out_put_cgroup;  | 
|---|
 | 991 | +	}  | 
|---|
 | 992 | +  | 
|---|
 | 993 | +	return bpf_link_settle(&link_primer);  | 
|---|
 | 994 | +  | 
|---|
 | 995 | +out_put_cgroup:  | 
|---|
 | 996 | +	cgroup_put(cgrp);  | 
|---|
 | 997 | +	return err;  | 
|---|
| 487 | 998 |  } | 
|---|
| 488 | 999 |   | 
|---|
| 489 | 1000 |  int cgroup_bpf_prog_query(const union bpf_attr *attr, | 
|---|
| .. | .. | 
|---|
| 514 | 1025 |   * The program type passed in via @type must be suitable for network | 
|---|
| 515 | 1026 |   * filtering. No further check is performed to assert that. | 
|---|
| 516 | 1027 |   * | 
|---|
| 517 |  | - * This function will return %-EPERM if any if an attached program was found  | 
|---|
| 518 |  | - * and if it returned != 1 during execution. In all other cases, 0 is returned.  | 
|---|
 | 1028 | + * For egress packets, this function can return:  | 
|---|
 | 1029 | + *   NET_XMIT_SUCCESS    (0)	- continue with packet output  | 
|---|
 | 1030 | + *   NET_XMIT_DROP       (1)	- drop packet and notify TCP to call cwr  | 
|---|
 | 1031 | + *   NET_XMIT_CN         (2)	- continue with packet output and notify TCP  | 
|---|
 | 1032 | + *				  to call cwr  | 
|---|
 | 1033 | + *   -EPERM			- drop packet  | 
|---|
 | 1034 | + *  | 
|---|
 | 1035 | + * For ingress packets, this function will return -EPERM if any  | 
|---|
 | 1036 | + * attached program was found and if it returned != 1 during execution.  | 
|---|
 | 1037 | + * Otherwise 0 is returned.  | 
|---|
| 519 | 1038 |   */ | 
|---|
| 520 | 1039 |  int __cgroup_bpf_run_filter_skb(struct sock *sk, | 
|---|
| 521 | 1040 |  				struct sk_buff *skb, | 
|---|
| .. | .. | 
|---|
| 523 | 1042 |  { | 
|---|
| 524 | 1043 |  	unsigned int offset = skb->data - skb_network_header(skb); | 
|---|
| 525 | 1044 |  	struct sock *save_sk; | 
|---|
 | 1045 | +	void *saved_data_end;  | 
|---|
| 526 | 1046 |  	struct cgroup *cgrp; | 
|---|
| 527 | 1047 |  	int ret; | 
|---|
| 528 | 1048 |   | 
|---|
| .. | .. | 
|---|
| 536 | 1056 |  	save_sk = skb->sk; | 
|---|
| 537 | 1057 |  	skb->sk = sk; | 
|---|
| 538 | 1058 |  	__skb_push(skb, offset); | 
|---|
| 539 |  | -	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,  | 
|---|
| 540 |  | -				 bpf_prog_run_save_cb);  | 
|---|
 | 1059 | +  | 
|---|
 | 1060 | +	/* compute pointers for the bpf prog */  | 
|---|
 | 1061 | +	bpf_compute_and_save_data_end(skb, &saved_data_end);  | 
|---|
 | 1062 | +  | 
|---|
 | 1063 | +	if (type == BPF_CGROUP_INET_EGRESS) {  | 
|---|
 | 1064 | +		ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(  | 
|---|
 | 1065 | +			cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);  | 
|---|
 | 1066 | +	} else {  | 
|---|
 | 1067 | +		ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,  | 
|---|
 | 1068 | +					  __bpf_prog_run_save_cb);  | 
|---|
 | 1069 | +		ret = (ret == 1 ? 0 : -EPERM);  | 
|---|
 | 1070 | +	}  | 
|---|
 | 1071 | +	bpf_restore_data_end(skb, saved_data_end);  | 
|---|
| 541 | 1072 |  	__skb_pull(skb, offset); | 
|---|
| 542 | 1073 |  	skb->sk = save_sk; | 
|---|
| 543 |  | -	return ret == 1 ? 0 : -EPERM;  | 
|---|
 | 1074 | +  | 
|---|
 | 1075 | +	return ret;  | 
|---|
| 544 | 1076 |  } | 
|---|
| 545 | 1077 |  EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); | 
|---|
| 546 | 1078 |   | 
|---|
| .. | .. | 
|---|
| 661 | 1193 |   | 
|---|
| 662 | 1194 |  	return !allow; | 
|---|
| 663 | 1195 |  } | 
|---|
| 664 |  | -EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);  | 
|---|
| 665 | 1196 |   | 
|---|
| 666 | 1197 |  static const struct bpf_func_proto * | 
|---|
| 667 |  | -cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)  | 
|---|
 | 1198 | +cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)  | 
|---|
| 668 | 1199 |  { | 
|---|
| 669 | 1200 |  	switch (func_id) { | 
|---|
| 670 |  | -	case BPF_FUNC_map_lookup_elem:  | 
|---|
| 671 |  | -		return &bpf_map_lookup_elem_proto;  | 
|---|
| 672 |  | -	case BPF_FUNC_map_update_elem:  | 
|---|
| 673 |  | -		return &bpf_map_update_elem_proto;  | 
|---|
| 674 |  | -	case BPF_FUNC_map_delete_elem:  | 
|---|
| 675 |  | -		return &bpf_map_delete_elem_proto;  | 
|---|
| 676 | 1201 |  	case BPF_FUNC_get_current_uid_gid: | 
|---|
| 677 | 1202 |  		return &bpf_get_current_uid_gid_proto; | 
|---|
| 678 | 1203 |  	case BPF_FUNC_get_local_storage: | 
|---|
| 679 | 1204 |  		return &bpf_get_local_storage_proto; | 
|---|
| 680 |  | -	case BPF_FUNC_trace_printk:  | 
|---|
| 681 |  | -		if (capable(CAP_SYS_ADMIN))  | 
|---|
| 682 |  | -			return bpf_get_trace_printk_proto();  | 
|---|
 | 1205 | +	case BPF_FUNC_get_current_cgroup_id:  | 
|---|
 | 1206 | +		return &bpf_get_current_cgroup_id_proto;  | 
|---|
 | 1207 | +	case BPF_FUNC_perf_event_output:  | 
|---|
 | 1208 | +		return &bpf_event_output_data_proto;  | 
|---|
| 683 | 1209 |  	default: | 
|---|
| 684 |  | -		return NULL;  | 
|---|
 | 1210 | +		return bpf_base_func_proto(func_id);  | 
|---|
| 685 | 1211 |  	} | 
|---|
 | 1212 | +}  | 
|---|
 | 1213 | +  | 
|---|
 | 1214 | +static const struct bpf_func_proto *  | 
|---|
 | 1215 | +cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)  | 
|---|
 | 1216 | +{  | 
|---|
 | 1217 | +	return cgroup_base_func_proto(func_id, prog);  | 
|---|
| 686 | 1218 |  } | 
|---|
| 687 | 1219 |   | 
|---|
| 688 | 1220 |  static bool cgroup_dev_is_valid_access(int off, int size, | 
|---|
| .. | .. | 
|---|
| 722 | 1254 |  	.get_func_proto		= cgroup_dev_func_proto, | 
|---|
| 723 | 1255 |  	.is_valid_access	= cgroup_dev_is_valid_access, | 
|---|
| 724 | 1256 |  }; | 
|---|
 | 1257 | +  | 
|---|
 | 1258 | +/**  | 
|---|
 | 1259 | + * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl  | 
|---|
 | 1260 | + *  | 
|---|
 | 1261 | + * @head: sysctl table header  | 
|---|
 | 1262 | + * @table: sysctl table  | 
|---|
 | 1263 | + * @write: sysctl is being read (= 0) or written (= 1)  | 
|---|
 | 1264 | + * @buf: pointer to buffer (in and out)  | 
|---|
 | 1265 | + * @pcount: value-result argument: value is size of buffer pointed to by @buf,  | 
|---|
 | 1266 | + *	result is size of @new_buf if program set new value, initial value  | 
|---|
 | 1267 | + *	otherwise  | 
|---|
 | 1268 | + * @ppos: value-result argument: value is position at which read from or write  | 
|---|
 | 1269 | + *	to sysctl is happening, result is new position if program overrode it,  | 
|---|
 | 1270 | + *	initial value otherwise  | 
|---|
 | 1271 | + * @type: type of program to be executed  | 
|---|
 | 1272 | + *  | 
|---|
 | 1273 | + * Program is run when sysctl is being accessed, either read or written, and  | 
|---|
 | 1274 | + * can allow or deny such access.  | 
|---|
 | 1275 | + *  | 
|---|
 | 1276 | + * This function will return %-EPERM if an attached program is found and  | 
|---|
 | 1277 | + * returned value != 1 during execution. In all other cases 0 is returned.  | 
|---|
 | 1278 | + */  | 
|---|
 | 1279 | +int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,  | 
|---|
 | 1280 | +				   struct ctl_table *table, int write,  | 
|---|
 | 1281 | +				   char **buf, size_t *pcount, loff_t *ppos,  | 
|---|
 | 1282 | +				   enum bpf_attach_type type)  | 
|---|
 | 1283 | +{  | 
|---|
 | 1284 | +	struct bpf_sysctl_kern ctx = {  | 
|---|
 | 1285 | +		.head = head,  | 
|---|
 | 1286 | +		.table = table,  | 
|---|
 | 1287 | +		.write = write,  | 
|---|
 | 1288 | +		.ppos = ppos,  | 
|---|
 | 1289 | +		.cur_val = NULL,  | 
|---|
 | 1290 | +		.cur_len = PAGE_SIZE,  | 
|---|
 | 1291 | +		.new_val = NULL,  | 
|---|
 | 1292 | +		.new_len = 0,  | 
|---|
 | 1293 | +		.new_updated = 0,  | 
|---|
 | 1294 | +	};  | 
|---|
 | 1295 | +	struct cgroup *cgrp;  | 
|---|
 | 1296 | +	loff_t pos = 0;  | 
|---|
 | 1297 | +	int ret;  | 
|---|
 | 1298 | +  | 
|---|
 | 1299 | +	ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);  | 
|---|
 | 1300 | +	if (!ctx.cur_val ||  | 
|---|
 | 1301 | +	    table->proc_handler(table, 0, ctx.cur_val, &ctx.cur_len, &pos)) {  | 
|---|
 | 1302 | +		/* Let BPF program decide how to proceed. */  | 
|---|
 | 1303 | +		ctx.cur_len = 0;  | 
|---|
 | 1304 | +	}  | 
|---|
 | 1305 | +  | 
|---|
 | 1306 | +	if (write && *buf && *pcount) {  | 
|---|
 | 1307 | +		/* BPF program should be able to override new value with a  | 
|---|
 | 1308 | +		 * buffer bigger than provided by user.  | 
|---|
 | 1309 | +		 */  | 
|---|
 | 1310 | +		ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);  | 
|---|
 | 1311 | +		ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount);  | 
|---|
 | 1312 | +		if (ctx.new_val) {  | 
|---|
 | 1313 | +			memcpy(ctx.new_val, *buf, ctx.new_len);  | 
|---|
 | 1314 | +		} else {  | 
|---|
 | 1315 | +			/* Let BPF program decide how to proceed. */  | 
|---|
 | 1316 | +			ctx.new_len = 0;  | 
|---|
 | 1317 | +		}  | 
|---|
 | 1318 | +	}  | 
|---|
 | 1319 | +  | 
|---|
 | 1320 | +	rcu_read_lock();  | 
|---|
 | 1321 | +	cgrp = task_dfl_cgroup(current);  | 
|---|
 | 1322 | +	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);  | 
|---|
 | 1323 | +	rcu_read_unlock();  | 
|---|
 | 1324 | +  | 
|---|
 | 1325 | +	kfree(ctx.cur_val);  | 
|---|
 | 1326 | +  | 
|---|
 | 1327 | +	if (ret == 1 && ctx.new_updated) {  | 
|---|
 | 1328 | +		kfree(*buf);  | 
|---|
 | 1329 | +		*buf = ctx.new_val;  | 
|---|
 | 1330 | +		*pcount = ctx.new_len;  | 
|---|
 | 1331 | +	} else {  | 
|---|
 | 1332 | +		kfree(ctx.new_val);  | 
|---|
 | 1333 | +	}  | 
|---|
 | 1334 | +  | 
|---|
 | 1335 | +	return ret == 1 ? 0 : -EPERM;  | 
|---|
 | 1336 | +}  | 
|---|
 | 1337 | +  | 
|---|
 | 1338 | +#ifdef CONFIG_NET  | 
|---|
 | 1339 | +static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,  | 
|---|
 | 1340 | +					     enum bpf_attach_type attach_type)  | 
|---|
 | 1341 | +{  | 
|---|
 | 1342 | +	struct bpf_prog_array *prog_array;  | 
|---|
 | 1343 | +	bool empty;  | 
|---|
 | 1344 | +  | 
|---|
 | 1345 | +	rcu_read_lock();  | 
|---|
 | 1346 | +	prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);  | 
|---|
 | 1347 | +	empty = bpf_prog_array_is_empty(prog_array);  | 
|---|
 | 1348 | +	rcu_read_unlock();  | 
|---|
 | 1349 | +  | 
|---|
 | 1350 | +	return empty;  | 
|---|
 | 1351 | +}  | 
|---|
 | 1352 | +  | 
|---|
 | 1353 | +static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)  | 
|---|
 | 1354 | +{  | 
|---|
 | 1355 | +	if (unlikely(max_optlen < 0))  | 
|---|
 | 1356 | +		return -EINVAL;  | 
|---|
 | 1357 | +  | 
|---|
 | 1358 | +	if (unlikely(max_optlen > PAGE_SIZE)) {  | 
|---|
 | 1359 | +		/* We don't expose optvals that are greater than PAGE_SIZE  | 
|---|
 | 1360 | +		 * to the BPF program.  | 
|---|
 | 1361 | +		 */  | 
|---|
 | 1362 | +		max_optlen = PAGE_SIZE;  | 
|---|
 | 1363 | +	}  | 
|---|
 | 1364 | +  | 
|---|
 | 1365 | +	ctx->optval = kzalloc(max_optlen, GFP_USER);  | 
|---|
 | 1366 | +	if (!ctx->optval)  | 
|---|
 | 1367 | +		return -ENOMEM;  | 
|---|
 | 1368 | +  | 
|---|
 | 1369 | +	ctx->optval_end = ctx->optval + max_optlen;  | 
|---|
 | 1370 | +  | 
|---|
 | 1371 | +	return max_optlen;  | 
|---|
 | 1372 | +}  | 
|---|
 | 1373 | +  | 
|---|
 | 1374 | +static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)  | 
|---|
 | 1375 | +{  | 
|---|
 | 1376 | +	kfree(ctx->optval);  | 
|---|
 | 1377 | +}  | 
|---|
 | 1378 | +  | 
|---|
 | 1379 | +int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,  | 
|---|
 | 1380 | +				       int *optname, char __user *optval,  | 
|---|
 | 1381 | +				       int *optlen, char **kernel_optval)  | 
|---|
 | 1382 | +{  | 
|---|
 | 1383 | +	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);  | 
|---|
 | 1384 | +	struct bpf_sockopt_kern ctx = {  | 
|---|
 | 1385 | +		.sk = sk,  | 
|---|
 | 1386 | +		.level = *level,  | 
|---|
 | 1387 | +		.optname = *optname,  | 
|---|
 | 1388 | +	};  | 
|---|
 | 1389 | +	int ret, max_optlen;  | 
|---|
 | 1390 | +  | 
|---|
 | 1391 | +	/* Opportunistic check to see whether we have any BPF program  | 
|---|
 | 1392 | +	 * attached to the hook so we don't waste time allocating  | 
|---|
 | 1393 | +	 * memory and locking the socket.  | 
|---|
 | 1394 | +	 */  | 
|---|
 | 1395 | +	if (!cgroup_bpf_enabled ||  | 
|---|
 | 1396 | +	    __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))  | 
|---|
 | 1397 | +		return 0;  | 
|---|
 | 1398 | +  | 
|---|
 | 1399 | +	/* Allocate a bit more than the initial user buffer for  | 
|---|
 | 1400 | +	 * BPF program. The canonical use case is overriding  | 
|---|
 | 1401 | +	 * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).  | 
|---|
 | 1402 | +	 */  | 
|---|
 | 1403 | +	max_optlen = max_t(int, 16, *optlen);  | 
|---|
 | 1404 | +  | 
|---|
 | 1405 | +	max_optlen = sockopt_alloc_buf(&ctx, max_optlen);  | 
|---|
 | 1406 | +	if (max_optlen < 0)  | 
|---|
 | 1407 | +		return max_optlen;  | 
|---|
 | 1408 | +  | 
|---|
 | 1409 | +	ctx.optlen = *optlen;  | 
|---|
 | 1410 | +  | 
|---|
 | 1411 | +	if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {  | 
|---|
 | 1412 | +		ret = -EFAULT;  | 
|---|
 | 1413 | +		goto out;  | 
|---|
 | 1414 | +	}  | 
|---|
 | 1415 | +  | 
|---|
 | 1416 | +	lock_sock(sk);  | 
|---|
 | 1417 | +	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],  | 
|---|
 | 1418 | +				 &ctx, BPF_PROG_RUN);  | 
|---|
 | 1419 | +	release_sock(sk);  | 
|---|
 | 1420 | +  | 
|---|
 | 1421 | +	if (!ret) {  | 
|---|
 | 1422 | +		ret = -EPERM;  | 
|---|
 | 1423 | +		goto out;  | 
|---|
 | 1424 | +	}  | 
|---|
 | 1425 | +  | 
|---|
 | 1426 | +	if (ctx.optlen == -1) {  | 
|---|
 | 1427 | +		/* optlen set to -1, bypass kernel */  | 
|---|
 | 1428 | +		ret = 1;  | 
|---|
 | 1429 | +	} else if (ctx.optlen > max_optlen || ctx.optlen < -1) {  | 
|---|
 | 1430 | +		/* optlen is out of bounds */  | 
|---|
 | 1431 | +		ret = -EFAULT;  | 
|---|
 | 1432 | +	} else {  | 
|---|
 | 1433 | +		/* optlen within bounds, run kernel handler */  | 
|---|
 | 1434 | +		ret = 0;  | 
|---|
 | 1435 | +  | 
|---|
 | 1436 | +		/* export any potential modifications */  | 
|---|
 | 1437 | +		*level = ctx.level;  | 
|---|
 | 1438 | +		*optname = ctx.optname;  | 
|---|
 | 1439 | +  | 
|---|
 | 1440 | +		/* optlen == 0 from BPF indicates that we should  | 
|---|
 | 1441 | +		 * use original userspace data.  | 
|---|
 | 1442 | +		 */  | 
|---|
 | 1443 | +		if (ctx.optlen != 0) {  | 
|---|
 | 1444 | +			*optlen = ctx.optlen;  | 
|---|
 | 1445 | +			*kernel_optval = ctx.optval;  | 
|---|
 | 1446 | +			/* export and don't free sockopt buf */  | 
|---|
 | 1447 | +			return 0;  | 
|---|
 | 1448 | +		}  | 
|---|
 | 1449 | +	}  | 
|---|
 | 1450 | +  | 
|---|
 | 1451 | +out:  | 
|---|
 | 1452 | +	sockopt_free_buf(&ctx);  | 
|---|
 | 1453 | +	return ret;  | 
|---|
 | 1454 | +}  | 
|---|
 | 1455 | +  | 
|---|
 | 1456 | +int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,  | 
|---|
 | 1457 | +				       int optname, char __user *optval,  | 
|---|
 | 1458 | +				       int __user *optlen, int max_optlen,  | 
|---|
 | 1459 | +				       int retval)  | 
|---|
 | 1460 | +{  | 
|---|
 | 1461 | +	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);  | 
|---|
 | 1462 | +	struct bpf_sockopt_kern ctx = {  | 
|---|
 | 1463 | +		.sk = sk,  | 
|---|
 | 1464 | +		.level = level,  | 
|---|
 | 1465 | +		.optname = optname,  | 
|---|
 | 1466 | +		.retval = retval,  | 
|---|
 | 1467 | +	};  | 
|---|
 | 1468 | +	int ret;  | 
|---|
 | 1469 | +  | 
|---|
 | 1470 | +	/* Opportunistic check to see whether we have any BPF program  | 
|---|
 | 1471 | +	 * attached to the hook so we don't waste time allocating  | 
|---|
 | 1472 | +	 * memory and locking the socket.  | 
|---|
 | 1473 | +	 */  | 
|---|
 | 1474 | +	if (!cgroup_bpf_enabled ||  | 
|---|
 | 1475 | +	    __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))  | 
|---|
 | 1476 | +		return retval;  | 
|---|
 | 1477 | +  | 
|---|
 | 1478 | +	ctx.optlen = max_optlen;  | 
|---|
 | 1479 | +  | 
|---|
 | 1480 | +	max_optlen = sockopt_alloc_buf(&ctx, max_optlen);  | 
|---|
 | 1481 | +	if (max_optlen < 0)  | 
|---|
 | 1482 | +		return max_optlen;  | 
|---|
 | 1483 | +  | 
|---|
 | 1484 | +	if (!retval) {  | 
|---|
 | 1485 | +		/* If kernel getsockopt finished successfully,  | 
|---|
 | 1486 | +		 * copy whatever was returned to the user back  | 
|---|
 | 1487 | +		 * into our temporary buffer. Set optlen to the  | 
|---|
 | 1488 | +		 * one that kernel returned as well to let  | 
|---|
 | 1489 | +		 * BPF programs inspect the value.  | 
|---|
 | 1490 | +		 */  | 
|---|
 | 1491 | +  | 
|---|
 | 1492 | +		if (get_user(ctx.optlen, optlen)) {  | 
|---|
 | 1493 | +			ret = -EFAULT;  | 
|---|
 | 1494 | +			goto out;  | 
|---|
 | 1495 | +		}  | 
|---|
 | 1496 | +  | 
|---|
 | 1497 | +		if (ctx.optlen < 0) {  | 
|---|
 | 1498 | +			ret = -EFAULT;  | 
|---|
 | 1499 | +			goto out;  | 
|---|
 | 1500 | +		}  | 
|---|
 | 1501 | +  | 
|---|
 | 1502 | +		if (copy_from_user(ctx.optval, optval,  | 
|---|
 | 1503 | +				   min(ctx.optlen, max_optlen)) != 0) {  | 
|---|
 | 1504 | +			ret = -EFAULT;  | 
|---|
 | 1505 | +			goto out;  | 
|---|
 | 1506 | +		}  | 
|---|
 | 1507 | +	}  | 
|---|
 | 1508 | +  | 
|---|
 | 1509 | +	lock_sock(sk);  | 
|---|
 | 1510 | +	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],  | 
|---|
 | 1511 | +				 &ctx, BPF_PROG_RUN);  | 
|---|
 | 1512 | +	release_sock(sk);  | 
|---|
 | 1513 | +  | 
|---|
 | 1514 | +	if (!ret) {  | 
|---|
 | 1515 | +		ret = -EPERM;  | 
|---|
 | 1516 | +		goto out;  | 
|---|
 | 1517 | +	}  | 
|---|
 | 1518 | +  | 
|---|
 | 1519 | +	if (ctx.optlen > max_optlen || ctx.optlen < 0) {  | 
|---|
 | 1520 | +		ret = -EFAULT;  | 
|---|
 | 1521 | +		goto out;  | 
|---|
 | 1522 | +	}  | 
|---|
 | 1523 | +  | 
|---|
 | 1524 | +	/* BPF programs only allowed to set retval to 0, not some  | 
|---|
 | 1525 | +	 * arbitrary value.  | 
|---|
 | 1526 | +	 */  | 
|---|
 | 1527 | +	if (ctx.retval != 0 && ctx.retval != retval) {  | 
|---|
 | 1528 | +		ret = -EFAULT;  | 
|---|
 | 1529 | +		goto out;  | 
|---|
 | 1530 | +	}  | 
|---|
 | 1531 | +  | 
|---|
 | 1532 | +	if (ctx.optlen != 0) {  | 
|---|
 | 1533 | +		if (copy_to_user(optval, ctx.optval, ctx.optlen) ||  | 
|---|
 | 1534 | +		    put_user(ctx.optlen, optlen)) {  | 
|---|
 | 1535 | +			ret = -EFAULT;  | 
|---|
 | 1536 | +			goto out;  | 
|---|
 | 1537 | +		}  | 
|---|
 | 1538 | +	}  | 
|---|
 | 1539 | +  | 
|---|
 | 1540 | +	ret = ctx.retval;  | 
|---|
 | 1541 | +  | 
|---|
 | 1542 | +out:  | 
|---|
 | 1543 | +	sockopt_free_buf(&ctx);  | 
|---|
 | 1544 | +	return ret;  | 
|---|
 | 1545 | +}  | 
|---|
 | 1546 | +#endif  | 
|---|
 | 1547 | +  | 
|---|
 | 1548 | +static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,  | 
|---|
 | 1549 | +			      size_t *lenp)  | 
|---|
 | 1550 | +{  | 
|---|
 | 1551 | +	ssize_t tmp_ret = 0, ret;  | 
|---|
 | 1552 | +  | 
|---|
 | 1553 | +	if (dir->header.parent) {  | 
|---|
 | 1554 | +		tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp);  | 
|---|
 | 1555 | +		if (tmp_ret < 0)  | 
|---|
 | 1556 | +			return tmp_ret;  | 
|---|
 | 1557 | +	}  | 
|---|
 | 1558 | +  | 
|---|
 | 1559 | +	ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp);  | 
|---|
 | 1560 | +	if (ret < 0)  | 
|---|
 | 1561 | +		return ret;  | 
|---|
 | 1562 | +	*bufp += ret;  | 
|---|
 | 1563 | +	*lenp -= ret;  | 
|---|
 | 1564 | +	ret += tmp_ret;  | 
|---|
 | 1565 | +  | 
|---|
 | 1566 | +	/* Avoid leading slash. */  | 
|---|
 | 1567 | +	if (!ret)  | 
|---|
 | 1568 | +		return ret;  | 
|---|
 | 1569 | +  | 
|---|
 | 1570 | +	tmp_ret = strscpy(*bufp, "/", *lenp);  | 
|---|
 | 1571 | +	if (tmp_ret < 0)  | 
|---|
 | 1572 | +		return tmp_ret;  | 
|---|
 | 1573 | +	*bufp += tmp_ret;  | 
|---|
 | 1574 | +	*lenp -= tmp_ret;  | 
|---|
 | 1575 | +  | 
|---|
 | 1576 | +	return ret + tmp_ret;  | 
|---|
 | 1577 | +}  | 
|---|
 | 1578 | +  | 
|---|
 | 1579 | +BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf,  | 
|---|
 | 1580 | +	   size_t, buf_len, u64, flags)  | 
|---|
 | 1581 | +{  | 
|---|
 | 1582 | +	ssize_t tmp_ret = 0, ret;  | 
|---|
 | 1583 | +  | 
|---|
 | 1584 | +	if (!buf)  | 
|---|
 | 1585 | +		return -EINVAL;  | 
|---|
 | 1586 | +  | 
|---|
 | 1587 | +	if (!(flags & BPF_F_SYSCTL_BASE_NAME)) {  | 
|---|
 | 1588 | +		if (!ctx->head)  | 
|---|
 | 1589 | +			return -EINVAL;  | 
|---|
 | 1590 | +		tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len);  | 
|---|
 | 1591 | +		if (tmp_ret < 0)  | 
|---|
 | 1592 | +			return tmp_ret;  | 
|---|
 | 1593 | +	}  | 
|---|
 | 1594 | +  | 
|---|
 | 1595 | +	ret = strscpy(buf, ctx->table->procname, buf_len);  | 
|---|
 | 1596 | +  | 
|---|
 | 1597 | +	return ret < 0 ? ret : tmp_ret + ret;  | 
|---|
 | 1598 | +}  | 
|---|
 | 1599 | +  | 
|---|
 | 1600 | +static const struct bpf_func_proto bpf_sysctl_get_name_proto = {  | 
|---|
 | 1601 | +	.func		= bpf_sysctl_get_name,  | 
|---|
 | 1602 | +	.gpl_only	= false,  | 
|---|
 | 1603 | +	.ret_type	= RET_INTEGER,  | 
|---|
 | 1604 | +	.arg1_type	= ARG_PTR_TO_CTX,  | 
|---|
 | 1605 | +	.arg2_type	= ARG_PTR_TO_MEM,  | 
|---|
 | 1606 | +	.arg3_type	= ARG_CONST_SIZE,  | 
|---|
 | 1607 | +	.arg4_type	= ARG_ANYTHING,  | 
|---|
 | 1608 | +};  | 
|---|
 | 1609 | +  | 
|---|
 | 1610 | +static int copy_sysctl_value(char *dst, size_t dst_len, char *src,  | 
|---|
 | 1611 | +			     size_t src_len)  | 
|---|
 | 1612 | +{  | 
|---|
 | 1613 | +	if (!dst)  | 
|---|
 | 1614 | +		return -EINVAL;  | 
|---|
 | 1615 | +  | 
|---|
 | 1616 | +	if (!dst_len)  | 
|---|
 | 1617 | +		return -E2BIG;  | 
|---|
 | 1618 | +  | 
|---|
 | 1619 | +	if (!src || !src_len) {  | 
|---|
 | 1620 | +		memset(dst, 0, dst_len);  | 
|---|
 | 1621 | +		return -EINVAL;  | 
|---|
 | 1622 | +	}  | 
|---|
 | 1623 | +  | 
|---|
 | 1624 | +	memcpy(dst, src, min(dst_len, src_len));  | 
|---|
 | 1625 | +  | 
|---|
 | 1626 | +	if (dst_len > src_len) {  | 
|---|
 | 1627 | +		memset(dst + src_len, '\0', dst_len - src_len);  | 
|---|
 | 1628 | +		return src_len;  | 
|---|
 | 1629 | +	}  | 
|---|
 | 1630 | +  | 
|---|
 | 1631 | +	dst[dst_len - 1] = '\0';  | 
|---|
 | 1632 | +  | 
|---|
 | 1633 | +	return -E2BIG;  | 
|---|
 | 1634 | +}  | 
|---|
 | 1635 | +  | 
|---|
 | 1636 | +BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,  | 
|---|
 | 1637 | +	   char *, buf, size_t, buf_len)  | 
|---|
 | 1638 | +{  | 
|---|
 | 1639 | +	return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len);  | 
|---|
 | 1640 | +}  | 
|---|
 | 1641 | +  | 
|---|
 | 1642 | +static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {  | 
|---|
 | 1643 | +	.func		= bpf_sysctl_get_current_value,  | 
|---|
 | 1644 | +	.gpl_only	= false,  | 
|---|
 | 1645 | +	.ret_type	= RET_INTEGER,  | 
|---|
 | 1646 | +	.arg1_type	= ARG_PTR_TO_CTX,  | 
|---|
 | 1647 | +	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,  | 
|---|
 | 1648 | +	.arg3_type	= ARG_CONST_SIZE,  | 
|---|
 | 1649 | +};  | 
|---|
 | 1650 | +  | 
|---|
 | 1651 | +BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,  | 
|---|
 | 1652 | +	   size_t, buf_len)  | 
|---|
 | 1653 | +{  | 
|---|
 | 1654 | +	if (!ctx->write) {  | 
|---|
 | 1655 | +		if (buf && buf_len)  | 
|---|
 | 1656 | +			memset(buf, '\0', buf_len);  | 
|---|
 | 1657 | +		return -EINVAL;  | 
|---|
 | 1658 | +	}  | 
|---|
 | 1659 | +	return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);  | 
|---|
 | 1660 | +}  | 
|---|
 | 1661 | +  | 
|---|
 | 1662 | +static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {  | 
|---|
 | 1663 | +	.func		= bpf_sysctl_get_new_value,  | 
|---|
 | 1664 | +	.gpl_only	= false,  | 
|---|
 | 1665 | +	.ret_type	= RET_INTEGER,  | 
|---|
 | 1666 | +	.arg1_type	= ARG_PTR_TO_CTX,  | 
|---|
 | 1667 | +	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,  | 
|---|
 | 1668 | +	.arg3_type	= ARG_CONST_SIZE,  | 
|---|
 | 1669 | +};  | 
|---|
 | 1670 | +  | 
|---|
 | 1671 | +BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,  | 
|---|
 | 1672 | +	   const char *, buf, size_t, buf_len)  | 
|---|
 | 1673 | +{  | 
|---|
 | 1674 | +	if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)  | 
|---|
 | 1675 | +		return -EINVAL;  | 
|---|
 | 1676 | +  | 
|---|
 | 1677 | +	if (buf_len > PAGE_SIZE - 1)  | 
|---|
 | 1678 | +		return -E2BIG;  | 
|---|
 | 1679 | +  | 
|---|
 | 1680 | +	memcpy(ctx->new_val, buf, buf_len);  | 
|---|
 | 1681 | +	ctx->new_len = buf_len;  | 
|---|
 | 1682 | +	ctx->new_updated = 1;  | 
|---|
 | 1683 | +  | 
|---|
 | 1684 | +	return 0;  | 
|---|
 | 1685 | +}  | 
|---|
 | 1686 | +  | 
|---|
 | 1687 | +static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {  | 
|---|
 | 1688 | +	.func		= bpf_sysctl_set_new_value,  | 
|---|
 | 1689 | +	.gpl_only	= false,  | 
|---|
 | 1690 | +	.ret_type	= RET_INTEGER,  | 
|---|
 | 1691 | +	.arg1_type	= ARG_PTR_TO_CTX,  | 
|---|
 | 1692 | +	.arg2_type	= ARG_PTR_TO_MEM,  | 
|---|
 | 1693 | +	.arg3_type	= ARG_CONST_SIZE,  | 
|---|
 | 1694 | +};  | 
|---|
 | 1695 | +  | 
|---|
 | 1696 | +static const struct bpf_func_proto *  | 
|---|
 | 1697 | +sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)  | 
|---|
 | 1698 | +{  | 
|---|
 | 1699 | +	switch (func_id) {  | 
|---|
 | 1700 | +	case BPF_FUNC_strtol:  | 
|---|
 | 1701 | +		return &bpf_strtol_proto;  | 
|---|
 | 1702 | +	case BPF_FUNC_strtoul:  | 
|---|
 | 1703 | +		return &bpf_strtoul_proto;  | 
|---|
 | 1704 | +	case BPF_FUNC_sysctl_get_name:  | 
|---|
 | 1705 | +		return &bpf_sysctl_get_name_proto;  | 
|---|
 | 1706 | +	case BPF_FUNC_sysctl_get_current_value:  | 
|---|
 | 1707 | +		return &bpf_sysctl_get_current_value_proto;  | 
|---|
 | 1708 | +	case BPF_FUNC_sysctl_get_new_value:  | 
|---|
 | 1709 | +		return &bpf_sysctl_get_new_value_proto;  | 
|---|
 | 1710 | +	case BPF_FUNC_sysctl_set_new_value:  | 
|---|
 | 1711 | +		return &bpf_sysctl_set_new_value_proto;  | 
|---|
 | 1712 | +	default:  | 
|---|
 | 1713 | +		return cgroup_base_func_proto(func_id, prog);  | 
|---|
 | 1714 | +	}  | 
|---|
 | 1715 | +}  | 
|---|
 | 1716 | +  | 
|---|
 | 1717 | +static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,  | 
|---|
 | 1718 | +				   const struct bpf_prog *prog,  | 
|---|
 | 1719 | +				   struct bpf_insn_access_aux *info)  | 
|---|
 | 1720 | +{  | 
|---|
 | 1721 | +	const int size_default = sizeof(__u32);  | 
|---|
 | 1722 | +  | 
|---|
 | 1723 | +	if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size)  | 
|---|
 | 1724 | +		return false;  | 
|---|
 | 1725 | +  | 
|---|
 | 1726 | +	switch (off) {  | 
|---|
 | 1727 | +	case bpf_ctx_range(struct bpf_sysctl, write):  | 
|---|
 | 1728 | +		if (type != BPF_READ)  | 
|---|
 | 1729 | +			return false;  | 
|---|
 | 1730 | +		bpf_ctx_record_field_size(info, size_default);  | 
|---|
 | 1731 | +		return bpf_ctx_narrow_access_ok(off, size, size_default);  | 
|---|
 | 1732 | +	case bpf_ctx_range(struct bpf_sysctl, file_pos):  | 
|---|
 | 1733 | +		if (type == BPF_READ) {  | 
|---|
 | 1734 | +			bpf_ctx_record_field_size(info, size_default);  | 
|---|
 | 1735 | +			return bpf_ctx_narrow_access_ok(off, size, size_default);  | 
|---|
 | 1736 | +		} else {  | 
|---|
 | 1737 | +			return size == size_default;  | 
|---|
 | 1738 | +		}  | 
|---|
 | 1739 | +	default:  | 
|---|
 | 1740 | +		return false;  | 
|---|
 | 1741 | +	}  | 
|---|
 | 1742 | +}  | 
|---|
 | 1743 | +  | 
|---|
 | 1744 | +static u32 sysctl_convert_ctx_access(enum bpf_access_type type,  | 
|---|
 | 1745 | +				     const struct bpf_insn *si,  | 
|---|
 | 1746 | +				     struct bpf_insn *insn_buf,  | 
|---|
 | 1747 | +				     struct bpf_prog *prog, u32 *target_size)  | 
|---|
 | 1748 | +{  | 
|---|
 | 1749 | +	struct bpf_insn *insn = insn_buf;  | 
|---|
 | 1750 | +	u32 read_size;  | 
|---|
 | 1751 | +  | 
|---|
 | 1752 | +	switch (si->off) {  | 
|---|
 | 1753 | +	case offsetof(struct bpf_sysctl, write):  | 
|---|
 | 1754 | +		*insn++ = BPF_LDX_MEM(  | 
|---|
 | 1755 | +			BPF_SIZE(si->code), si->dst_reg, si->src_reg,  | 
|---|
 | 1756 | +			bpf_target_off(struct bpf_sysctl_kern, write,  | 
|---|
 | 1757 | +				       sizeof_field(struct bpf_sysctl_kern,  | 
|---|
 | 1758 | +						    write),  | 
|---|
 | 1759 | +				       target_size));  | 
|---|
 | 1760 | +		break;  | 
|---|
 | 1761 | +	case offsetof(struct bpf_sysctl, file_pos):  | 
|---|
 | 1762 | +		/* ppos is a pointer so it should be accessed via indirect  | 
|---|
 | 1763 | +		 * loads and stores. Also for stores additional temporary  | 
|---|
 | 1764 | +		 * register is used since neither src_reg nor dst_reg can be  | 
|---|
 | 1765 | +		 * overridden.  | 
|---|
 | 1766 | +		 */  | 
|---|
 | 1767 | +		if (type == BPF_WRITE) {  | 
|---|
 | 1768 | +			int treg = BPF_REG_9;  | 
|---|
 | 1769 | +  | 
|---|
 | 1770 | +			if (si->src_reg == treg || si->dst_reg == treg)  | 
|---|
 | 1771 | +				--treg;  | 
|---|
 | 1772 | +			if (si->src_reg == treg || si->dst_reg == treg)  | 
|---|
 | 1773 | +				--treg;  | 
|---|
 | 1774 | +			*insn++ = BPF_STX_MEM(  | 
|---|
 | 1775 | +				BPF_DW, si->dst_reg, treg,  | 
|---|
 | 1776 | +				offsetof(struct bpf_sysctl_kern, tmp_reg));  | 
|---|
 | 1777 | +			*insn++ = BPF_LDX_MEM(  | 
|---|
 | 1778 | +				BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),  | 
|---|
 | 1779 | +				treg, si->dst_reg,  | 
|---|
 | 1780 | +				offsetof(struct bpf_sysctl_kern, ppos));  | 
|---|
 | 1781 | +			*insn++ = BPF_STX_MEM(  | 
|---|
 | 1782 | +				BPF_SIZEOF(u32), treg, si->src_reg,  | 
|---|
 | 1783 | +				bpf_ctx_narrow_access_offset(  | 
|---|
 | 1784 | +					0, sizeof(u32), sizeof(loff_t)));  | 
|---|
 | 1785 | +			*insn++ = BPF_LDX_MEM(  | 
|---|
 | 1786 | +				BPF_DW, treg, si->dst_reg,  | 
|---|
 | 1787 | +				offsetof(struct bpf_sysctl_kern, tmp_reg));  | 
|---|
 | 1788 | +		} else {  | 
|---|
 | 1789 | +			*insn++ = BPF_LDX_MEM(  | 
|---|
 | 1790 | +				BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),  | 
|---|
 | 1791 | +				si->dst_reg, si->src_reg,  | 
|---|
 | 1792 | +				offsetof(struct bpf_sysctl_kern, ppos));  | 
|---|
 | 1793 | +			read_size = bpf_size_to_bytes(BPF_SIZE(si->code));  | 
|---|
 | 1794 | +			*insn++ = BPF_LDX_MEM(  | 
|---|
 | 1795 | +				BPF_SIZE(si->code), si->dst_reg, si->dst_reg,  | 
|---|
 | 1796 | +				bpf_ctx_narrow_access_offset(  | 
|---|
 | 1797 | +					0, read_size, sizeof(loff_t)));  | 
|---|
 | 1798 | +		}  | 
|---|
 | 1799 | +		*target_size = sizeof(u32);  | 
|---|
 | 1800 | +		break;  | 
|---|
 | 1801 | +	}  | 
|---|
 | 1802 | +  | 
|---|
 | 1803 | +	return insn - insn_buf;  | 
|---|
 | 1804 | +}  | 
|---|
 | 1805 | +  | 
|---|
 | 1806 | +const struct bpf_verifier_ops cg_sysctl_verifier_ops = {  | 
|---|
 | 1807 | +	.get_func_proto		= sysctl_func_proto,  | 
|---|
 | 1808 | +	.is_valid_access	= sysctl_is_valid_access,  | 
|---|
 | 1809 | +	.convert_ctx_access	= sysctl_convert_ctx_access,  | 
|---|
 | 1810 | +};  | 
|---|
 | 1811 | +  | 
|---|
 | 1812 | +const struct bpf_prog_ops cg_sysctl_prog_ops = {  | 
|---|
 | 1813 | +};  | 
|---|
 | 1814 | +  | 
|---|
 | 1815 | +static const struct bpf_func_proto *  | 
|---|
 | 1816 | +cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)  | 
|---|
 | 1817 | +{  | 
|---|
 | 1818 | +	switch (func_id) {  | 
|---|
 | 1819 | +#ifdef CONFIG_NET  | 
|---|
 | 1820 | +	case BPF_FUNC_sk_storage_get:  | 
|---|
 | 1821 | +		return &bpf_sk_storage_get_proto;  | 
|---|
 | 1822 | +	case BPF_FUNC_sk_storage_delete:  | 
|---|
 | 1823 | +		return &bpf_sk_storage_delete_proto;  | 
|---|
 | 1824 | +#endif  | 
|---|
 | 1825 | +#ifdef CONFIG_INET  | 
|---|
 | 1826 | +	case BPF_FUNC_tcp_sock:  | 
|---|
 | 1827 | +		return &bpf_tcp_sock_proto;  | 
|---|
 | 1828 | +#endif  | 
|---|
 | 1829 | +	default:  | 
|---|
 | 1830 | +		return cgroup_base_func_proto(func_id, prog);  | 
|---|
 | 1831 | +	}  | 
|---|
 | 1832 | +}  | 
|---|
 | 1833 | +  | 
|---|
 | 1834 | +static bool cg_sockopt_is_valid_access(int off, int size,  | 
|---|
 | 1835 | +				       enum bpf_access_type type,  | 
|---|
 | 1836 | +				       const struct bpf_prog *prog,  | 
|---|
 | 1837 | +				       struct bpf_insn_access_aux *info)  | 
|---|
 | 1838 | +{  | 
|---|
 | 1839 | +	const int size_default = sizeof(__u32);  | 
|---|
 | 1840 | +  | 
|---|
 | 1841 | +	if (off < 0 || off >= sizeof(struct bpf_sockopt))  | 
|---|
 | 1842 | +		return false;  | 
|---|
 | 1843 | +  | 
|---|
 | 1844 | +	if (off % size != 0)  | 
|---|
 | 1845 | +		return false;  | 
|---|
 | 1846 | +  | 
|---|
 | 1847 | +	if (type == BPF_WRITE) {  | 
|---|
 | 1848 | +		switch (off) {  | 
|---|
 | 1849 | +		case offsetof(struct bpf_sockopt, retval):  | 
|---|
 | 1850 | +			if (size != size_default)  | 
|---|
 | 1851 | +				return false;  | 
|---|
 | 1852 | +			return prog->expected_attach_type ==  | 
|---|
 | 1853 | +				BPF_CGROUP_GETSOCKOPT;  | 
|---|
 | 1854 | +		case offsetof(struct bpf_sockopt, optname):  | 
|---|
 | 1855 | +			fallthrough;  | 
|---|
 | 1856 | +		case offsetof(struct bpf_sockopt, level):  | 
|---|
 | 1857 | +			if (size != size_default)  | 
|---|
 | 1858 | +				return false;  | 
|---|
 | 1859 | +			return prog->expected_attach_type ==  | 
|---|
 | 1860 | +				BPF_CGROUP_SETSOCKOPT;  | 
|---|
 | 1861 | +		case offsetof(struct bpf_sockopt, optlen):  | 
|---|
 | 1862 | +			return size == size_default;  | 
|---|
 | 1863 | +		default:  | 
|---|
 | 1864 | +			return false;  | 
|---|
 | 1865 | +		}  | 
|---|
 | 1866 | +	}  | 
|---|
 | 1867 | +  | 
|---|
 | 1868 | +	switch (off) {  | 
|---|
 | 1869 | +	case offsetof(struct bpf_sockopt, sk):  | 
|---|
 | 1870 | +		if (size != sizeof(__u64))  | 
|---|
 | 1871 | +			return false;  | 
|---|
 | 1872 | +		info->reg_type = PTR_TO_SOCKET;  | 
|---|
 | 1873 | +		break;  | 
|---|
 | 1874 | +	case offsetof(struct bpf_sockopt, optval):  | 
|---|
 | 1875 | +		if (size != sizeof(__u64))  | 
|---|
 | 1876 | +			return false;  | 
|---|
 | 1877 | +		info->reg_type = PTR_TO_PACKET;  | 
|---|
 | 1878 | +		break;  | 
|---|
 | 1879 | +	case offsetof(struct bpf_sockopt, optval_end):  | 
|---|
 | 1880 | +		if (size != sizeof(__u64))  | 
|---|
 | 1881 | +			return false;  | 
|---|
 | 1882 | +		info->reg_type = PTR_TO_PACKET_END;  | 
|---|
 | 1883 | +		break;  | 
|---|
 | 1884 | +	case offsetof(struct bpf_sockopt, retval):  | 
|---|
 | 1885 | +		if (size != size_default)  | 
|---|
 | 1886 | +			return false;  | 
|---|
 | 1887 | +		return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT;  | 
|---|
 | 1888 | +	default:  | 
|---|
 | 1889 | +		if (size != size_default)  | 
|---|
 | 1890 | +			return false;  | 
|---|
 | 1891 | +		break;  | 
|---|
 | 1892 | +	}  | 
|---|
 | 1893 | +	return true;  | 
|---|
 | 1894 | +}  | 
|---|
 | 1895 | +  | 
|---|
 | 1896 | +#define CG_SOCKOPT_ACCESS_FIELD(T, F)					\  | 
|---|
 | 1897 | +	T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F),			\  | 
|---|
 | 1898 | +	  si->dst_reg, si->src_reg,					\  | 
|---|
 | 1899 | +	  offsetof(struct bpf_sockopt_kern, F))  | 
|---|
 | 1900 | +  | 
|---|
 | 1901 | +static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,  | 
|---|
 | 1902 | +					 const struct bpf_insn *si,  | 
|---|
 | 1903 | +					 struct bpf_insn *insn_buf,  | 
|---|
 | 1904 | +					 struct bpf_prog *prog,  | 
|---|
 | 1905 | +					 u32 *target_size)  | 
|---|
 | 1906 | +{  | 
|---|
 | 1907 | +	struct bpf_insn *insn = insn_buf;  | 
|---|
 | 1908 | +  | 
|---|
 | 1909 | +	switch (si->off) {  | 
|---|
 | 1910 | +	case offsetof(struct bpf_sockopt, sk):  | 
|---|
 | 1911 | +		*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk);  | 
|---|
 | 1912 | +		break;  | 
|---|
 | 1913 | +	case offsetof(struct bpf_sockopt, level):  | 
|---|
 | 1914 | +		if (type == BPF_WRITE)  | 
|---|
 | 1915 | +			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level);  | 
|---|
 | 1916 | +		else  | 
|---|
 | 1917 | +			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level);  | 
|---|
 | 1918 | +		break;  | 
|---|
 | 1919 | +	case offsetof(struct bpf_sockopt, optname):  | 
|---|
 | 1920 | +		if (type == BPF_WRITE)  | 
|---|
 | 1921 | +			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname);  | 
|---|
 | 1922 | +		else  | 
|---|
 | 1923 | +			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname);  | 
|---|
 | 1924 | +		break;  | 
|---|
 | 1925 | +	case offsetof(struct bpf_sockopt, optlen):  | 
|---|
 | 1926 | +		if (type == BPF_WRITE)  | 
|---|
 | 1927 | +			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen);  | 
|---|
 | 1928 | +		else  | 
|---|
 | 1929 | +			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);  | 
|---|
 | 1930 | +		break;  | 
|---|
 | 1931 | +	case offsetof(struct bpf_sockopt, retval):  | 
|---|
 | 1932 | +		if (type == BPF_WRITE)  | 
|---|
 | 1933 | +			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);  | 
|---|
 | 1934 | +		else  | 
|---|
 | 1935 | +			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);  | 
|---|
 | 1936 | +		break;  | 
|---|
 | 1937 | +	case offsetof(struct bpf_sockopt, optval):  | 
|---|
 | 1938 | +		*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);  | 
|---|
 | 1939 | +		break;  | 
|---|
 | 1940 | +	case offsetof(struct bpf_sockopt, optval_end):  | 
|---|
 | 1941 | +		*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end);  | 
|---|
 | 1942 | +		break;  | 
|---|
 | 1943 | +	}  | 
|---|
 | 1944 | +  | 
|---|
 | 1945 | +	return insn - insn_buf;  | 
|---|
 | 1946 | +}  | 
|---|
 | 1947 | +  | 
|---|
 | 1948 | +static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf,  | 
|---|
 | 1949 | +				   bool direct_write,  | 
|---|
 | 1950 | +				   const struct bpf_prog *prog)  | 
|---|
 | 1951 | +{  | 
|---|
 | 1952 | +	/* Nothing to do for sockopt argument. The data is kzalloc'ated.  | 
|---|
 | 1953 | +	 */  | 
|---|
 | 1954 | +	return 0;  | 
|---|
 | 1955 | +}  | 
|---|
 | 1956 | +  | 
|---|
 | 1957 | +const struct bpf_verifier_ops cg_sockopt_verifier_ops = {  | 
|---|
 | 1958 | +	.get_func_proto		= cg_sockopt_func_proto,  | 
|---|
 | 1959 | +	.is_valid_access	= cg_sockopt_is_valid_access,  | 
|---|
 | 1960 | +	.convert_ctx_access	= cg_sockopt_convert_ctx_access,  | 
|---|
 | 1961 | +	.gen_prologue		= cg_sockopt_get_prologue,  | 
|---|
 | 1962 | +};  | 
|---|
 | 1963 | +  | 
|---|
 | 1964 | +const struct bpf_prog_ops cg_sockopt_prog_ops = {  | 
|---|
 | 1965 | +};  | 
|---|