| .. | .. | 
|---|
| 1 | 1 |  //SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 | 2 |  #include <linux/bpf-cgroup.h> | 
|---|
| 3 | 3 |  #include <linux/bpf.h> | 
|---|
 | 4 | +#include <linux/btf.h>  | 
|---|
| 4 | 5 |  #include <linux/bug.h> | 
|---|
| 5 | 6 |  #include <linux/filter.h> | 
|---|
| 6 | 7 |  #include <linux/mm.h> | 
|---|
| 7 | 8 |  #include <linux/rbtree.h> | 
|---|
| 8 | 9 |  #include <linux/slab.h> | 
|---|
| 9 |  | -  | 
|---|
| 10 |  | -DEFINE_PER_CPU(void*, bpf_cgroup_storage);  | 
|---|
 | 10 | +#include <uapi/linux/btf.h>  | 
|---|
| 11 | 11 |   | 
|---|
| 12 | 12 |  #ifdef CONFIG_CGROUP_BPF | 
|---|
| 13 | 13 |   | 
|---|
 | 14 | +DEFINE_PER_CPU(struct bpf_cgroup_storage_info,  | 
|---|
 | 15 | +	       bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);  | 
|---|
 | 16 | +  | 
|---|
 | 17 | +#include "../cgroup/cgroup-internal.h"  | 
|---|
 | 18 | +  | 
|---|
| 14 | 19 |  #define LOCAL_STORAGE_CREATE_FLAG_MASK					\ | 
|---|
| 15 |  | -	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)  | 
|---|
 | 20 | +	(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)  | 
|---|
| 16 | 21 |   | 
|---|
| 17 | 22 |  struct bpf_cgroup_storage_map { | 
|---|
| 18 | 23 |  	struct bpf_map map; | 
|---|
| 19 | 24 |   | 
|---|
| 20 | 25 |  	spinlock_t lock; | 
|---|
| 21 |  | -	struct bpf_prog *prog;  | 
|---|
| 22 | 26 |  	struct rb_root root; | 
|---|
| 23 | 27 |  	struct list_head list; | 
|---|
| 24 | 28 |  }; | 
|---|
| .. | .. | 
|---|
| 28 | 32 |  	return container_of(map, struct bpf_cgroup_storage_map, map); | 
|---|
| 29 | 33 |  } | 
|---|
| 30 | 34 |   | 
|---|
| 31 |  | -static int bpf_cgroup_storage_key_cmp(  | 
|---|
| 32 |  | -	const struct bpf_cgroup_storage_key *key1,  | 
|---|
| 33 |  | -	const struct bpf_cgroup_storage_key *key2)  | 
|---|
 | 35 | +static bool attach_type_isolated(const struct bpf_map *map)  | 
|---|
| 34 | 36 |  { | 
|---|
| 35 |  | -	if (key1->cgroup_inode_id < key2->cgroup_inode_id)  | 
|---|
| 36 |  | -		return -1;  | 
|---|
| 37 |  | -	else if (key1->cgroup_inode_id > key2->cgroup_inode_id)  | 
|---|
| 38 |  | -		return 1;  | 
|---|
| 39 |  | -	else if (key1->attach_type < key2->attach_type)  | 
|---|
| 40 |  | -		return -1;  | 
|---|
| 41 |  | -	else if (key1->attach_type > key2->attach_type)  | 
|---|
| 42 |  | -		return 1;  | 
|---|
 | 37 | +	return map->key_size == sizeof(struct bpf_cgroup_storage_key);  | 
|---|
 | 38 | +}  | 
|---|
 | 39 | +  | 
|---|
 | 40 | +static int bpf_cgroup_storage_key_cmp(const struct bpf_cgroup_storage_map *map,  | 
|---|
 | 41 | +				      const void *_key1, const void *_key2)  | 
|---|
 | 42 | +{  | 
|---|
 | 43 | +	if (attach_type_isolated(&map->map)) {  | 
|---|
 | 44 | +		const struct bpf_cgroup_storage_key *key1 = _key1;  | 
|---|
 | 45 | +		const struct bpf_cgroup_storage_key *key2 = _key2;  | 
|---|
 | 46 | +  | 
|---|
 | 47 | +		if (key1->cgroup_inode_id < key2->cgroup_inode_id)  | 
|---|
 | 48 | +			return -1;  | 
|---|
 | 49 | +		else if (key1->cgroup_inode_id > key2->cgroup_inode_id)  | 
|---|
 | 50 | +			return 1;  | 
|---|
 | 51 | +		else if (key1->attach_type < key2->attach_type)  | 
|---|
 | 52 | +			return -1;  | 
|---|
 | 53 | +		else if (key1->attach_type > key2->attach_type)  | 
|---|
 | 54 | +			return 1;  | 
|---|
 | 55 | +	} else {  | 
|---|
 | 56 | +		const __u64 *cgroup_inode_id1 = _key1;  | 
|---|
 | 57 | +		const __u64 *cgroup_inode_id2 = _key2;  | 
|---|
 | 58 | +  | 
|---|
 | 59 | +		if (*cgroup_inode_id1 < *cgroup_inode_id2)  | 
|---|
 | 60 | +			return -1;  | 
|---|
 | 61 | +		else if (*cgroup_inode_id1 > *cgroup_inode_id2)  | 
|---|
 | 62 | +			return 1;  | 
|---|
 | 63 | +	}  | 
|---|
| 43 | 64 |  	return 0; | 
|---|
| 44 | 65 |  } | 
|---|
| 45 | 66 |   | 
|---|
| 46 |  | -static struct bpf_cgroup_storage *cgroup_storage_lookup(  | 
|---|
| 47 |  | -	struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,  | 
|---|
| 48 |  | -	bool locked)  | 
|---|
 | 67 | +struct bpf_cgroup_storage *  | 
|---|
 | 68 | +cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,  | 
|---|
 | 69 | +		      void *key, bool locked)  | 
|---|
| 49 | 70 |  { | 
|---|
| 50 | 71 |  	struct rb_root *root = &map->root; | 
|---|
| 51 | 72 |  	struct rb_node *node; | 
|---|
| .. | .. | 
|---|
| 59 | 80 |   | 
|---|
| 60 | 81 |  		storage = container_of(node, struct bpf_cgroup_storage, node); | 
|---|
| 61 | 82 |   | 
|---|
| 62 |  | -		switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {  | 
|---|
 | 83 | +		switch (bpf_cgroup_storage_key_cmp(map, key, &storage->key)) {  | 
|---|
| 63 | 84 |  		case -1: | 
|---|
| 64 | 85 |  			node = node->rb_left; | 
|---|
| 65 | 86 |  			break; | 
|---|
| .. | .. | 
|---|
| 91 | 112 |  		this = container_of(*new, struct bpf_cgroup_storage, node); | 
|---|
| 92 | 113 |   | 
|---|
| 93 | 114 |  		parent = *new; | 
|---|
| 94 |  | -		switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {  | 
|---|
 | 115 | +		switch (bpf_cgroup_storage_key_cmp(map, &storage->key, &this->key)) {  | 
|---|
| 95 | 116 |  		case -1: | 
|---|
| 96 | 117 |  			new = &((*new)->rb_left); | 
|---|
| 97 | 118 |  			break; | 
|---|
| .. | .. | 
|---|
| 109 | 130 |  	return 0; | 
|---|
| 110 | 131 |  } | 
|---|
| 111 | 132 |   | 
|---|
| 112 |  | -static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)  | 
|---|
 | 133 | +static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *key)  | 
|---|
| 113 | 134 |  { | 
|---|
| 114 | 135 |  	struct bpf_cgroup_storage_map *map = map_to_storage(_map); | 
|---|
| 115 |  | -	struct bpf_cgroup_storage_key *key = _key;  | 
|---|
| 116 | 136 |  	struct bpf_cgroup_storage *storage; | 
|---|
| 117 | 137 |   | 
|---|
| 118 | 138 |  	storage = cgroup_storage_lookup(map, key, false); | 
|---|
| .. | .. | 
|---|
| 122 | 142 |  	return &READ_ONCE(storage->buf)->data[0]; | 
|---|
| 123 | 143 |  } | 
|---|
| 124 | 144 |   | 
|---|
| 125 |  | -static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,  | 
|---|
 | 145 | +static int cgroup_storage_update_elem(struct bpf_map *map, void *key,  | 
|---|
| 126 | 146 |  				      void *value, u64 flags) | 
|---|
| 127 | 147 |  { | 
|---|
| 128 |  | -	struct bpf_cgroup_storage_key *key = _key;  | 
|---|
| 129 | 148 |  	struct bpf_cgroup_storage *storage; | 
|---|
| 130 | 149 |  	struct bpf_storage_buffer *new; | 
|---|
| 131 | 150 |   | 
|---|
| 132 |  | -	if (flags != BPF_ANY && flags != BPF_EXIST)  | 
|---|
 | 151 | +	if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST)))  | 
|---|
 | 152 | +		return -EINVAL;  | 
|---|
 | 153 | +  | 
|---|
 | 154 | +	if (unlikely((flags & BPF_F_LOCK) &&  | 
|---|
 | 155 | +		     !map_value_has_spin_lock(map)))  | 
|---|
| 133 | 156 |  		return -EINVAL; | 
|---|
| 134 | 157 |   | 
|---|
| 135 | 158 |  	storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, | 
|---|
| 136 | 159 |  					key, false); | 
|---|
| 137 | 160 |  	if (!storage) | 
|---|
| 138 | 161 |  		return -ENOENT; | 
|---|
 | 162 | +  | 
|---|
 | 163 | +	if (flags & BPF_F_LOCK) {  | 
|---|
 | 164 | +		copy_map_value_locked(map, storage->buf->data, value, false);  | 
|---|
 | 165 | +		return 0;  | 
|---|
 | 166 | +	}  | 
|---|
| 139 | 167 |   | 
|---|
| 140 | 168 |  	new = kmalloc_node(sizeof(struct bpf_storage_buffer) + | 
|---|
| 141 | 169 |  			   map->value_size, | 
|---|
| .. | .. | 
|---|
| 145 | 173 |  		return -ENOMEM; | 
|---|
| 146 | 174 |   | 
|---|
| 147 | 175 |  	memcpy(&new->data[0], value, map->value_size); | 
|---|
 | 176 | +	check_and_init_map_lock(map, new->data);  | 
|---|
| 148 | 177 |   | 
|---|
| 149 | 178 |  	new = xchg(&storage->buf, new); | 
|---|
| 150 | 179 |  	kfree_rcu(new, rcu); | 
|---|
| .. | .. | 
|---|
| 152 | 181 |  	return 0; | 
|---|
| 153 | 182 |  } | 
|---|
| 154 | 183 |   | 
|---|
| 155 |  | -static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,  | 
|---|
 | 184 | +int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *key,  | 
|---|
 | 185 | +				   void *value)  | 
|---|
 | 186 | +{  | 
|---|
 | 187 | +	struct bpf_cgroup_storage_map *map = map_to_storage(_map);  | 
|---|
 | 188 | +	struct bpf_cgroup_storage *storage;  | 
|---|
 | 189 | +	int cpu, off = 0;  | 
|---|
 | 190 | +	u32 size;  | 
|---|
 | 191 | +  | 
|---|
 | 192 | +	rcu_read_lock();  | 
|---|
 | 193 | +	storage = cgroup_storage_lookup(map, key, false);  | 
|---|
 | 194 | +	if (!storage) {  | 
|---|
 | 195 | +		rcu_read_unlock();  | 
|---|
 | 196 | +		return -ENOENT;  | 
|---|
 | 197 | +	}  | 
|---|
 | 198 | +  | 
|---|
 | 199 | +	/* per_cpu areas are zero-filled and bpf programs can only  | 
|---|
 | 200 | +	 * access 'value_size' of them, so copying rounded areas  | 
|---|
 | 201 | +	 * will not leak any kernel data  | 
|---|
 | 202 | +	 */  | 
|---|
 | 203 | +	size = round_up(_map->value_size, 8);  | 
|---|
 | 204 | +	for_each_possible_cpu(cpu) {  | 
|---|
 | 205 | +		bpf_long_memcpy(value + off,  | 
|---|
 | 206 | +				per_cpu_ptr(storage->percpu_buf, cpu), size);  | 
|---|
 | 207 | +		off += size;  | 
|---|
 | 208 | +	}  | 
|---|
 | 209 | +	rcu_read_unlock();  | 
|---|
 | 210 | +	return 0;  | 
|---|
 | 211 | +}  | 
|---|
 | 212 | +  | 
|---|
 | 213 | +int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *key,  | 
|---|
 | 214 | +				     void *value, u64 map_flags)  | 
|---|
 | 215 | +{  | 
|---|
 | 216 | +	struct bpf_cgroup_storage_map *map = map_to_storage(_map);  | 
|---|
 | 217 | +	struct bpf_cgroup_storage *storage;  | 
|---|
 | 218 | +	int cpu, off = 0;  | 
|---|
 | 219 | +	u32 size;  | 
|---|
 | 220 | +  | 
|---|
 | 221 | +	if (map_flags != BPF_ANY && map_flags != BPF_EXIST)  | 
|---|
 | 222 | +		return -EINVAL;  | 
|---|
 | 223 | +  | 
|---|
 | 224 | +	rcu_read_lock();  | 
|---|
 | 225 | +	storage = cgroup_storage_lookup(map, key, false);  | 
|---|
 | 226 | +	if (!storage) {  | 
|---|
 | 227 | +		rcu_read_unlock();  | 
|---|
 | 228 | +		return -ENOENT;  | 
|---|
 | 229 | +	}  | 
|---|
 | 230 | +  | 
|---|
 | 231 | +	/* the user space will provide round_up(value_size, 8) bytes that  | 
|---|
 | 232 | +	 * will be copied into per-cpu area. bpf programs can only access  | 
|---|
 | 233 | +	 * value_size of it. During lookup the same extra bytes will be  | 
|---|
 | 234 | +	 * returned or zeros which were zero-filled by percpu_alloc,  | 
|---|
 | 235 | +	 * so no kernel data leaks possible  | 
|---|
 | 236 | +	 */  | 
|---|
 | 237 | +	size = round_up(_map->value_size, 8);  | 
|---|
 | 238 | +	for_each_possible_cpu(cpu) {  | 
|---|
 | 239 | +		bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),  | 
|---|
 | 240 | +				value + off, size);  | 
|---|
 | 241 | +		off += size;  | 
|---|
 | 242 | +	}  | 
|---|
 | 243 | +	rcu_read_unlock();  | 
|---|
 | 244 | +	return 0;  | 
|---|
 | 245 | +}  | 
|---|
 | 246 | +  | 
|---|
 | 247 | +static int cgroup_storage_get_next_key(struct bpf_map *_map, void *key,  | 
|---|
| 156 | 248 |  				       void *_next_key) | 
|---|
| 157 | 249 |  { | 
|---|
| 158 | 250 |  	struct bpf_cgroup_storage_map *map = map_to_storage(_map); | 
|---|
| 159 |  | -	struct bpf_cgroup_storage_key *key = _key;  | 
|---|
| 160 |  | -	struct bpf_cgroup_storage_key *next = _next_key;  | 
|---|
| 161 | 251 |  	struct bpf_cgroup_storage *storage; | 
|---|
| 162 | 252 |   | 
|---|
| 163 | 253 |  	spin_lock_bh(&map->lock); | 
|---|
| .. | .. | 
|---|
| 170 | 260 |  		if (!storage) | 
|---|
| 171 | 261 |  			goto enoent; | 
|---|
| 172 | 262 |   | 
|---|
| 173 |  | -		storage = list_next_entry(storage, list);  | 
|---|
 | 263 | +		storage = list_next_entry(storage, list_map);  | 
|---|
| 174 | 264 |  		if (!storage) | 
|---|
| 175 | 265 |  			goto enoent; | 
|---|
| 176 | 266 |  	} else { | 
|---|
| 177 | 267 |  		storage = list_first_entry(&map->list, | 
|---|
| 178 |  | -					 struct bpf_cgroup_storage, list);  | 
|---|
 | 268 | +					 struct bpf_cgroup_storage, list_map);  | 
|---|
| 179 | 269 |  	} | 
|---|
| 180 | 270 |   | 
|---|
| 181 | 271 |  	spin_unlock_bh(&map->lock); | 
|---|
| 182 |  | -	next->attach_type = storage->key.attach_type;  | 
|---|
| 183 |  | -	next->cgroup_inode_id = storage->key.cgroup_inode_id;  | 
|---|
 | 272 | +  | 
|---|
 | 273 | +	if (attach_type_isolated(&map->map)) {  | 
|---|
 | 274 | +		struct bpf_cgroup_storage_key *next = _next_key;  | 
|---|
 | 275 | +		*next = storage->key;  | 
|---|
 | 276 | +	} else {  | 
|---|
 | 277 | +		__u64 *next = _next_key;  | 
|---|
 | 278 | +		*next = storage->key.cgroup_inode_id;  | 
|---|
 | 279 | +	}  | 
|---|
| 184 | 280 |  	return 0; | 
|---|
| 185 | 281 |   | 
|---|
| 186 | 282 |  enoent: | 
|---|
| .. | .. | 
|---|
| 192 | 288 |  { | 
|---|
| 193 | 289 |  	int numa_node = bpf_map_attr_numa_node(attr); | 
|---|
| 194 | 290 |  	struct bpf_cgroup_storage_map *map; | 
|---|
 | 291 | +	struct bpf_map_memory mem;  | 
|---|
 | 292 | +	int ret;  | 
|---|
| 195 | 293 |   | 
|---|
| 196 |  | -	if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))  | 
|---|
 | 294 | +	if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) &&  | 
|---|
 | 295 | +	    attr->key_size != sizeof(__u64))  | 
|---|
| 197 | 296 |  		return ERR_PTR(-EINVAL); | 
|---|
| 198 | 297 |   | 
|---|
| 199 | 298 |  	if (attr->value_size == 0) | 
|---|
| .. | .. | 
|---|
| 202 | 301 |  	if (attr->value_size > PAGE_SIZE) | 
|---|
| 203 | 302 |  		return ERR_PTR(-E2BIG); | 
|---|
| 204 | 303 |   | 
|---|
| 205 |  | -	if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)  | 
|---|
| 206 |  | -		/* reserved bits should not be used */  | 
|---|
 | 304 | +	if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK ||  | 
|---|
 | 305 | +	    !bpf_map_flags_access_ok(attr->map_flags))  | 
|---|
| 207 | 306 |  		return ERR_PTR(-EINVAL); | 
|---|
| 208 | 307 |   | 
|---|
| 209 | 308 |  	if (attr->max_entries) | 
|---|
| 210 | 309 |  		/* max_entries is not used and enforced to be 0 */ | 
|---|
| 211 | 310 |  		return ERR_PTR(-EINVAL); | 
|---|
| 212 | 311 |   | 
|---|
 | 312 | +	ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map));  | 
|---|
 | 313 | +	if (ret < 0)  | 
|---|
 | 314 | +		return ERR_PTR(ret);  | 
|---|
 | 315 | +  | 
|---|
| 213 | 316 |  	map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), | 
|---|
| 214 | 317 |  			   __GFP_ZERO | GFP_USER, numa_node); | 
|---|
| 215 |  | -	if (!map)  | 
|---|
 | 318 | +	if (!map) {  | 
|---|
 | 319 | +		bpf_map_charge_finish(&mem);  | 
|---|
| 216 | 320 |  		return ERR_PTR(-ENOMEM); | 
|---|
 | 321 | +	}  | 
|---|
| 217 | 322 |   | 
|---|
| 218 |  | -	map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),  | 
|---|
| 219 |  | -				  PAGE_SIZE) >> PAGE_SHIFT;  | 
|---|
 | 323 | +	bpf_map_charge_move(&map->map.memory, &mem);  | 
|---|
| 220 | 324 |   | 
|---|
| 221 | 325 |  	/* copy mandatory map attributes */ | 
|---|
| 222 | 326 |  	bpf_map_init_from_attr(&map->map, attr); | 
|---|
| .. | .. | 
|---|
| 231 | 335 |  static void cgroup_storage_map_free(struct bpf_map *_map) | 
|---|
| 232 | 336 |  { | 
|---|
| 233 | 337 |  	struct bpf_cgroup_storage_map *map = map_to_storage(_map); | 
|---|
 | 338 | +	struct list_head *storages = &map->list;  | 
|---|
 | 339 | +	struct bpf_cgroup_storage *storage, *stmp;  | 
|---|
 | 340 | +  | 
|---|
 | 341 | +	mutex_lock(&cgroup_mutex);  | 
|---|
 | 342 | +  | 
|---|
 | 343 | +	list_for_each_entry_safe(storage, stmp, storages, list_map) {  | 
|---|
 | 344 | +		bpf_cgroup_storage_unlink(storage);  | 
|---|
 | 345 | +		bpf_cgroup_storage_free(storage);  | 
|---|
 | 346 | +	}  | 
|---|
 | 347 | +  | 
|---|
 | 348 | +	mutex_unlock(&cgroup_mutex);  | 
|---|
| 234 | 349 |   | 
|---|
| 235 | 350 |  	WARN_ON(!RB_EMPTY_ROOT(&map->root)); | 
|---|
| 236 | 351 |  	WARN_ON(!list_empty(&map->list)); | 
|---|
| .. | .. | 
|---|
| 243 | 358 |  	return -EINVAL; | 
|---|
| 244 | 359 |  } | 
|---|
| 245 | 360 |   | 
|---|
 | 361 | +static int cgroup_storage_check_btf(const struct bpf_map *map,  | 
|---|
 | 362 | +				    const struct btf *btf,  | 
|---|
 | 363 | +				    const struct btf_type *key_type,  | 
|---|
 | 364 | +				    const struct btf_type *value_type)  | 
|---|
 | 365 | +{  | 
|---|
 | 366 | +	if (attach_type_isolated(map)) {  | 
|---|
 | 367 | +		struct btf_member *m;  | 
|---|
 | 368 | +		u32 offset, size;  | 
|---|
 | 369 | +  | 
|---|
 | 370 | +		/* Key is expected to be of struct bpf_cgroup_storage_key type,  | 
|---|
 | 371 | +		 * which is:  | 
|---|
 | 372 | +		 * struct bpf_cgroup_storage_key {  | 
|---|
 | 373 | +		 *	__u64	cgroup_inode_id;  | 
|---|
 | 374 | +		 *	__u32	attach_type;  | 
|---|
 | 375 | +		 * };  | 
|---|
 | 376 | +		 */  | 
|---|
 | 377 | +  | 
|---|
 | 378 | +		/*  | 
|---|
 | 379 | +		 * Key_type must be a structure with two fields.  | 
|---|
 | 380 | +		 */  | 
|---|
 | 381 | +		if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ||  | 
|---|
 | 382 | +		    BTF_INFO_VLEN(key_type->info) != 2)  | 
|---|
 | 383 | +			return -EINVAL;  | 
|---|
 | 384 | +  | 
|---|
 | 385 | +		/*  | 
|---|
 | 386 | +		 * The first field must be a 64 bit integer at 0 offset.  | 
|---|
 | 387 | +		 */  | 
|---|
 | 388 | +		m = (struct btf_member *)(key_type + 1);  | 
|---|
 | 389 | +		size = sizeof_field(struct bpf_cgroup_storage_key, cgroup_inode_id);  | 
|---|
 | 390 | +		if (!btf_member_is_reg_int(btf, key_type, m, 0, size))  | 
|---|
 | 391 | +			return -EINVAL;  | 
|---|
 | 392 | +  | 
|---|
 | 393 | +		/*  | 
|---|
 | 394 | +		 * The second field must be a 32 bit integer at 64 bit offset.  | 
|---|
 | 395 | +		 */  | 
|---|
 | 396 | +		m++;  | 
|---|
 | 397 | +		offset = offsetof(struct bpf_cgroup_storage_key, attach_type);  | 
|---|
 | 398 | +		size = sizeof_field(struct bpf_cgroup_storage_key, attach_type);  | 
|---|
 | 399 | +		if (!btf_member_is_reg_int(btf, key_type, m, offset, size))  | 
|---|
 | 400 | +			return -EINVAL;  | 
|---|
 | 401 | +	} else {  | 
|---|
 | 402 | +		u32 int_data;  | 
|---|
 | 403 | +  | 
|---|
 | 404 | +		/*  | 
|---|
 | 405 | +		 * Key is expected to be u64, which stores the cgroup_inode_id  | 
|---|
 | 406 | +		 */  | 
|---|
 | 407 | +  | 
|---|
 | 408 | +		if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)  | 
|---|
 | 409 | +			return -EINVAL;  | 
|---|
 | 410 | +  | 
|---|
 | 411 | +		int_data = *(u32 *)(key_type + 1);  | 
|---|
 | 412 | +		if (BTF_INT_BITS(int_data) != 64 || BTF_INT_OFFSET(int_data))  | 
|---|
 | 413 | +			return -EINVAL;  | 
|---|
 | 414 | +	}  | 
|---|
 | 415 | +  | 
|---|
 | 416 | +	return 0;  | 
|---|
 | 417 | +}  | 
|---|
 | 418 | +  | 
|---|
 | 419 | +static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,  | 
|---|
 | 420 | +					 struct seq_file *m)  | 
|---|
 | 421 | +{  | 
|---|
 | 422 | +	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);  | 
|---|
 | 423 | +	struct bpf_cgroup_storage *storage;  | 
|---|
 | 424 | +	int cpu;  | 
|---|
 | 425 | +  | 
|---|
 | 426 | +	rcu_read_lock();  | 
|---|
 | 427 | +	storage = cgroup_storage_lookup(map_to_storage(map), key, false);  | 
|---|
 | 428 | +	if (!storage) {  | 
|---|
 | 429 | +		rcu_read_unlock();  | 
|---|
 | 430 | +		return;  | 
|---|
 | 431 | +	}  | 
|---|
 | 432 | +  | 
|---|
 | 433 | +	btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);  | 
|---|
 | 434 | +	stype = cgroup_storage_type(map);  | 
|---|
 | 435 | +	if (stype == BPF_CGROUP_STORAGE_SHARED) {  | 
|---|
 | 436 | +		seq_puts(m, ": ");  | 
|---|
 | 437 | +		btf_type_seq_show(map->btf, map->btf_value_type_id,  | 
|---|
 | 438 | +				  &READ_ONCE(storage->buf)->data[0], m);  | 
|---|
 | 439 | +		seq_puts(m, "\n");  | 
|---|
 | 440 | +	} else {  | 
|---|
 | 441 | +		seq_puts(m, ": {\n");  | 
|---|
 | 442 | +		for_each_possible_cpu(cpu) {  | 
|---|
 | 443 | +			seq_printf(m, "\tcpu%d: ", cpu);  | 
|---|
 | 444 | +			btf_type_seq_show(map->btf, map->btf_value_type_id,  | 
|---|
 | 445 | +					  per_cpu_ptr(storage->percpu_buf, cpu),  | 
|---|
 | 446 | +					  m);  | 
|---|
 | 447 | +			seq_puts(m, "\n");  | 
|---|
 | 448 | +		}  | 
|---|
 | 449 | +		seq_puts(m, "}\n");  | 
|---|
 | 450 | +	}  | 
|---|
 | 451 | +	rcu_read_unlock();  | 
|---|
 | 452 | +}  | 
|---|
 | 453 | +  | 
|---|
 | 454 | +static int cgroup_storage_map_btf_id;  | 
|---|
| 246 | 455 |  const struct bpf_map_ops cgroup_storage_map_ops = { | 
|---|
| 247 | 456 |  	.map_alloc = cgroup_storage_map_alloc, | 
|---|
| 248 | 457 |  	.map_free = cgroup_storage_map_free, | 
|---|
| .. | .. | 
|---|
| 250 | 459 |  	.map_lookup_elem = cgroup_storage_lookup_elem, | 
|---|
| 251 | 460 |  	.map_update_elem = cgroup_storage_update_elem, | 
|---|
| 252 | 461 |  	.map_delete_elem = cgroup_storage_delete_elem, | 
|---|
| 253 |  | -	.map_check_btf = map_check_no_btf,  | 
|---|
 | 462 | +	.map_check_btf = cgroup_storage_check_btf,  | 
|---|
 | 463 | +	.map_seq_show_elem = cgroup_storage_seq_show_elem,  | 
|---|
 | 464 | +	.map_btf_name = "bpf_cgroup_storage_map",  | 
|---|
 | 465 | +	.map_btf_id = &cgroup_storage_map_btf_id,  | 
|---|
| 254 | 466 |  }; | 
|---|
| 255 | 467 |   | 
|---|
| 256 |  | -int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)  | 
|---|
 | 468 | +int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map)  | 
|---|
| 257 | 469 |  { | 
|---|
| 258 |  | -	struct bpf_cgroup_storage_map *map = map_to_storage(_map);  | 
|---|
| 259 |  | -	int ret = -EBUSY;  | 
|---|
 | 470 | +	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);  | 
|---|
| 260 | 471 |   | 
|---|
| 261 |  | -	spin_lock_bh(&map->lock);  | 
|---|
 | 472 | +	if (aux->cgroup_storage[stype] &&  | 
|---|
 | 473 | +	    aux->cgroup_storage[stype] != _map)  | 
|---|
 | 474 | +		return -EBUSY;  | 
|---|
| 262 | 475 |   | 
|---|
| 263 |  | -	if (map->prog && map->prog != prog)  | 
|---|
| 264 |  | -		goto unlock;  | 
|---|
| 265 |  | -	if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map)  | 
|---|
| 266 |  | -		goto unlock;  | 
|---|
| 267 |  | -  | 
|---|
| 268 |  | -	map->prog = prog;  | 
|---|
| 269 |  | -	prog->aux->cgroup_storage = _map;  | 
|---|
| 270 |  | -	ret = 0;  | 
|---|
| 271 |  | -unlock:  | 
|---|
| 272 |  | -	spin_unlock_bh(&map->lock);  | 
|---|
| 273 |  | -  | 
|---|
| 274 |  | -	return ret;  | 
|---|
 | 476 | +	aux->cgroup_storage[stype] = _map;  | 
|---|
 | 477 | +	return 0;  | 
|---|
| 275 | 478 |  } | 
|---|
| 276 | 479 |   | 
|---|
| 277 |  | -void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)  | 
|---|
 | 480 | +static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)  | 
|---|
| 278 | 481 |  { | 
|---|
| 279 |  | -	struct bpf_cgroup_storage_map *map = map_to_storage(_map);  | 
|---|
 | 482 | +	size_t size;  | 
|---|
| 280 | 483 |   | 
|---|
| 281 |  | -	spin_lock_bh(&map->lock);  | 
|---|
| 282 |  | -	if (map->prog == prog) {  | 
|---|
| 283 |  | -		WARN_ON(prog->aux->cgroup_storage != _map);  | 
|---|
| 284 |  | -		map->prog = NULL;  | 
|---|
| 285 |  | -		prog->aux->cgroup_storage = NULL;  | 
|---|
 | 484 | +	if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) {  | 
|---|
 | 485 | +		size = sizeof(struct bpf_storage_buffer) + map->value_size;  | 
|---|
 | 486 | +		*pages = round_up(sizeof(struct bpf_cgroup_storage) + size,  | 
|---|
 | 487 | +				  PAGE_SIZE) >> PAGE_SHIFT;  | 
|---|
 | 488 | +	} else {  | 
|---|
 | 489 | +		size = map->value_size;  | 
|---|
 | 490 | +		*pages = round_up(round_up(size, 8) * num_possible_cpus(),  | 
|---|
 | 491 | +				  PAGE_SIZE) >> PAGE_SHIFT;  | 
|---|
| 286 | 492 |  	} | 
|---|
| 287 |  | -	spin_unlock_bh(&map->lock);  | 
|---|
 | 493 | +  | 
|---|
 | 494 | +	return size;  | 
|---|
| 288 | 495 |  } | 
|---|
| 289 | 496 |   | 
|---|
| 290 |  | -struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog)  | 
|---|
 | 497 | +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,  | 
|---|
 | 498 | +					enum bpf_cgroup_storage_type stype)  | 
|---|
| 291 | 499 |  { | 
|---|
| 292 | 500 |  	struct bpf_cgroup_storage *storage; | 
|---|
| 293 | 501 |  	struct bpf_map *map; | 
|---|
 | 502 | +	gfp_t flags;  | 
|---|
 | 503 | +	size_t size;  | 
|---|
| 294 | 504 |  	u32 pages; | 
|---|
| 295 | 505 |   | 
|---|
| 296 |  | -	map = prog->aux->cgroup_storage;  | 
|---|
 | 506 | +	map = prog->aux->cgroup_storage[stype];  | 
|---|
| 297 | 507 |  	if (!map) | 
|---|
| 298 | 508 |  		return NULL; | 
|---|
| 299 | 509 |   | 
|---|
| 300 |  | -	pages = round_up(sizeof(struct bpf_cgroup_storage) +  | 
|---|
| 301 |  | -			 sizeof(struct bpf_storage_buffer) +  | 
|---|
| 302 |  | -			 map->value_size, PAGE_SIZE) >> PAGE_SHIFT;  | 
|---|
 | 510 | +	size = bpf_cgroup_storage_calculate_size(map, &pages);  | 
|---|
 | 511 | +  | 
|---|
| 303 | 512 |  	if (bpf_map_charge_memlock(map, pages)) | 
|---|
| 304 | 513 |  		return ERR_PTR(-EPERM); | 
|---|
| 305 | 514 |   | 
|---|
| 306 | 515 |  	storage = kmalloc_node(sizeof(struct bpf_cgroup_storage), | 
|---|
| 307 | 516 |  			       __GFP_ZERO | GFP_USER, map->numa_node); | 
|---|
| 308 |  | -	if (!storage) {  | 
|---|
| 309 |  | -		bpf_map_uncharge_memlock(map, pages);  | 
|---|
| 310 |  | -		return ERR_PTR(-ENOMEM);  | 
|---|
| 311 |  | -	}  | 
|---|
 | 517 | +	if (!storage)  | 
|---|
 | 518 | +		goto enomem;  | 
|---|
| 312 | 519 |   | 
|---|
| 313 |  | -	storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) +  | 
|---|
| 314 |  | -				    map->value_size, __GFP_ZERO | GFP_USER,  | 
|---|
| 315 |  | -				    map->numa_node);  | 
|---|
| 316 |  | -	if (!storage->buf) {  | 
|---|
| 317 |  | -		bpf_map_uncharge_memlock(map, pages);  | 
|---|
| 318 |  | -		kfree(storage);  | 
|---|
| 319 |  | -		return ERR_PTR(-ENOMEM);  | 
|---|
 | 520 | +	flags = __GFP_ZERO | GFP_USER;  | 
|---|
 | 521 | +  | 
|---|
 | 522 | +	if (stype == BPF_CGROUP_STORAGE_SHARED) {  | 
|---|
 | 523 | +		storage->buf = kmalloc_node(size, flags, map->numa_node);  | 
|---|
 | 524 | +		if (!storage->buf)  | 
|---|
 | 525 | +			goto enomem;  | 
|---|
 | 526 | +		check_and_init_map_lock(map, storage->buf->data);  | 
|---|
 | 527 | +	} else {  | 
|---|
 | 528 | +		storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);  | 
|---|
 | 529 | +		if (!storage->percpu_buf)  | 
|---|
 | 530 | +			goto enomem;  | 
|---|
| 320 | 531 |  	} | 
|---|
| 321 | 532 |   | 
|---|
| 322 | 533 |  	storage->map = (struct bpf_cgroup_storage_map *)map; | 
|---|
| 323 | 534 |   | 
|---|
| 324 | 535 |  	return storage; | 
|---|
 | 536 | +  | 
|---|
 | 537 | +enomem:  | 
|---|
 | 538 | +	bpf_map_uncharge_memlock(map, pages);  | 
|---|
 | 539 | +	kfree(storage);  | 
|---|
 | 540 | +	return ERR_PTR(-ENOMEM);  | 
|---|
 | 541 | +}  | 
|---|
 | 542 | +  | 
|---|
 | 543 | +static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu)  | 
|---|
 | 544 | +{  | 
|---|
 | 545 | +	struct bpf_cgroup_storage *storage =  | 
|---|
 | 546 | +		container_of(rcu, struct bpf_cgroup_storage, rcu);  | 
|---|
 | 547 | +  | 
|---|
 | 548 | +	kfree(storage->buf);  | 
|---|
 | 549 | +	kfree(storage);  | 
|---|
 | 550 | +}  | 
|---|
 | 551 | +  | 
|---|
 | 552 | +static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu)  | 
|---|
 | 553 | +{  | 
|---|
 | 554 | +	struct bpf_cgroup_storage *storage =  | 
|---|
 | 555 | +		container_of(rcu, struct bpf_cgroup_storage, rcu);  | 
|---|
 | 556 | +  | 
|---|
 | 557 | +	free_percpu(storage->percpu_buf);  | 
|---|
 | 558 | +	kfree(storage);  | 
|---|
| 325 | 559 |  } | 
|---|
| 326 | 560 |   | 
|---|
| 327 | 561 |  void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage) | 
|---|
| 328 | 562 |  { | 
|---|
| 329 |  | -	u32 pages;  | 
|---|
 | 563 | +	enum bpf_cgroup_storage_type stype;  | 
|---|
| 330 | 564 |  	struct bpf_map *map; | 
|---|
 | 565 | +	u32 pages;  | 
|---|
| 331 | 566 |   | 
|---|
| 332 | 567 |  	if (!storage) | 
|---|
| 333 | 568 |  		return; | 
|---|
| 334 | 569 |   | 
|---|
| 335 | 570 |  	map = &storage->map->map; | 
|---|
| 336 |  | -	pages = round_up(sizeof(struct bpf_cgroup_storage) +  | 
|---|
| 337 |  | -			 sizeof(struct bpf_storage_buffer) +  | 
|---|
| 338 |  | -			 map->value_size, PAGE_SIZE) >> PAGE_SHIFT;  | 
|---|
 | 571 | +  | 
|---|
 | 572 | +	bpf_cgroup_storage_calculate_size(map, &pages);  | 
|---|
| 339 | 573 |  	bpf_map_uncharge_memlock(map, pages); | 
|---|
| 340 | 574 |   | 
|---|
| 341 |  | -	kfree_rcu(storage->buf, rcu);  | 
|---|
| 342 |  | -	kfree_rcu(storage, rcu);  | 
|---|
 | 575 | +	stype = cgroup_storage_type(map);  | 
|---|
 | 576 | +	if (stype == BPF_CGROUP_STORAGE_SHARED)  | 
|---|
 | 577 | +		call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu);  | 
|---|
 | 578 | +	else  | 
|---|
 | 579 | +		call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu);  | 
|---|
| 343 | 580 |  } | 
|---|
| 344 | 581 |   | 
|---|
| 345 | 582 |  void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, | 
|---|
| .. | .. | 
|---|
| 352 | 589 |  		return; | 
|---|
| 353 | 590 |   | 
|---|
| 354 | 591 |  	storage->key.attach_type = type; | 
|---|
| 355 |  | -	storage->key.cgroup_inode_id = cgroup->kn->id.id;  | 
|---|
 | 592 | +	storage->key.cgroup_inode_id = cgroup_id(cgroup);  | 
|---|
| 356 | 593 |   | 
|---|
| 357 | 594 |  	map = storage->map; | 
|---|
| 358 | 595 |   | 
|---|
| 359 | 596 |  	spin_lock_bh(&map->lock); | 
|---|
| 360 | 597 |  	WARN_ON(cgroup_storage_insert(map, storage)); | 
|---|
| 361 |  | -	list_add(&storage->list, &map->list);  | 
|---|
 | 598 | +	list_add(&storage->list_map, &map->list);  | 
|---|
 | 599 | +	list_add(&storage->list_cg, &cgroup->bpf.storages);  | 
|---|
| 362 | 600 |  	spin_unlock_bh(&map->lock); | 
|---|
| 363 | 601 |  } | 
|---|
| 364 | 602 |   | 
|---|
| .. | .. | 
|---|
| 376 | 614 |  	root = &map->root; | 
|---|
| 377 | 615 |  	rb_erase(&storage->node, root); | 
|---|
| 378 | 616 |   | 
|---|
| 379 |  | -	list_del(&storage->list);  | 
|---|
 | 617 | +	list_del(&storage->list_map);  | 
|---|
 | 618 | +	list_del(&storage->list_cg);  | 
|---|
| 380 | 619 |  	spin_unlock_bh(&map->lock); | 
|---|
| 381 | 620 |  } | 
|---|
| 382 | 621 |   | 
|---|