| .. | .. | 
|---|
 | 1 | +// SPDX-License-Identifier: GPL-2.0-only  | 
|---|
| 1 | 2 |  /* | 
|---|
| 2 | 3 |   * Minimal file system backend for holding eBPF maps and programs, | 
|---|
| 3 | 4 |   * used by bpf(2) object pinning. | 
|---|
| .. | .. | 
|---|
| 5 | 6 |   * Authors: | 
|---|
| 6 | 7 |   * | 
|---|
| 7 | 8 |   *	Daniel Borkmann <daniel@iogearbox.net> | 
|---|
| 8 |  | - *  | 
|---|
| 9 |  | - * This program is free software; you can redistribute it and/or  | 
|---|
| 10 |  | - * modify it under the terms of the GNU General Public License  | 
|---|
| 11 |  | - * version 2 as published by the Free Software Foundation.  | 
|---|
| 12 | 9 |   */ | 
|---|
| 13 | 10 |   | 
|---|
| 14 | 11 |  #include <linux/init.h> | 
|---|
| .. | .. | 
|---|
| 17 | 14 |  #include <linux/mount.h> | 
|---|
| 18 | 15 |  #include <linux/namei.h> | 
|---|
| 19 | 16 |  #include <linux/fs.h> | 
|---|
 | 17 | +#include <linux/fs_context.h>  | 
|---|
 | 18 | +#include <linux/fs_parser.h>  | 
|---|
| 20 | 19 |  #include <linux/kdev_t.h> | 
|---|
| 21 |  | -#include <linux/parser.h>  | 
|---|
| 22 | 20 |  #include <linux/filter.h> | 
|---|
| 23 | 21 |  #include <linux/bpf.h> | 
|---|
| 24 | 22 |  #include <linux/bpf_trace.h> | 
|---|
 | 23 | +#include "preload/bpf_preload.h"  | 
|---|
| 25 | 24 |   | 
|---|
| 26 | 25 |  enum bpf_type { | 
|---|
| 27 | 26 |  	BPF_TYPE_UNSPEC	= 0, | 
|---|
| 28 | 27 |  	BPF_TYPE_PROG, | 
|---|
| 29 | 28 |  	BPF_TYPE_MAP, | 
|---|
 | 29 | +	BPF_TYPE_LINK,  | 
|---|
| 30 | 30 |  }; | 
|---|
| 31 | 31 |   | 
|---|
| 32 | 32 |  static void *bpf_any_get(void *raw, enum bpf_type type) | 
|---|
| 33 | 33 |  { | 
|---|
| 34 | 34 |  	switch (type) { | 
|---|
| 35 | 35 |  	case BPF_TYPE_PROG: | 
|---|
| 36 |  | -		raw = bpf_prog_inc(raw);  | 
|---|
 | 36 | +		bpf_prog_inc(raw);  | 
|---|
| 37 | 37 |  		break; | 
|---|
| 38 | 38 |  	case BPF_TYPE_MAP: | 
|---|
| 39 |  | -		raw = bpf_map_inc(raw, true);  | 
|---|
 | 39 | +		bpf_map_inc_with_uref(raw);  | 
|---|
 | 40 | +		break;  | 
|---|
 | 41 | +	case BPF_TYPE_LINK:  | 
|---|
 | 42 | +		bpf_link_inc(raw);  | 
|---|
| 40 | 43 |  		break; | 
|---|
| 41 | 44 |  	default: | 
|---|
| 42 | 45 |  		WARN_ON_ONCE(1); | 
|---|
| .. | .. | 
|---|
| 55 | 58 |  	case BPF_TYPE_MAP: | 
|---|
| 56 | 59 |  		bpf_map_put_with_uref(raw); | 
|---|
| 57 | 60 |  		break; | 
|---|
 | 61 | +	case BPF_TYPE_LINK:  | 
|---|
 | 62 | +		bpf_link_put(raw);  | 
|---|
 | 63 | +		break;  | 
|---|
| 58 | 64 |  	default: | 
|---|
| 59 | 65 |  		WARN_ON_ONCE(1); | 
|---|
| 60 | 66 |  		break; | 
|---|
| .. | .. | 
|---|
| 65 | 71 |  { | 
|---|
| 66 | 72 |  	void *raw; | 
|---|
| 67 | 73 |   | 
|---|
| 68 |  | -	*type = BPF_TYPE_MAP;  | 
|---|
| 69 | 74 |  	raw = bpf_map_get_with_uref(ufd); | 
|---|
| 70 |  | -	if (IS_ERR(raw)) {  | 
|---|
| 71 |  | -		*type = BPF_TYPE_PROG;  | 
|---|
| 72 |  | -		raw = bpf_prog_get(ufd);  | 
|---|
 | 75 | +	if (!IS_ERR(raw)) {  | 
|---|
 | 76 | +		*type = BPF_TYPE_MAP;  | 
|---|
 | 77 | +		return raw;  | 
|---|
| 73 | 78 |  	} | 
|---|
| 74 | 79 |   | 
|---|
| 75 |  | -	return raw;  | 
|---|
 | 80 | +	raw = bpf_prog_get(ufd);  | 
|---|
 | 81 | +	if (!IS_ERR(raw)) {  | 
|---|
 | 82 | +		*type = BPF_TYPE_PROG;  | 
|---|
 | 83 | +		return raw;  | 
|---|
 | 84 | +	}  | 
|---|
 | 85 | +  | 
|---|
 | 86 | +	raw = bpf_link_get_from_fd(ufd);  | 
|---|
 | 87 | +	if (!IS_ERR(raw)) {  | 
|---|
 | 88 | +		*type = BPF_TYPE_LINK;  | 
|---|
 | 89 | +		return raw;  | 
|---|
 | 90 | +	}  | 
|---|
 | 91 | +  | 
|---|
 | 92 | +	return ERR_PTR(-EINVAL);  | 
|---|
| 76 | 93 |  } | 
|---|
| 77 | 94 |   | 
|---|
| 78 | 95 |  static const struct inode_operations bpf_dir_iops; | 
|---|
| 79 | 96 |   | 
|---|
| 80 | 97 |  static const struct inode_operations bpf_prog_iops = { }; | 
|---|
| 81 | 98 |  static const struct inode_operations bpf_map_iops  = { }; | 
|---|
 | 99 | +static const struct inode_operations bpf_link_iops  = { };  | 
|---|
| 82 | 100 |   | 
|---|
| 83 | 101 |  static struct inode *bpf_get_inode(struct super_block *sb, | 
|---|
| 84 | 102 |  				   const struct inode *dir, | 
|---|
| .. | .. | 
|---|
| 116 | 134 |  		*type = BPF_TYPE_PROG; | 
|---|
| 117 | 135 |  	else if (inode->i_op == &bpf_map_iops) | 
|---|
| 118 | 136 |  		*type = BPF_TYPE_MAP; | 
|---|
 | 137 | +	else if (inode->i_op == &bpf_link_iops)  | 
|---|
 | 138 | +		*type = BPF_TYPE_LINK;  | 
|---|
| 119 | 139 |  	else | 
|---|
| 120 | 140 |  		return -EACCES; | 
|---|
| 121 | 141 |   | 
|---|
| .. | .. | 
|---|
| 339 | 359 |  			     &bpffs_map_fops : &bpffs_obj_fops); | 
|---|
| 340 | 360 |  } | 
|---|
| 341 | 361 |   | 
|---|
 | 362 | +static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg)  | 
|---|
 | 363 | +{  | 
|---|
 | 364 | +	struct bpf_link *link = arg;  | 
|---|
 | 365 | +  | 
|---|
 | 366 | +	return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops,  | 
|---|
 | 367 | +			     bpf_link_is_iter(link) ?  | 
|---|
 | 368 | +			     &bpf_iter_fops : &bpffs_obj_fops);  | 
|---|
 | 369 | +}  | 
|---|
 | 370 | +  | 
|---|
| 342 | 371 |  static struct dentry * | 
|---|
| 343 | 372 |  bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) | 
|---|
| 344 | 373 |  { | 
|---|
| 345 | 374 |  	/* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future | 
|---|
| 346 |  | -	 * extensions.  | 
|---|
 | 375 | +	 * extensions. That allows popoulate_bpffs() create special files.  | 
|---|
| 347 | 376 |  	 */ | 
|---|
| 348 |  | -	if (strchr(dentry->d_name.name, '.'))  | 
|---|
 | 377 | +	if ((dir->i_mode & S_IALLUGO) &&  | 
|---|
 | 378 | +	    strchr(dentry->d_name.name, '.'))  | 
|---|
| 349 | 379 |  		return ERR_PTR(-EPERM); | 
|---|
| 350 | 380 |   | 
|---|
| 351 | 381 |  	return simple_lookup(dir, dentry, flags); | 
|---|
| .. | .. | 
|---|
| 383 | 413 |  	.unlink		= simple_unlink, | 
|---|
| 384 | 414 |  }; | 
|---|
| 385 | 415 |   | 
|---|
| 386 |  | -static int bpf_obj_do_pin(const struct filename *pathname, void *raw,  | 
|---|
 | 416 | +/* pin iterator link into bpffs */  | 
|---|
 | 417 | +static int bpf_iter_link_pin_kernel(struct dentry *parent,  | 
|---|
 | 418 | +				    const char *name, struct bpf_link *link)  | 
|---|
 | 419 | +{  | 
|---|
 | 420 | +	umode_t mode = S_IFREG | S_IRUSR;  | 
|---|
 | 421 | +	struct dentry *dentry;  | 
|---|
 | 422 | +	int ret;  | 
|---|
 | 423 | +  | 
|---|
 | 424 | +	inode_lock(parent->d_inode);  | 
|---|
 | 425 | +	dentry = lookup_one_len(name, parent, strlen(name));  | 
|---|
 | 426 | +	if (IS_ERR(dentry)) {  | 
|---|
 | 427 | +		inode_unlock(parent->d_inode);  | 
|---|
 | 428 | +		return PTR_ERR(dentry);  | 
|---|
 | 429 | +	}  | 
|---|
 | 430 | +	ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops,  | 
|---|
 | 431 | +			    &bpf_iter_fops);  | 
|---|
 | 432 | +	dput(dentry);  | 
|---|
 | 433 | +	inode_unlock(parent->d_inode);  | 
|---|
 | 434 | +	return ret;  | 
|---|
 | 435 | +}  | 
|---|
 | 436 | +  | 
|---|
 | 437 | +static int bpf_obj_do_pin(const char __user *pathname, void *raw,  | 
|---|
| 387 | 438 |  			  enum bpf_type type) | 
|---|
| 388 | 439 |  { | 
|---|
| 389 | 440 |  	struct dentry *dentry; | 
|---|
| .. | .. | 
|---|
| 392 | 443 |  	umode_t mode; | 
|---|
| 393 | 444 |  	int ret; | 
|---|
| 394 | 445 |   | 
|---|
| 395 |  | -	dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0);  | 
|---|
 | 446 | +	dentry = user_path_create(AT_FDCWD, pathname, &path, 0);  | 
|---|
| 396 | 447 |  	if (IS_ERR(dentry)) | 
|---|
| 397 | 448 |  		return PTR_ERR(dentry); | 
|---|
| 398 | 449 |   | 
|---|
| .. | .. | 
|---|
| 415 | 466 |  	case BPF_TYPE_MAP: | 
|---|
| 416 | 467 |  		ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); | 
|---|
| 417 | 468 |  		break; | 
|---|
 | 469 | +	case BPF_TYPE_LINK:  | 
|---|
 | 470 | +		ret = vfs_mkobj(dentry, mode, bpf_mklink, raw);  | 
|---|
 | 471 | +		break;  | 
|---|
| 418 | 472 |  	default: | 
|---|
| 419 | 473 |  		ret = -EPERM; | 
|---|
| 420 | 474 |  	} | 
|---|
| .. | .. | 
|---|
| 425 | 479 |   | 
|---|
| 426 | 480 |  int bpf_obj_pin_user(u32 ufd, const char __user *pathname) | 
|---|
| 427 | 481 |  { | 
|---|
| 428 |  | -	struct filename *pname;  | 
|---|
| 429 | 482 |  	enum bpf_type type; | 
|---|
| 430 | 483 |  	void *raw; | 
|---|
| 431 | 484 |  	int ret; | 
|---|
| 432 | 485 |   | 
|---|
| 433 |  | -	pname = getname(pathname);  | 
|---|
| 434 |  | -	if (IS_ERR(pname))  | 
|---|
| 435 |  | -		return PTR_ERR(pname);  | 
|---|
| 436 |  | -  | 
|---|
| 437 | 486 |  	raw = bpf_fd_probe_obj(ufd, &type); | 
|---|
| 438 |  | -	if (IS_ERR(raw)) {  | 
|---|
| 439 |  | -		ret = PTR_ERR(raw);  | 
|---|
| 440 |  | -		goto out;  | 
|---|
| 441 |  | -	}  | 
|---|
 | 487 | +	if (IS_ERR(raw))  | 
|---|
 | 488 | +		return PTR_ERR(raw);  | 
|---|
| 442 | 489 |   | 
|---|
| 443 |  | -	ret = bpf_obj_do_pin(pname, raw, type);  | 
|---|
 | 490 | +	ret = bpf_obj_do_pin(pathname, raw, type);  | 
|---|
| 444 | 491 |  	if (ret != 0) | 
|---|
| 445 | 492 |  		bpf_any_put(raw, type); | 
|---|
| 446 |  | -out:  | 
|---|
| 447 |  | -	putname(pname);  | 
|---|
 | 493 | +  | 
|---|
| 448 | 494 |  	return ret; | 
|---|
| 449 | 495 |  } | 
|---|
| 450 | 496 |   | 
|---|
| 451 |  | -static void *bpf_obj_do_get(const struct filename *pathname,  | 
|---|
 | 497 | +static void *bpf_obj_do_get(const char __user *pathname,  | 
|---|
| 452 | 498 |  			    enum bpf_type *type, int flags) | 
|---|
| 453 | 499 |  { | 
|---|
| 454 | 500 |  	struct inode *inode; | 
|---|
| .. | .. | 
|---|
| 456 | 502 |  	void *raw; | 
|---|
| 457 | 503 |  	int ret; | 
|---|
| 458 | 504 |   | 
|---|
| 459 |  | -	ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path);  | 
|---|
 | 505 | +	ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path);  | 
|---|
| 460 | 506 |  	if (ret) | 
|---|
| 461 | 507 |  		return ERR_PTR(ret); | 
|---|
| 462 | 508 |   | 
|---|
| .. | .. | 
|---|
| 483 | 529 |  int bpf_obj_get_user(const char __user *pathname, int flags) | 
|---|
| 484 | 530 |  { | 
|---|
| 485 | 531 |  	enum bpf_type type = BPF_TYPE_UNSPEC; | 
|---|
| 486 |  | -	struct filename *pname;  | 
|---|
| 487 |  | -	int ret = -ENOENT;  | 
|---|
| 488 | 532 |  	int f_flags; | 
|---|
| 489 | 533 |  	void *raw; | 
|---|
 | 534 | +	int ret;  | 
|---|
| 490 | 535 |   | 
|---|
| 491 | 536 |  	f_flags = bpf_get_file_flag(flags); | 
|---|
| 492 | 537 |  	if (f_flags < 0) | 
|---|
| 493 | 538 |  		return f_flags; | 
|---|
| 494 | 539 |   | 
|---|
| 495 |  | -	pname = getname(pathname);  | 
|---|
| 496 |  | -	if (IS_ERR(pname))  | 
|---|
| 497 |  | -		return PTR_ERR(pname);  | 
|---|
| 498 |  | -  | 
|---|
| 499 |  | -	raw = bpf_obj_do_get(pname, &type, f_flags);  | 
|---|
| 500 |  | -	if (IS_ERR(raw)) {  | 
|---|
| 501 |  | -		ret = PTR_ERR(raw);  | 
|---|
| 502 |  | -		goto out;  | 
|---|
| 503 |  | -	}  | 
|---|
 | 540 | +	raw = bpf_obj_do_get(pathname, &type, f_flags);  | 
|---|
 | 541 | +	if (IS_ERR(raw))  | 
|---|
 | 542 | +		return PTR_ERR(raw);  | 
|---|
| 504 | 543 |   | 
|---|
| 505 | 544 |  	if (type == BPF_TYPE_PROG) | 
|---|
| 506 | 545 |  		ret = bpf_prog_new_fd(raw); | 
|---|
| 507 | 546 |  	else if (type == BPF_TYPE_MAP) | 
|---|
| 508 | 547 |  		ret = bpf_map_new_fd(raw, f_flags); | 
|---|
 | 548 | +	else if (type == BPF_TYPE_LINK)  | 
|---|
 | 549 | +		ret = (f_flags != O_RDWR) ? -EINVAL : bpf_link_new_fd(raw);  | 
|---|
| 509 | 550 |  	else | 
|---|
| 510 |  | -		goto out;  | 
|---|
 | 551 | +		return -ENOENT;  | 
|---|
| 511 | 552 |   | 
|---|
| 512 | 553 |  	if (ret < 0) | 
|---|
| 513 | 554 |  		bpf_any_put(raw, type); | 
|---|
| 514 |  | -out:  | 
|---|
| 515 |  | -	putname(pname);  | 
|---|
| 516 | 555 |  	return ret; | 
|---|
| 517 | 556 |  } | 
|---|
| 518 | 557 |   | 
|---|
| .. | .. | 
|---|
| 524 | 563 |  		return ERR_PTR(ret); | 
|---|
| 525 | 564 |   | 
|---|
| 526 | 565 |  	if (inode->i_op == &bpf_map_iops) | 
|---|
 | 566 | +		return ERR_PTR(-EINVAL);  | 
|---|
 | 567 | +	if (inode->i_op == &bpf_link_iops)  | 
|---|
| 527 | 568 |  		return ERR_PTR(-EINVAL); | 
|---|
| 528 | 569 |  	if (inode->i_op != &bpf_prog_iops) | 
|---|
| 529 | 570 |  		return ERR_PTR(-EACCES); | 
|---|
| .. | .. | 
|---|
| 537 | 578 |  	if (!bpf_prog_get_ok(prog, &type, false)) | 
|---|
| 538 | 579 |  		return ERR_PTR(-EINVAL); | 
|---|
| 539 | 580 |   | 
|---|
| 540 |  | -	return bpf_prog_inc(prog);  | 
|---|
 | 581 | +	bpf_prog_inc(prog);  | 
|---|
 | 582 | +	return prog;  | 
|---|
| 541 | 583 |  } | 
|---|
| 542 | 584 |   | 
|---|
| 543 | 585 |  struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) | 
|---|
| .. | .. | 
|---|
| 567 | 609 |  	return 0; | 
|---|
| 568 | 610 |  } | 
|---|
| 569 | 611 |   | 
|---|
| 570 |  | -static void bpf_destroy_inode_deferred(struct rcu_head *head)  | 
|---|
 | 612 | +static void bpf_free_inode(struct inode *inode)  | 
|---|
| 571 | 613 |  { | 
|---|
| 572 |  | -	struct inode *inode = container_of(head, struct inode, i_rcu);  | 
|---|
| 573 | 614 |  	enum bpf_type type; | 
|---|
| 574 | 615 |   | 
|---|
| 575 | 616 |  	if (S_ISLNK(inode->i_mode)) | 
|---|
| .. | .. | 
|---|
| 579 | 620 |  	free_inode_nonrcu(inode); | 
|---|
| 580 | 621 |  } | 
|---|
| 581 | 622 |   | 
|---|
| 582 |  | -static void bpf_destroy_inode(struct inode *inode)  | 
|---|
| 583 |  | -{  | 
|---|
| 584 |  | -	call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred);  | 
|---|
| 585 |  | -}  | 
|---|
| 586 |  | -  | 
|---|
| 587 | 623 |  static const struct super_operations bpf_super_ops = { | 
|---|
| 588 | 624 |  	.statfs		= simple_statfs, | 
|---|
| 589 | 625 |  	.drop_inode	= generic_delete_inode, | 
|---|
| 590 | 626 |  	.show_options	= bpf_show_options, | 
|---|
| 591 |  | -	.destroy_inode	= bpf_destroy_inode,  | 
|---|
 | 627 | +	.free_inode	= bpf_free_inode,  | 
|---|
| 592 | 628 |  }; | 
|---|
| 593 | 629 |   | 
|---|
| 594 | 630 |  enum { | 
|---|
| 595 | 631 |  	OPT_MODE, | 
|---|
| 596 |  | -	OPT_ERR,  | 
|---|
| 597 | 632 |  }; | 
|---|
| 598 | 633 |   | 
|---|
| 599 |  | -static const match_table_t bpf_mount_tokens = {  | 
|---|
| 600 |  | -	{ OPT_MODE, "mode=%o" },  | 
|---|
| 601 |  | -	{ OPT_ERR, NULL },  | 
|---|
 | 634 | +static const struct fs_parameter_spec bpf_fs_parameters[] = {  | 
|---|
 | 635 | +	fsparam_u32oct	("mode",			OPT_MODE),  | 
|---|
 | 636 | +	{}  | 
|---|
| 602 | 637 |  }; | 
|---|
| 603 | 638 |   | 
|---|
| 604 | 639 |  struct bpf_mount_opts { | 
|---|
| 605 | 640 |  	umode_t mode; | 
|---|
| 606 | 641 |  }; | 
|---|
| 607 | 642 |   | 
|---|
| 608 |  | -static int bpf_parse_options(char *data, struct bpf_mount_opts *opts)  | 
|---|
 | 643 | +static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)  | 
|---|
| 609 | 644 |  { | 
|---|
| 610 |  | -	substring_t args[MAX_OPT_ARGS];  | 
|---|
| 611 |  | -	int option, token;  | 
|---|
| 612 |  | -	char *ptr;  | 
|---|
 | 645 | +	struct bpf_mount_opts *opts = fc->fs_private;  | 
|---|
 | 646 | +	struct fs_parse_result result;  | 
|---|
 | 647 | +	int opt;  | 
|---|
| 613 | 648 |   | 
|---|
| 614 |  | -	opts->mode = S_IRWXUGO;  | 
|---|
| 615 |  | -  | 
|---|
| 616 |  | -	while ((ptr = strsep(&data, ",")) != NULL) {  | 
|---|
| 617 |  | -		if (!*ptr)  | 
|---|
| 618 |  | -			continue;  | 
|---|
| 619 |  | -  | 
|---|
| 620 |  | -		token = match_token(ptr, bpf_mount_tokens, args);  | 
|---|
| 621 |  | -		switch (token) {  | 
|---|
| 622 |  | -		case OPT_MODE:  | 
|---|
| 623 |  | -			if (match_octal(&args[0], &option))  | 
|---|
| 624 |  | -				return -EINVAL;  | 
|---|
| 625 |  | -			opts->mode = option & S_IALLUGO;  | 
|---|
| 626 |  | -			break;  | 
|---|
 | 649 | +	opt = fs_parse(fc, bpf_fs_parameters, param, &result);  | 
|---|
 | 650 | +	if (opt < 0)  | 
|---|
| 627 | 651 |  		/* We might like to report bad mount options here, but | 
|---|
| 628 | 652 |  		 * traditionally we've ignored all mount options, so we'd | 
|---|
| 629 | 653 |  		 * better continue to ignore non-existing options for bpf. | 
|---|
| 630 | 654 |  		 */ | 
|---|
| 631 |  | -		}  | 
|---|
 | 655 | +		return opt == -ENOPARAM ? 0 : opt;  | 
|---|
 | 656 | +  | 
|---|
 | 657 | +	switch (opt) {  | 
|---|
 | 658 | +	case OPT_MODE:  | 
|---|
 | 659 | +		opts->mode = result.uint_32 & S_IALLUGO;  | 
|---|
 | 660 | +		break;  | 
|---|
| 632 | 661 |  	} | 
|---|
| 633 | 662 |   | 
|---|
| 634 | 663 |  	return 0; | 
|---|
| 635 | 664 |  } | 
|---|
| 636 | 665 |   | 
|---|
| 637 |  | -static int bpf_fill_super(struct super_block *sb, void *data, int silent)  | 
|---|
 | 666 | +struct bpf_preload_ops *bpf_preload_ops;  | 
|---|
 | 667 | +EXPORT_SYMBOL_GPL(bpf_preload_ops);  | 
|---|
 | 668 | +  | 
|---|
 | 669 | +static bool bpf_preload_mod_get(void)  | 
|---|
 | 670 | +{  | 
|---|
 | 671 | +	/* If bpf_preload.ko wasn't loaded earlier then load it now.  | 
|---|
 | 672 | +	 * When bpf_preload is built into vmlinux the module's __init  | 
|---|
 | 673 | +	 * function will populate it.  | 
|---|
 | 674 | +	 */  | 
|---|
 | 675 | +	if (!bpf_preload_ops) {  | 
|---|
 | 676 | +		request_module("bpf_preload");  | 
|---|
 | 677 | +		if (!bpf_preload_ops)  | 
|---|
 | 678 | +			return false;  | 
|---|
 | 679 | +	}  | 
|---|
 | 680 | +	/* And grab the reference, so the module doesn't disappear while the  | 
|---|
 | 681 | +	 * kernel is interacting with the kernel module and its UMD.  | 
|---|
 | 682 | +	 */  | 
|---|
 | 683 | +	if (!try_module_get(bpf_preload_ops->owner)) {  | 
|---|
 | 684 | +		pr_err("bpf_preload module get failed.\n");  | 
|---|
 | 685 | +		return false;  | 
|---|
 | 686 | +	}  | 
|---|
 | 687 | +	return true;  | 
|---|
 | 688 | +}  | 
|---|
 | 689 | +  | 
|---|
 | 690 | +static void bpf_preload_mod_put(void)  | 
|---|
 | 691 | +{  | 
|---|
 | 692 | +	if (bpf_preload_ops)  | 
|---|
 | 693 | +		/* now user can "rmmod bpf_preload" if necessary */  | 
|---|
 | 694 | +		module_put(bpf_preload_ops->owner);  | 
|---|
 | 695 | +}  | 
|---|
 | 696 | +  | 
|---|
 | 697 | +static DEFINE_MUTEX(bpf_preload_lock);  | 
|---|
 | 698 | +  | 
|---|
 | 699 | +static int populate_bpffs(struct dentry *parent)  | 
|---|
 | 700 | +{  | 
|---|
 | 701 | +	struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {};  | 
|---|
 | 702 | +	struct bpf_link *links[BPF_PRELOAD_LINKS] = {};  | 
|---|
 | 703 | +	int err = 0, i;  | 
|---|
 | 704 | +  | 
|---|
 | 705 | +	/* grab the mutex to make sure the kernel interactions with bpf_preload  | 
|---|
 | 706 | +	 * UMD are serialized  | 
|---|
 | 707 | +	 */  | 
|---|
 | 708 | +	mutex_lock(&bpf_preload_lock);  | 
|---|
 | 709 | +  | 
|---|
 | 710 | +	/* if bpf_preload.ko wasn't built into vmlinux then load it */  | 
|---|
 | 711 | +	if (!bpf_preload_mod_get())  | 
|---|
 | 712 | +		goto out;  | 
|---|
 | 713 | +  | 
|---|
 | 714 | +	if (!bpf_preload_ops->info.tgid) {  | 
|---|
 | 715 | +		/* preload() will start UMD that will load BPF iterator programs */  | 
|---|
 | 716 | +		err = bpf_preload_ops->preload(objs);  | 
|---|
 | 717 | +		if (err)  | 
|---|
 | 718 | +			goto out_put;  | 
|---|
 | 719 | +		for (i = 0; i < BPF_PRELOAD_LINKS; i++) {  | 
|---|
 | 720 | +			links[i] = bpf_link_by_id(objs[i].link_id);  | 
|---|
 | 721 | +			if (IS_ERR(links[i])) {  | 
|---|
 | 722 | +				err = PTR_ERR(links[i]);  | 
|---|
 | 723 | +				goto out_put;  | 
|---|
 | 724 | +			}  | 
|---|
 | 725 | +		}  | 
|---|
 | 726 | +		for (i = 0; i < BPF_PRELOAD_LINKS; i++) {  | 
|---|
 | 727 | +			err = bpf_iter_link_pin_kernel(parent,  | 
|---|
 | 728 | +						       objs[i].link_name, links[i]);  | 
|---|
 | 729 | +			if (err)  | 
|---|
 | 730 | +				goto out_put;  | 
|---|
 | 731 | +			/* do not unlink successfully pinned links even  | 
|---|
 | 732 | +			 * if later link fails to pin  | 
|---|
 | 733 | +			 */  | 
|---|
 | 734 | +			links[i] = NULL;  | 
|---|
 | 735 | +		}  | 
|---|
 | 736 | +		/* finish() will tell UMD process to exit */  | 
|---|
 | 737 | +		err = bpf_preload_ops->finish();  | 
|---|
 | 738 | +		if (err)  | 
|---|
 | 739 | +			goto out_put;  | 
|---|
 | 740 | +	}  | 
|---|
 | 741 | +out_put:  | 
|---|
 | 742 | +	bpf_preload_mod_put();  | 
|---|
 | 743 | +out:  | 
|---|
 | 744 | +	mutex_unlock(&bpf_preload_lock);  | 
|---|
 | 745 | +	for (i = 0; i < BPF_PRELOAD_LINKS && err; i++)  | 
|---|
 | 746 | +		if (!IS_ERR_OR_NULL(links[i]))  | 
|---|
 | 747 | +			bpf_link_put(links[i]);  | 
|---|
 | 748 | +	return err;  | 
|---|
 | 749 | +}  | 
|---|
 | 750 | +  | 
|---|
 | 751 | +static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)  | 
|---|
| 638 | 752 |  { | 
|---|
| 639 | 753 |  	static const struct tree_descr bpf_rfiles[] = { { "" } }; | 
|---|
| 640 |  | -	struct bpf_mount_opts opts;  | 
|---|
 | 754 | +	struct bpf_mount_opts *opts = fc->fs_private;  | 
|---|
| 641 | 755 |  	struct inode *inode; | 
|---|
| 642 | 756 |  	int ret; | 
|---|
| 643 |  | -  | 
|---|
| 644 |  | -	ret = bpf_parse_options(data, &opts);  | 
|---|
| 645 |  | -	if (ret)  | 
|---|
| 646 |  | -		return ret;  | 
|---|
| 647 | 757 |   | 
|---|
| 648 | 758 |  	ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); | 
|---|
| 649 | 759 |  	if (ret) | 
|---|
| .. | .. | 
|---|
| 654 | 764 |  	inode = sb->s_root->d_inode; | 
|---|
| 655 | 765 |  	inode->i_op = &bpf_dir_iops; | 
|---|
| 656 | 766 |  	inode->i_mode &= ~S_IALLUGO; | 
|---|
| 657 |  | -	inode->i_mode |= S_ISVTX | opts.mode;  | 
|---|
| 658 |  | -  | 
|---|
 | 767 | +	populate_bpffs(sb->s_root);  | 
|---|
 | 768 | +	inode->i_mode |= S_ISVTX | opts->mode;  | 
|---|
| 659 | 769 |  	return 0; | 
|---|
| 660 | 770 |  } | 
|---|
| 661 | 771 |   | 
|---|
| 662 |  | -static struct dentry *bpf_mount(struct file_system_type *type, int flags,  | 
|---|
| 663 |  | -				const char *dev_name, void *data)  | 
|---|
 | 772 | +static int bpf_get_tree(struct fs_context *fc)  | 
|---|
| 664 | 773 |  { | 
|---|
| 665 |  | -	return mount_nodev(type, flags, data, bpf_fill_super);  | 
|---|
 | 774 | +	return get_tree_nodev(fc, bpf_fill_super);  | 
|---|
 | 775 | +}  | 
|---|
 | 776 | +  | 
|---|
 | 777 | +static void bpf_free_fc(struct fs_context *fc)  | 
|---|
 | 778 | +{  | 
|---|
 | 779 | +	kfree(fc->fs_private);  | 
|---|
 | 780 | +}  | 
|---|
 | 781 | +  | 
|---|
 | 782 | +static const struct fs_context_operations bpf_context_ops = {  | 
|---|
 | 783 | +	.free		= bpf_free_fc,  | 
|---|
 | 784 | +	.parse_param	= bpf_parse_param,  | 
|---|
 | 785 | +	.get_tree	= bpf_get_tree,  | 
|---|
 | 786 | +};  | 
|---|
 | 787 | +  | 
|---|
 | 788 | +/*  | 
|---|
 | 789 | + * Set up the filesystem mount context.  | 
|---|
 | 790 | + */  | 
|---|
 | 791 | +static int bpf_init_fs_context(struct fs_context *fc)  | 
|---|
 | 792 | +{  | 
|---|
 | 793 | +	struct bpf_mount_opts *opts;  | 
|---|
 | 794 | +  | 
|---|
 | 795 | +	opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL);  | 
|---|
 | 796 | +	if (!opts)  | 
|---|
 | 797 | +		return -ENOMEM;  | 
|---|
 | 798 | +  | 
|---|
 | 799 | +	opts->mode = S_IRWXUGO;  | 
|---|
 | 800 | +  | 
|---|
 | 801 | +	fc->fs_private = opts;  | 
|---|
 | 802 | +	fc->ops = &bpf_context_ops;  | 
|---|
 | 803 | +	return 0;  | 
|---|
| 666 | 804 |  } | 
|---|
| 667 | 805 |   | 
|---|
| 668 | 806 |  static struct file_system_type bpf_fs_type = { | 
|---|
| 669 | 807 |  	.owner		= THIS_MODULE, | 
|---|
| 670 | 808 |  	.name		= "bpf", | 
|---|
| 671 |  | -	.mount		= bpf_mount,  | 
|---|
 | 809 | +	.init_fs_context = bpf_init_fs_context,  | 
|---|
 | 810 | +	.parameters	= bpf_fs_parameters,  | 
|---|
| 672 | 811 |  	.kill_sb	= kill_litter_super, | 
|---|
| 673 | 812 |  }; | 
|---|
| 674 | 813 |   | 
|---|
| .. | .. | 
|---|
| 676 | 815 |  { | 
|---|
| 677 | 816 |  	int ret; | 
|---|
| 678 | 817 |   | 
|---|
 | 818 | +	mutex_init(&bpf_preload_lock);  | 
|---|
 | 819 | +  | 
|---|
| 679 | 820 |  	ret = sysfs_create_mount_point(fs_kobj, "bpf"); | 
|---|
| 680 | 821 |  	if (ret) | 
|---|
| 681 | 822 |  		return ret; | 
|---|