| .. | .. | 
|---|
|  | 1 | +// SPDX-License-Identifier: GPL-2.0-only | 
|---|
| 1 | 2 | /* | 
|---|
| 2 | 3 | * Minimal file system backend for holding eBPF maps and programs, | 
|---|
| 3 | 4 | * used by bpf(2) object pinning. | 
|---|
| .. | .. | 
|---|
| 5 | 6 | * Authors: | 
|---|
| 6 | 7 | * | 
|---|
| 7 | 8 | *	Daniel Borkmann <daniel@iogearbox.net> | 
|---|
| 8 |  | - * | 
|---|
| 9 |  | - * This program is free software; you can redistribute it and/or | 
|---|
| 10 |  | - * modify it under the terms of the GNU General Public License | 
|---|
| 11 |  | - * version 2 as published by the Free Software Foundation. | 
|---|
| 12 | 9 | */ | 
|---|
| 13 | 10 |  | 
|---|
| 14 | 11 | #include <linux/init.h> | 
|---|
| .. | .. | 
|---|
| 17 | 14 | #include <linux/mount.h> | 
|---|
| 18 | 15 | #include <linux/namei.h> | 
|---|
| 19 | 16 | #include <linux/fs.h> | 
|---|
|  | 17 | +#include <linux/fs_context.h> | 
|---|
|  | 18 | +#include <linux/fs_parser.h> | 
|---|
| 20 | 19 | #include <linux/kdev_t.h> | 
|---|
| 21 |  | -#include <linux/parser.h> | 
|---|
| 22 | 20 | #include <linux/filter.h> | 
|---|
| 23 | 21 | #include <linux/bpf.h> | 
|---|
| 24 | 22 | #include <linux/bpf_trace.h> | 
|---|
|  | 23 | +#include "preload/bpf_preload.h" | 
|---|
| 25 | 24 |  | 
|---|
| 26 | 25 | enum bpf_type { | 
|---|
| 27 | 26 | BPF_TYPE_UNSPEC	= 0, | 
|---|
| 28 | 27 | BPF_TYPE_PROG, | 
|---|
| 29 | 28 | BPF_TYPE_MAP, | 
|---|
|  | 29 | +	BPF_TYPE_LINK, | 
|---|
| 30 | 30 | }; | 
|---|
| 31 | 31 |  | 
|---|
| 32 | 32 | static void *bpf_any_get(void *raw, enum bpf_type type) | 
|---|
| 33 | 33 | { | 
|---|
| 34 | 34 | switch (type) { | 
|---|
| 35 | 35 | case BPF_TYPE_PROG: | 
|---|
| 36 |  | -		raw = bpf_prog_inc(raw); | 
|---|
|  | 36 | +		bpf_prog_inc(raw); | 
|---|
| 37 | 37 | break; | 
|---|
| 38 | 38 | case BPF_TYPE_MAP: | 
|---|
| 39 |  | -		raw = bpf_map_inc(raw, true); | 
|---|
|  | 39 | +		bpf_map_inc_with_uref(raw); | 
|---|
|  | 40 | +		break; | 
|---|
|  | 41 | +	case BPF_TYPE_LINK: | 
|---|
|  | 42 | +		bpf_link_inc(raw); | 
|---|
| 40 | 43 | break; | 
|---|
| 41 | 44 | default: | 
|---|
| 42 | 45 | WARN_ON_ONCE(1); | 
|---|
| .. | .. | 
|---|
| 55 | 58 | case BPF_TYPE_MAP: | 
|---|
| 56 | 59 | bpf_map_put_with_uref(raw); | 
|---|
| 57 | 60 | break; | 
|---|
|  | 61 | +	case BPF_TYPE_LINK: | 
|---|
|  | 62 | +		bpf_link_put(raw); | 
|---|
|  | 63 | +		break; | 
|---|
| 58 | 64 | default: | 
|---|
| 59 | 65 | WARN_ON_ONCE(1); | 
|---|
| 60 | 66 | break; | 
|---|
| .. | .. | 
|---|
| 65 | 71 | { | 
|---|
| 66 | 72 | void *raw; | 
|---|
| 67 | 73 |  | 
|---|
| 68 |  | -	*type = BPF_TYPE_MAP; | 
|---|
| 69 | 74 | raw = bpf_map_get_with_uref(ufd); | 
|---|
| 70 |  | -	if (IS_ERR(raw)) { | 
|---|
| 71 |  | -		*type = BPF_TYPE_PROG; | 
|---|
| 72 |  | -		raw = bpf_prog_get(ufd); | 
|---|
|  | 75 | +	if (!IS_ERR(raw)) { | 
|---|
|  | 76 | +		*type = BPF_TYPE_MAP; | 
|---|
|  | 77 | +		return raw; | 
|---|
| 73 | 78 | } | 
|---|
| 74 | 79 |  | 
|---|
| 75 |  | -	return raw; | 
|---|
|  | 80 | +	raw = bpf_prog_get(ufd); | 
|---|
|  | 81 | +	if (!IS_ERR(raw)) { | 
|---|
|  | 82 | +		*type = BPF_TYPE_PROG; | 
|---|
|  | 83 | +		return raw; | 
|---|
|  | 84 | +	} | 
|---|
|  | 85 | + | 
|---|
|  | 86 | +	raw = bpf_link_get_from_fd(ufd); | 
|---|
|  | 87 | +	if (!IS_ERR(raw)) { | 
|---|
|  | 88 | +		*type = BPF_TYPE_LINK; | 
|---|
|  | 89 | +		return raw; | 
|---|
|  | 90 | +	} | 
|---|
|  | 91 | + | 
|---|
|  | 92 | +	return ERR_PTR(-EINVAL); | 
|---|
| 76 | 93 | } | 
|---|
| 77 | 94 |  | 
|---|
| 78 | 95 | static const struct inode_operations bpf_dir_iops; | 
|---|
| 79 | 96 |  | 
|---|
| 80 | 97 | static const struct inode_operations bpf_prog_iops = { }; | 
|---|
| 81 | 98 | static const struct inode_operations bpf_map_iops  = { }; | 
|---|
|  | 99 | +static const struct inode_operations bpf_link_iops  = { }; | 
|---|
| 82 | 100 |  | 
|---|
| 83 | 101 | static struct inode *bpf_get_inode(struct super_block *sb, | 
|---|
| 84 | 102 | const struct inode *dir, | 
|---|
| .. | .. | 
|---|
| 116 | 134 | *type = BPF_TYPE_PROG; | 
|---|
| 117 | 135 | else if (inode->i_op == &bpf_map_iops) | 
|---|
| 118 | 136 | *type = BPF_TYPE_MAP; | 
|---|
|  | 137 | +	else if (inode->i_op == &bpf_link_iops) | 
|---|
|  | 138 | +		*type = BPF_TYPE_LINK; | 
|---|
| 119 | 139 | else | 
|---|
| 120 | 140 | return -EACCES; | 
|---|
| 121 | 141 |  | 
|---|
| .. | .. | 
|---|
| 339 | 359 | &bpffs_map_fops : &bpffs_obj_fops); | 
|---|
| 340 | 360 | } | 
|---|
| 341 | 361 |  | 
|---|
|  | 362 | +static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg) | 
|---|
|  | 363 | +{ | 
|---|
|  | 364 | +	struct bpf_link *link = arg; | 
|---|
|  | 365 | + | 
|---|
|  | 366 | +	return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops, | 
|---|
|  | 367 | +			     bpf_link_is_iter(link) ? | 
|---|
|  | 368 | +			     &bpf_iter_fops : &bpffs_obj_fops); | 
|---|
|  | 369 | +} | 
|---|
|  | 370 | + | 
|---|
| 342 | 371 | static struct dentry * | 
|---|
| 343 | 372 | bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) | 
|---|
| 344 | 373 | { | 
|---|
| 345 | 374 | /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future | 
|---|
| 346 |  | -	 * extensions. | 
|---|
|  | 375 | +	 * extensions. That allows popoulate_bpffs() create special files. | 
|---|
| 347 | 376 | */ | 
|---|
| 348 |  | -	if (strchr(dentry->d_name.name, '.')) | 
|---|
|  | 377 | +	if ((dir->i_mode & S_IALLUGO) && | 
|---|
|  | 378 | +	    strchr(dentry->d_name.name, '.')) | 
|---|
| 349 | 379 | return ERR_PTR(-EPERM); | 
|---|
| 350 | 380 |  | 
|---|
| 351 | 381 | return simple_lookup(dir, dentry, flags); | 
|---|
| .. | .. | 
|---|
| 383 | 413 | .unlink		= simple_unlink, | 
|---|
| 384 | 414 | }; | 
|---|
| 385 | 415 |  | 
|---|
| 386 |  | -static int bpf_obj_do_pin(const struct filename *pathname, void *raw, | 
|---|
|  | 416 | +/* pin iterator link into bpffs */ | 
|---|
|  | 417 | +static int bpf_iter_link_pin_kernel(struct dentry *parent, | 
|---|
|  | 418 | +				    const char *name, struct bpf_link *link) | 
|---|
|  | 419 | +{ | 
|---|
|  | 420 | +	umode_t mode = S_IFREG | S_IRUSR; | 
|---|
|  | 421 | +	struct dentry *dentry; | 
|---|
|  | 422 | +	int ret; | 
|---|
|  | 423 | + | 
|---|
|  | 424 | +	inode_lock(parent->d_inode); | 
|---|
|  | 425 | +	dentry = lookup_one_len(name, parent, strlen(name)); | 
|---|
|  | 426 | +	if (IS_ERR(dentry)) { | 
|---|
|  | 427 | +		inode_unlock(parent->d_inode); | 
|---|
|  | 428 | +		return PTR_ERR(dentry); | 
|---|
|  | 429 | +	} | 
|---|
|  | 430 | +	ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops, | 
|---|
|  | 431 | +			    &bpf_iter_fops); | 
|---|
|  | 432 | +	dput(dentry); | 
|---|
|  | 433 | +	inode_unlock(parent->d_inode); | 
|---|
|  | 434 | +	return ret; | 
|---|
|  | 435 | +} | 
|---|
|  | 436 | + | 
|---|
|  | 437 | +static int bpf_obj_do_pin(const char __user *pathname, void *raw, | 
|---|
| 387 | 438 | enum bpf_type type) | 
|---|
| 388 | 439 | { | 
|---|
| 389 | 440 | struct dentry *dentry; | 
|---|
| .. | .. | 
|---|
| 392 | 443 | umode_t mode; | 
|---|
| 393 | 444 | int ret; | 
|---|
| 394 | 445 |  | 
|---|
| 395 |  | -	dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); | 
|---|
|  | 446 | +	dentry = user_path_create(AT_FDCWD, pathname, &path, 0); | 
|---|
| 396 | 447 | if (IS_ERR(dentry)) | 
|---|
| 397 | 448 | return PTR_ERR(dentry); | 
|---|
| 398 | 449 |  | 
|---|
| .. | .. | 
|---|
| 415 | 466 | case BPF_TYPE_MAP: | 
|---|
| 416 | 467 | ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); | 
|---|
| 417 | 468 | break; | 
|---|
|  | 469 | +	case BPF_TYPE_LINK: | 
|---|
|  | 470 | +		ret = vfs_mkobj(dentry, mode, bpf_mklink, raw); | 
|---|
|  | 471 | +		break; | 
|---|
| 418 | 472 | default: | 
|---|
| 419 | 473 | ret = -EPERM; | 
|---|
| 420 | 474 | } | 
|---|
| .. | .. | 
|---|
| 425 | 479 |  | 
|---|
| 426 | 480 | int bpf_obj_pin_user(u32 ufd, const char __user *pathname) | 
|---|
| 427 | 481 | { | 
|---|
| 428 |  | -	struct filename *pname; | 
|---|
| 429 | 482 | enum bpf_type type; | 
|---|
| 430 | 483 | void *raw; | 
|---|
| 431 | 484 | int ret; | 
|---|
| 432 | 485 |  | 
|---|
| 433 |  | -	pname = getname(pathname); | 
|---|
| 434 |  | -	if (IS_ERR(pname)) | 
|---|
| 435 |  | -		return PTR_ERR(pname); | 
|---|
| 436 |  | - | 
|---|
| 437 | 486 | raw = bpf_fd_probe_obj(ufd, &type); | 
|---|
| 438 |  | -	if (IS_ERR(raw)) { | 
|---|
| 439 |  | -		ret = PTR_ERR(raw); | 
|---|
| 440 |  | -		goto out; | 
|---|
| 441 |  | -	} | 
|---|
|  | 487 | +	if (IS_ERR(raw)) | 
|---|
|  | 488 | +		return PTR_ERR(raw); | 
|---|
| 442 | 489 |  | 
|---|
| 443 |  | -	ret = bpf_obj_do_pin(pname, raw, type); | 
|---|
|  | 490 | +	ret = bpf_obj_do_pin(pathname, raw, type); | 
|---|
| 444 | 491 | if (ret != 0) | 
|---|
| 445 | 492 | bpf_any_put(raw, type); | 
|---|
| 446 |  | -out: | 
|---|
| 447 |  | -	putname(pname); | 
|---|
|  | 493 | + | 
|---|
| 448 | 494 | return ret; | 
|---|
| 449 | 495 | } | 
|---|
| 450 | 496 |  | 
|---|
| 451 |  | -static void *bpf_obj_do_get(const struct filename *pathname, | 
|---|
|  | 497 | +static void *bpf_obj_do_get(const char __user *pathname, | 
|---|
| 452 | 498 | enum bpf_type *type, int flags) | 
|---|
| 453 | 499 | { | 
|---|
| 454 | 500 | struct inode *inode; | 
|---|
| .. | .. | 
|---|
| 456 | 502 | void *raw; | 
|---|
| 457 | 503 | int ret; | 
|---|
| 458 | 504 |  | 
|---|
| 459 |  | -	ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path); | 
|---|
|  | 505 | +	ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path); | 
|---|
| 460 | 506 | if (ret) | 
|---|
| 461 | 507 | return ERR_PTR(ret); | 
|---|
| 462 | 508 |  | 
|---|
| .. | .. | 
|---|
| 483 | 529 | int bpf_obj_get_user(const char __user *pathname, int flags) | 
|---|
| 484 | 530 | { | 
|---|
| 485 | 531 | enum bpf_type type = BPF_TYPE_UNSPEC; | 
|---|
| 486 |  | -	struct filename *pname; | 
|---|
| 487 |  | -	int ret = -ENOENT; | 
|---|
| 488 | 532 | int f_flags; | 
|---|
| 489 | 533 | void *raw; | 
|---|
|  | 534 | +	int ret; | 
|---|
| 490 | 535 |  | 
|---|
| 491 | 536 | f_flags = bpf_get_file_flag(flags); | 
|---|
| 492 | 537 | if (f_flags < 0) | 
|---|
| 493 | 538 | return f_flags; | 
|---|
| 494 | 539 |  | 
|---|
| 495 |  | -	pname = getname(pathname); | 
|---|
| 496 |  | -	if (IS_ERR(pname)) | 
|---|
| 497 |  | -		return PTR_ERR(pname); | 
|---|
| 498 |  | - | 
|---|
| 499 |  | -	raw = bpf_obj_do_get(pname, &type, f_flags); | 
|---|
| 500 |  | -	if (IS_ERR(raw)) { | 
|---|
| 501 |  | -		ret = PTR_ERR(raw); | 
|---|
| 502 |  | -		goto out; | 
|---|
| 503 |  | -	} | 
|---|
|  | 540 | +	raw = bpf_obj_do_get(pathname, &type, f_flags); | 
|---|
|  | 541 | +	if (IS_ERR(raw)) | 
|---|
|  | 542 | +		return PTR_ERR(raw); | 
|---|
| 504 | 543 |  | 
|---|
| 505 | 544 | if (type == BPF_TYPE_PROG) | 
|---|
| 506 | 545 | ret = bpf_prog_new_fd(raw); | 
|---|
| 507 | 546 | else if (type == BPF_TYPE_MAP) | 
|---|
| 508 | 547 | ret = bpf_map_new_fd(raw, f_flags); | 
|---|
|  | 548 | +	else if (type == BPF_TYPE_LINK) | 
|---|
|  | 549 | +		ret = (f_flags != O_RDWR) ? -EINVAL : bpf_link_new_fd(raw); | 
|---|
| 509 | 550 | else | 
|---|
| 510 |  | -		goto out; | 
|---|
|  | 551 | +		return -ENOENT; | 
|---|
| 511 | 552 |  | 
|---|
| 512 | 553 | if (ret < 0) | 
|---|
| 513 | 554 | bpf_any_put(raw, type); | 
|---|
| 514 |  | -out: | 
|---|
| 515 |  | -	putname(pname); | 
|---|
| 516 | 555 | return ret; | 
|---|
| 517 | 556 | } | 
|---|
| 518 | 557 |  | 
|---|
| .. | .. | 
|---|
| 524 | 563 | return ERR_PTR(ret); | 
|---|
| 525 | 564 |  | 
|---|
| 526 | 565 | if (inode->i_op == &bpf_map_iops) | 
|---|
|  | 566 | +		return ERR_PTR(-EINVAL); | 
|---|
|  | 567 | +	if (inode->i_op == &bpf_link_iops) | 
|---|
| 527 | 568 | return ERR_PTR(-EINVAL); | 
|---|
| 528 | 569 | if (inode->i_op != &bpf_prog_iops) | 
|---|
| 529 | 570 | return ERR_PTR(-EACCES); | 
|---|
| .. | .. | 
|---|
| 537 | 578 | if (!bpf_prog_get_ok(prog, &type, false)) | 
|---|
| 538 | 579 | return ERR_PTR(-EINVAL); | 
|---|
| 539 | 580 |  | 
|---|
| 540 |  | -	return bpf_prog_inc(prog); | 
|---|
|  | 581 | +	bpf_prog_inc(prog); | 
|---|
|  | 582 | +	return prog; | 
|---|
| 541 | 583 | } | 
|---|
| 542 | 584 |  | 
|---|
| 543 | 585 | struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) | 
|---|
| .. | .. | 
|---|
| 567 | 609 | return 0; | 
|---|
| 568 | 610 | } | 
|---|
| 569 | 611 |  | 
|---|
| 570 |  | -static void bpf_destroy_inode_deferred(struct rcu_head *head) | 
|---|
|  | 612 | +static void bpf_free_inode(struct inode *inode) | 
|---|
| 571 | 613 | { | 
|---|
| 572 |  | -	struct inode *inode = container_of(head, struct inode, i_rcu); | 
|---|
| 573 | 614 | enum bpf_type type; | 
|---|
| 574 | 615 |  | 
|---|
| 575 | 616 | if (S_ISLNK(inode->i_mode)) | 
|---|
| .. | .. | 
|---|
| 579 | 620 | free_inode_nonrcu(inode); | 
|---|
| 580 | 621 | } | 
|---|
| 581 | 622 |  | 
|---|
| 582 |  | -static void bpf_destroy_inode(struct inode *inode) | 
|---|
| 583 |  | -{ | 
|---|
| 584 |  | -	call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred); | 
|---|
| 585 |  | -} | 
|---|
| 586 |  | - | 
|---|
| 587 | 623 | static const struct super_operations bpf_super_ops = { | 
|---|
| 588 | 624 | .statfs		= simple_statfs, | 
|---|
| 589 | 625 | .drop_inode	= generic_delete_inode, | 
|---|
| 590 | 626 | .show_options	= bpf_show_options, | 
|---|
| 591 |  | -	.destroy_inode	= bpf_destroy_inode, | 
|---|
|  | 627 | +	.free_inode	= bpf_free_inode, | 
|---|
| 592 | 628 | }; | 
|---|
| 593 | 629 |  | 
|---|
| 594 | 630 | enum { | 
|---|
| 595 | 631 | OPT_MODE, | 
|---|
| 596 |  | -	OPT_ERR, | 
|---|
| 597 | 632 | }; | 
|---|
| 598 | 633 |  | 
|---|
| 599 |  | -static const match_table_t bpf_mount_tokens = { | 
|---|
| 600 |  | -	{ OPT_MODE, "mode=%o" }, | 
|---|
| 601 |  | -	{ OPT_ERR, NULL }, | 
|---|
|  | 634 | +static const struct fs_parameter_spec bpf_fs_parameters[] = { | 
|---|
|  | 635 | +	fsparam_u32oct	("mode",			OPT_MODE), | 
|---|
|  | 636 | +	{} | 
|---|
| 602 | 637 | }; | 
|---|
| 603 | 638 |  | 
|---|
| 604 | 639 | struct bpf_mount_opts { | 
|---|
| 605 | 640 | umode_t mode; | 
|---|
| 606 | 641 | }; | 
|---|
| 607 | 642 |  | 
|---|
| 608 |  | -static int bpf_parse_options(char *data, struct bpf_mount_opts *opts) | 
|---|
|  | 643 | +static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) | 
|---|
| 609 | 644 | { | 
|---|
| 610 |  | -	substring_t args[MAX_OPT_ARGS]; | 
|---|
| 611 |  | -	int option, token; | 
|---|
| 612 |  | -	char *ptr; | 
|---|
|  | 645 | +	struct bpf_mount_opts *opts = fc->fs_private; | 
|---|
|  | 646 | +	struct fs_parse_result result; | 
|---|
|  | 647 | +	int opt; | 
|---|
| 613 | 648 |  | 
|---|
| 614 |  | -	opts->mode = S_IRWXUGO; | 
|---|
| 615 |  | - | 
|---|
| 616 |  | -	while ((ptr = strsep(&data, ",")) != NULL) { | 
|---|
| 617 |  | -		if (!*ptr) | 
|---|
| 618 |  | -			continue; | 
|---|
| 619 |  | - | 
|---|
| 620 |  | -		token = match_token(ptr, bpf_mount_tokens, args); | 
|---|
| 621 |  | -		switch (token) { | 
|---|
| 622 |  | -		case OPT_MODE: | 
|---|
| 623 |  | -			if (match_octal(&args[0], &option)) | 
|---|
| 624 |  | -				return -EINVAL; | 
|---|
| 625 |  | -			opts->mode = option & S_IALLUGO; | 
|---|
| 626 |  | -			break; | 
|---|
|  | 649 | +	opt = fs_parse(fc, bpf_fs_parameters, param, &result); | 
|---|
|  | 650 | +	if (opt < 0) | 
|---|
| 627 | 651 | /* We might like to report bad mount options here, but | 
|---|
| 628 | 652 | * traditionally we've ignored all mount options, so we'd | 
|---|
| 629 | 653 | * better continue to ignore non-existing options for bpf. | 
|---|
| 630 | 654 | */ | 
|---|
| 631 |  | -		} | 
|---|
|  | 655 | +		return opt == -ENOPARAM ? 0 : opt; | 
|---|
|  | 656 | + | 
|---|
|  | 657 | +	switch (opt) { | 
|---|
|  | 658 | +	case OPT_MODE: | 
|---|
|  | 659 | +		opts->mode = result.uint_32 & S_IALLUGO; | 
|---|
|  | 660 | +		break; | 
|---|
| 632 | 661 | } | 
|---|
| 633 | 662 |  | 
|---|
| 634 | 663 | return 0; | 
|---|
| 635 | 664 | } | 
|---|
| 636 | 665 |  | 
|---|
| 637 |  | -static int bpf_fill_super(struct super_block *sb, void *data, int silent) | 
|---|
|  | 666 | +struct bpf_preload_ops *bpf_preload_ops; | 
|---|
|  | 667 | +EXPORT_SYMBOL_GPL(bpf_preload_ops); | 
|---|
|  | 668 | + | 
|---|
|  | 669 | +static bool bpf_preload_mod_get(void) | 
|---|
|  | 670 | +{ | 
|---|
|  | 671 | +	/* If bpf_preload.ko wasn't loaded earlier then load it now. | 
|---|
|  | 672 | +	 * When bpf_preload is built into vmlinux the module's __init | 
|---|
|  | 673 | +	 * function will populate it. | 
|---|
|  | 674 | +	 */ | 
|---|
|  | 675 | +	if (!bpf_preload_ops) { | 
|---|
|  | 676 | +		request_module("bpf_preload"); | 
|---|
|  | 677 | +		if (!bpf_preload_ops) | 
|---|
|  | 678 | +			return false; | 
|---|
|  | 679 | +	} | 
|---|
|  | 680 | +	/* And grab the reference, so the module doesn't disappear while the | 
|---|
|  | 681 | +	 * kernel is interacting with the kernel module and its UMD. | 
|---|
|  | 682 | +	 */ | 
|---|
|  | 683 | +	if (!try_module_get(bpf_preload_ops->owner)) { | 
|---|
|  | 684 | +		pr_err("bpf_preload module get failed.\n"); | 
|---|
|  | 685 | +		return false; | 
|---|
|  | 686 | +	} | 
|---|
|  | 687 | +	return true; | 
|---|
|  | 688 | +} | 
|---|
|  | 689 | + | 
|---|
|  | 690 | +static void bpf_preload_mod_put(void) | 
|---|
|  | 691 | +{ | 
|---|
|  | 692 | +	if (bpf_preload_ops) | 
|---|
|  | 693 | +		/* now user can "rmmod bpf_preload" if necessary */ | 
|---|
|  | 694 | +		module_put(bpf_preload_ops->owner); | 
|---|
|  | 695 | +} | 
|---|
|  | 696 | + | 
|---|
|  | 697 | +static DEFINE_MUTEX(bpf_preload_lock); | 
|---|
|  | 698 | + | 
|---|
|  | 699 | +static int populate_bpffs(struct dentry *parent) | 
|---|
|  | 700 | +{ | 
|---|
|  | 701 | +	struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {}; | 
|---|
|  | 702 | +	struct bpf_link *links[BPF_PRELOAD_LINKS] = {}; | 
|---|
|  | 703 | +	int err = 0, i; | 
|---|
|  | 704 | + | 
|---|
|  | 705 | +	/* grab the mutex to make sure the kernel interactions with bpf_preload | 
|---|
|  | 706 | +	 * UMD are serialized | 
|---|
|  | 707 | +	 */ | 
|---|
|  | 708 | +	mutex_lock(&bpf_preload_lock); | 
|---|
|  | 709 | + | 
|---|
|  | 710 | +	/* if bpf_preload.ko wasn't built into vmlinux then load it */ | 
|---|
|  | 711 | +	if (!bpf_preload_mod_get()) | 
|---|
|  | 712 | +		goto out; | 
|---|
|  | 713 | + | 
|---|
|  | 714 | +	if (!bpf_preload_ops->info.tgid) { | 
|---|
|  | 715 | +		/* preload() will start UMD that will load BPF iterator programs */ | 
|---|
|  | 716 | +		err = bpf_preload_ops->preload(objs); | 
|---|
|  | 717 | +		if (err) | 
|---|
|  | 718 | +			goto out_put; | 
|---|
|  | 719 | +		for (i = 0; i < BPF_PRELOAD_LINKS; i++) { | 
|---|
|  | 720 | +			links[i] = bpf_link_by_id(objs[i].link_id); | 
|---|
|  | 721 | +			if (IS_ERR(links[i])) { | 
|---|
|  | 722 | +				err = PTR_ERR(links[i]); | 
|---|
|  | 723 | +				goto out_put; | 
|---|
|  | 724 | +			} | 
|---|
|  | 725 | +		} | 
|---|
|  | 726 | +		for (i = 0; i < BPF_PRELOAD_LINKS; i++) { | 
|---|
|  | 727 | +			err = bpf_iter_link_pin_kernel(parent, | 
|---|
|  | 728 | +						       objs[i].link_name, links[i]); | 
|---|
|  | 729 | +			if (err) | 
|---|
|  | 730 | +				goto out_put; | 
|---|
|  | 731 | +			/* do not unlink successfully pinned links even | 
|---|
|  | 732 | +			 * if later link fails to pin | 
|---|
|  | 733 | +			 */ | 
|---|
|  | 734 | +			links[i] = NULL; | 
|---|
|  | 735 | +		} | 
|---|
|  | 736 | +		/* finish() will tell UMD process to exit */ | 
|---|
|  | 737 | +		err = bpf_preload_ops->finish(); | 
|---|
|  | 738 | +		if (err) | 
|---|
|  | 739 | +			goto out_put; | 
|---|
|  | 740 | +	} | 
|---|
|  | 741 | +out_put: | 
|---|
|  | 742 | +	bpf_preload_mod_put(); | 
|---|
|  | 743 | +out: | 
|---|
|  | 744 | +	mutex_unlock(&bpf_preload_lock); | 
|---|
|  | 745 | +	for (i = 0; i < BPF_PRELOAD_LINKS && err; i++) | 
|---|
|  | 746 | +		if (!IS_ERR_OR_NULL(links[i])) | 
|---|
|  | 747 | +			bpf_link_put(links[i]); | 
|---|
|  | 748 | +	return err; | 
|---|
|  | 749 | +} | 
|---|
|  | 750 | + | 
|---|
|  | 751 | +static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) | 
|---|
| 638 | 752 | { | 
|---|
| 639 | 753 | static const struct tree_descr bpf_rfiles[] = { { "" } }; | 
|---|
| 640 |  | -	struct bpf_mount_opts opts; | 
|---|
|  | 754 | +	struct bpf_mount_opts *opts = fc->fs_private; | 
|---|
| 641 | 755 | struct inode *inode; | 
|---|
| 642 | 756 | int ret; | 
|---|
| 643 |  | - | 
|---|
| 644 |  | -	ret = bpf_parse_options(data, &opts); | 
|---|
| 645 |  | -	if (ret) | 
|---|
| 646 |  | -		return ret; | 
|---|
| 647 | 757 |  | 
|---|
| 648 | 758 | ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); | 
|---|
| 649 | 759 | if (ret) | 
|---|
| .. | .. | 
|---|
| 654 | 764 | inode = sb->s_root->d_inode; | 
|---|
| 655 | 765 | inode->i_op = &bpf_dir_iops; | 
|---|
| 656 | 766 | inode->i_mode &= ~S_IALLUGO; | 
|---|
| 657 |  | -	inode->i_mode |= S_ISVTX | opts.mode; | 
|---|
| 658 |  | - | 
|---|
|  | 767 | +	populate_bpffs(sb->s_root); | 
|---|
|  | 768 | +	inode->i_mode |= S_ISVTX | opts->mode; | 
|---|
| 659 | 769 | return 0; | 
|---|
| 660 | 770 | } | 
|---|
| 661 | 771 |  | 
|---|
| 662 |  | -static struct dentry *bpf_mount(struct file_system_type *type, int flags, | 
|---|
| 663 |  | -				const char *dev_name, void *data) | 
|---|
|  | 772 | +static int bpf_get_tree(struct fs_context *fc) | 
|---|
| 664 | 773 | { | 
|---|
| 665 |  | -	return mount_nodev(type, flags, data, bpf_fill_super); | 
|---|
|  | 774 | +	return get_tree_nodev(fc, bpf_fill_super); | 
|---|
|  | 775 | +} | 
|---|
|  | 776 | + | 
|---|
|  | 777 | +static void bpf_free_fc(struct fs_context *fc) | 
|---|
|  | 778 | +{ | 
|---|
|  | 779 | +	kfree(fc->fs_private); | 
|---|
|  | 780 | +} | 
|---|
|  | 781 | + | 
|---|
|  | 782 | +static const struct fs_context_operations bpf_context_ops = { | 
|---|
|  | 783 | +	.free		= bpf_free_fc, | 
|---|
|  | 784 | +	.parse_param	= bpf_parse_param, | 
|---|
|  | 785 | +	.get_tree	= bpf_get_tree, | 
|---|
|  | 786 | +}; | 
|---|
|  | 787 | + | 
|---|
|  | 788 | +/* | 
|---|
|  | 789 | + * Set up the filesystem mount context. | 
|---|
|  | 790 | + */ | 
|---|
|  | 791 | +static int bpf_init_fs_context(struct fs_context *fc) | 
|---|
|  | 792 | +{ | 
|---|
|  | 793 | +	struct bpf_mount_opts *opts; | 
|---|
|  | 794 | + | 
|---|
|  | 795 | +	opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL); | 
|---|
|  | 796 | +	if (!opts) | 
|---|
|  | 797 | +		return -ENOMEM; | 
|---|
|  | 798 | + | 
|---|
|  | 799 | +	opts->mode = S_IRWXUGO; | 
|---|
|  | 800 | + | 
|---|
|  | 801 | +	fc->fs_private = opts; | 
|---|
|  | 802 | +	fc->ops = &bpf_context_ops; | 
|---|
|  | 803 | +	return 0; | 
|---|
| 666 | 804 | } | 
|---|
| 667 | 805 |  | 
|---|
| 668 | 806 | static struct file_system_type bpf_fs_type = { | 
|---|
| 669 | 807 | .owner		= THIS_MODULE, | 
|---|
| 670 | 808 | .name		= "bpf", | 
|---|
| 671 |  | -	.mount		= bpf_mount, | 
|---|
|  | 809 | +	.init_fs_context = bpf_init_fs_context, | 
|---|
|  | 810 | +	.parameters	= bpf_fs_parameters, | 
|---|
| 672 | 811 | .kill_sb	= kill_litter_super, | 
|---|
| 673 | 812 | }; | 
|---|
| 674 | 813 |  | 
|---|
| .. | .. | 
|---|
| 676 | 815 | { | 
|---|
| 677 | 816 | int ret; | 
|---|
| 678 | 817 |  | 
|---|
|  | 818 | +	mutex_init(&bpf_preload_lock); | 
|---|
|  | 819 | + | 
|---|
| 679 | 820 | ret = sysfs_create_mount_point(fs_kobj, "bpf"); | 
|---|
| 680 | 821 | if (ret) | 
|---|
| 681 | 822 | return ret; | 
|---|