~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,3 +1,4 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* Minimal file system backend for holding eBPF maps and programs,
3	4	* used by bpf(2) object pinning.
..	..	@@ -5,10 +6,6 @@
5	6	* Authors:
6	7	*
7	8	* Daniel Borkmann <daniel@iogearbox.net>
8		- *
9		- * This program is free software; you can redistribute it and/or
10		- * modify it under the terms of the GNU General Public License
11		- * version 2 as published by the Free Software Foundation.
12	9	*/
13	10
14	11	#include <linux/init.h>
..	..	@@ -17,26 +14,32 @@
17	14	#include <linux/mount.h>
18	15	#include <linux/namei.h>
19	16	#include <linux/fs.h>
	17	+#include <linux/fs_context.h>
	18	+#include <linux/fs_parser.h>
20	19	#include <linux/kdev_t.h>
21		-#include <linux/parser.h>
22	20	#include <linux/filter.h>
23	21	#include <linux/bpf.h>
24	22	#include <linux/bpf_trace.h>
	23	+#include "preload/bpf_preload.h"
25	24
26	25	enum bpf_type {
27	26	BPF_TYPE_UNSPEC = 0,
28	27	BPF_TYPE_PROG,
29	28	BPF_TYPE_MAP,
	29	+ BPF_TYPE_LINK,
30	30	};
31	31
32	32	static void bpf_any_get(void raw, enum bpf_type type)
33	33	{
34	34	switch (type) {
35	35	case BPF_TYPE_PROG:
36		- raw = bpf_prog_inc(raw);
	36	+ bpf_prog_inc(raw);
37	37	break;
38	38	case BPF_TYPE_MAP:
39		- raw = bpf_map_inc(raw, true);
	39	+ bpf_map_inc_with_uref(raw);
	40	+ break;
	41	+ case BPF_TYPE_LINK:
	42	+ bpf_link_inc(raw);
40	43	break;
41	44	default:
42	45	WARN_ON_ONCE(1);
..	..	@@ -55,6 +58,9 @@
55	58	case BPF_TYPE_MAP:
56	59	bpf_map_put_with_uref(raw);
57	60	break;
	61	+ case BPF_TYPE_LINK:
	62	+ bpf_link_put(raw);
	63	+ break;
58	64	default:
59	65	WARN_ON_ONCE(1);
60	66	break;
..	..	@@ -65,20 +71,32 @@
65	71	{
66	72	void *raw;
67	73
68		- *type = BPF_TYPE_MAP;
69	74	raw = bpf_map_get_with_uref(ufd);
70		- if (IS_ERR(raw)) {
71		- *type = BPF_TYPE_PROG;
72		- raw = bpf_prog_get(ufd);
	75	+ if (!IS_ERR(raw)) {
	76	+ *type = BPF_TYPE_MAP;
	77	+ return raw;
73	78	}
74	79
75		- return raw;
	80	+ raw = bpf_prog_get(ufd);
	81	+ if (!IS_ERR(raw)) {
	82	+ *type = BPF_TYPE_PROG;
	83	+ return raw;
	84	+ }
	85	+
	86	+ raw = bpf_link_get_from_fd(ufd);
	87	+ if (!IS_ERR(raw)) {
	88	+ *type = BPF_TYPE_LINK;
	89	+ return raw;
	90	+ }
	91	+
	92	+ return ERR_PTR(-EINVAL);
76	93	}
77	94
78	95	static const struct inode_operations bpf_dir_iops;
79	96
80	97	static const struct inode_operations bpf_prog_iops = { };
81	98	static const struct inode_operations bpf_map_iops = { };
	99	+static const struct inode_operations bpf_link_iops = { };
82	100
83	101	static struct inode bpf_get_inode(struct super_block sb,
84	102	const struct inode *dir,
..	..	@@ -116,6 +134,8 @@
116	134	*type = BPF_TYPE_PROG;
117	135	else if (inode->i_op == &bpf_map_iops)
118	136	*type = BPF_TYPE_MAP;
	137	+ else if (inode->i_op == &bpf_link_iops)
	138	+ *type = BPF_TYPE_LINK;
119	139	else
120	140	return -EACCES;
121	141
..	..	@@ -339,13 +359,23 @@
339	359	&bpffs_map_fops : &bpffs_obj_fops);
340	360	}
341	361
	362	+static int bpf_mklink(struct dentry dentry, umode_t mode, void arg)
	363	+{
	364	+ struct bpf_link *link = arg;
	365	+
	366	+ return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops,
	367	+ bpf_link_is_iter(link) ?
	368	+ &bpf_iter_fops : &bpffs_obj_fops);
	369	+}
	370	+
342	371	static struct dentry *
343	372	bpf_lookup(struct inode dir, struct dentry dentry, unsigned flags)
344	373	{
345	374	/* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
346		- * extensions.
	375	+ * extensions. That allows popoulate_bpffs() create special files.
347	376	*/
348		- if (strchr(dentry->d_name.name, '.'))
	377	+ if ((dir->i_mode & S_IALLUGO) &&
	378	+ strchr(dentry->d_name.name, '.'))
349	379	return ERR_PTR(-EPERM);
350	380
351	381	return simple_lookup(dir, dentry, flags);
..	..	@@ -383,7 +413,28 @@
383	413	.unlink = simple_unlink,
384	414	};
385	415
386		-static int bpf_obj_do_pin(const struct filename pathname, void raw,
	416	+/* pin iterator link into bpffs */
	417	+static int bpf_iter_link_pin_kernel(struct dentry *parent,
	418	+ const char name, struct bpf_link link)
	419	+{
	420	+ umode_t mode = S_IFREG \| S_IRUSR;
	421	+ struct dentry *dentry;
	422	+ int ret;
	423	+
	424	+ inode_lock(parent->d_inode);
	425	+ dentry = lookup_one_len(name, parent, strlen(name));
	426	+ if (IS_ERR(dentry)) {
	427	+ inode_unlock(parent->d_inode);
	428	+ return PTR_ERR(dentry);
	429	+ }
	430	+ ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops,
	431	+ &bpf_iter_fops);
	432	+ dput(dentry);
	433	+ inode_unlock(parent->d_inode);
	434	+ return ret;
	435	+}
	436	+
	437	+static int bpf_obj_do_pin(const char __user pathname, void raw,
387	438	enum bpf_type type)
388	439	{
389	440	struct dentry *dentry;
..	..	@@ -392,7 +443,7 @@
392	443	umode_t mode;
393	444	int ret;
394	445
395		- dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0);
	446	+ dentry = user_path_create(AT_FDCWD, pathname, &path, 0);
396	447	if (IS_ERR(dentry))
397	448	return PTR_ERR(dentry);
398	449
..	..	@@ -415,6 +466,9 @@
415	466	case BPF_TYPE_MAP:
416	467	ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw);
417	468	break;
	469	+ case BPF_TYPE_LINK:
	470	+ ret = vfs_mkobj(dentry, mode, bpf_mklink, raw);
	471	+ break;
418	472	default:
419	473	ret = -EPERM;
420	474	}
..	..	@@ -425,30 +479,22 @@
425	479
426	480	int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
427	481	{
428		- struct filename *pname;
429	482	enum bpf_type type;
430	483	void *raw;
431	484	int ret;
432	485
433		- pname = getname(pathname);
434		- if (IS_ERR(pname))
435		- return PTR_ERR(pname);
436		-
437	486	raw = bpf_fd_probe_obj(ufd, &type);
438		- if (IS_ERR(raw)) {
439		- ret = PTR_ERR(raw);
440		- goto out;
441		- }
	487	+ if (IS_ERR(raw))
	488	+ return PTR_ERR(raw);
442	489
443		- ret = bpf_obj_do_pin(pname, raw, type);
	490	+ ret = bpf_obj_do_pin(pathname, raw, type);
444	491	if (ret != 0)
445	492	bpf_any_put(raw, type);
446		-out:
447		- putname(pname);
	493	+
448	494	return ret;
449	495	}
450	496
451		-static void bpf_obj_do_get(const struct filename pathname,
	497	+static void bpf_obj_do_get(const char __user pathname,
452	498	enum bpf_type *type, int flags)
453	499	{
454	500	struct inode *inode;
..	..	@@ -456,7 +502,7 @@
456	502	void *raw;
457	503	int ret;
458	504
459		- ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path);
	505	+ ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path);
460	506	if (ret)
461	507	return ERR_PTR(ret);
462	508
..	..	@@ -483,36 +529,29 @@
483	529	int bpf_obj_get_user(const char __user *pathname, int flags)
484	530	{
485	531	enum bpf_type type = BPF_TYPE_UNSPEC;
486		- struct filename *pname;
487		- int ret = -ENOENT;
488	532	int f_flags;
489	533	void *raw;
	534	+ int ret;
490	535
491	536	f_flags = bpf_get_file_flag(flags);
492	537	if (f_flags < 0)
493	538	return f_flags;
494	539
495		- pname = getname(pathname);
496		- if (IS_ERR(pname))
497		- return PTR_ERR(pname);
498		-
499		- raw = bpf_obj_do_get(pname, &type, f_flags);
500		- if (IS_ERR(raw)) {
501		- ret = PTR_ERR(raw);
502		- goto out;
503		- }
	540	+ raw = bpf_obj_do_get(pathname, &type, f_flags);
	541	+ if (IS_ERR(raw))
	542	+ return PTR_ERR(raw);
504	543
505	544	if (type == BPF_TYPE_PROG)
506	545	ret = bpf_prog_new_fd(raw);
507	546	else if (type == BPF_TYPE_MAP)
508	547	ret = bpf_map_new_fd(raw, f_flags);
	548	+ else if (type == BPF_TYPE_LINK)
	549	+ ret = (f_flags != O_RDWR) ? -EINVAL : bpf_link_new_fd(raw);
509	550	else
510		- goto out;
	551	+ return -ENOENT;
511	552
512	553	if (ret < 0)
513	554	bpf_any_put(raw, type);
514		-out:
515		- putname(pname);
516	555	return ret;
517	556	}
518	557
..	..	@@ -524,6 +563,8 @@
524	563	return ERR_PTR(ret);
525	564
526	565	if (inode->i_op == &bpf_map_iops)
	566	+ return ERR_PTR(-EINVAL);
	567	+ if (inode->i_op == &bpf_link_iops)
527	568	return ERR_PTR(-EINVAL);
528	569	if (inode->i_op != &bpf_prog_iops)
529	570	return ERR_PTR(-EACCES);
..	..	@@ -537,7 +578,8 @@
537	578	if (!bpf_prog_get_ok(prog, &type, false))
538	579	return ERR_PTR(-EINVAL);
539	580
540		- return bpf_prog_inc(prog);
	581	+ bpf_prog_inc(prog);
	582	+ return prog;
541	583	}
542	584
543	585	struct bpf_prog bpf_prog_get_type_path(const char name, enum bpf_prog_type type)
..	..	@@ -567,9 +609,8 @@
567	609	return 0;
568	610	}
569	611
570		-static void bpf_destroy_inode_deferred(struct rcu_head *head)
	612	+static void bpf_free_inode(struct inode *inode)
571	613	{
572		- struct inode *inode = container_of(head, struct inode, i_rcu);
573	614	enum bpf_type type;
574	615
575	616	if (S_ISLNK(inode->i_mode))
..	..	@@ -579,71 +620,140 @@
579	620	free_inode_nonrcu(inode);
580	621	}
581	622
582		-static void bpf_destroy_inode(struct inode *inode)
583		-{
584		- call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred);
585		-}
586		-
587	623	static const struct super_operations bpf_super_ops = {
588	624	.statfs = simple_statfs,
589	625	.drop_inode = generic_delete_inode,
590	626	.show_options = bpf_show_options,
591		- .destroy_inode = bpf_destroy_inode,
	627	+ .free_inode = bpf_free_inode,
592	628	};
593	629
594	630	enum {
595	631	OPT_MODE,
596		- OPT_ERR,
597	632	};
598	633
599		-static const match_table_t bpf_mount_tokens = {
600		- { OPT_MODE, "mode=%o" },
601		- { OPT_ERR, NULL },
	634	+static const struct fs_parameter_spec bpf_fs_parameters[] = {
	635	+ fsparam_u32oct ("mode", OPT_MODE),
	636	+ {}
602	637	};
603	638
604	639	struct bpf_mount_opts {
605	640	umode_t mode;
606	641	};
607	642
608		-static int bpf_parse_options(char data, struct bpf_mount_opts opts)
	643	+static int bpf_parse_param(struct fs_context fc, struct fs_parameter param)
609	644	{
610		- substring_t args[MAX_OPT_ARGS];
611		- int option, token;
612		- char *ptr;
	645	+ struct bpf_mount_opts *opts = fc->fs_private;
	646	+ struct fs_parse_result result;
	647	+ int opt;
613	648
614		- opts->mode = S_IRWXUGO;
615		-
616		- while ((ptr = strsep(&data, ",")) != NULL) {
617		- if (!*ptr)
618		- continue;
619		-
620		- token = match_token(ptr, bpf_mount_tokens, args);
621		- switch (token) {
622		- case OPT_MODE:
623		- if (match_octal(&args[0], &option))
624		- return -EINVAL;
625		- opts->mode = option & S_IALLUGO;
626		- break;
	649	+ opt = fs_parse(fc, bpf_fs_parameters, param, &result);
	650	+ if (opt < 0)
627	651	/* We might like to report bad mount options here, but
628	652	* traditionally we've ignored all mount options, so we'd
629	653	* better continue to ignore non-existing options for bpf.
630	654	*/
631		- }
	655	+ return opt == -ENOPARAM ? 0 : opt;
	656	+
	657	+ switch (opt) {
	658	+ case OPT_MODE:
	659	+ opts->mode = result.uint_32 & S_IALLUGO;
	660	+ break;
632	661	}
633	662
634	663	return 0;
635	664	}
636	665
637		-static int bpf_fill_super(struct super_block sb, void data, int silent)
	666	+struct bpf_preload_ops *bpf_preload_ops;
	667	+EXPORT_SYMBOL_GPL(bpf_preload_ops);
	668	+
	669	+static bool bpf_preload_mod_get(void)
	670	+{
	671	+ /* If bpf_preload.ko wasn't loaded earlier then load it now.
	672	+ * When bpf_preload is built into vmlinux the module's __init
	673	+ * function will populate it.
	674	+ */
	675	+ if (!bpf_preload_ops) {
	676	+ request_module("bpf_preload");
	677	+ if (!bpf_preload_ops)
	678	+ return false;
	679	+ }
	680	+ /* And grab the reference, so the module doesn't disappear while the
	681	+ * kernel is interacting with the kernel module and its UMD.
	682	+ */
	683	+ if (!try_module_get(bpf_preload_ops->owner)) {
	684	+ pr_err("bpf_preload module get failed.\n");
	685	+ return false;
	686	+ }
	687	+ return true;
	688	+}
	689	+
	690	+static void bpf_preload_mod_put(void)
	691	+{
	692	+ if (bpf_preload_ops)
	693	+ /* now user can "rmmod bpf_preload" if necessary */
	694	+ module_put(bpf_preload_ops->owner);
	695	+}
	696	+
	697	+static DEFINE_MUTEX(bpf_preload_lock);
	698	+
	699	+static int populate_bpffs(struct dentry *parent)
	700	+{
	701	+ struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {};
	702	+ struct bpf_link *links[BPF_PRELOAD_LINKS] = {};
	703	+ int err = 0, i;
	704	+
	705	+ /* grab the mutex to make sure the kernel interactions with bpf_preload
	706	+ * UMD are serialized
	707	+ */
	708	+ mutex_lock(&bpf_preload_lock);
	709	+
	710	+ /* if bpf_preload.ko wasn't built into vmlinux then load it */
	711	+ if (!bpf_preload_mod_get())
	712	+ goto out;
	713	+
	714	+ if (!bpf_preload_ops->info.tgid) {
	715	+ /* preload() will start UMD that will load BPF iterator programs */
	716	+ err = bpf_preload_ops->preload(objs);
	717	+ if (err)
	718	+ goto out_put;
	719	+ for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
	720	+ links[i] = bpf_link_by_id(objs[i].link_id);
	721	+ if (IS_ERR(links[i])) {
	722	+ err = PTR_ERR(links[i]);
	723	+ goto out_put;
	724	+ }
	725	+ }
	726	+ for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
	727	+ err = bpf_iter_link_pin_kernel(parent,
	728	+ objs[i].link_name, links[i]);
	729	+ if (err)
	730	+ goto out_put;
	731	+ /* do not unlink successfully pinned links even
	732	+ * if later link fails to pin
	733	+ */
	734	+ links[i] = NULL;
	735	+ }
	736	+ /* finish() will tell UMD process to exit */
	737	+ err = bpf_preload_ops->finish();
	738	+ if (err)
	739	+ goto out_put;
	740	+ }
	741	+out_put:
	742	+ bpf_preload_mod_put();
	743	+out:
	744	+ mutex_unlock(&bpf_preload_lock);
	745	+ for (i = 0; i < BPF_PRELOAD_LINKS && err; i++)
	746	+ if (!IS_ERR_OR_NULL(links[i]))
	747	+ bpf_link_put(links[i]);
	748	+ return err;
	749	+}
	750	+
	751	+static int bpf_fill_super(struct super_block sb, struct fs_context fc)
638	752	{
639	753	static const struct tree_descr bpf_rfiles[] = { { "" } };
640		- struct bpf_mount_opts opts;
	754	+ struct bpf_mount_opts *opts = fc->fs_private;
641	755	struct inode *inode;
642	756	int ret;
643		-
644		- ret = bpf_parse_options(data, &opts);
645		- if (ret)
646		- return ret;
647	757
648	758	ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
649	759	if (ret)
..	..	@@ -654,21 +764,50 @@
654	764	inode = sb->s_root->d_inode;
655	765	inode->i_op = &bpf_dir_iops;
656	766	inode->i_mode &= ~S_IALLUGO;
657		- inode->i_mode \|= S_ISVTX \| opts.mode;
658		-
	767	+ populate_bpffs(sb->s_root);
	768	+ inode->i_mode \|= S_ISVTX \| opts->mode;
659	769	return 0;
660	770	}
661	771
662		-static struct dentry bpf_mount(struct file_system_type type, int flags,
663		- const char dev_name, void data)
	772	+static int bpf_get_tree(struct fs_context *fc)
664	773	{
665		- return mount_nodev(type, flags, data, bpf_fill_super);
	774	+ return get_tree_nodev(fc, bpf_fill_super);
	775	+}
	776	+
	777	+static void bpf_free_fc(struct fs_context *fc)
	778	+{
	779	+ kfree(fc->fs_private);
	780	+}
	781	+
	782	+static const struct fs_context_operations bpf_context_ops = {
	783	+ .free = bpf_free_fc,
	784	+ .parse_param = bpf_parse_param,
	785	+ .get_tree = bpf_get_tree,
	786	+};
	787	+
	788	+/*
	789	+ * Set up the filesystem mount context.
	790	+ */
	791	+static int bpf_init_fs_context(struct fs_context *fc)
	792	+{
	793	+ struct bpf_mount_opts *opts;
	794	+
	795	+ opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL);
	796	+ if (!opts)
	797	+ return -ENOMEM;
	798	+
	799	+ opts->mode = S_IRWXUGO;
	800	+
	801	+ fc->fs_private = opts;
	802	+ fc->ops = &bpf_context_ops;
	803	+ return 0;
666	804	}
667	805
668	806	static struct file_system_type bpf_fs_type = {
669	807	.owner = THIS_MODULE,
670	808	.name = "bpf",
671		- .mount = bpf_mount,
	809	+ .init_fs_context = bpf_init_fs_context,
	810	+ .parameters = bpf_fs_parameters,
672	811	.kill_sb = kill_litter_super,
673	812	};
674	813
..	..	@@ -676,6 +815,8 @@
676	815	{
677	816	int ret;
678	817
	818	+ mutex_init(&bpf_preload_lock);
	819	+
679	820	ret = sysfs_create_mount_point(fs_kobj, "bpf");
680	821	if (ret)
681	822	return ret;