~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,14 +1,6 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2	3	* Copyright (c) 2016 Facebook
3		- *
4		- * This program is free software; you can redistribute it and/or
5		- * modify it under the terms of version 2 of the GNU General Public
6		- * License as published by the Free Software Foundation.
7		- *
8		- * This program is distributed in the hope that it will be useful, but
9		- * WITHOUT ANY WARRANTY; without even the implied warranty of
10		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11		- * General Public License for more details.
12	4	*/
13	5	#include <linux/bpf.h>
14	6	#include <linux/btf.h>
..	..	@@ -17,17 +9,81 @@
17	9	#include <linux/rculist_nulls.h>
18	10	#include <linux/random.h>
19	11	#include <uapi/linux/btf.h>
	12	+#include <linux/rcupdate_trace.h>
20	13	#include "percpu_freelist.h"
21	14	#include "bpf_lru_list.h"
22	15	#include "map_in_map.h"
23	16
24	17	#define HTAB_CREATE_FLAG_MASK \
25	18	(BPF_F_NO_PREALLOC \| BPF_F_NO_COMMON_LRU \| BPF_F_NUMA_NODE \| \
26		- BPF_F_RDONLY \| BPF_F_WRONLY)
	19	+ BPF_F_ACCESS_MASK \| BPF_F_ZERO_SEED)
27	20
	21	+#define BATCH_OPS(_name) \
	22	+ .map_lookup_batch = \
	23	+ _name##_map_lookup_batch, \
	24	+ .map_lookup_and_delete_batch = \
	25	+ _name##_map_lookup_and_delete_batch, \
	26	+ .map_update_batch = \
	27	+ generic_map_update_batch, \
	28	+ .map_delete_batch = \
	29	+ generic_map_delete_batch
	30	+
	31	+/*
	32	+ * The bucket lock has two protection scopes:
	33	+ *
	34	+ * 1) Serializing concurrent operations from BPF programs on differrent
	35	+ * CPUs
	36	+ *
	37	+ * 2) Serializing concurrent operations from BPF programs and sys_bpf()
	38	+ *
	39	+ * BPF programs can execute in any context including perf, kprobes and
	40	+ * tracing. As there are almost no limits where perf, kprobes and tracing
	41	+ * can be invoked from the lock operations need to be protected against
	42	+ * deadlocks. Deadlocks can be caused by recursion and by an invocation in
	43	+ * the lock held section when functions which acquire this lock are invoked
	44	+ * from sys_bpf(). BPF recursion is prevented by incrementing the per CPU
	45	+ * variable bpf_prog_active, which prevents BPF programs attached to perf
	46	+ * events, kprobes and tracing to be invoked before the prior invocation
	47	+ * from one of these contexts completed. sys_bpf() uses the same mechanism
	48	+ * by pinning the task to the current CPU and incrementing the recursion
	49	+ * protection accross the map operation.
	50	+ *
	51	+ * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain
	52	+ * operations like memory allocations (even with GFP_ATOMIC) from atomic
	53	+ * contexts. This is required because even with GFP_ATOMIC the memory
	54	+ * allocator calls into code pathes which acquire locks with long held lock
	55	+ * sections. To ensure the deterministic behaviour these locks are regular
	56	+ * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only
	57	+ * true atomic contexts on an RT kernel are the low level hardware
	58	+ * handling, scheduling, low level interrupt handling, NMIs etc. None of
	59	+ * these contexts should ever do memory allocations.
	60	+ *
	61	+ * As regular device interrupt handlers and soft interrupts are forced into
	62	+ * thread context, the existing code which does
	63	+ * spin_lock(); alloc(GPF_ATOMIC); spin_unlock();
	64	+ * just works.
	65	+ *
	66	+ * In theory the BPF locks could be converted to regular spinlocks as well,
	67	+ * but the bucket locks and percpu_freelist locks can be taken from
	68	+ * arbitrary contexts (perf, kprobes, tracepoints) which are required to be
	69	+ * atomic contexts even on RT. These mechanisms require preallocated maps,
	70	+ * so there is no need to invoke memory allocations within the lock held
	71	+ * sections.
	72	+ *
	73	+ * BPF maps which need dynamic allocation are only used from (forced)
	74	+ * thread context on RT and can therefore use regular spinlocks which in
	75	+ * turn allows to invoke memory allocations from the lock held section.
	76	+ *
	77	+ * On a non RT kernel this distinction is neither possible nor required.
	78	+ * spinlock maps to raw_spinlock and the extra code is optimized out by the
	79	+ * compiler.
	80	+ */
28	81	struct bucket {
29	82	struct hlist_nulls_head head;
30		- raw_spinlock_t lock;
	83	+ union {
	84	+ raw_spinlock_t raw_lock;
	85	+ spinlock_t lock;
	86	+ };
31	87	};
32	88
33	89	struct bpf_htab {
..	..	@@ -54,6 +110,7 @@
54	110	union {
55	111	struct bpf_htab *htab;
56	112	struct pcpu_freelist_node fnode;
	113	+ struct htab_elem *batch_flink;
57	114	};
58	115	};
59	116	};
..	..	@@ -62,8 +119,53 @@
62	119	struct bpf_lru_node lru_node;
63	120	};
64	121	u32 hash;
65		- char key[0] __aligned(8);
	122	+ char key[] __aligned(8);
66	123	};
	124	+
	125	+static inline bool htab_is_prealloc(const struct bpf_htab *htab)
	126	+{
	127	+ return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
	128	+}
	129	+
	130	+static inline bool htab_use_raw_lock(const struct bpf_htab *htab)
	131	+{
	132	+ return (!IS_ENABLED(CONFIG_PREEMPT_RT) \|\| htab_is_prealloc(htab));
	133	+}
	134	+
	135	+static void htab_init_buckets(struct bpf_htab *htab)
	136	+{
	137	+ unsigned i;
	138	+
	139	+ for (i = 0; i < htab->n_buckets; i++) {
	140	+ INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
	141	+ if (htab_use_raw_lock(htab))
	142	+ raw_spin_lock_init(&htab->buckets[i].raw_lock);
	143	+ else
	144	+ spin_lock_init(&htab->buckets[i].lock);
	145	+ }
	146	+}
	147	+
	148	+static inline unsigned long htab_lock_bucket(const struct bpf_htab *htab,
	149	+ struct bucket *b)
	150	+{
	151	+ unsigned long flags;
	152	+
	153	+ if (htab_use_raw_lock(htab))
	154	+ raw_spin_lock_irqsave(&b->raw_lock, flags);
	155	+ else
	156	+ spin_lock_irqsave(&b->lock, flags);
	157	+ return flags;
	158	+}
	159	+
	160	+static inline void htab_unlock_bucket(const struct bpf_htab *htab,
	161	+ struct bucket *b,
	162	+ unsigned long flags)
	163	+{
	164	+ if (htab_use_raw_lock(htab))
	165	+ raw_spin_unlock_irqrestore(&b->raw_lock, flags);
	166	+ else
	167	+ spin_unlock_irqrestore(&b->lock, flags);
	168	+}
67	169
68	170	static bool htab_lru_map_delete_node(void arg, struct bpf_lru_node node);
69	171
..	..	@@ -77,11 +179,6 @@
77	179	{
78	180	return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
79	181	htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
80		-}
81		-
82		-static bool htab_is_prealloc(const struct bpf_htab *htab)
83		-{
84		- return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
85	182	}
86	183
87	184	static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
..	..	@@ -124,6 +221,17 @@
124	221	bpf_map_area_free(htab->elems);
125	222	}
126	223
	224	+/* The LRU list has a lock (lru_lock). Each htab bucket has a lock
	225	+ * (bucket_lock). If both locks need to be acquired together, the lock
	226	+ * order is always lru_lock -> bucket_lock and this only happens in
	227	+ * bpf_lru_list.c logic. For example, certain code path of
	228	+ * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(),
	229	+ * will acquire lru_lock first followed by acquiring bucket_lock.
	230	+ *
	231	+ * In hashtab.c, to avoid deadlock, lock acquisition of
	232	+ * bucket_lock followed by lru_lock is not allowed. In such cases,
	233	+ * bucket_lock needs to be released first before acquiring lru_lock.
	234	+ */
127	235	static struct htab_elem prealloc_lru_pop(struct bpf_htab htab, void *key,
128	236	u32 hash)
129	237	{
..	..	@@ -244,6 +352,7 @@
244	352	*/
245	353	bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
246	354	bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
	355	+ bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
247	356	int numa_node = bpf_map_attr_numa_node(attr);
248	357
249	358	BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
..	..	@@ -251,14 +360,18 @@
251	360	BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
252	361	offsetof(struct htab_elem, hash_node.pprev));
253	362
254		- if (lru && !capable(CAP_SYS_ADMIN))
	363	+ if (lru && !bpf_capable())
255	364	/* LRU implementation is much complicated than other
256		- * maps. Hence, limit to CAP_SYS_ADMIN for now.
	365	+ * maps. Hence, limit to CAP_BPF.
257	366	*/
258	367	return -EPERM;
259	368
260		- if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
261		- /* reserved bits should not be used */
	369	+ if (zero_seed && !capable(CAP_SYS_ADMIN))
	370	+ /* Guard against local DoS, and discourage production use. */
	371	+ return -EPERM;
	372	+
	373	+ if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK \|\|
	374	+ !bpf_map_flags_access_ok(attr->map_flags))
262	375	return -EINVAL;
263	376
264	377	if (!lru && percpu_lru)
..	..	@@ -309,8 +422,8 @@
309	422	bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
310	423	bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
311	424	struct bpf_htab *htab;
312		- int err, i;
313	425	u64 cost;
	426	+ int err;
314	427
315	428	htab = kzalloc(sizeof(*htab), GFP_USER);
316	429	if (!htab)
..	..	@@ -355,14 +468,8 @@
355	468	else
356	469	cost += (u64) htab->elem_size * num_possible_cpus();
357	470
358		- if (cost >= U32_MAX - PAGE_SIZE)
359		- /* make sure page count doesn't overflow */
360		- goto free_htab;
361		-
362		- htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
363		-
364		- /* if map size is larger than memlock limit, reject it early */
365		- err = bpf_map_precharge_memlock(htab->map.pages);
	471	+ /* if map size is larger than memlock limit, reject it */
	472	+ err = bpf_map_charge_init(&htab->map.memory, cost);
366	473	if (err)
367	474	goto free_htab;
368	475
..	..	@@ -371,13 +478,14 @@
371	478	sizeof(struct bucket),
372	479	htab->map.numa_node);
373	480	if (!htab->buckets)
374		- goto free_htab;
	481	+ goto free_charge;
375	482
376		- htab->hashrnd = get_random_int();
377		- for (i = 0; i < htab->n_buckets; i++) {
378		- INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
379		- raw_spin_lock_init(&htab->buckets[i].lock);
380		- }
	483	+ if (htab->map.map_flags & BPF_F_ZERO_SEED)
	484	+ htab->hashrnd = 0;
	485	+ else
	486	+ htab->hashrnd = get_random_int();
	487	+
	488	+ htab_init_buckets(htab);
381	489
382	490	if (prealloc) {
383	491	err = prealloc_init(htab);
..	..	@@ -400,6 +508,8 @@
400	508	prealloc_destroy(htab);
401	509	free_buckets:
402	510	bpf_map_area_free(htab->buckets);
	511	+free_charge:
	512	+ bpf_map_charge_finish(&htab->map.memory);
403	513	free_htab:
404	514	kfree(htab);
405	515	return ERR_PTR(err);
..	..	@@ -468,8 +578,7 @@
468	578	struct htab_elem *l;
469	579	u32 hash, key_size;
470	580
471		- /* Must be called with rcu_read_lock. */
472		- WARN_ON_ONCE(!rcu_read_lock_held());
	581	+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
473	582
474	583	key_size = map->key_size;
475	584
..	..	@@ -503,7 +612,7 @@
503	612	* bpf_prog
504	613	* __htab_map_lookup_elem
505	614	*/
506		-static u32 htab_map_gen_lookup(struct bpf_map map, struct bpf_insn insn_buf)
	615	+static int htab_map_gen_lookup(struct bpf_map map, struct bpf_insn insn_buf)
507	616	{
508	617	struct bpf_insn *insn = insn_buf;
509	618	const int ret = BPF_REG_0;
..	..	@@ -542,7 +651,7 @@
542	651	return __htab_lru_map_lookup_elem(map, key, false);
543	652	}
544	653
545		-static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
	654	+static int htab_lru_map_gen_lookup(struct bpf_map *map,
546	655	struct bpf_insn *insn_buf)
547	656	{
548	657	struct bpf_insn *insn = insn_buf;
..	..	@@ -583,7 +692,7 @@
583	692	b = __select_bucket(htab, tgt_l->hash);
584	693	head = &b->head;
585	694
586		- raw_spin_lock_irqsave(&b->lock, flags);
	695	+ flags = htab_lock_bucket(htab, b);
587	696
588	697	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
589	698	if (l == tgt_l) {
..	..	@@ -591,7 +700,7 @@
591	700	break;
592	701	}
593	702
594		- raw_spin_unlock_irqrestore(&b->lock, flags);
	703	+ htab_unlock_bucket(htab, b, flags);
595	704
596	705	return l == tgt_l;
597	706	}
..	..	@@ -712,19 +821,36 @@
712	821	}
713	822	}
714	823
	824	+static void pcpu_init_value(struct bpf_htab htab, void __percpu pptr,
	825	+ void *value, bool onallcpus)
	826	+{
	827	+ /* When using prealloc and not setting the initial value on all cpus,
	828	+ * zero-fill element values for other cpus (just as what happens when
	829	+ * not using prealloc). Otherwise, bpf program has no way to ensure
	830	+ * known initial values for cpus other than current one
	831	+ * (onallcpus=false always when coming from bpf prog).
	832	+ */
	833	+ if (htab_is_prealloc(htab) && !onallcpus) {
	834	+ u32 size = round_up(htab->map.value_size, 8);
	835	+ int current_cpu = raw_smp_processor_id();
	836	+ int cpu;
	837	+
	838	+ for_each_possible_cpu(cpu) {
	839	+ if (cpu == current_cpu)
	840	+ bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
	841	+ size);
	842	+ else
	843	+ memset(per_cpu_ptr(pptr, cpu), 0, size);
	844	+ }
	845	+ } else {
	846	+ pcpu_copy_value(htab, pptr, value, onallcpus);
	847	+ }
	848	+}
	849	+
715	850	static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
716	851	{
717	852	return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
718	853	BITS_PER_LONG == 64;
719		-}
720		-
721		-static u32 htab_size_value(const struct bpf_htab *htab, bool percpu)
722		-{
723		- u32 size = htab->map.value_size;
724		-
725		- if (percpu \|\| fd_htab_map_needs_adjust(htab))
726		- size = round_up(size, 8);
727		- return size;
728	854	}
729	855
730	856	static struct htab_elem alloc_htab_elem(struct bpf_htab htab, void *key,
..	..	@@ -732,7 +858,7 @@
732	858	bool percpu, bool onallcpus,
733	859	struct htab_elem *old_elem)
734	860	{
735		- u32 size = htab_size_value(htab, percpu);
	861	+ u32 size = htab->map.value_size;
736	862	bool prealloc = htab_is_prealloc(htab);
737	863	struct htab_elem l_new, *pl_new;
738	864	void __percpu *pptr;
..	..	@@ -771,10 +897,13 @@
771	897	l_new = ERR_PTR(-ENOMEM);
772	898	goto dec_count;
773	899	}
	900	+ check_and_init_map_lock(&htab->map,
	901	+ l_new->key + round_up(key_size, 8));
774	902	}
775	903
776	904	memcpy(l_new->key, key, key_size);
777	905	if (percpu) {
	906	+ size = round_up(size, 8);
778	907	if (prealloc) {
779	908	pptr = htab_elem_get_ptr(l_new, key_size);
780	909	} else {
..	..	@@ -788,12 +917,17 @@
788	917	}
789	918	}
790	919
791		- pcpu_copy_value(htab, pptr, value, onallcpus);
	920	+ pcpu_init_value(htab, pptr, value, onallcpus);
792	921
793	922	if (!prealloc)
794	923	htab_elem_set_ptr(l_new, key_size, pptr);
795		- } else {
	924	+ } else if (fd_htab_map_needs_adjust(htab)) {
	925	+ size = round_up(size, 8);
796	926	memcpy(l_new->key + round_up(key_size, 8), value, size);
	927	+ } else {
	928	+ copy_map_value(&htab->map,
	929	+ l_new->key + round_up(key_size, 8),
	930	+ value);
797	931	}
798	932
799	933	l_new->hash = hash;
..	..	@@ -806,11 +940,11 @@
806	940	static int check_flags(struct bpf_htab htab, struct htab_elem l_old,
807	941	u64 map_flags)
808	942	{
809		- if (l_old && map_flags == BPF_NOEXIST)
	943	+ if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
810	944	/* elem already exists */
811	945	return -EEXIST;
812	946
813		- if (!l_old && map_flags == BPF_EXIST)
	947	+ if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
814	948	/* elem doesn't exist, cannot update it */
815	949	return -ENOENT;
816	950
..	..	@@ -829,11 +963,11 @@
829	963	u32 key_size, hash;
830	964	int ret;
831	965
832		- if (unlikely(map_flags > BPF_EXIST))
	966	+ if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
833	967	/* unknown flags */
834	968	return -EINVAL;
835	969
836		- WARN_ON_ONCE(!rcu_read_lock_held());
	970	+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
837	971
838	972	key_size = map->key_size;
839	973
..	..	@@ -842,14 +976,49 @@
842	976	b = __select_bucket(htab, hash);
843	977	head = &b->head;
844	978
845		- /* bpf_map_update_elem() can be called in_irq() */
846		- raw_spin_lock_irqsave(&b->lock, flags);
	979	+ if (unlikely(map_flags & BPF_F_LOCK)) {
	980	+ if (unlikely(!map_value_has_spin_lock(map)))
	981	+ return -EINVAL;
	982	+ /* find an element without taking the bucket lock */
	983	+ l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
	984	+ htab->n_buckets);
	985	+ ret = check_flags(htab, l_old, map_flags);
	986	+ if (ret)
	987	+ return ret;
	988	+ if (l_old) {
	989	+ /* grab the element lock and update value in place */
	990	+ copy_map_value_locked(map,
	991	+ l_old->key + round_up(key_size, 8),
	992	+ value, false);
	993	+ return 0;
	994	+ }
	995	+ /* fall through, grab the bucket lock and lookup again.
	996	+ * 99.9% chance that the element won't be found,
	997	+ * but second lookup under lock has to be done.
	998	+ */
	999	+ }
	1000	+
	1001	+ flags = htab_lock_bucket(htab, b);
847	1002
848	1003	l_old = lookup_elem_raw(head, hash, key, key_size);
849	1004
850	1005	ret = check_flags(htab, l_old, map_flags);
851	1006	if (ret)
852	1007	goto err;
	1008	+
	1009	+ if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
	1010	+ /* first lookup without the bucket lock didn't find the element,
	1011	+ * but second lookup with the bucket lock found it.
	1012	+ * This case is highly unlikely, but has to be dealt with:
	1013	+ * grab the element lock in addition to the bucket lock
	1014	+ * and update element in place
	1015	+ */
	1016	+ copy_map_value_locked(map,
	1017	+ l_old->key + round_up(key_size, 8),
	1018	+ value, false);
	1019	+ ret = 0;
	1020	+ goto err;
	1021	+ }
853	1022
854	1023	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
855	1024	l_old);
..	..	@@ -870,7 +1039,7 @@
870	1039	}
871	1040	ret = 0;
872	1041	err:
873		- raw_spin_unlock_irqrestore(&b->lock, flags);
	1042	+ htab_unlock_bucket(htab, b, flags);
874	1043	return ret;
875	1044	}
876	1045
..	..	@@ -889,7 +1058,7 @@
889	1058	/* unknown flags */
890	1059	return -EINVAL;
891	1060
892		- WARN_ON_ONCE(!rcu_read_lock_held());
	1061	+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
893	1062
894	1063	key_size = map->key_size;
895	1064
..	..	@@ -908,8 +1077,7 @@
908	1077	return -ENOMEM;
909	1078	memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
910	1079
911		- /* bpf_map_update_elem() can be called in_irq() */
912		- raw_spin_lock_irqsave(&b->lock, flags);
	1080	+ flags = htab_lock_bucket(htab, b);
913	1081
914	1082	l_old = lookup_elem_raw(head, hash, key, key_size);
915	1083
..	..	@@ -928,7 +1096,7 @@
928	1096	ret = 0;
929	1097
930	1098	err:
931		- raw_spin_unlock_irqrestore(&b->lock, flags);
	1099	+ htab_unlock_bucket(htab, b, flags);
932	1100
933	1101	if (ret)
934	1102	bpf_lru_push_free(&htab->lru, &l_new->lru_node);
..	..	@@ -963,8 +1131,7 @@
963	1131	b = __select_bucket(htab, hash);
964	1132	head = &b->head;
965	1133
966		- /* bpf_map_update_elem() can be called in_irq() */
967		- raw_spin_lock_irqsave(&b->lock, flags);
	1134	+ flags = htab_lock_bucket(htab, b);
968	1135
969	1136	l_old = lookup_elem_raw(head, hash, key, key_size);
970	1137
..	..	@@ -987,7 +1154,7 @@
987	1154	}
988	1155	ret = 0;
989	1156	err:
990		- raw_spin_unlock_irqrestore(&b->lock, flags);
	1157	+ htab_unlock_bucket(htab, b, flags);
991	1158	return ret;
992	1159	}
993	1160
..	..	@@ -1027,8 +1194,7 @@
1027	1194	return -ENOMEM;
1028	1195	}
1029	1196
1030		- /* bpf_map_update_elem() can be called in_irq() */
1031		- raw_spin_lock_irqsave(&b->lock, flags);
	1197	+ flags = htab_lock_bucket(htab, b);
1032	1198
1033	1199	l_old = lookup_elem_raw(head, hash, key, key_size);
1034	1200
..	..	@@ -1043,14 +1209,14 @@
1043	1209	pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1044	1210	value, onallcpus);
1045	1211	} else {
1046		- pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
	1212	+ pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
1047	1213	value, onallcpus);
1048	1214	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1049	1215	l_new = NULL;
1050	1216	}
1051	1217	ret = 0;
1052	1218	err:
1053		- raw_spin_unlock_irqrestore(&b->lock, flags);
	1219	+ htab_unlock_bucket(htab, b, flags);
1054	1220	if (l_new)
1055	1221	bpf_lru_push_free(&htab->lru, &l_new->lru_node);
1056	1222	return ret;
..	..	@@ -1080,7 +1246,7 @@
1080	1246	u32 hash, key_size;
1081	1247	int ret = -ENOENT;
1082	1248
1083		- WARN_ON_ONCE(!rcu_read_lock_held());
	1249	+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
1084	1250
1085	1251	key_size = map->key_size;
1086	1252
..	..	@@ -1088,7 +1254,7 @@
1088	1254	b = __select_bucket(htab, hash);
1089	1255	head = &b->head;
1090	1256
1091		- raw_spin_lock_irqsave(&b->lock, flags);
	1257	+ flags = htab_lock_bucket(htab, b);
1092	1258
1093	1259	l = lookup_elem_raw(head, hash, key, key_size);
1094	1260
..	..	@@ -1098,7 +1264,7 @@
1098	1264	ret = 0;
1099	1265	}
1100	1266
1101		- raw_spin_unlock_irqrestore(&b->lock, flags);
	1267	+ htab_unlock_bucket(htab, b, flags);
1102	1268	return ret;
1103	1269	}
1104	1270
..	..	@@ -1112,7 +1278,7 @@
1112	1278	u32 hash, key_size;
1113	1279	int ret = -ENOENT;
1114	1280
1115		- WARN_ON_ONCE(!rcu_read_lock_held());
	1281	+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
1116	1282
1117	1283	key_size = map->key_size;
1118	1284
..	..	@@ -1120,7 +1286,7 @@
1120	1286	b = __select_bucket(htab, hash);
1121	1287	head = &b->head;
1122	1288
1123		- raw_spin_lock_irqsave(&b->lock, flags);
	1289	+ flags = htab_lock_bucket(htab, b);
1124	1290
1125	1291	l = lookup_elem_raw(head, hash, key, key_size);
1126	1292
..	..	@@ -1129,7 +1295,7 @@
1129	1295	ret = 0;
1130	1296	}
1131	1297
1132		- raw_spin_unlock_irqrestore(&b->lock, flags);
	1298	+ htab_unlock_bucket(htab, b, flags);
1133	1299	if (l)
1134	1300	bpf_lru_push_free(&htab->lru, &l->lru_node);
1135	1301	return ret;
..	..	@@ -1156,12 +1322,10 @@
1156	1322	{
1157	1323	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1158	1324
1159		- /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
1160		- * so the programs (can be more than one that used this map) were
1161		- * disconnected from events. Wait for outstanding critical sections in
1162		- * these programs to complete
	1325	+ /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback.
	1326	+ * bpf_free_used_maps() is called after bpf prog is no longer executing.
	1327	+ * There is no need to synchronize_rcu() here to protect map elements.
1163	1328	*/
1164		- synchronize_rcu();
1165	1329
1166	1330	/* some of free_htab_elem() callbacks for elements of this map may
1167	1331	* not have executed. Wait for them.
..	..	@@ -1198,7 +1362,476 @@
1198	1362	rcu_read_unlock();
1199	1363	}
1200	1364
	1365	+static int
	1366	+__htab_map_lookup_and_delete_batch(struct bpf_map *map,
	1367	+ const union bpf_attr *attr,
	1368	+ union bpf_attr __user *uattr,
	1369	+ bool do_delete, bool is_lru_map,
	1370	+ bool is_percpu)
	1371	+{
	1372	+ struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	1373	+ u32 bucket_cnt, total, key_size, value_size, roundup_key_size;
	1374	+ void keys = NULL, values = NULL, value, dst_key, *dst_val;
	1375	+ void __user *uvalues = u64_to_user_ptr(attr->batch.values);
	1376	+ void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
	1377	+ void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
	1378	+ u32 batch, max_count, size, bucket_size;
	1379	+ struct htab_elem *node_to_free = NULL;
	1380	+ u64 elem_map_flags, map_flags;
	1381	+ struct hlist_nulls_head *head;
	1382	+ struct hlist_nulls_node *n;
	1383	+ unsigned long flags = 0;
	1384	+ bool locked = false;
	1385	+ struct htab_elem *l;
	1386	+ struct bucket *b;
	1387	+ int ret = 0;
	1388	+
	1389	+ elem_map_flags = attr->batch.elem_flags;
	1390	+ if ((elem_map_flags & ~BPF_F_LOCK) \|\|
	1391	+ ((elem_map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
	1392	+ return -EINVAL;
	1393	+
	1394	+ map_flags = attr->batch.flags;
	1395	+ if (map_flags)
	1396	+ return -EINVAL;
	1397	+
	1398	+ max_count = attr->batch.count;
	1399	+ if (!max_count)
	1400	+ return 0;
	1401	+
	1402	+ if (put_user(0, &uattr->batch.count))
	1403	+ return -EFAULT;
	1404	+
	1405	+ batch = 0;
	1406	+ if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch)))
	1407	+ return -EFAULT;
	1408	+
	1409	+ if (batch >= htab->n_buckets)
	1410	+ return -ENOENT;
	1411	+
	1412	+ key_size = htab->map.key_size;
	1413	+ roundup_key_size = round_up(htab->map.key_size, 8);
	1414	+ value_size = htab->map.value_size;
	1415	+ size = round_up(value_size, 8);
	1416	+ if (is_percpu)
	1417	+ value_size = size * num_possible_cpus();
	1418	+ total = 0;
	1419	+ /* while experimenting with hash tables with sizes ranging from 10 to
	1420	+ * 1000, it was observed that a bucket can have upto 5 entries.
	1421	+ */
	1422	+ bucket_size = 5;
	1423	+
	1424	+alloc:
	1425	+ /* We cannot do copy_from_user or copy_to_user inside
	1426	+ * the rcu_read_lock. Allocate enough space here.
	1427	+ */
	1428	+ keys = kvmalloc_array(key_size, bucket_size, GFP_USER \| __GFP_NOWARN);
	1429	+ values = kvmalloc_array(value_size, bucket_size, GFP_USER \| __GFP_NOWARN);
	1430	+ if (!keys \|\| !values) {
	1431	+ ret = -ENOMEM;
	1432	+ goto after_loop;
	1433	+ }
	1434	+
	1435	+again:
	1436	+ bpf_disable_instrumentation();
	1437	+ rcu_read_lock();
	1438	+again_nocopy:
	1439	+ dst_key = keys;
	1440	+ dst_val = values;
	1441	+ b = &htab->buckets[batch];
	1442	+ head = &b->head;
	1443	+ /* do not grab the lock unless need it (bucket_cnt > 0). */
	1444	+ if (locked)
	1445	+ flags = htab_lock_bucket(htab, b);
	1446	+
	1447	+ bucket_cnt = 0;
	1448	+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
	1449	+ bucket_cnt++;
	1450	+
	1451	+ if (bucket_cnt && !locked) {
	1452	+ locked = true;
	1453	+ goto again_nocopy;
	1454	+ }
	1455	+
	1456	+ if (bucket_cnt > (max_count - total)) {
	1457	+ if (total == 0)
	1458	+ ret = -ENOSPC;
	1459	+ /* Note that since bucket_cnt > 0 here, it is implicit
	1460	+ * that the locked was grabbed, so release it.
	1461	+ */
	1462	+ htab_unlock_bucket(htab, b, flags);
	1463	+ rcu_read_unlock();
	1464	+ bpf_enable_instrumentation();
	1465	+ goto after_loop;
	1466	+ }
	1467	+
	1468	+ if (bucket_cnt > bucket_size) {
	1469	+ bucket_size = bucket_cnt;
	1470	+ /* Note that since bucket_cnt > 0 here, it is implicit
	1471	+ * that the locked was grabbed, so release it.
	1472	+ */
	1473	+ htab_unlock_bucket(htab, b, flags);
	1474	+ rcu_read_unlock();
	1475	+ bpf_enable_instrumentation();
	1476	+ kvfree(keys);
	1477	+ kvfree(values);
	1478	+ goto alloc;
	1479	+ }
	1480	+
	1481	+ /* Next block is only safe to run if you have grabbed the lock */
	1482	+ if (!locked)
	1483	+ goto next_batch;
	1484	+
	1485	+ hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
	1486	+ memcpy(dst_key, l->key, key_size);
	1487	+
	1488	+ if (is_percpu) {
	1489	+ int off = 0, cpu;
	1490	+ void __percpu *pptr;
	1491	+
	1492	+ pptr = htab_elem_get_ptr(l, map->key_size);
	1493	+ for_each_possible_cpu(cpu) {
	1494	+ bpf_long_memcpy(dst_val + off,
	1495	+ per_cpu_ptr(pptr, cpu), size);
	1496	+ off += size;
	1497	+ }
	1498	+ } else {
	1499	+ value = l->key + roundup_key_size;
	1500	+ if (elem_map_flags & BPF_F_LOCK)
	1501	+ copy_map_value_locked(map, dst_val, value,
	1502	+ true);
	1503	+ else
	1504	+ copy_map_value(map, dst_val, value);
	1505	+ check_and_init_map_lock(map, dst_val);
	1506	+ }
	1507	+ if (do_delete) {
	1508	+ hlist_nulls_del_rcu(&l->hash_node);
	1509	+
	1510	+ /* bpf_lru_push_free() will acquire lru_lock, which
	1511	+ * may cause deadlock. See comments in function
	1512	+ * prealloc_lru_pop(). Let us do bpf_lru_push_free()
	1513	+ * after releasing the bucket lock.
	1514	+ */
	1515	+ if (is_lru_map) {
	1516	+ l->batch_flink = node_to_free;
	1517	+ node_to_free = l;
	1518	+ } else {
	1519	+ free_htab_elem(htab, l);
	1520	+ }
	1521	+ }
	1522	+ dst_key += key_size;
	1523	+ dst_val += value_size;
	1524	+ }
	1525	+
	1526	+ htab_unlock_bucket(htab, b, flags);
	1527	+ locked = false;
	1528	+
	1529	+ while (node_to_free) {
	1530	+ l = node_to_free;
	1531	+ node_to_free = node_to_free->batch_flink;
	1532	+ bpf_lru_push_free(&htab->lru, &l->lru_node);
	1533	+ }
	1534	+
	1535	+next_batch:
	1536	+ /* If we are not copying data, we can go to next bucket and avoid
	1537	+ * unlocking the rcu.
	1538	+ */
	1539	+ if (!bucket_cnt && (batch + 1 < htab->n_buckets)) {
	1540	+ batch++;
	1541	+ goto again_nocopy;
	1542	+ }
	1543	+
	1544	+ rcu_read_unlock();
	1545	+ bpf_enable_instrumentation();
	1546	+ if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys,
	1547	+ key_size * bucket_cnt) \|\|
	1548	+ copy_to_user(uvalues + total * value_size, values,
	1549	+ value_size * bucket_cnt))) {
	1550	+ ret = -EFAULT;
	1551	+ goto after_loop;
	1552	+ }
	1553	+
	1554	+ total += bucket_cnt;
	1555	+ batch++;
	1556	+ if (batch >= htab->n_buckets) {
	1557	+ ret = -ENOENT;
	1558	+ goto after_loop;
	1559	+ }
	1560	+ goto again;
	1561	+
	1562	+after_loop:
	1563	+ if (ret == -EFAULT)
	1564	+ goto out;
	1565	+
	1566	+ /* copy # of entries and next batch */
	1567	+ ubatch = u64_to_user_ptr(attr->batch.out_batch);
	1568	+ if (copy_to_user(ubatch, &batch, sizeof(batch)) \|\|
	1569	+ put_user(total, &uattr->batch.count))
	1570	+ ret = -EFAULT;
	1571	+
	1572	+out:
	1573	+ kvfree(keys);
	1574	+ kvfree(values);
	1575	+ return ret;
	1576	+}
	1577	+
	1578	+static int
	1579	+htab_percpu_map_lookup_batch(struct bpf_map map, const union bpf_attr attr,
	1580	+ union bpf_attr __user *uattr)
	1581	+{
	1582	+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
	1583	+ false, true);
	1584	+}
	1585	+
	1586	+static int
	1587	+htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
	1588	+ const union bpf_attr *attr,
	1589	+ union bpf_attr __user *uattr)
	1590	+{
	1591	+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
	1592	+ false, true);
	1593	+}
	1594	+
	1595	+static int
	1596	+htab_map_lookup_batch(struct bpf_map map, const union bpf_attr attr,
	1597	+ union bpf_attr __user *uattr)
	1598	+{
	1599	+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
	1600	+ false, false);
	1601	+}
	1602	+
	1603	+static int
	1604	+htab_map_lookup_and_delete_batch(struct bpf_map *map,
	1605	+ const union bpf_attr *attr,
	1606	+ union bpf_attr __user *uattr)
	1607	+{
	1608	+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
	1609	+ false, false);
	1610	+}
	1611	+
	1612	+static int
	1613	+htab_lru_percpu_map_lookup_batch(struct bpf_map *map,
	1614	+ const union bpf_attr *attr,
	1615	+ union bpf_attr __user *uattr)
	1616	+{
	1617	+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
	1618	+ true, true);
	1619	+}
	1620	+
	1621	+static int
	1622	+htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
	1623	+ const union bpf_attr *attr,
	1624	+ union bpf_attr __user *uattr)
	1625	+{
	1626	+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
	1627	+ true, true);
	1628	+}
	1629	+
	1630	+static int
	1631	+htab_lru_map_lookup_batch(struct bpf_map map, const union bpf_attr attr,
	1632	+ union bpf_attr __user *uattr)
	1633	+{
	1634	+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
	1635	+ true, false);
	1636	+}
	1637	+
	1638	+static int
	1639	+htab_lru_map_lookup_and_delete_batch(struct bpf_map *map,
	1640	+ const union bpf_attr *attr,
	1641	+ union bpf_attr __user *uattr)
	1642	+{
	1643	+ return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
	1644	+ true, false);
	1645	+}
	1646	+
	1647	+struct bpf_iter_seq_hash_map_info {
	1648	+ struct bpf_map *map;
	1649	+ struct bpf_htab *htab;
	1650	+ void *percpu_value_buf; // non-zero means percpu hash
	1651	+ u32 bucket_id;
	1652	+ u32 skip_elems;
	1653	+};
	1654	+
	1655	+static struct htab_elem *
	1656	+bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
	1657	+ struct htab_elem *prev_elem)
	1658	+{
	1659	+ const struct bpf_htab *htab = info->htab;
	1660	+ u32 skip_elems = info->skip_elems;
	1661	+ u32 bucket_id = info->bucket_id;
	1662	+ struct hlist_nulls_head *head;
	1663	+ struct hlist_nulls_node *n;
	1664	+ struct htab_elem *elem;
	1665	+ struct bucket *b;
	1666	+ u32 i, count;
	1667	+
	1668	+ if (bucket_id >= htab->n_buckets)
	1669	+ return NULL;
	1670	+
	1671	+ /* try to find next elem in the same bucket */
	1672	+ if (prev_elem) {
	1673	+ /* no update/deletion on this bucket, prev_elem should be still valid
	1674	+ * and we won't skip elements.
	1675	+ */
	1676	+ n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node));
	1677	+ elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node);
	1678	+ if (elem)
	1679	+ return elem;
	1680	+
	1681	+ /* not found, unlock and go to the next bucket */
	1682	+ b = &htab->buckets[bucket_id++];
	1683	+ rcu_read_unlock();
	1684	+ skip_elems = 0;
	1685	+ }
	1686	+
	1687	+ for (i = bucket_id; i < htab->n_buckets; i++) {
	1688	+ b = &htab->buckets[i];
	1689	+ rcu_read_lock();
	1690	+
	1691	+ count = 0;
	1692	+ head = &b->head;
	1693	+ hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
	1694	+ if (count >= skip_elems) {
	1695	+ info->bucket_id = i;
	1696	+ info->skip_elems = count;
	1697	+ return elem;
	1698	+ }
	1699	+ count++;
	1700	+ }
	1701	+
	1702	+ rcu_read_unlock();
	1703	+ skip_elems = 0;
	1704	+ }
	1705	+
	1706	+ info->bucket_id = i;
	1707	+ info->skip_elems = 0;
	1708	+ return NULL;
	1709	+}
	1710	+
	1711	+static void bpf_hash_map_seq_start(struct seq_file seq, loff_t *pos)
	1712	+{
	1713	+ struct bpf_iter_seq_hash_map_info *info = seq->private;
	1714	+ struct htab_elem *elem;
	1715	+
	1716	+ elem = bpf_hash_map_seq_find_next(info, NULL);
	1717	+ if (!elem)
	1718	+ return NULL;
	1719	+
	1720	+ if (*pos == 0)
	1721	+ ++*pos;
	1722	+ return elem;
	1723	+}
	1724	+
	1725	+static void bpf_hash_map_seq_next(struct seq_file seq, void v, loff_t pos)
	1726	+{
	1727	+ struct bpf_iter_seq_hash_map_info *info = seq->private;
	1728	+
	1729	+ ++*pos;
	1730	+ ++info->skip_elems;
	1731	+ return bpf_hash_map_seq_find_next(info, v);
	1732	+}
	1733	+
	1734	+static int __bpf_hash_map_seq_show(struct seq_file seq, struct htab_elem elem)
	1735	+{
	1736	+ struct bpf_iter_seq_hash_map_info *info = seq->private;
	1737	+ u32 roundup_key_size, roundup_value_size;
	1738	+ struct bpf_iter__bpf_map_elem ctx = {};
	1739	+ struct bpf_map *map = info->map;
	1740	+ struct bpf_iter_meta meta;
	1741	+ int ret = 0, off = 0, cpu;
	1742	+ struct bpf_prog *prog;
	1743	+ void __percpu *pptr;
	1744	+
	1745	+ meta.seq = seq;
	1746	+ prog = bpf_iter_get_info(&meta, elem == NULL);
	1747	+ if (prog) {
	1748	+ ctx.meta = &meta;
	1749	+ ctx.map = info->map;
	1750	+ if (elem) {
	1751	+ roundup_key_size = round_up(map->key_size, 8);
	1752	+ ctx.key = elem->key;
	1753	+ if (!info->percpu_value_buf) {
	1754	+ ctx.value = elem->key + roundup_key_size;
	1755	+ } else {
	1756	+ roundup_value_size = round_up(map->value_size, 8);
	1757	+ pptr = htab_elem_get_ptr(elem, map->key_size);
	1758	+ for_each_possible_cpu(cpu) {
	1759	+ bpf_long_memcpy(info->percpu_value_buf + off,
	1760	+ per_cpu_ptr(pptr, cpu),
	1761	+ roundup_value_size);
	1762	+ off += roundup_value_size;
	1763	+ }
	1764	+ ctx.value = info->percpu_value_buf;
	1765	+ }
	1766	+ }
	1767	+ ret = bpf_iter_run_prog(prog, &ctx);
	1768	+ }
	1769	+
	1770	+ return ret;
	1771	+}
	1772	+
	1773	+static int bpf_hash_map_seq_show(struct seq_file seq, void v)
	1774	+{
	1775	+ return __bpf_hash_map_seq_show(seq, v);
	1776	+}
	1777	+
	1778	+static void bpf_hash_map_seq_stop(struct seq_file seq, void v)
	1779	+{
	1780	+ if (!v)
	1781	+ (void)__bpf_hash_map_seq_show(seq, NULL);
	1782	+ else
	1783	+ rcu_read_unlock();
	1784	+}
	1785	+
	1786	+static int bpf_iter_init_hash_map(void *priv_data,
	1787	+ struct bpf_iter_aux_info *aux)
	1788	+{
	1789	+ struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
	1790	+ struct bpf_map *map = aux->map;
	1791	+ void *value_buf;
	1792	+ u32 buf_size;
	1793	+
	1794	+ if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH \|\|
	1795	+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
	1796	+ buf_size = round_up(map->value_size, 8) * num_possible_cpus();
	1797	+ value_buf = kmalloc(buf_size, GFP_USER \| __GFP_NOWARN);
	1798	+ if (!value_buf)
	1799	+ return -ENOMEM;
	1800	+
	1801	+ seq_info->percpu_value_buf = value_buf;
	1802	+ }
	1803	+
	1804	+ bpf_map_inc_with_uref(map);
	1805	+ seq_info->map = map;
	1806	+ seq_info->htab = container_of(map, struct bpf_htab, map);
	1807	+ return 0;
	1808	+}
	1809	+
	1810	+static void bpf_iter_fini_hash_map(void *priv_data)
	1811	+{
	1812	+ struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
	1813	+
	1814	+ bpf_map_put_with_uref(seq_info->map);
	1815	+ kfree(seq_info->percpu_value_buf);
	1816	+}
	1817	+
	1818	+static const struct seq_operations bpf_hash_map_seq_ops = {
	1819	+ .start = bpf_hash_map_seq_start,
	1820	+ .next = bpf_hash_map_seq_next,
	1821	+ .stop = bpf_hash_map_seq_stop,
	1822	+ .show = bpf_hash_map_seq_show,
	1823	+};
	1824	+
	1825	+static const struct bpf_iter_seq_info iter_seq_info = {
	1826	+ .seq_ops = &bpf_hash_map_seq_ops,
	1827	+ .init_seq_private = bpf_iter_init_hash_map,
	1828	+ .fini_seq_private = bpf_iter_fini_hash_map,
	1829	+ .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
	1830	+};
	1831	+
	1832	+static int htab_map_btf_id;
1201	1833	const struct bpf_map_ops htab_map_ops = {
	1834	+ .map_meta_equal = bpf_map_meta_equal,
1202	1835	.map_alloc_check = htab_map_alloc_check,
1203	1836	.map_alloc = htab_map_alloc,
1204	1837	.map_free = htab_map_free,
..	..	@@ -1208,9 +1841,15 @@
1208	1841	.map_delete_elem = htab_map_delete_elem,
1209	1842	.map_gen_lookup = htab_map_gen_lookup,
1210	1843	.map_seq_show_elem = htab_map_seq_show_elem,
	1844	+ BATCH_OPS(htab),
	1845	+ .map_btf_name = "bpf_htab",
	1846	+ .map_btf_id = &htab_map_btf_id,
	1847	+ .iter_seq_info = &iter_seq_info,
1211	1848	};
1212	1849
	1850	+static int htab_lru_map_btf_id;
1213	1851	const struct bpf_map_ops htab_lru_map_ops = {
	1852	+ .map_meta_equal = bpf_map_meta_equal,
1214	1853	.map_alloc_check = htab_map_alloc_check,
1215	1854	.map_alloc = htab_map_alloc,
1216	1855	.map_free = htab_map_free,
..	..	@@ -1221,6 +1860,10 @@
1221	1860	.map_delete_elem = htab_lru_map_delete_elem,
1222	1861	.map_gen_lookup = htab_lru_map_gen_lookup,
1223	1862	.map_seq_show_elem = htab_map_seq_show_elem,
	1863	+ BATCH_OPS(htab_lru),
	1864	+ .map_btf_name = "bpf_htab",
	1865	+ .map_btf_id = &htab_lru_map_btf_id,
	1866	+ .iter_seq_info = &iter_seq_info,
1224	1867	};
1225	1868
1226	1869	/* Called from eBPF program */
..	..	@@ -1296,7 +1939,38 @@
1296	1939	return ret;
1297	1940	}
1298	1941
	1942	+static void htab_percpu_map_seq_show_elem(struct bpf_map map, void key,
	1943	+ struct seq_file *m)
	1944	+{
	1945	+ struct htab_elem *l;
	1946	+ void __percpu *pptr;
	1947	+ int cpu;
	1948	+
	1949	+ rcu_read_lock();
	1950	+
	1951	+ l = __htab_map_lookup_elem(map, key);
	1952	+ if (!l) {
	1953	+ rcu_read_unlock();
	1954	+ return;
	1955	+ }
	1956	+
	1957	+ btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
	1958	+ seq_puts(m, ": {\n");
	1959	+ pptr = htab_elem_get_ptr(l, map->key_size);
	1960	+ for_each_possible_cpu(cpu) {
	1961	+ seq_printf(m, "\tcpu%d: ", cpu);
	1962	+ btf_type_seq_show(map->btf, map->btf_value_type_id,
	1963	+ per_cpu_ptr(pptr, cpu), m);
	1964	+ seq_puts(m, "\n");
	1965	+ }
	1966	+ seq_puts(m, "}\n");
	1967	+
	1968	+ rcu_read_unlock();
	1969	+}
	1970	+
	1971	+static int htab_percpu_map_btf_id;
1299	1972	const struct bpf_map_ops htab_percpu_map_ops = {
	1973	+ .map_meta_equal = bpf_map_meta_equal,
1300	1974	.map_alloc_check = htab_map_alloc_check,
1301	1975	.map_alloc = htab_map_alloc,
1302	1976	.map_free = htab_map_free,
..	..	@@ -1304,9 +1978,16 @@
1304	1978	.map_lookup_elem = htab_percpu_map_lookup_elem,
1305	1979	.map_update_elem = htab_percpu_map_update_elem,
1306	1980	.map_delete_elem = htab_map_delete_elem,
	1981	+ .map_seq_show_elem = htab_percpu_map_seq_show_elem,
	1982	+ BATCH_OPS(htab_percpu),
	1983	+ .map_btf_name = "bpf_htab",
	1984	+ .map_btf_id = &htab_percpu_map_btf_id,
	1985	+ .iter_seq_info = &iter_seq_info,
1307	1986	};
1308	1987
	1988	+static int htab_lru_percpu_map_btf_id;
1309	1989	const struct bpf_map_ops htab_lru_percpu_map_ops = {
	1990	+ .map_meta_equal = bpf_map_meta_equal,
1310	1991	.map_alloc_check = htab_map_alloc_check,
1311	1992	.map_alloc = htab_map_alloc,
1312	1993	.map_free = htab_map_free,
..	..	@@ -1314,6 +1995,11 @@
1314	1995	.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
1315	1996	.map_update_elem = htab_lru_percpu_map_update_elem,
1316	1997	.map_delete_elem = htab_lru_map_delete_elem,
	1998	+ .map_seq_show_elem = htab_percpu_map_seq_show_elem,
	1999	+ BATCH_OPS(htab_lru_percpu),
	2000	+ .map_btf_name = "bpf_htab",
	2001	+ .map_btf_id = &htab_lru_percpu_map_btf_id,
	2002	+ .iter_seq_info = &iter_seq_info,
1317	2003	};
1318	2004
1319	2005	static int fd_htab_map_alloc_check(union bpf_attr *attr)
..	..	@@ -1412,7 +2098,7 @@
1412	2098	return READ_ONCE(*inner_map);
1413	2099	}
1414	2100
1415		-static u32 htab_of_map_gen_lookup(struct bpf_map *map,
	2101	+static int htab_of_map_gen_lookup(struct bpf_map *map,
1416	2102	struct bpf_insn *insn_buf)
1417	2103	{
1418	2104	struct bpf_insn *insn = insn_buf;
..	..	@@ -1436,6 +2122,7 @@
1436	2122	fd_htab_map_free(map);
1437	2123	}
1438	2124
	2125	+static int htab_of_maps_map_btf_id;
1439	2126	const struct bpf_map_ops htab_of_maps_map_ops = {
1440	2127	.map_alloc_check = fd_htab_map_alloc_check,
1441	2128	.map_alloc = htab_of_map_alloc,
..	..	@@ -1448,4 +2135,6 @@
1448	2135	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
1449	2136	.map_gen_lookup = htab_of_map_gen_lookup,
1450	2137	.map_check_btf = map_check_no_btf,
	2138	+ .map_btf_name = "bpf_htab",
	2139	+ .map_btf_id = &htab_of_maps_map_btf_id,
1451	2140	};