~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,13 +1,5 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch>
2		- *
3		- * This program is free software; you can redistribute it and/or
4		- * modify it under the terms of version 2 of the GNU General Public
5		- * License as published by the Free Software Foundation.
6		- *
7		- * This program is distributed in the hope that it will be useful, but
8		- * WITHOUT ANY WARRANTY; without even the implied warranty of
9		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10		- * General Public License for more details.
11	3	*/
12	4
13	5	#include <linux/kernel.h>
..	..	@@ -16,6 +8,9 @@
16	8	#include <linux/types.h>
17	9	#include <linux/bpf.h>
18	10	#include <net/lwtunnel.h>
	11	+#include <net/gre.h>
	12	+#include <net/ip6_route.h>
	13	+#include <net/ipv6_stubs.h>
19	14
20	15	struct bpf_lwt_prog {
21	16	struct bpf_prog *prog;
..	..	@@ -44,16 +39,17 @@
44	39	{
45	40	int ret;
46	41
47		- /* Preempt disable and BH disable are needed to protect per-cpu
	42	+ /* Migration disable and BH disable are needed to protect per-cpu
48	43	* redirect_info between BPF prog and skb_do_redirect().
49	44	*/
50		- preempt_disable();
	45	+ migrate_disable();
51	46	local_bh_disable();
52	47	bpf_compute_data_pointers(skb);
53	48	ret = bpf_prog_run_save_cb(lwt->prog, skb);
54	49
55	50	switch (ret) {
56	51	case BPF_OK:
	52	+ case BPF_LWT_REROUTE:
57	53	break;
58	54
59	55	case BPF_REDIRECT:
..	..	@@ -82,9 +78,38 @@
82	78	}
83	79
84	80	local_bh_enable();
85		- preempt_enable();
	81	+ migrate_enable();
86	82
87	83	return ret;
	84	+}
	85	+
	86	+static int bpf_lwt_input_reroute(struct sk_buff *skb)
	87	+{
	88	+ int err = -EINVAL;
	89	+
	90	+ if (skb->protocol == htons(ETH_P_IP)) {
	91	+ struct net_device *dev = skb_dst(skb)->dev;
	92	+ struct iphdr *iph = ip_hdr(skb);
	93	+
	94	+ dev_hold(dev);
	95	+ skb_dst_drop(skb);
	96	+ err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
	97	+ iph->tos, dev);
	98	+ dev_put(dev);
	99	+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
	100	+ skb_dst_drop(skb);
	101	+ err = ipv6_stub->ipv6_route_input(skb);
	102	+ } else {
	103	+ err = -EAFNOSUPPORT;
	104	+ }
	105	+
	106	+ if (err)
	107	+ goto err;
	108	+ return dst_input(skb);
	109	+
	110	+err:
	111	+ kfree_skb(skb);
	112	+ return err;
88	113	}
89	114
90	115	static int bpf_input(struct sk_buff *skb)
..	..	@@ -98,11 +123,11 @@
98	123	ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
99	124	if (ret < 0)
100	125	return ret;
	126	+ if (ret == BPF_LWT_REROUTE)
	127	+ return bpf_lwt_input_reroute(skb);
101	128	}
102	129
103	130	if (unlikely(!dst->lwtstate->orig_input)) {
104		- pr_warn_once("orig_input not set on dst for prog %s\n",
105		- bpf->out.name);
106	131	kfree_skb(skb);
107	132	return -EINVAL;
108	133	}
..	..	@@ -133,10 +158,8 @@
133	158	return dst->lwtstate->orig_output(net, sk, skb);
134	159	}
135	160
136		-static int xmit_check_hhlen(struct sk_buff *skb)
	161	+static int xmit_check_hhlen(struct sk_buff *skb, int hh_len)
137	162	{
138		- int hh_len = skb_dst(skb)->dev->hard_header_len;
139		-
140	163	if (skb_headroom(skb) < hh_len) {
141	164	int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
142	165
..	..	@@ -147,6 +170,100 @@
147	170	return 0;
148	171	}
149	172
	173	+static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
	174	+{
	175	+ struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
	176	+ int oif = l3mdev ? l3mdev->ifindex : 0;
	177	+ struct dst_entry *dst = NULL;
	178	+ int err = -EAFNOSUPPORT;
	179	+ struct sock *sk;
	180	+ struct net *net;
	181	+ bool ipv4;
	182	+
	183	+ if (skb->protocol == htons(ETH_P_IP))
	184	+ ipv4 = true;
	185	+ else if (skb->protocol == htons(ETH_P_IPV6))
	186	+ ipv4 = false;
	187	+ else
	188	+ goto err;
	189	+
	190	+ sk = sk_to_full_sk(skb->sk);
	191	+ if (sk) {
	192	+ if (sk->sk_bound_dev_if)
	193	+ oif = sk->sk_bound_dev_if;
	194	+ net = sock_net(sk);
	195	+ } else {
	196	+ net = dev_net(skb_dst(skb)->dev);
	197	+ }
	198	+
	199	+ if (ipv4) {
	200	+ struct iphdr *iph = ip_hdr(skb);
	201	+ struct flowi4 fl4 = {};
	202	+ struct rtable *rt;
	203	+
	204	+ fl4.flowi4_oif = oif;
	205	+ fl4.flowi4_mark = skb->mark;
	206	+ fl4.flowi4_uid = sock_net_uid(net, sk);
	207	+ fl4.flowi4_tos = RT_TOS(iph->tos);
	208	+ fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
	209	+ fl4.flowi4_proto = iph->protocol;
	210	+ fl4.daddr = iph->daddr;
	211	+ fl4.saddr = iph->saddr;
	212	+
	213	+ rt = ip_route_output_key(net, &fl4);
	214	+ if (IS_ERR(rt)) {
	215	+ err = PTR_ERR(rt);
	216	+ goto err;
	217	+ }
	218	+ dst = &rt->dst;
	219	+ } else {
	220	+ struct ipv6hdr *iph6 = ipv6_hdr(skb);
	221	+ struct flowi6 fl6 = {};
	222	+
	223	+ fl6.flowi6_oif = oif;
	224	+ fl6.flowi6_mark = skb->mark;
	225	+ fl6.flowi6_uid = sock_net_uid(net, sk);
	226	+ fl6.flowlabel = ip6_flowinfo(iph6);
	227	+ fl6.flowi6_proto = iph6->nexthdr;
	228	+ fl6.daddr = iph6->daddr;
	229	+ fl6.saddr = iph6->saddr;
	230	+
	231	+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, skb->sk, &fl6, NULL);
	232	+ if (IS_ERR(dst)) {
	233	+ err = PTR_ERR(dst);
	234	+ goto err;
	235	+ }
	236	+ }
	237	+ if (unlikely(dst->error)) {
	238	+ err = dst->error;
	239	+ dst_release(dst);
	240	+ goto err;
	241	+ }
	242	+
	243	+ /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
	244	+ * was done for the previous dst, so we are doing it here again, in
	245	+ * case the new dst needs much more space. The call below is a noop
	246	+ * if there is enough header space in skb.
	247	+ */
	248	+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
	249	+ if (unlikely(err))
	250	+ goto err;
	251	+
	252	+ skb_dst_drop(skb);
	253	+ skb_dst_set(skb, dst);
	254	+
	255	+ err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
	256	+ if (unlikely(err))
	257	+ return err;
	258	+
	259	+ /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
	260	+ return LWTUNNEL_XMIT_DONE;
	261	+
	262	+err:
	263	+ kfree_skb(skb);
	264	+ return err;
	265	+}
	266	+
150	267	static int bpf_xmit(struct sk_buff *skb)
151	268	{
152	269	struct dst_entry *dst = skb_dst(skb);
..	..	@@ -154,21 +271,33 @@
154	271
155	272	bpf = bpf_lwt_lwtunnel(dst->lwtstate);
156	273	if (bpf->xmit.prog) {
	274	+ int hh_len = dst->dev->hard_header_len;
	275	+ __be16 proto = skb->protocol;
157	276	int ret;
158	277
159	278	ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
160	279	switch (ret) {
161	280	case BPF_OK:
	281	+ /* If the header changed, e.g. via bpf_lwt_push_encap,
	282	+ * BPF_LWT_REROUTE below should have been used if the
	283	+ * protocol was also changed.
	284	+ */
	285	+ if (skb->protocol != proto) {
	286	+ kfree_skb(skb);
	287	+ return -EINVAL;
	288	+ }
162	289	/* If the header was expanded, headroom might be too
163	290	* small for L2 header to come, expand as needed.
164	291	*/
165		- ret = xmit_check_hhlen(skb);
	292	+ ret = xmit_check_hhlen(skb, hh_len);
166	293	if (unlikely(ret))
167	294	return ret;
168	295
169	296	return LWTUNNEL_XMIT_CONTINUE;
170	297	case BPF_REDIRECT:
171	298	return LWTUNNEL_XMIT_DONE;
	299	+ case BPF_LWT_REROUTE:
	300	+ return bpf_lwt_xmit_reroute(skb);
172	301	default:
173	302	return ret;
174	303	}
..	..	@@ -208,8 +337,8 @@
208	337	int ret;
209	338	u32 fd;
210	339
211		- ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy,
212		- NULL);
	340	+ ret = nla_parse_nested_deprecated(tb, LWT_BPF_PROG_MAX, attr,
	341	+ bpf_prog_policy, NULL);
213	342	if (ret < 0)
214	343	return ret;
215	344
..	..	@@ -237,7 +366,7 @@
237	366	[LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 },
238	367	};
239	368
240		-static int bpf_build_state(struct nlattr *nla,
	369	+static int bpf_build_state(struct net net, struct nlattr nla,
241	370	unsigned int family, const void *cfg,
242	371	struct lwtunnel_state **ts,
243	372	struct netlink_ext_ack *extack)
..	..	@@ -250,7 +379,8 @@
250	379	if (family != AF_INET && family != AF_INET6)
251	380	return -EAFNOSUPPORT;
252	381
253		- ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, extack);
	382	+ ret = nla_parse_nested_deprecated(tb, LWT_BPF_MAX, nla, bpf_nl_policy,
	383	+ extack);
254	384	if (ret < 0)
255	385	return ret;
256	386
..	..	@@ -318,7 +448,7 @@
318	448	if (!prog->prog)
319	449	return 0;
320	450
321		- nest = nla_nest_start(skb, attr);
	451	+ nest = nla_nest_start_noflag(skb, attr);
322	452	if (!nest)
323	453	return -EMSGSIZE;
324	454
..	..	@@ -390,6 +520,135 @@
390	520	.owner = THIS_MODULE,
391	521	};
392	522
	523	+static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type,
	524	+ int encap_len)
	525	+{
	526	+ struct skb_shared_info *shinfo = skb_shinfo(skb);
	527	+
	528	+ gso_type \|= SKB_GSO_DODGY;
	529	+ shinfo->gso_type \|= gso_type;
	530	+ skb_decrease_gso_size(shinfo, encap_len);
	531	+ shinfo->gso_segs = 0;
	532	+ return 0;
	533	+}
	534	+
	535	+static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len)
	536	+{
	537	+ int next_hdr_offset;
	538	+ void *next_hdr;
	539	+ __u8 protocol;
	540	+
	541	+ /* SCTP and UDP_L4 gso need more nuanced handling than what
	542	+ * handle_gso_type() does above: skb_decrease_gso_size() is not enough.
	543	+ * So at the moment only TCP GSO packets are let through.
	544	+ */
	545	+ if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 \| SKB_GSO_TCPV6)))
	546	+ return -ENOTSUPP;
	547	+
	548	+ if (ipv4) {
	549	+ protocol = ip_hdr(skb)->protocol;
	550	+ next_hdr_offset = sizeof(struct iphdr);
	551	+ next_hdr = skb_network_header(skb) + next_hdr_offset;
	552	+ } else {
	553	+ protocol = ipv6_hdr(skb)->nexthdr;
	554	+ next_hdr_offset = sizeof(struct ipv6hdr);
	555	+ next_hdr = skb_network_header(skb) + next_hdr_offset;
	556	+ }
	557	+
	558	+ switch (protocol) {
	559	+ case IPPROTO_GRE:
	560	+ next_hdr_offset += sizeof(struct gre_base_hdr);
	561	+ if (next_hdr_offset > encap_len)
	562	+ return -EINVAL;
	563	+
	564	+ if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM)
	565	+ return handle_gso_type(skb, SKB_GSO_GRE_CSUM,
	566	+ encap_len);
	567	+ return handle_gso_type(skb, SKB_GSO_GRE, encap_len);
	568	+
	569	+ case IPPROTO_UDP:
	570	+ next_hdr_offset += sizeof(struct udphdr);
	571	+ if (next_hdr_offset > encap_len)
	572	+ return -EINVAL;
	573	+
	574	+ if (((struct udphdr *)next_hdr)->check)
	575	+ return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM,
	576	+ encap_len);
	577	+ return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len);
	578	+
	579	+ case IPPROTO_IP:
	580	+ case IPPROTO_IPV6:
	581	+ if (ipv4)
	582	+ return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len);
	583	+ else
	584	+ return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len);
	585	+
	586	+ default:
	587	+ return -EPROTONOSUPPORT;
	588	+ }
	589	+}
	590	+
	591	+int bpf_lwt_push_ip_encap(struct sk_buff skb, void hdr, u32 len, bool ingress)
	592	+{
	593	+ struct iphdr *iph;
	594	+ bool ipv4;
	595	+ int err;
	596	+
	597	+ if (unlikely(len < sizeof(struct iphdr) \|\| len > LWT_BPF_MAX_HEADROOM))
	598	+ return -EINVAL;
	599	+
	600	+ /* validate protocol and length */
	601	+ iph = (struct iphdr *)hdr;
	602	+ if (iph->version == 4) {
	603	+ ipv4 = true;
	604	+ if (unlikely(len < iph->ihl * 4))
	605	+ return -EINVAL;
	606	+ } else if (iph->version == 6) {
	607	+ ipv4 = false;
	608	+ if (unlikely(len < sizeof(struct ipv6hdr)))
	609	+ return -EINVAL;
	610	+ } else {
	611	+ return -EINVAL;
	612	+ }
	613	+
	614	+ if (ingress)
	615	+ err = skb_cow_head(skb, len + skb->mac_len);
	616	+ else
	617	+ err = skb_cow_head(skb,
	618	+ len + LL_RESERVED_SPACE(skb_dst(skb)->dev));
	619	+ if (unlikely(err))
	620	+ return err;
	621	+
	622	+ /* push the encap headers and fix pointers */
	623	+ skb_reset_inner_headers(skb);
	624	+ skb_reset_inner_mac_header(skb); /* mac header is not yet set */
	625	+ skb_set_inner_protocol(skb, skb->protocol);
	626	+ skb->encapsulation = 1;
	627	+ skb_push(skb, len);
	628	+ if (ingress)
	629	+ skb_postpush_rcsum(skb, iph, len);
	630	+ skb_reset_network_header(skb);
	631	+ memcpy(skb_network_header(skb), hdr, len);
	632	+ bpf_compute_data_pointers(skb);
	633	+ skb_clear_hash(skb);
	634	+
	635	+ if (ipv4) {
	636	+ skb->protocol = htons(ETH_P_IP);
	637	+ iph = ip_hdr(skb);
	638	+
	639	+ if (!iph->check)
	640	+ iph->check = ip_fast_csum((unsigned char *)iph,
	641	+ iph->ihl);
	642	+ } else {
	643	+ skb->protocol = htons(ETH_P_IPV6);
	644	+ }
	645	+
	646	+ if (skb_is_gso(skb))
	647	+ return handle_gso_encap(skb, ipv4, len);
	648	+
	649	+ return 0;
	650	+}
	651	+
393	652	static int __init bpf_lwt_init(void)
394	653	{
395	654	return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);