~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,13 +1,5 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch>
2		- *
3		- * This program is free software; you can redistribute it and/or
4		- * modify it under the terms of version 2 of the GNU General Public
5		- * License as published by the Free Software Foundation.
6		- *
7		- * This program is distributed in the hope that it will be useful, but
8		- * WITHOUT ANY WARRANTY; without even the implied warranty of
9		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10		- * General Public License for more details.
11	3	*/
12	4
13	5	#include <linux/kernel.h>
..	..	@@ -16,6 +8,9 @@
16	8	#include <linux/types.h>
17	9	#include <linux/bpf.h>
18	10	#include <net/lwtunnel.h>
	11	+#include <net/gre.h>
	12	+#include <net/ip6_route.h>
	13	+#include <net/ipv6_stubs.h>
19	14
20	15	struct bpf_lwt_prog {
21	16	struct bpf_prog *prog;
..	..	@@ -44,16 +39,17 @@
44	39	{
45	40	int ret;
46	41
47		- /* Preempt disable and BH disable are needed to protect per-cpu
	42	+ /* Migration disable and BH disable are needed to protect per-cpu
48	43	* redirect_info between BPF prog and skb_do_redirect().
49	44	*/
50		- preempt_disable();
	45	+ migrate_disable();
51	46	local_bh_disable();
52	47	bpf_compute_data_pointers(skb);
53	48	ret = bpf_prog_run_save_cb(lwt->prog, skb);
54	49
55	50	switch (ret) {
56	51	case BPF_OK:
	52	+ case BPF_LWT_REROUTE:
57	53	break;
58	54
59	55	case BPF_REDIRECT:
..	..	@@ -63,9 +59,8 @@
63	59	ret = BPF_OK;
64	60	} else {
65	61	skb_reset_mac_header(skb);
66		- ret = skb_do_redirect(skb);
67		- if (ret == 0)
68		- ret = BPF_REDIRECT;
	62	+ skb_do_redirect(skb);
	63	+ ret = BPF_REDIRECT;
69	64	}
70	65	break;
71	66
..	..	@@ -82,9 +77,38 @@
82	77	}
83	78
84	79	local_bh_enable();
85		- preempt_enable();
	80	+ migrate_enable();
86	81
87	82	return ret;
	83	+}
	84	+
	85	+static int bpf_lwt_input_reroute(struct sk_buff *skb)
	86	+{
	87	+ int err = -EINVAL;
	88	+
	89	+ if (skb->protocol == htons(ETH_P_IP)) {
	90	+ struct net_device *dev = skb_dst(skb)->dev;
	91	+ struct iphdr *iph = ip_hdr(skb);
	92	+
	93	+ dev_hold(dev);
	94	+ skb_dst_drop(skb);
	95	+ err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
	96	+ iph->tos, dev);
	97	+ dev_put(dev);
	98	+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
	99	+ skb_dst_drop(skb);
	100	+ err = ipv6_stub->ipv6_route_input(skb);
	101	+ } else {
	102	+ err = -EAFNOSUPPORT;
	103	+ }
	104	+
	105	+ if (err)
	106	+ goto err;
	107	+ return dst_input(skb);
	108	+
	109	+err:
	110	+ kfree_skb(skb);
	111	+ return err;
88	112	}
89	113
90	114	static int bpf_input(struct sk_buff *skb)
..	..	@@ -98,11 +122,11 @@
98	122	ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
99	123	if (ret < 0)
100	124	return ret;
	125	+ if (ret == BPF_LWT_REROUTE)
	126	+ return bpf_lwt_input_reroute(skb);
101	127	}
102	128
103	129	if (unlikely(!dst->lwtstate->orig_input)) {
104		- pr_warn_once("orig_input not set on dst for prog %s\n",
105		- bpf->out.name);
106	130	kfree_skb(skb);
107	131	return -EINVAL;
108	132	}
..	..	@@ -133,10 +157,8 @@
133	157	return dst->lwtstate->orig_output(net, sk, skb);
134	158	}
135	159
136		-static int xmit_check_hhlen(struct sk_buff *skb)
	160	+static int xmit_check_hhlen(struct sk_buff *skb, int hh_len)
137	161	{
138		- int hh_len = skb_dst(skb)->dev->hard_header_len;
139		-
140	162	if (skb_headroom(skb) < hh_len) {
141	163	int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
142	164
..	..	@@ -147,6 +169,100 @@
147	169	return 0;
148	170	}
149	171
	172	+static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
	173	+{
	174	+ struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev);
	175	+ int oif = l3mdev ? l3mdev->ifindex : 0;
	176	+ struct dst_entry *dst = NULL;
	177	+ int err = -EAFNOSUPPORT;
	178	+ struct sock *sk;
	179	+ struct net *net;
	180	+ bool ipv4;
	181	+
	182	+ if (skb->protocol == htons(ETH_P_IP))
	183	+ ipv4 = true;
	184	+ else if (skb->protocol == htons(ETH_P_IPV6))
	185	+ ipv4 = false;
	186	+ else
	187	+ goto err;
	188	+
	189	+ sk = sk_to_full_sk(skb->sk);
	190	+ if (sk) {
	191	+ if (sk->sk_bound_dev_if)
	192	+ oif = sk->sk_bound_dev_if;
	193	+ net = sock_net(sk);
	194	+ } else {
	195	+ net = dev_net(skb_dst(skb)->dev);
	196	+ }
	197	+
	198	+ if (ipv4) {
	199	+ struct iphdr *iph = ip_hdr(skb);
	200	+ struct flowi4 fl4 = {};
	201	+ struct rtable *rt;
	202	+
	203	+ fl4.flowi4_oif = oif;
	204	+ fl4.flowi4_mark = skb->mark;
	205	+ fl4.flowi4_uid = sock_net_uid(net, sk);
	206	+ fl4.flowi4_tos = RT_TOS(iph->tos);
	207	+ fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
	208	+ fl4.flowi4_proto = iph->protocol;
	209	+ fl4.daddr = iph->daddr;
	210	+ fl4.saddr = iph->saddr;
	211	+
	212	+ rt = ip_route_output_key(net, &fl4);
	213	+ if (IS_ERR(rt)) {
	214	+ err = PTR_ERR(rt);
	215	+ goto err;
	216	+ }
	217	+ dst = &rt->dst;
	218	+ } else {
	219	+ struct ipv6hdr *iph6 = ipv6_hdr(skb);
	220	+ struct flowi6 fl6 = {};
	221	+
	222	+ fl6.flowi6_oif = oif;
	223	+ fl6.flowi6_mark = skb->mark;
	224	+ fl6.flowi6_uid = sock_net_uid(net, sk);
	225	+ fl6.flowlabel = ip6_flowinfo(iph6);
	226	+ fl6.flowi6_proto = iph6->nexthdr;
	227	+ fl6.daddr = iph6->daddr;
	228	+ fl6.saddr = iph6->saddr;
	229	+
	230	+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, skb->sk, &fl6, NULL);
	231	+ if (IS_ERR(dst)) {
	232	+ err = PTR_ERR(dst);
	233	+ goto err;
	234	+ }
	235	+ }
	236	+ if (unlikely(dst->error)) {
	237	+ err = dst->error;
	238	+ dst_release(dst);
	239	+ goto err;
	240	+ }
	241	+
	242	+ /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it
	243	+ * was done for the previous dst, so we are doing it here again, in
	244	+ * case the new dst needs much more space. The call below is a noop
	245	+ * if there is enough header space in skb.
	246	+ */
	247	+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
	248	+ if (unlikely(err))
	249	+ goto err;
	250	+
	251	+ skb_dst_drop(skb);
	252	+ skb_dst_set(skb, dst);
	253	+
	254	+ err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb);
	255	+ if (unlikely(err))
	256	+ return net_xmit_errno(err);
	257	+
	258	+ /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */
	259	+ return LWTUNNEL_XMIT_DONE;
	260	+
	261	+err:
	262	+ kfree_skb(skb);
	263	+ return err;
	264	+}
	265	+
150	266	static int bpf_xmit(struct sk_buff *skb)
151	267	{
152	268	struct dst_entry *dst = skb_dst(skb);
..	..	@@ -154,21 +270,33 @@
154	270
155	271	bpf = bpf_lwt_lwtunnel(dst->lwtstate);
156	272	if (bpf->xmit.prog) {
	273	+ int hh_len = dst->dev->hard_header_len;
	274	+ __be16 proto = skb->protocol;
157	275	int ret;
158	276
159	277	ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
160	278	switch (ret) {
161	279	case BPF_OK:
	280	+ /* If the header changed, e.g. via bpf_lwt_push_encap,
	281	+ * BPF_LWT_REROUTE below should have been used if the
	282	+ * protocol was also changed.
	283	+ */
	284	+ if (skb->protocol != proto) {
	285	+ kfree_skb(skb);
	286	+ return -EINVAL;
	287	+ }
162	288	/* If the header was expanded, headroom might be too
163	289	* small for L2 header to come, expand as needed.
164	290	*/
165		- ret = xmit_check_hhlen(skb);
	291	+ ret = xmit_check_hhlen(skb, hh_len);
166	292	if (unlikely(ret))
167	293	return ret;
168	294
169	295	return LWTUNNEL_XMIT_CONTINUE;
170	296	case BPF_REDIRECT:
171	297	return LWTUNNEL_XMIT_DONE;
	298	+ case BPF_LWT_REROUTE:
	299	+ return bpf_lwt_xmit_reroute(skb);
172	300	default:
173	301	return ret;
174	302	}
..	..	@@ -208,8 +336,8 @@
208	336	int ret;
209	337	u32 fd;
210	338
211		- ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy,
212		- NULL);
	339	+ ret = nla_parse_nested_deprecated(tb, LWT_BPF_PROG_MAX, attr,
	340	+ bpf_prog_policy, NULL);
213	341	if (ret < 0)
214	342	return ret;
215	343
..	..	@@ -237,7 +365,7 @@
237	365	[LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 },
238	366	};
239	367
240		-static int bpf_build_state(struct nlattr *nla,
	368	+static int bpf_build_state(struct net net, struct nlattr nla,
241	369	unsigned int family, const void *cfg,
242	370	struct lwtunnel_state **ts,
243	371	struct netlink_ext_ack *extack)
..	..	@@ -250,7 +378,8 @@
250	378	if (family != AF_INET && family != AF_INET6)
251	379	return -EAFNOSUPPORT;
252	380
253		- ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, extack);
	381	+ ret = nla_parse_nested_deprecated(tb, LWT_BPF_MAX, nla, bpf_nl_policy,
	382	+ extack);
254	383	if (ret < 0)
255	384	return ret;
256	385
..	..	@@ -318,7 +447,7 @@
318	447	if (!prog->prog)
319	448	return 0;
320	449
321		- nest = nla_nest_start(skb, attr);
	450	+ nest = nla_nest_start_noflag(skb, attr);
322	451	if (!nest)
323	452	return -EMSGSIZE;
324	453
..	..	@@ -390,6 +519,135 @@
390	519	.owner = THIS_MODULE,
391	520	};
392	521
	522	+static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type,
	523	+ int encap_len)
	524	+{
	525	+ struct skb_shared_info *shinfo = skb_shinfo(skb);
	526	+
	527	+ gso_type \|= SKB_GSO_DODGY;
	528	+ shinfo->gso_type \|= gso_type;
	529	+ skb_decrease_gso_size(shinfo, encap_len);
	530	+ shinfo->gso_segs = 0;
	531	+ return 0;
	532	+}
	533	+
	534	+static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len)
	535	+{
	536	+ int next_hdr_offset;
	537	+ void *next_hdr;
	538	+ __u8 protocol;
	539	+
	540	+ /* SCTP and UDP_L4 gso need more nuanced handling than what
	541	+ * handle_gso_type() does above: skb_decrease_gso_size() is not enough.
	542	+ * So at the moment only TCP GSO packets are let through.
	543	+ */
	544	+ if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 \| SKB_GSO_TCPV6)))
	545	+ return -ENOTSUPP;
	546	+
	547	+ if (ipv4) {
	548	+ protocol = ip_hdr(skb)->protocol;
	549	+ next_hdr_offset = sizeof(struct iphdr);
	550	+ next_hdr = skb_network_header(skb) + next_hdr_offset;
	551	+ } else {
	552	+ protocol = ipv6_hdr(skb)->nexthdr;
	553	+ next_hdr_offset = sizeof(struct ipv6hdr);
	554	+ next_hdr = skb_network_header(skb) + next_hdr_offset;
	555	+ }
	556	+
	557	+ switch (protocol) {
	558	+ case IPPROTO_GRE:
	559	+ next_hdr_offset += sizeof(struct gre_base_hdr);
	560	+ if (next_hdr_offset > encap_len)
	561	+ return -EINVAL;
	562	+
	563	+ if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM)
	564	+ return handle_gso_type(skb, SKB_GSO_GRE_CSUM,
	565	+ encap_len);
	566	+ return handle_gso_type(skb, SKB_GSO_GRE, encap_len);
	567	+
	568	+ case IPPROTO_UDP:
	569	+ next_hdr_offset += sizeof(struct udphdr);
	570	+ if (next_hdr_offset > encap_len)
	571	+ return -EINVAL;
	572	+
	573	+ if (((struct udphdr *)next_hdr)->check)
	574	+ return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM,
	575	+ encap_len);
	576	+ return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len);
	577	+
	578	+ case IPPROTO_IP:
	579	+ case IPPROTO_IPV6:
	580	+ if (ipv4)
	581	+ return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len);
	582	+ else
	583	+ return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len);
	584	+
	585	+ default:
	586	+ return -EPROTONOSUPPORT;
	587	+ }
	588	+}
	589	+
	590	+int bpf_lwt_push_ip_encap(struct sk_buff skb, void hdr, u32 len, bool ingress)
	591	+{
	592	+ struct iphdr *iph;
	593	+ bool ipv4;
	594	+ int err;
	595	+
	596	+ if (unlikely(len < sizeof(struct iphdr) \|\| len > LWT_BPF_MAX_HEADROOM))
	597	+ return -EINVAL;
	598	+
	599	+ /* validate protocol and length */
	600	+ iph = (struct iphdr *)hdr;
	601	+ if (iph->version == 4) {
	602	+ ipv4 = true;
	603	+ if (unlikely(len < iph->ihl * 4))
	604	+ return -EINVAL;
	605	+ } else if (iph->version == 6) {
	606	+ ipv4 = false;
	607	+ if (unlikely(len < sizeof(struct ipv6hdr)))
	608	+ return -EINVAL;
	609	+ } else {
	610	+ return -EINVAL;
	611	+ }
	612	+
	613	+ if (ingress)
	614	+ err = skb_cow_head(skb, len + skb->mac_len);
	615	+ else
	616	+ err = skb_cow_head(skb,
	617	+ len + LL_RESERVED_SPACE(skb_dst(skb)->dev));
	618	+ if (unlikely(err))
	619	+ return err;
	620	+
	621	+ /* push the encap headers and fix pointers */
	622	+ skb_reset_inner_headers(skb);
	623	+ skb_reset_inner_mac_header(skb); /* mac header is not yet set */
	624	+ skb_set_inner_protocol(skb, skb->protocol);
	625	+ skb->encapsulation = 1;
	626	+ skb_push(skb, len);
	627	+ if (ingress)
	628	+ skb_postpush_rcsum(skb, iph, len);
	629	+ skb_reset_network_header(skb);
	630	+ memcpy(skb_network_header(skb), hdr, len);
	631	+ bpf_compute_data_pointers(skb);
	632	+ skb_clear_hash(skb);
	633	+
	634	+ if (ipv4) {
	635	+ skb->protocol = htons(ETH_P_IP);
	636	+ iph = ip_hdr(skb);
	637	+
	638	+ if (!iph->check)
	639	+ iph->check = ip_fast_csum((unsigned char *)iph,
	640	+ iph->ihl);
	641	+ } else {
	642	+ skb->protocol = htons(ETH_P_IPV6);
	643	+ }
	644	+
	645	+ if (skb_is_gso(skb))
	646	+ return handle_gso_encap(skb, ipv4, len);
	647	+
	648	+ return 0;
	649	+}
	650	+
393	651	static int __init bpf_lwt_init(void)
394	652	{
395	653	return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);