hc
2024-05-10 10ebd8556b7990499c896a550e3d416b444211e6
kernel/net/ipv4/ip_tunnel.c
....@@ -1,19 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (c) 2013 Nicira, Inc.
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of version 2 of the GNU General Public
6
- * License as published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope that it will be useful, but
9
- * WITHOUT ANY WARRANTY; without even the implied warranty of
10
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
- * General Public License for more details.
12
- *
13
- * You should have received a copy of the GNU General Public License
14
- * along with this program; if not, write to the Free Software
15
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16
- * 02110-1301, USA
174 */
185
196 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -308,7 +295,7 @@
308295 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
309296 iph->saddr, tunnel->parms.o_key,
310297 RT_TOS(iph->tos), tunnel->parms.link,
311
- tunnel->fwmark);
298
+ tunnel->fwmark, 0);
312299 rt = ip_route_output_key(tunnel->net, &fl4);
313300
314301 if (!IS_ERR(rt)) {
....@@ -376,7 +363,6 @@
376363 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
377364 bool log_ecn_error)
378365 {
379
- struct pcpu_sw_netstats *tstats;
380366 const struct iphdr *iph = ip_hdr(skb);
381367 int err;
382368
....@@ -404,7 +390,7 @@
404390 tunnel->i_seqno = ntohl(tpi->seq) + 1;
405391 }
406392
407
- skb_reset_network_header(skb);
393
+ skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
408394
409395 err = IP_ECN_decapsulate(iph, skb);
410396 if (unlikely(err)) {
....@@ -418,12 +404,7 @@
418404 }
419405 }
420406
421
- tstats = this_cpu_ptr(tunnel->dev->tstats);
422
- u64_stats_update_begin(&tstats->syncp);
423
- tstats->rx_packets++;
424
- tstats->rx_bytes += skb->len;
425
- u64_stats_update_end(&tstats->syncp);
426
-
407
+ dev_sw_netstats_rx_add(tunnel->dev, skb->len);
427408 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
428409
429410 if (tunnel->dev->type == ARPHRD_ETHER) {
....@@ -502,41 +483,47 @@
502483
503484 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
504485 struct rtable *rt, __be16 df,
505
- const struct iphdr *inner_iph)
486
+ const struct iphdr *inner_iph,
487
+ int tunnel_hlen, __be32 dst, bool md)
506488 {
507489 struct ip_tunnel *tunnel = netdev_priv(dev);
508490 int pkt_size;
509491 int mtu;
510492
511
- pkt_size = skb->len - tunnel->hlen;
493
+ tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
494
+ pkt_size = skb->len - tunnel_hlen;
512495 pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
513496
514497 if (df) {
515
- mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel->hlen);
498
+ mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
516499 mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
517500 } else {
518
- mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
501
+ mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
519502 }
520503
521
- skb_dst_update_pmtu_no_confirm(skb, mtu);
504
+ if (skb_valid_dst(skb))
505
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
522506
523507 if (skb->protocol == htons(ETH_P_IP)) {
524508 if (!skb_is_gso(skb) &&
525509 (inner_iph->frag_off & htons(IP_DF)) &&
526510 mtu < pkt_size) {
527
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
528
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
511
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
529512 return -E2BIG;
530513 }
531514 }
532515 #if IS_ENABLED(CONFIG_IPV6)
533516 else if (skb->protocol == htons(ETH_P_IPV6)) {
534
- struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
517
+ struct rt6_info *rt6;
518
+ __be32 daddr;
519
+
520
+ rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
521
+ NULL;
522
+ daddr = md ? dst : tunnel->parms.iph.daddr;
535523
536524 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
537525 mtu >= IPV6_MIN_MTU) {
538
- if ((tunnel->parms.iph.daddr &&
539
- !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
526
+ if ((daddr && !ipv4_is_multicast(daddr)) ||
540527 rt6->rt6i_dst.plen == 128) {
541528 rt6->rt6i_flags |= RTF_MODIFIED;
542529 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
....@@ -545,7 +532,7 @@
545532
546533 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
547534 mtu < pkt_size) {
548
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
535
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
549536 return -E2BIG;
550537 }
551538 }
....@@ -553,17 +540,19 @@
553540 return 0;
554541 }
555542
556
-void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
543
+void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
544
+ u8 proto, int tunnel_hlen)
557545 {
558546 struct ip_tunnel *tunnel = netdev_priv(dev);
559547 u32 headroom = sizeof(struct iphdr);
560548 struct ip_tunnel_info *tun_info;
561549 const struct ip_tunnel_key *key;
562550 const struct iphdr *inner_iph;
563
- struct rtable *rt;
551
+ struct rtable *rt = NULL;
564552 struct flowi4 fl4;
565553 __be16 df = 0;
566554 u8 tos, ttl;
555
+ bool use_cache;
567556
568557 tun_info = skb_tunnel_info(skb);
569558 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
....@@ -581,19 +570,37 @@
581570 }
582571 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
583572 tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
584
- 0, skb->mark);
573
+ 0, skb->mark, skb_get_hash(skb));
585574 if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
586575 goto tx_error;
587
- rt = ip_route_output_key(tunnel->net, &fl4);
588
- if (IS_ERR(rt)) {
589
- dev->stats.tx_carrier_errors++;
590
- goto tx_error;
576
+
577
+ use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
578
+ if (use_cache)
579
+ rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
580
+ if (!rt) {
581
+ rt = ip_route_output_key(tunnel->net, &fl4);
582
+ if (IS_ERR(rt)) {
583
+ dev->stats.tx_carrier_errors++;
584
+ goto tx_error;
585
+ }
586
+ if (use_cache)
587
+ dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
588
+ fl4.saddr);
591589 }
592590 if (rt->dst.dev == dev) {
593591 ip_rt_put(rt);
594592 dev->stats.collisions++;
595593 goto tx_error;
596594 }
595
+
596
+ if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
597
+ df = htons(IP_DF);
598
+ if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
599
+ key->u.ipv4.dst, true)) {
600
+ ip_rt_put(rt);
601
+ goto tx_error;
602
+ }
603
+
597604 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
598605 ttl = key->ttl;
599606 if (ttl == 0) {
....@@ -604,15 +611,12 @@
604611 else
605612 ttl = ip4_dst_hoplimit(&rt->dst);
606613 }
607
- if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
608
- df = htons(IP_DF);
609
- else if (skb->protocol == htons(ETH_P_IP))
610
- df = inner_iph->frag_off & htons(IP_DF);
611
- headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
612
- if (headroom > dev->needed_headroom)
613
- dev->needed_headroom = headroom;
614614
615
- if (skb_cow_head(skb, dev->needed_headroom)) {
615
+ headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
616
+ if (headroom > READ_ONCE(dev->needed_headroom))
617
+ WRITE_ONCE(dev->needed_headroom, headroom);
618
+
619
+ if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
616620 ip_rt_put(rt);
617621 goto tx_dropped;
618622 }
....@@ -633,14 +637,17 @@
633637 const struct iphdr *tnl_params, u8 protocol)
634638 {
635639 struct ip_tunnel *tunnel = netdev_priv(dev);
640
+ struct ip_tunnel_info *tun_info = NULL;
636641 const struct iphdr *inner_iph;
637
- struct flowi4 fl4;
638
- u8 tos, ttl;
639
- __be16 df;
640
- struct rtable *rt; /* Route to the other host */
641642 unsigned int max_headroom; /* The extra header space needed */
642
- __be32 dst;
643
+ struct rtable *rt = NULL; /* Route to the other host */
644
+ bool use_cache = false;
645
+ struct flowi4 fl4;
646
+ bool md = false;
643647 bool connected;
648
+ u8 tos, ttl;
649
+ __be32 dst;
650
+ __be16 df;
644651
645652 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
646653 connected = (tunnel->parms.iph.daddr != 0);
....@@ -650,7 +657,6 @@
650657 dst = tnl_params->daddr;
651658 if (dst == 0) {
652659 /* NBMA tunnel */
653
- struct ip_tunnel_info *tun_info;
654660
655661 if (!skb_dst(skb)) {
656662 dev->stats.tx_fifo_errors++;
....@@ -660,8 +666,11 @@
660666 tun_info = skb_tunnel_info(skb);
661667 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
662668 ip_tunnel_info_af(tun_info) == AF_INET &&
663
- tun_info->key.u.ipv4.dst)
669
+ tun_info->key.u.ipv4.dst) {
664670 dst = tun_info->key.u.ipv4.dst;
671
+ md = true;
672
+ connected = true;
673
+ }
665674 else if (skb->protocol == htons(ETH_P_IP)) {
666675 rt = skb_rtable(skb);
667676 dst = rt_nexthop(rt, inner_iph->daddr);
....@@ -700,7 +709,8 @@
700709 else
701710 goto tx_error;
702711
703
- connected = false;
712
+ if (!md)
713
+ connected = false;
704714 }
705715
706716 tos = tnl_params->tos;
....@@ -717,13 +727,20 @@
717727
718728 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
719729 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
720
- tunnel->fwmark);
730
+ tunnel->fwmark, skb_get_hash(skb));
721731
722732 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
723733 goto tx_error;
724734
725
- rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
726
- NULL;
735
+ if (connected && md) {
736
+ use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
737
+ if (use_cache)
738
+ rt = dst_cache_get_ip4(&tun_info->dst_cache,
739
+ &fl4.saddr);
740
+ } else {
741
+ rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
742
+ &fl4.saddr) : NULL;
743
+ }
727744
728745 if (!rt) {
729746 rt = ip_route_output_key(tunnel->net, &fl4);
....@@ -732,7 +749,10 @@
732749 dev->stats.tx_carrier_errors++;
733750 goto tx_error;
734751 }
735
- if (connected)
752
+ if (use_cache)
753
+ dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
754
+ fl4.saddr);
755
+ else if (!md && connected)
736756 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
737757 fl4.saddr);
738758 }
....@@ -747,7 +767,7 @@
747767 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
748768 df |= (inner_iph->frag_off & htons(IP_DF));
749769
750
- if (tnl_update_pmtu(dev, skb, rt, df, inner_iph)) {
770
+ if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
751771 ip_rt_put(rt);
752772 goto tx_error;
753773 }
....@@ -777,10 +797,10 @@
777797
778798 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
779799 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
780
- if (max_headroom > dev->needed_headroom)
781
- dev->needed_headroom = max_headroom;
800
+ if (max_headroom > READ_ONCE(dev->needed_headroom))
801
+ WRITE_ONCE(dev->needed_headroom, max_headroom);
782802
783
- if (skb_cow_head(skb, dev->needed_headroom)) {
803
+ if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
784804 ip_rt_put(rt);
785805 dev->stats.tx_dropped++;
786806 kfree_skb(skb);
....@@ -836,7 +856,7 @@
836856 netdev_state_change(dev);
837857 }
838858
839
-int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
859
+int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
840860 {
841861 int err = 0;
842862 struct ip_tunnel *t = netdev_priv(dev);
....@@ -936,6 +956,20 @@
936956 done:
937957 return err;
938958 }
959
+EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
960
+
961
+int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
962
+{
963
+ struct ip_tunnel_parm p;
964
+ int err;
965
+
966
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
967
+ return -EFAULT;
968
+ err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
969
+ if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
970
+ return -EFAULT;
971
+ return err;
972
+}
939973 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
940974
941975 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)