| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (c) 2013 Nicira, Inc. |
|---|
| 3 | | - * |
|---|
| 4 | | - * This program is free software; you can redistribute it and/or |
|---|
| 5 | | - * modify it under the terms of version 2 of the GNU General Public |
|---|
| 6 | | - * License as published by the Free Software Foundation. |
|---|
| 7 | | - * |
|---|
| 8 | | - * This program is distributed in the hope that it will be useful, but |
|---|
| 9 | | - * WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 10 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|---|
| 11 | | - * General Public License for more details. |
|---|
| 12 | | - * |
|---|
| 13 | | - * You should have received a copy of the GNU General Public License |
|---|
| 14 | | - * along with this program; if not, write to the Free Software |
|---|
| 15 | | - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
|---|
| 16 | | - * 02110-1301, USA |
|---|
| 17 | 4 | */ |
|---|
| 18 | 5 | |
|---|
| 19 | 6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
|---|
| .. | .. |
|---|
| 308 | 295 | ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, |
|---|
| 309 | 296 | iph->saddr, tunnel->parms.o_key, |
|---|
| 310 | 297 | RT_TOS(iph->tos), tunnel->parms.link, |
|---|
| 311 | | - tunnel->fwmark); |
|---|
| 298 | + tunnel->fwmark, 0); |
|---|
| 312 | 299 | rt = ip_route_output_key(tunnel->net, &fl4); |
|---|
| 313 | 300 | |
|---|
| 314 | 301 | if (!IS_ERR(rt)) { |
|---|
| .. | .. |
|---|
| 376 | 363 | const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, |
|---|
| 377 | 364 | bool log_ecn_error) |
|---|
| 378 | 365 | { |
|---|
| 379 | | - struct pcpu_sw_netstats *tstats; |
|---|
| 380 | 366 | const struct iphdr *iph = ip_hdr(skb); |
|---|
| 381 | 367 | int err; |
|---|
| 382 | 368 | |
|---|
| .. | .. |
|---|
| 404 | 390 | tunnel->i_seqno = ntohl(tpi->seq) + 1; |
|---|
| 405 | 391 | } |
|---|
| 406 | 392 | |
|---|
| 407 | | - skb_reset_network_header(skb); |
|---|
| 393 | + skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0); |
|---|
| 408 | 394 | |
|---|
| 409 | 395 | err = IP_ECN_decapsulate(iph, skb); |
|---|
| 410 | 396 | if (unlikely(err)) { |
|---|
| .. | .. |
|---|
| 418 | 404 | } |
|---|
| 419 | 405 | } |
|---|
| 420 | 406 | |
|---|
| 421 | | - tstats = this_cpu_ptr(tunnel->dev->tstats); |
|---|
| 422 | | - u64_stats_update_begin(&tstats->syncp); |
|---|
| 423 | | - tstats->rx_packets++; |
|---|
| 424 | | - tstats->rx_bytes += skb->len; |
|---|
| 425 | | - u64_stats_update_end(&tstats->syncp); |
|---|
| 426 | | - |
|---|
| 407 | + dev_sw_netstats_rx_add(tunnel->dev, skb->len); |
|---|
| 427 | 408 | skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); |
|---|
| 428 | 409 | |
|---|
| 429 | 410 | if (tunnel->dev->type == ARPHRD_ETHER) { |
|---|
| .. | .. |
|---|
| 502 | 483 | |
|---|
| 503 | 484 | static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, |
|---|
| 504 | 485 | struct rtable *rt, __be16 df, |
|---|
| 505 | | - const struct iphdr *inner_iph) |
|---|
| 486 | + const struct iphdr *inner_iph, |
|---|
| 487 | + int tunnel_hlen, __be32 dst, bool md) |
|---|
| 506 | 488 | { |
|---|
| 507 | 489 | struct ip_tunnel *tunnel = netdev_priv(dev); |
|---|
| 508 | 490 | int pkt_size; |
|---|
| 509 | 491 | int mtu; |
|---|
| 510 | 492 | |
|---|
| 511 | | - pkt_size = skb->len - tunnel->hlen; |
|---|
| 493 | + tunnel_hlen = md ? tunnel_hlen : tunnel->hlen; |
|---|
| 494 | + pkt_size = skb->len - tunnel_hlen; |
|---|
| 512 | 495 | pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0; |
|---|
| 513 | 496 | |
|---|
| 514 | 497 | if (df) { |
|---|
| 515 | | - mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel->hlen); |
|---|
| 498 | + mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen); |
|---|
| 516 | 499 | mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0; |
|---|
| 517 | 500 | } else { |
|---|
| 518 | | - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; |
|---|
| 501 | + mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; |
|---|
| 519 | 502 | } |
|---|
| 520 | 503 | |
|---|
| 521 | | - skb_dst_update_pmtu_no_confirm(skb, mtu); |
|---|
| 504 | + if (skb_valid_dst(skb)) |
|---|
| 505 | + skb_dst_update_pmtu_no_confirm(skb, mtu); |
|---|
| 522 | 506 | |
|---|
| 523 | 507 | if (skb->protocol == htons(ETH_P_IP)) { |
|---|
| 524 | 508 | if (!skb_is_gso(skb) && |
|---|
| 525 | 509 | (inner_iph->frag_off & htons(IP_DF)) && |
|---|
| 526 | 510 | mtu < pkt_size) { |
|---|
| 527 | | - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); |
|---|
| 528 | | - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
|---|
| 511 | + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
|---|
| 529 | 512 | return -E2BIG; |
|---|
| 530 | 513 | } |
|---|
| 531 | 514 | } |
|---|
| 532 | 515 | #if IS_ENABLED(CONFIG_IPV6) |
|---|
| 533 | 516 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
|---|
| 534 | | - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); |
|---|
| 517 | + struct rt6_info *rt6; |
|---|
| 518 | + __be32 daddr; |
|---|
| 519 | + |
|---|
| 520 | + rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) : |
|---|
| 521 | + NULL; |
|---|
| 522 | + daddr = md ? dst : tunnel->parms.iph.daddr; |
|---|
| 535 | 523 | |
|---|
| 536 | 524 | if (rt6 && mtu < dst_mtu(skb_dst(skb)) && |
|---|
| 537 | 525 | mtu >= IPV6_MIN_MTU) { |
|---|
| 538 | | - if ((tunnel->parms.iph.daddr && |
|---|
| 539 | | - !ipv4_is_multicast(tunnel->parms.iph.daddr)) || |
|---|
| 526 | + if ((daddr && !ipv4_is_multicast(daddr)) || |
|---|
| 540 | 527 | rt6->rt6i_dst.plen == 128) { |
|---|
| 541 | 528 | rt6->rt6i_flags |= RTF_MODIFIED; |
|---|
| 542 | 529 | dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); |
|---|
| .. | .. |
|---|
| 545 | 532 | |
|---|
| 546 | 533 | if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && |
|---|
| 547 | 534 | mtu < pkt_size) { |
|---|
| 548 | | - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
|---|
| 535 | + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); |
|---|
| 549 | 536 | return -E2BIG; |
|---|
| 550 | 537 | } |
|---|
| 551 | 538 | } |
|---|
| .. | .. |
|---|
| 553 | 540 | return 0; |
|---|
| 554 | 541 | } |
|---|
| 555 | 542 | |
|---|
| 556 | | -void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) |
|---|
| 543 | +void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, |
|---|
| 544 | + u8 proto, int tunnel_hlen) |
|---|
| 557 | 545 | { |
|---|
| 558 | 546 | struct ip_tunnel *tunnel = netdev_priv(dev); |
|---|
| 559 | 547 | u32 headroom = sizeof(struct iphdr); |
|---|
| 560 | 548 | struct ip_tunnel_info *tun_info; |
|---|
| 561 | 549 | const struct ip_tunnel_key *key; |
|---|
| 562 | 550 | const struct iphdr *inner_iph; |
|---|
| 563 | | - struct rtable *rt; |
|---|
| 551 | + struct rtable *rt = NULL; |
|---|
| 564 | 552 | struct flowi4 fl4; |
|---|
| 565 | 553 | __be16 df = 0; |
|---|
| 566 | 554 | u8 tos, ttl; |
|---|
| 555 | + bool use_cache; |
|---|
| 567 | 556 | |
|---|
| 568 | 557 | tun_info = skb_tunnel_info(skb); |
|---|
| 569 | 558 | if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || |
|---|
| .. | .. |
|---|
| 581 | 570 | } |
|---|
| 582 | 571 | ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, |
|---|
| 583 | 572 | tunnel_id_to_key32(key->tun_id), RT_TOS(tos), |
|---|
| 584 | | - 0, skb->mark); |
|---|
| 573 | + 0, skb->mark, skb_get_hash(skb)); |
|---|
| 585 | 574 | if (tunnel->encap.type != TUNNEL_ENCAP_NONE) |
|---|
| 586 | 575 | goto tx_error; |
|---|
| 587 | | - rt = ip_route_output_key(tunnel->net, &fl4); |
|---|
| 588 | | - if (IS_ERR(rt)) { |
|---|
| 589 | | - dev->stats.tx_carrier_errors++; |
|---|
| 590 | | - goto tx_error; |
|---|
| 576 | + |
|---|
| 577 | + use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); |
|---|
| 578 | + if (use_cache) |
|---|
| 579 | + rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr); |
|---|
| 580 | + if (!rt) { |
|---|
| 581 | + rt = ip_route_output_key(tunnel->net, &fl4); |
|---|
| 582 | + if (IS_ERR(rt)) { |
|---|
| 583 | + dev->stats.tx_carrier_errors++; |
|---|
| 584 | + goto tx_error; |
|---|
| 585 | + } |
|---|
| 586 | + if (use_cache) |
|---|
| 587 | + dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, |
|---|
| 588 | + fl4.saddr); |
|---|
| 591 | 589 | } |
|---|
| 592 | 590 | if (rt->dst.dev == dev) { |
|---|
| 593 | 591 | ip_rt_put(rt); |
|---|
| 594 | 592 | dev->stats.collisions++; |
|---|
| 595 | 593 | goto tx_error; |
|---|
| 596 | 594 | } |
|---|
| 595 | + |
|---|
| 596 | + if (key->tun_flags & TUNNEL_DONT_FRAGMENT) |
|---|
| 597 | + df = htons(IP_DF); |
|---|
| 598 | + if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen, |
|---|
| 599 | + key->u.ipv4.dst, true)) { |
|---|
| 600 | + ip_rt_put(rt); |
|---|
| 601 | + goto tx_error; |
|---|
| 602 | + } |
|---|
| 603 | + |
|---|
| 597 | 604 | tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); |
|---|
| 598 | 605 | ttl = key->ttl; |
|---|
| 599 | 606 | if (ttl == 0) { |
|---|
| .. | .. |
|---|
| 604 | 611 | else |
|---|
| 605 | 612 | ttl = ip4_dst_hoplimit(&rt->dst); |
|---|
| 606 | 613 | } |
|---|
| 607 | | - if (key->tun_flags & TUNNEL_DONT_FRAGMENT) |
|---|
| 608 | | - df = htons(IP_DF); |
|---|
| 609 | | - else if (skb->protocol == htons(ETH_P_IP)) |
|---|
| 610 | | - df = inner_iph->frag_off & htons(IP_DF); |
|---|
| 611 | | - headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; |
|---|
| 612 | | - if (headroom > dev->needed_headroom) |
|---|
| 613 | | - dev->needed_headroom = headroom; |
|---|
| 614 | 614 | |
|---|
| 615 | | - if (skb_cow_head(skb, dev->needed_headroom)) { |
|---|
| 615 | + headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; |
|---|
| 616 | + if (headroom > READ_ONCE(dev->needed_headroom)) |
|---|
| 617 | + WRITE_ONCE(dev->needed_headroom, headroom); |
|---|
| 618 | + |
|---|
| 619 | + if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { |
|---|
| 616 | 620 | ip_rt_put(rt); |
|---|
| 617 | 621 | goto tx_dropped; |
|---|
| 618 | 622 | } |
|---|
| .. | .. |
|---|
| 633 | 637 | const struct iphdr *tnl_params, u8 protocol) |
|---|
| 634 | 638 | { |
|---|
| 635 | 639 | struct ip_tunnel *tunnel = netdev_priv(dev); |
|---|
| 640 | + struct ip_tunnel_info *tun_info = NULL; |
|---|
| 636 | 641 | const struct iphdr *inner_iph; |
|---|
| 637 | | - struct flowi4 fl4; |
|---|
| 638 | | - u8 tos, ttl; |
|---|
| 639 | | - __be16 df; |
|---|
| 640 | | - struct rtable *rt; /* Route to the other host */ |
|---|
| 641 | 642 | unsigned int max_headroom; /* The extra header space needed */ |
|---|
| 642 | | - __be32 dst; |
|---|
| 643 | + struct rtable *rt = NULL; /* Route to the other host */ |
|---|
| 644 | + bool use_cache = false; |
|---|
| 645 | + struct flowi4 fl4; |
|---|
| 646 | + bool md = false; |
|---|
| 643 | 647 | bool connected; |
|---|
| 648 | + u8 tos, ttl; |
|---|
| 649 | + __be32 dst; |
|---|
| 650 | + __be16 df; |
|---|
| 644 | 651 | |
|---|
| 645 | 652 | inner_iph = (const struct iphdr *)skb_inner_network_header(skb); |
|---|
| 646 | 653 | connected = (tunnel->parms.iph.daddr != 0); |
|---|
| .. | .. |
|---|
| 650 | 657 | dst = tnl_params->daddr; |
|---|
| 651 | 658 | if (dst == 0) { |
|---|
| 652 | 659 | /* NBMA tunnel */ |
|---|
| 653 | | - struct ip_tunnel_info *tun_info; |
|---|
| 654 | 660 | |
|---|
| 655 | 661 | if (!skb_dst(skb)) { |
|---|
| 656 | 662 | dev->stats.tx_fifo_errors++; |
|---|
| .. | .. |
|---|
| 660 | 666 | tun_info = skb_tunnel_info(skb); |
|---|
| 661 | 667 | if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) && |
|---|
| 662 | 668 | ip_tunnel_info_af(tun_info) == AF_INET && |
|---|
| 663 | | - tun_info->key.u.ipv4.dst) |
|---|
| 669 | + tun_info->key.u.ipv4.dst) { |
|---|
| 664 | 670 | dst = tun_info->key.u.ipv4.dst; |
|---|
| 671 | + md = true; |
|---|
| 672 | + connected = true; |
|---|
| 673 | + } |
|---|
| 665 | 674 | else if (skb->protocol == htons(ETH_P_IP)) { |
|---|
| 666 | 675 | rt = skb_rtable(skb); |
|---|
| 667 | 676 | dst = rt_nexthop(rt, inner_iph->daddr); |
|---|
| .. | .. |
|---|
| 700 | 709 | else |
|---|
| 701 | 710 | goto tx_error; |
|---|
| 702 | 711 | |
|---|
| 703 | | - connected = false; |
|---|
| 712 | + if (!md) |
|---|
| 713 | + connected = false; |
|---|
| 704 | 714 | } |
|---|
| 705 | 715 | |
|---|
| 706 | 716 | tos = tnl_params->tos; |
|---|
| .. | .. |
|---|
| 717 | 727 | |
|---|
| 718 | 728 | ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, |
|---|
| 719 | 729 | tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, |
|---|
| 720 | | - tunnel->fwmark); |
|---|
| 730 | + tunnel->fwmark, skb_get_hash(skb)); |
|---|
| 721 | 731 | |
|---|
| 722 | 732 | if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) |
|---|
| 723 | 733 | goto tx_error; |
|---|
| 724 | 734 | |
|---|
| 725 | | - rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) : |
|---|
| 726 | | - NULL; |
|---|
| 735 | + if (connected && md) { |
|---|
| 736 | + use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); |
|---|
| 737 | + if (use_cache) |
|---|
| 738 | + rt = dst_cache_get_ip4(&tun_info->dst_cache, |
|---|
| 739 | + &fl4.saddr); |
|---|
| 740 | + } else { |
|---|
| 741 | + rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, |
|---|
| 742 | + &fl4.saddr) : NULL; |
|---|
| 743 | + } |
|---|
| 727 | 744 | |
|---|
| 728 | 745 | if (!rt) { |
|---|
| 729 | 746 | rt = ip_route_output_key(tunnel->net, &fl4); |
|---|
| .. | .. |
|---|
| 732 | 749 | dev->stats.tx_carrier_errors++; |
|---|
| 733 | 750 | goto tx_error; |
|---|
| 734 | 751 | } |
|---|
| 735 | | - if (connected) |
|---|
| 752 | + if (use_cache) |
|---|
| 753 | + dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, |
|---|
| 754 | + fl4.saddr); |
|---|
| 755 | + else if (!md && connected) |
|---|
| 736 | 756 | dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, |
|---|
| 737 | 757 | fl4.saddr); |
|---|
| 738 | 758 | } |
|---|
| .. | .. |
|---|
| 747 | 767 | if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df) |
|---|
| 748 | 768 | df |= (inner_iph->frag_off & htons(IP_DF)); |
|---|
| 749 | 769 | |
|---|
| 750 | | - if (tnl_update_pmtu(dev, skb, rt, df, inner_iph)) { |
|---|
| 770 | + if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) { |
|---|
| 751 | 771 | ip_rt_put(rt); |
|---|
| 752 | 772 | goto tx_error; |
|---|
| 753 | 773 | } |
|---|
| .. | .. |
|---|
| 777 | 797 | |
|---|
| 778 | 798 | max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) |
|---|
| 779 | 799 | + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); |
|---|
| 780 | | - if (max_headroom > dev->needed_headroom) |
|---|
| 781 | | - dev->needed_headroom = max_headroom; |
|---|
| 800 | + if (max_headroom > READ_ONCE(dev->needed_headroom)) |
|---|
| 801 | + WRITE_ONCE(dev->needed_headroom, max_headroom); |
|---|
| 782 | 802 | |
|---|
| 783 | | - if (skb_cow_head(skb, dev->needed_headroom)) { |
|---|
| 803 | + if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { |
|---|
| 784 | 804 | ip_rt_put(rt); |
|---|
| 785 | 805 | dev->stats.tx_dropped++; |
|---|
| 786 | 806 | kfree_skb(skb); |
|---|
| .. | .. |
|---|
| 836 | 856 | netdev_state_change(dev); |
|---|
| 837 | 857 | } |
|---|
| 838 | 858 | |
|---|
| 839 | | -int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) |
|---|
| 859 | +int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) |
|---|
| 840 | 860 | { |
|---|
| 841 | 861 | int err = 0; |
|---|
| 842 | 862 | struct ip_tunnel *t = netdev_priv(dev); |
|---|
| .. | .. |
|---|
| 936 | 956 | done: |
|---|
| 937 | 957 | return err; |
|---|
| 938 | 958 | } |
|---|
| 959 | +EXPORT_SYMBOL_GPL(ip_tunnel_ctl); |
|---|
| 960 | + |
|---|
| 961 | +int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) |
|---|
| 962 | +{ |
|---|
| 963 | + struct ip_tunnel_parm p; |
|---|
| 964 | + int err; |
|---|
| 965 | + |
|---|
| 966 | + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) |
|---|
| 967 | + return -EFAULT; |
|---|
| 968 | + err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd); |
|---|
| 969 | + if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) |
|---|
| 970 | + return -EFAULT; |
|---|
| 971 | + return err; |
|---|
| 972 | +} |
|---|
| 939 | 973 | EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); |
|---|
| 940 | 974 | |
|---|
| 941 | 975 | int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) |
|---|