hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/net/ipv6/ip6_output.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * IPv6 output functions
34 * Linux INET6 implementation
....@@ -6,11 +7,6 @@
67 * Pedro Roque <roque@di.fc.ul.pt>
78 *
89 * Based on linux/net/ipv4/ip_output.c
9
- *
10
- * This program is free software; you can redistribute it and/or
11
- * modify it under the terms of the GNU General Public License
12
- * as published by the Free Software Foundation; either version
13
- * 2 of the License, or (at your option) any later version.
1410 *
1511 * Changes:
1612 * A.N.Kuznetsov : airthmetics in fragmentation.
....@@ -58,15 +54,43 @@
5854 #include <linux/mroute6.h>
5955 #include <net/l3mdev.h>
6056 #include <net/lwtunnel.h>
61
-#include <soc/rockchip/android-version.h>
57
+#include <net/ip_tunnels.h>
6258
6359 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
6460 {
6561 struct dst_entry *dst = skb_dst(skb);
6662 struct net_device *dev = dst->dev;
63
+ unsigned int hh_len = LL_RESERVED_SPACE(dev);
64
+ int delta = hh_len - skb_headroom(skb);
65
+ const struct in6_addr *nexthop;
6766 struct neighbour *neigh;
68
- struct in6_addr *nexthop;
6967 int ret;
68
+
69
+ /* Be paranoid, rather than too clever. */
70
+ if (unlikely(delta > 0) && dev->header_ops) {
71
+ /* pskb_expand_head() might crash, if skb is shared */
72
+ if (skb_shared(skb)) {
73
+ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
74
+
75
+ if (likely(nskb)) {
76
+ if (skb->sk)
77
+ skb_set_owner_w(nskb, skb->sk);
78
+ consume_skb(skb);
79
+ } else {
80
+ kfree_skb(skb);
81
+ }
82
+ skb = nskb;
83
+ }
84
+ if (skb &&
85
+ pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
86
+ kfree_skb(skb);
87
+ skb = NULL;
88
+ }
89
+ if (!skb) {
90
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
91
+ return -ENOMEM;
92
+ }
93
+ }
7094
7195 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
7296 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
....@@ -107,7 +131,7 @@
107131 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
108132 int res = lwtunnel_xmit(skb);
109133
110
- if (res < 0 || res == LWTUNNEL_XMIT_DONE)
134
+ if (res != LWTUNNEL_XMIT_CONTINUE)
111135 return res;
112136 }
113137
....@@ -118,7 +142,7 @@
118142 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
119143 if (!IS_ERR(neigh)) {
120144 sock_confirm_neigh(skb, neigh);
121
- ret = neigh_output(neigh, skb);
145
+ ret = neigh_output(neigh, skb, false);
122146 rcu_read_unlock_bh();
123147 return ret;
124148 }
....@@ -162,16 +186,9 @@
162186 return ret;
163187 }
164188
165
-static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
189
+static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
166190 {
167191 unsigned int mtu;
168
- int ret;
169
-
170
- ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
171
- if (ret) {
172
- kfree_skb(skb);
173
- return ret;
174
- }
175192
176193 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
177194 /* Policy lookup after SNAT yielded a new policy */
....@@ -193,9 +210,25 @@
193210 return ip6_finish_output2(net, sk, skb);
194211 }
195212
213
+static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
214
+{
215
+ int ret;
216
+
217
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
218
+ switch (ret) {
219
+ case NET_XMIT_SUCCESS:
220
+ return __ip6_finish_output(net, sk, skb);
221
+ case NET_XMIT_CN:
222
+ return __ip6_finish_output(net, sk, skb) ? : ret;
223
+ default:
224
+ kfree_skb(skb);
225
+ return ret;
226
+ }
227
+}
228
+
196229 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
197230 {
198
- struct net_device *dev = skb_dst(skb)->dev;
231
+ struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
199232 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
200233
201234 skb->protocol = htons(ETH_P_IPV6);
....@@ -208,7 +241,7 @@
208241 }
209242
210243 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
211
- net, sk, skb, NULL, dev,
244
+ net, sk, skb, indev, dev,
212245 ip6_finish_output,
213246 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
214247 }
....@@ -228,7 +261,7 @@
228261 * which are using proper atomic operations or spinlocks.
229262 */
230263 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
231
- __u32 mark, struct ipv6_txoptions *opt, int tclass)
264
+ __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
232265 {
233266 struct net *net = sock_net(sk);
234267 const struct ipv6_pinfo *np = inet6_sk(sk);
....@@ -293,7 +326,7 @@
293326 hdr->daddr = *first_hop;
294327
295328 skb->protocol = htons(ETH_P_IPV6);
296
- skb->priority = sk->sk_priority;
329
+ skb->priority = priority;
297330 skb->mark = mark;
298331
299332 mtu = dst_mtu(dst);
....@@ -339,6 +372,12 @@
339372 if (sk && ra->sel == sel &&
340373 (!sk->sk_bound_dev_if ||
341374 sk->sk_bound_dev_if == skb->dev->ifindex)) {
375
+ struct ipv6_pinfo *np = inet6_sk(sk);
376
+
377
+ if (np && np->rtalert_isolate &&
378
+ !net_eq(sock_net(sk), dev_net(skb->dev))) {
379
+ continue;
380
+ }
342381 if (last) {
343382 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
344383 if (skb2)
....@@ -417,6 +456,13 @@
417456 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
418457 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
419458
459
+#ifdef CONFIG_NET_SWITCHDEV
460
+ if (skb->offload_l3_fwd_mark) {
461
+ consume_skb(skb);
462
+ return 0;
463
+ }
464
+#endif
465
+
420466 skb->tstamp = 0;
421467 return dst_output(net, sk, skb);
422468 }
....@@ -441,13 +487,14 @@
441487
442488 int ip6_forward(struct sk_buff *skb)
443489 {
444
- struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
445490 struct dst_entry *dst = skb_dst(skb);
446491 struct ipv6hdr *hdr = ipv6_hdr(skb);
447492 struct inet6_skb_parm *opt = IP6CB(skb);
448493 struct net *net = dev_net(dst->dev);
494
+ struct inet6_dev *idev;
449495 u32 mtu;
450496
497
+ idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
451498 if (net->ipv6.devconf_all->forwarding == 0)
452499 goto error;
453500
....@@ -461,7 +508,7 @@
461508 goto drop;
462509
463510 if (!net->ipv6.devconf_all->disable_policy &&
464
- !idev->cnf.disable_policy &&
511
+ (!idev || !idev->cnf.disable_policy) &&
465512 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
466513 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
467514 goto drop;
....@@ -491,8 +538,6 @@
491538 * check and decrement ttl
492539 */
493540 if (hdr->hop_limit <= 1) {
494
- /* Force OUTPUT device used as source address */
495
- skb->dev = dst->dev;
496541 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
497542 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
498543
....@@ -616,8 +661,172 @@
616661 to->tc_index = from->tc_index;
617662 #endif
618663 nf_copy(to, from);
664
+ skb_ext_copy(to, from);
619665 skb_copy_secmark(to, from);
620666 }
667
+
668
+int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
669
+ u8 nexthdr, __be32 frag_id,
670
+ struct ip6_fraglist_iter *iter)
671
+{
672
+ unsigned int first_len;
673
+ struct frag_hdr *fh;
674
+
675
+ /* BUILD HEADER */
676
+ *prevhdr = NEXTHDR_FRAGMENT;
677
+ iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
678
+ if (!iter->tmp_hdr)
679
+ return -ENOMEM;
680
+
681
+ iter->frag = skb_shinfo(skb)->frag_list;
682
+ skb_frag_list_init(skb);
683
+
684
+ iter->offset = 0;
685
+ iter->hlen = hlen;
686
+ iter->frag_id = frag_id;
687
+ iter->nexthdr = nexthdr;
688
+
689
+ __skb_pull(skb, hlen);
690
+ fh = __skb_push(skb, sizeof(struct frag_hdr));
691
+ __skb_push(skb, hlen);
692
+ skb_reset_network_header(skb);
693
+ memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
694
+
695
+ fh->nexthdr = nexthdr;
696
+ fh->reserved = 0;
697
+ fh->frag_off = htons(IP6_MF);
698
+ fh->identification = frag_id;
699
+
700
+ first_len = skb_pagelen(skb);
701
+ skb->data_len = first_len - skb_headlen(skb);
702
+ skb->len = first_len;
703
+ ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
704
+
705
+ return 0;
706
+}
707
+EXPORT_SYMBOL(ip6_fraglist_init);
708
+
709
+void ip6_fraglist_prepare(struct sk_buff *skb,
710
+ struct ip6_fraglist_iter *iter)
711
+{
712
+ struct sk_buff *frag = iter->frag;
713
+ unsigned int hlen = iter->hlen;
714
+ struct frag_hdr *fh;
715
+
716
+ frag->ip_summed = CHECKSUM_NONE;
717
+ skb_reset_transport_header(frag);
718
+ fh = __skb_push(frag, sizeof(struct frag_hdr));
719
+ __skb_push(frag, hlen);
720
+ skb_reset_network_header(frag);
721
+ memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
722
+ iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
723
+ fh->nexthdr = iter->nexthdr;
724
+ fh->reserved = 0;
725
+ fh->frag_off = htons(iter->offset);
726
+ if (frag->next)
727
+ fh->frag_off |= htons(IP6_MF);
728
+ fh->identification = iter->frag_id;
729
+ ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
730
+ ip6_copy_metadata(frag, skb);
731
+}
732
+EXPORT_SYMBOL(ip6_fraglist_prepare);
733
+
734
+void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
735
+ unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
736
+ u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
737
+{
738
+ state->prevhdr = prevhdr;
739
+ state->nexthdr = nexthdr;
740
+ state->frag_id = frag_id;
741
+
742
+ state->hlen = hlen;
743
+ state->mtu = mtu;
744
+
745
+ state->left = skb->len - hlen; /* Space per frame */
746
+ state->ptr = hlen; /* Where to start from */
747
+
748
+ state->hroom = hdr_room;
749
+ state->troom = needed_tailroom;
750
+
751
+ state->offset = 0;
752
+}
753
+EXPORT_SYMBOL(ip6_frag_init);
754
+
755
+struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
756
+{
757
+ u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
758
+ struct sk_buff *frag;
759
+ struct frag_hdr *fh;
760
+ unsigned int len;
761
+
762
+ len = state->left;
763
+ /* IF: it doesn't fit, use 'mtu' - the data space left */
764
+ if (len > state->mtu)
765
+ len = state->mtu;
766
+ /* IF: we are not sending up to and including the packet end
767
+ then align the next start on an eight byte boundary */
768
+ if (len < state->left)
769
+ len &= ~7;
770
+
771
+ /* Allocate buffer */
772
+ frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
773
+ state->hroom + state->troom, GFP_ATOMIC);
774
+ if (!frag)
775
+ return ERR_PTR(-ENOMEM);
776
+
777
+ /*
778
+ * Set up data on packet
779
+ */
780
+
781
+ ip6_copy_metadata(frag, skb);
782
+ skb_reserve(frag, state->hroom);
783
+ skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
784
+ skb_reset_network_header(frag);
785
+ fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
786
+ frag->transport_header = (frag->network_header + state->hlen +
787
+ sizeof(struct frag_hdr));
788
+
789
+ /*
790
+ * Charge the memory for the fragment to any owner
791
+ * it might possess
792
+ */
793
+ if (skb->sk)
794
+ skb_set_owner_w(frag, skb->sk);
795
+
796
+ /*
797
+ * Copy the packet header into the new buffer.
798
+ */
799
+ skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
800
+
801
+ fragnexthdr_offset = skb_network_header(frag);
802
+ fragnexthdr_offset += prevhdr - skb_network_header(skb);
803
+ *fragnexthdr_offset = NEXTHDR_FRAGMENT;
804
+
805
+ /*
806
+ * Build fragment header.
807
+ */
808
+ fh->nexthdr = state->nexthdr;
809
+ fh->reserved = 0;
810
+ fh->identification = state->frag_id;
811
+
812
+ /*
813
+ * Copy a block of the IP datagram.
814
+ */
815
+ BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
816
+ len));
817
+ state->left -= len;
818
+
819
+ fh->frag_off = htons(state->offset);
820
+ if (state->left > 0)
821
+ fh->frag_off |= htons(IP6_MF);
822
+ ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
823
+
824
+ state->ptr += len;
825
+ state->offset += len;
826
+
827
+ return frag;
828
+}
829
+EXPORT_SYMBOL(ip6_frag_next);
621830
622831 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
623832 int (*output)(struct net *, struct sock *, struct sk_buff *))
....@@ -626,12 +835,11 @@
626835 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
627836 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
628837 inet6_sk(skb->sk) : NULL;
629
- struct ipv6hdr *tmp_hdr;
630
- struct frag_hdr *fh;
631
- unsigned int mtu, hlen, left, len, nexthdr_offset;
632
- int hroom, troom;
838
+ struct ip6_frag_state state;
839
+ unsigned int mtu, hlen, nexthdr_offset;
840
+ ktime_t tstamp = skb->tstamp;
841
+ int hroom, err = 0;
633842 __be32 frag_id;
634
- int ptr, offset = 0, err = 0;
635843 u8 *prevhdr, nexthdr = 0;
636844
637845 err = ip6_find_1stfragopt(skb, &prevhdr);
....@@ -678,6 +886,7 @@
678886 hroom = LL_RESERVED_SPACE(rt->dst.dev);
679887 if (skb_has_frag_list(skb)) {
680888 unsigned int first_len = skb_pagelen(skb);
889
+ struct ip6_fraglist_iter iter;
681890 struct sk_buff *frag2;
682891
683892 if (first_len - hlen > mtu ||
....@@ -705,85 +914,46 @@
705914 skb->truesize -= frag->truesize;
706915 }
707916
708
- err = 0;
709
- offset = 0;
710
- /* BUILD HEADER */
711
-
712
- *prevhdr = NEXTHDR_FRAGMENT;
713
- tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
714
- if (!tmp_hdr) {
715
- err = -ENOMEM;
917
+ err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
918
+ &iter);
919
+ if (err < 0)
716920 goto fail;
717
- }
718
- frag = skb_shinfo(skb)->frag_list;
719
- skb_frag_list_init(skb);
720921
721
- __skb_pull(skb, hlen);
722
- fh = __skb_push(skb, sizeof(struct frag_hdr));
723
- __skb_push(skb, hlen);
724
- skb_reset_network_header(skb);
725
- memcpy(skb_network_header(skb), tmp_hdr, hlen);
726
-
727
- fh->nexthdr = nexthdr;
728
- fh->reserved = 0;
729
- fh->frag_off = htons(IP6_MF);
730
- fh->identification = frag_id;
731
-
732
- first_len = skb_pagelen(skb);
733
- skb->data_len = first_len - skb_headlen(skb);
734
- skb->len = first_len;
735
- ipv6_hdr(skb)->payload_len = htons(first_len -
736
- sizeof(struct ipv6hdr));
922
+ /* We prevent @rt from being freed. */
923
+ rcu_read_lock();
737924
738925 for (;;) {
739926 /* Prepare header of the next frame,
740927 * before previous one went down. */
741
- if (frag) {
742
- frag->ip_summed = CHECKSUM_NONE;
743
- skb_reset_transport_header(frag);
744
- fh = __skb_push(frag, sizeof(struct frag_hdr));
745
- __skb_push(frag, hlen);
746
- skb_reset_network_header(frag);
747
- memcpy(skb_network_header(frag), tmp_hdr,
748
- hlen);
749
- offset += skb->len - hlen - sizeof(struct frag_hdr);
750
- fh->nexthdr = nexthdr;
751
- fh->reserved = 0;
752
- fh->frag_off = htons(offset);
753
- if (frag->next)
754
- fh->frag_off |= htons(IP6_MF);
755
- fh->identification = frag_id;
756
- ipv6_hdr(frag)->payload_len =
757
- htons(frag->len -
758
- sizeof(struct ipv6hdr));
759
- ip6_copy_metadata(frag, skb);
760
- }
928
+ if (iter.frag)
929
+ ip6_fraglist_prepare(skb, &iter);
761930
931
+ skb->tstamp = tstamp;
762932 err = output(net, sk, skb);
763933 if (!err)
764934 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
765935 IPSTATS_MIB_FRAGCREATES);
766936
767
- if (err || !frag)
937
+ if (err || !iter.frag)
768938 break;
769939
770
- skb = frag;
771
- frag = skb->next;
772
- skb->next = NULL;
940
+ skb = ip6_fraglist_next(&iter);
773941 }
774942
775
- kfree(tmp_hdr);
943
+ kfree(iter.tmp_hdr);
776944
777945 if (err == 0) {
778946 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
779947 IPSTATS_MIB_FRAGOKS);
948
+ rcu_read_unlock();
780949 return 0;
781950 }
782951
783
- kfree_skb_list(frag);
952
+ kfree_skb_list(iter.frag);
784953
785954 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
786955 IPSTATS_MIB_FRAGFAILS);
956
+ rcu_read_unlock();
787957 return err;
788958
789959 slow_path_clean:
....@@ -797,93 +967,29 @@
797967 }
798968
799969 slow_path:
800
- left = skb->len - hlen; /* Space per frame */
801
- ptr = hlen; /* Where to start from */
802
-
803970 /*
804971 * Fragment the datagram.
805972 */
806973
807
- troom = rt->dst.dev->needed_tailroom;
974
+ ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
975
+ LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
976
+ &state);
808977
809978 /*
810979 * Keep copying data until we run out.
811980 */
812
- while (left > 0) {
813
- u8 *fragnexthdr_offset;
814981
815
- len = left;
816
- /* IF: it doesn't fit, use 'mtu' - the data space left */
817
- if (len > mtu)
818
- len = mtu;
819
- /* IF: we are not sending up to and including the packet end
820
- then align the next start on an eight byte boundary */
821
- if (len < left) {
822
- len &= ~7;
823
- }
824
-
825
- /* Allocate buffer */
826
- frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
827
- hroom + troom, GFP_ATOMIC);
828
- if (!frag) {
829
- err = -ENOMEM;
982
+ while (state.left > 0) {
983
+ frag = ip6_frag_next(skb, &state);
984
+ if (IS_ERR(frag)) {
985
+ err = PTR_ERR(frag);
830986 goto fail;
831987 }
832988
833989 /*
834
- * Set up data on packet
835
- */
836
-
837
- ip6_copy_metadata(frag, skb);
838
- skb_reserve(frag, hroom);
839
- skb_put(frag, len + hlen + sizeof(struct frag_hdr));
840
- skb_reset_network_header(frag);
841
- fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
842
- frag->transport_header = (frag->network_header + hlen +
843
- sizeof(struct frag_hdr));
844
-
845
- /*
846
- * Charge the memory for the fragment to any owner
847
- * it might possess
848
- */
849
- if (skb->sk)
850
- skb_set_owner_w(frag, skb->sk);
851
-
852
- /*
853
- * Copy the packet header into the new buffer.
854
- */
855
- skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
856
-
857
- fragnexthdr_offset = skb_network_header(frag);
858
- fragnexthdr_offset += prevhdr - skb_network_header(skb);
859
- *fragnexthdr_offset = NEXTHDR_FRAGMENT;
860
-
861
- /*
862
- * Build fragment header.
863
- */
864
- fh->nexthdr = nexthdr;
865
- fh->reserved = 0;
866
- fh->identification = frag_id;
867
-
868
- /*
869
- * Copy a block of the IP datagram.
870
- */
871
- BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
872
- len));
873
- left -= len;
874
-
875
- fh->frag_off = htons(offset);
876
- if (left > 0)
877
- fh->frag_off |= htons(IP6_MF);
878
- ipv6_hdr(frag)->payload_len = htons(frag->len -
879
- sizeof(struct ipv6hdr));
880
-
881
- ptr += len;
882
- offset += len;
883
-
884
- /*
885990 * Put this fragment into the sending queue.
886991 */
992
+ frag->tstamp = tstamp;
887993 err = output(net, sk, frag);
888994 if (err)
889995 goto fail;
....@@ -1066,14 +1172,11 @@
10661172 }
10671173 }
10681174 #endif
1069
-#if defined(CONFIG_ANDROID_VERSION) && CONFIG_ANDROID_VERSION < ANDROID_VERSION(8, 0, 0, 0)
1070
-#else
10711175 if (ipv6_addr_v4mapped(&fl6->saddr) &&
10721176 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
10731177 err = -EAFNOSUPPORT;
10741178 goto out_err_release;
10751179 }
1076
-#endif
10771180
10781181 return 0;
10791182
....@@ -1088,6 +1191,7 @@
10881191
10891192 /**
10901193 * ip6_dst_lookup - perform route lookup on flow
1194
+ * @net: Network namespace to perform lookup in
10911195 * @sk: socket which provides route info
10921196 * @dst: pointer to dst_entry * for result
10931197 * @fl6: flow to lookup
....@@ -1106,6 +1210,7 @@
11061210
11071211 /**
11081212 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1213
+ * @net: Network namespace to perform lookup in
11091214 * @sk: socket which provides route info
11101215 * @fl6: flow to lookup
11111216 * @final_dst: final destination address for ipsec lookup
....@@ -1166,6 +1271,74 @@
11661271 return dst;
11671272 }
11681273 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1274
+
1275
+/**
1276
+ * ip6_dst_lookup_tunnel - perform route lookup on tunnel
1277
+ * @skb: Packet for which lookup is done
1278
+ * @dev: Tunnel device
1279
+ * @net: Network namespace of tunnel device
1280
+ * @sock: Socket which provides route info
1281
+ * @saddr: Memory to store the src ip address
1282
+ * @info: Tunnel information
1283
+ * @protocol: IP protocol
1284
+ * @use_cache: Flag to enable cache usage
1285
+ * This function performs a route lookup on a tunnel
1286
+ *
1287
+ * It returns a valid dst pointer and stores src address to be used in
1288
+ * tunnel in param saddr on success, else a pointer encoded error code.
1289
+ */
1290
+
1291
+struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1292
+ struct net_device *dev,
1293
+ struct net *net,
1294
+ struct socket *sock,
1295
+ struct in6_addr *saddr,
1296
+ const struct ip_tunnel_info *info,
1297
+ u8 protocol,
1298
+ bool use_cache)
1299
+{
1300
+ struct dst_entry *dst = NULL;
1301
+#ifdef CONFIG_DST_CACHE
1302
+ struct dst_cache *dst_cache;
1303
+#endif
1304
+ struct flowi6 fl6;
1305
+ __u8 prio;
1306
+
1307
+#ifdef CONFIG_DST_CACHE
1308
+ dst_cache = (struct dst_cache *)&info->dst_cache;
1309
+ if (use_cache) {
1310
+ dst = dst_cache_get_ip6(dst_cache, saddr);
1311
+ if (dst)
1312
+ return dst;
1313
+ }
1314
+#endif
1315
+ memset(&fl6, 0, sizeof(fl6));
1316
+ fl6.flowi6_mark = skb->mark;
1317
+ fl6.flowi6_proto = protocol;
1318
+ fl6.daddr = info->key.u.ipv6.dst;
1319
+ fl6.saddr = info->key.u.ipv6.src;
1320
+ prio = info->key.tos;
1321
+ fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label);
1322
+
1323
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1324
+ NULL);
1325
+ if (IS_ERR(dst)) {
1326
+ netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1327
+ return ERR_PTR(-ENETUNREACH);
1328
+ }
1329
+ if (dst->dev == dev) { /* is this necessary? */
1330
+ netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1331
+ dst_release(dst);
1332
+ return ERR_PTR(-ELOOP);
1333
+ }
1334
+#ifdef CONFIG_DST_CACHE
1335
+ if (use_cache)
1336
+ dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1337
+#endif
1338
+ *saddr = fl6.saddr;
1339
+ return dst;
1340
+}
1341
+EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
11691342
11701343 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
11711344 gfp_t gfp)
....@@ -1263,11 +1436,10 @@
12631436 if (np->frag_size)
12641437 mtu = np->frag_size;
12651438 }
1266
- if (mtu < IPV6_MIN_MTU)
1267
- return -EINVAL;
12681439 cork->base.fragsize = mtu;
12691440 cork->base.gso_size = ipc6->gso_size;
12701441 cork->base.tx_flags = 0;
1442
+ cork->base.mark = ipc6->sockc.mark;
12711443 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
12721444
12731445 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
....@@ -1292,6 +1464,7 @@
12921464 {
12931465 struct sk_buff *skb, *skb_prev = NULL;
12941466 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1467
+ struct ubuf_info *uarg = NULL;
12951468 int exthdrlen = 0;
12961469 int dst_exthdrlen = 0;
12971470 int hh_len;
....@@ -1304,7 +1477,7 @@
13041477 int csummode = CHECKSUM_NONE;
13051478 unsigned int maxnonfragsize, headersize;
13061479 unsigned int wmem_alloc_delta = 0;
1307
- bool paged;
1480
+ bool paged, extra_uref = false;
13081481
13091482 skb = skb_peek_tail(queue);
13101483 if (!skb) {
....@@ -1324,14 +1497,19 @@
13241497
13251498 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
13261499 (opt ? opt->opt_nflen : 0);
1327
- maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1328
- sizeof(struct frag_hdr);
13291500
13301501 headersize = sizeof(struct ipv6hdr) +
13311502 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
13321503 (dst_allfrag(&rt->dst) ?
13331504 sizeof(struct frag_hdr) : 0) +
13341505 rt->rt6i_nfheader_len;
1506
+
1507
+ if (mtu <= fragheaderlen ||
1508
+ ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1509
+ goto emsgsize;
1510
+
1511
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1512
+ sizeof(struct frag_hdr);
13351513
13361514 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
13371515 * the first fragment
....@@ -1369,13 +1547,27 @@
13691547 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
13701548 csummode = CHECKSUM_PARTIAL;
13711549
1550
+ if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1551
+ uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1552
+ if (!uarg)
1553
+ return -ENOBUFS;
1554
+ extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
1555
+ if (rt->dst.dev->features & NETIF_F_SG &&
1556
+ csummode == CHECKSUM_PARTIAL) {
1557
+ paged = true;
1558
+ } else {
1559
+ uarg->zerocopy = 0;
1560
+ skb_zcopy_set(skb, uarg, &extra_uref);
1561
+ }
1562
+ }
1563
+
13721564 /*
13731565 * Let's try using as much space as possible.
13741566 * Use MTU if total length of the message fits into the MTU.
13751567 * Otherwise, we need to reserve fragment header and
13761568 * fragment alignment (= 8-15 octects, in total).
13771569 *
1378
- * Note that we may need to "move" the data from the tail of
1570
+ * Note that we may need to "move" the data from the tail
13791571 * of the buffer to the new fragment when we split
13801572 * the message.
13811573 *
....@@ -1489,12 +1681,6 @@
14891681 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
14901682 dst_exthdrlen);
14911683
1492
- /* Only the initial fragment is time stamped */
1493
- skb_shinfo(skb)->tx_flags = cork->tx_flags;
1494
- cork->tx_flags = 0;
1495
- skb_shinfo(skb)->tskey = tskey;
1496
- tskey = 0;
1497
-
14981684 /*
14991685 * Find where to start putting bytes
15001686 */
....@@ -1506,7 +1692,7 @@
15061692 if (fraggap) {
15071693 skb->csum = skb_copy_and_csum_bits(
15081694 skb_prev, maxfraglen,
1509
- data + transhdrlen, fraggap, 0);
1695
+ data + transhdrlen, fraggap);
15101696 skb_prev->csum = csum_sub(skb_prev->csum,
15111697 skb->csum);
15121698 data += fraggap;
....@@ -1525,6 +1711,13 @@
15251711 transhdrlen = 0;
15261712 exthdrlen = 0;
15271713 dst_exthdrlen = 0;
1714
+
1715
+ /* Only the initial fragment is time stamped */
1716
+ skb_shinfo(skb)->tx_flags = cork->tx_flags;
1717
+ cork->tx_flags = 0;
1718
+ skb_shinfo(skb)->tskey = tskey;
1719
+ tskey = 0;
1720
+ skb_zcopy_set(skb, uarg, &extra_uref);
15281721
15291722 if ((flags & MSG_CONFIRM) && !skb_prev)
15301723 skb_set_dst_pending_confirm(skb, 1);
....@@ -1555,7 +1748,7 @@
15551748 err = -EFAULT;
15561749 goto error;
15571750 }
1558
- } else {
1751
+ } else if (!uarg || !uarg->zerocopy) {
15591752 int i = skb_shinfo(skb)->nr_frags;
15601753
15611754 err = -ENOMEM;
....@@ -1585,6 +1778,10 @@
15851778 skb->data_len += copy;
15861779 skb->truesize += copy;
15871780 wmem_alloc_delta += copy;
1781
+ } else {
1782
+ err = skb_zerocopy_iter_dgram(skb, from, copy);
1783
+ if (err < 0)
1784
+ goto error;
15881785 }
15891786 offset += copy;
15901787 length -= copy;
....@@ -1597,6 +1794,8 @@
15971794 error_efault:
15981795 err = -EFAULT;
15991796 error:
1797
+ if (uarg)
1798
+ sock_zerocopy_put_abort(uarg, extra_uref);
16001799 cork->length -= length;
16011800 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
16021801 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
....@@ -1718,7 +1917,7 @@
17181917 hdr->daddr = *final_dst;
17191918
17201919 skb->priority = sk->sk_priority;
1721
- skb->mark = sk->sk_mark;
1920
+ skb->mark = cork->base.mark;
17221921
17231922 skb->tstamp = cork->base.transmit_time;
17241923
....@@ -1726,8 +1925,13 @@
17261925 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
17271926 if (proto == IPPROTO_ICMPV6) {
17281927 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1928
+ u8 icmp6_type;
17291929
1730
- ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1930
+ if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
1931
+ icmp6_type = fl6->fl6_icmp_type;
1932
+ else
1933
+ icmp6_type = icmp6_hdr(skb)->icmp6_type;
1934
+ ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
17311935 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
17321936 }
17331937