hc
2023-12-11 1f93a7dfd1f8d5ff7a5c53246c7534fe2332d6f4
kernel/net/ipv6/tcp_ipv6.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * TCP over IPv6
34 * Linux INET6 implementation
....@@ -16,11 +17,6 @@
1617 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
1718 * a single port at the same time.
1819 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
19
- *
20
- * This program is free software; you can redistribute it and/or
21
- * modify it under the terms of the GNU General Public License
22
- * as published by the Free Software Foundation; either version
23
- * 2 of the License, or (at your option) any later version.
2420 */
2521
2622 #include <linux/bottom_half.h>
....@@ -43,6 +39,7 @@
4339 #include <linux/ipv6.h>
4440 #include <linux/icmpv6.h>
4541 #include <linux/random.h>
42
+#include <linux/indirect_call_wrapper.h>
4643
4744 #include <net/tcp.h>
4845 #include <net/ndisc.h>
....@@ -78,17 +75,30 @@
7875 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
7976
8077 static const struct inet_connection_sock_af_ops ipv6_mapped;
81
-static const struct inet_connection_sock_af_ops ipv6_specific;
78
+const struct inet_connection_sock_af_ops ipv6_specific;
8279 #ifdef CONFIG_TCP_MD5SIG
8380 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
8481 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
8582 #else
8683 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
87
- const struct in6_addr *addr)
84
+ const struct in6_addr *addr,
85
+ int l3index)
8886 {
8987 return NULL;
9088 }
9189 #endif
90
+
91
+/* Helper returning the inet6 address from a given tcp socket.
92
+ * It can be used in TCP stack instead of inet6_sk(sk).
93
+ * This avoids a dereference and allow compiler optimizations.
94
+ * It is a specialized version of inet6_sk_generic().
95
+ */
96
+static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97
+{
98
+ unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
+
100
+ return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101
+}
92102
93103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
94104 {
....@@ -97,9 +107,9 @@
97107 if (dst && dst_hold_safe(dst)) {
98108 const struct rt6_info *rt = (const struct rt6_info *)dst;
99109
100
- sk->sk_rx_dst = dst;
110
+ rcu_assign_pointer(sk->sk_rx_dst, dst);
101111 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
102
- inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
112
+ tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
103113 }
104114 }
105115
....@@ -138,7 +148,7 @@
138148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
139149 struct inet_sock *inet = inet_sk(sk);
140150 struct inet_connection_sock *icsk = inet_csk(sk);
141
- struct ipv6_pinfo *np = inet6_sk(sk);
151
+ struct ipv6_pinfo *np = tcp_inet6_sk(sk);
142152 struct tcp_sock *tp = tcp_sk(sk);
143153 struct in6_addr *saddr = NULL, *final_p, final;
144154 struct ipv6_txoptions *opt;
....@@ -162,7 +172,7 @@
162172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
163173 struct ip6_flowlabel *flowlabel;
164174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
165
- if (!flowlabel)
175
+ if (IS_ERR(flowlabel))
166176 return -EINVAL;
167177 fl6_sock_release(flowlabel);
168178 }
....@@ -220,8 +230,6 @@
220230 u32 exthdrlen = icsk->icsk_ext_hdr_len;
221231 struct sockaddr_in sin;
222232
223
- SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
224
-
225233 if (__ipv6_only_sock(sk))
226234 return -ENETUNREACH;
227235
....@@ -230,6 +238,8 @@
230238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
231239
232240 icsk->icsk_af_ops = &ipv6_mapped;
241
+ if (sk_is_mptcp(sk))
242
+ mptcpv6_handle_mapped(sk, true);
233243 sk->sk_backlog_rcv = tcp_v4_do_rcv;
234244 #ifdef CONFIG_TCP_MD5SIG
235245 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
....@@ -240,6 +250,8 @@
240250 if (err) {
241251 icsk->icsk_ext_hdr_len = exthdrlen;
242252 icsk->icsk_af_ops = &ipv6_specific;
253
+ if (sk_is_mptcp(sk))
254
+ mptcpv6_handle_mapped(sk, false);
243255 sk->sk_backlog_rcv = tcp_v6_do_rcv;
244256 #ifdef CONFIG_TCP_MD5SIG
245257 tp->af_specific = &tcp_sock_ipv6_specific;
....@@ -266,7 +278,7 @@
266278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
267279 final_p = fl6_update_dst(&fl6, opt, &final);
268280
269
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
281
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
270282
271283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
272284 if (IS_ERR(dst)) {
....@@ -327,6 +339,8 @@
327339
328340 late_failure:
329341 tcp_set_state(sk, TCP_CLOSE);
342
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
343
+ inet_reset_saddr(sk);
330344 failure:
331345 inet->inet_dport = 0;
332346 sk->sk_route_caps = 0;
....@@ -359,7 +373,7 @@
359373 }
360374 }
361375
362
-static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
376
+static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
363377 u8 type, u8 code, int offset, __be32 info)
364378 {
365379 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
....@@ -381,17 +395,19 @@
381395 if (!sk) {
382396 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
383397 ICMP6_MIB_INERRORS);
384
- return;
398
+ return -ENOENT;
385399 }
386400
387401 if (sk->sk_state == TCP_TIME_WAIT) {
388402 inet_twsk_put(inet_twsk(sk));
389
- return;
403
+ return 0;
390404 }
391405 seq = ntohl(th->seq);
392406 fatal = icmpv6_err_convert(type, code, &err);
393
- if (sk->sk_state == TCP_NEW_SYN_RECV)
394
- return tcp_req_err(sk, seq, fatal);
407
+ if (sk->sk_state == TCP_NEW_SYN_RECV) {
408
+ tcp_req_err(sk, seq, fatal);
409
+ return 0;
410
+ }
395411
396412 bh_lock_sock(sk);
397413 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
....@@ -400,14 +416,14 @@
400416 if (sk->sk_state == TCP_CLOSE)
401417 goto out;
402418
403
- if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
419
+ if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
404420 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
405421 goto out;
406422 }
407423
408424 tp = tcp_sk(sk);
409425 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
410
- fastopen = tp->fastopen_rsk;
426
+ fastopen = rcu_dereference(tp->fastopen_rsk);
411427 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
412428 if (sk->sk_state != TCP_LISTEN &&
413429 !between(seq, snd_una, tp->snd_nxt)) {
....@@ -415,7 +431,7 @@
415431 goto out;
416432 }
417433
418
- np = inet6_sk(sk);
434
+ np = tcp_inet6_sk(sk);
419435
420436 if (type == NDISC_REDIRECT) {
421437 if (!sock_owned_by_user(sk)) {
....@@ -459,10 +475,12 @@
459475 case TCP_SYN_SENT:
460476 case TCP_SYN_RECV:
461477 /* Only in fast or simultaneous open. If a fast open socket is
462
- * is already accepted it is treated as a connected one below.
478
+ * already accepted it is treated as a connected one below.
463479 */
464480 if (fastopen && !fastopen->sk)
465481 break;
482
+
483
+ ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
466484
467485 if (!sock_owned_by_user(sk)) {
468486 sk->sk_err = err;
....@@ -472,6 +490,15 @@
472490 } else
473491 sk->sk_err_soft = err;
474492 goto out;
493
+ case TCP_LISTEN:
494
+ break;
495
+ default:
496
+ /* check if this ICMP message allows revert of backoff.
497
+ * (see RFC 6069)
498
+ */
499
+ if (!fastopen && type == ICMPV6_DEST_UNREACH &&
500
+ code == ICMPV6_NOROUTE)
501
+ tcp_ld_RTO_revert(sk, seq);
475502 }
476503
477504 if (!sock_owned_by_user(sk) && np->recverr) {
....@@ -483,6 +510,7 @@
483510 out:
484511 bh_unlock_sock(sk);
485512 sock_put(sk);
513
+ return 0;
486514 }
487515
488516
....@@ -490,21 +518,23 @@
490518 struct flowi *fl,
491519 struct request_sock *req,
492520 struct tcp_fastopen_cookie *foc,
493
- enum tcp_synack_type synack_type)
521
+ enum tcp_synack_type synack_type,
522
+ struct sk_buff *syn_skb)
494523 {
495524 struct inet_request_sock *ireq = inet_rsk(req);
496
- struct ipv6_pinfo *np = inet6_sk(sk);
525
+ struct ipv6_pinfo *np = tcp_inet6_sk(sk);
497526 struct ipv6_txoptions *opt;
498527 struct flowi6 *fl6 = &fl->u.ip6;
499528 struct sk_buff *skb;
500529 int err = -ENOMEM;
530
+ u8 tclass;
501531
502532 /* First, grab a route. */
503533 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
504534 IPPROTO_TCP)) == NULL)
505535 goto done;
506536
507
- skb = tcp_make_synack(sk, dst, req, foc, synack_type);
537
+ skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
508538
509539 if (skb) {
510540 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
....@@ -514,12 +544,21 @@
514544 if (np->repflow && ireq->pktopts)
515545 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
516546
547
+ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
548
+ (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
549
+ (np->tclass & INET_ECN_MASK) :
550
+ np->tclass;
551
+
552
+ if (!INET_ECN_is_capable(tclass) &&
553
+ tcp_bpf_ca_needs_ecn((struct sock *)req))
554
+ tclass |= INET_ECN_ECT_0;
555
+
517556 rcu_read_lock();
518557 opt = ireq->ipv6_opt;
519558 if (!opt)
520559 opt = rcu_dereference(np->opt);
521560 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
522
- np->tclass);
561
+ tclass, sk->sk_priority);
523562 rcu_read_unlock();
524563 err = net_xmit_eval(err);
525564 }
....@@ -537,28 +576,36 @@
537576
538577 #ifdef CONFIG_TCP_MD5SIG
539578 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
540
- const struct in6_addr *addr)
579
+ const struct in6_addr *addr,
580
+ int l3index)
541581 {
542
- return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
582
+ return tcp_md5_do_lookup(sk, l3index,
583
+ (union tcp_md5_addr *)addr, AF_INET6);
543584 }
544585
545586 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
546587 const struct sock *addr_sk)
547588 {
548
- return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
589
+ int l3index;
590
+
591
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
592
+ addr_sk->sk_bound_dev_if);
593
+ return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
594
+ l3index);
549595 }
550596
551597 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
552
- char __user *optval, int optlen)
598
+ sockptr_t optval, int optlen)
553599 {
554600 struct tcp_md5sig cmd;
555601 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
602
+ int l3index = 0;
556603 u8 prefixlen;
557604
558605 if (optlen < sizeof(cmd))
559606 return -EINVAL;
560607
561
- if (copy_from_user(&cmd, optval, sizeof(cmd)))
608
+ if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
562609 return -EFAULT;
563610
564611 if (sin6->sin6_family != AF_INET6)
....@@ -574,12 +621,30 @@
574621 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
575622 }
576623
624
+ if (optname == TCP_MD5SIG_EXT &&
625
+ cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
626
+ struct net_device *dev;
627
+
628
+ rcu_read_lock();
629
+ dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
630
+ if (dev && netif_is_l3_master(dev))
631
+ l3index = dev->ifindex;
632
+ rcu_read_unlock();
633
+
634
+ /* ok to reference set/not set outside of rcu;
635
+ * right now device MUST be an L3 master
636
+ */
637
+ if (!dev || !l3index)
638
+ return -EINVAL;
639
+ }
640
+
577641 if (!cmd.tcpm_keylen) {
578642 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
579643 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
580
- AF_INET, prefixlen);
644
+ AF_INET, prefixlen,
645
+ l3index);
581646 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
582
- AF_INET6, prefixlen);
647
+ AF_INET6, prefixlen, l3index);
583648 }
584649
585650 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
....@@ -587,12 +652,13 @@
587652
588653 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
589654 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
590
- AF_INET, prefixlen, cmd.tcpm_key,
591
- cmd.tcpm_keylen, GFP_KERNEL);
655
+ AF_INET, prefixlen, l3index,
656
+ cmd.tcpm_key, cmd.tcpm_keylen,
657
+ GFP_KERNEL);
592658
593659 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
594
- AF_INET6, prefixlen, cmd.tcpm_key,
595
- cmd.tcpm_keylen, GFP_KERNEL);
660
+ AF_INET6, prefixlen, l3index,
661
+ cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
596662 }
597663
598664 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
....@@ -703,17 +769,23 @@
703769 #endif
704770
705771 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
706
- const struct sk_buff *skb)
772
+ const struct sk_buff *skb,
773
+ int dif, int sdif)
707774 {
708775 #ifdef CONFIG_TCP_MD5SIG
709776 const __u8 *hash_location = NULL;
710777 struct tcp_md5sig_key *hash_expected;
711778 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
712779 const struct tcphdr *th = tcp_hdr(skb);
713
- int genhash;
780
+ int genhash, l3index;
714781 u8 newhash[16];
715782
716
- hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
783
+ /* sdif set, means packet ingressed via a device
784
+ * in an L3 domain and dif is set to the l3mdev
785
+ */
786
+ l3index = sdif ? dif : 0;
787
+
788
+ hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
717789 hash_location = tcp_parse_md5sig_option(th);
718790
719791 /* We've parsed the options - do we have a hash? */
....@@ -737,10 +809,10 @@
737809
738810 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
739811 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
740
- net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
812
+ net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
741813 genhash ? "failed" : "mismatch",
742814 &ip6h->saddr, ntohs(th->source),
743
- &ip6h->daddr, ntohs(th->dest));
815
+ &ip6h->daddr, ntohs(th->dest), l3index);
744816 return true;
745817 }
746818 #endif
....@@ -751,14 +823,15 @@
751823 const struct sock *sk_listener,
752824 struct sk_buff *skb)
753825 {
826
+ bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
754827 struct inet_request_sock *ireq = inet_rsk(req);
755
- const struct ipv6_pinfo *np = inet6_sk(sk_listener);
828
+ const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
756829
757830 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
758831 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
759832
760833 /* So that link locals have meaning */
761
- if (!sk_listener->sk_bound_dev_if &&
834
+ if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
762835 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
763836 ireq->ir_iif = tcp_v6_iif(skb);
764837
....@@ -789,7 +862,7 @@
789862 .syn_ack_timeout = tcp_syn_ack_timeout,
790863 };
791864
792
-static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
865
+const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
793866 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
794867 sizeof(struct ipv6hdr),
795868 #ifdef CONFIG_TCP_MD5SIG
....@@ -809,7 +882,7 @@
809882 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
810883 u32 ack, u32 win, u32 tsval, u32 tsecr,
811884 int oif, struct tcp_md5sig_key *key, int rst,
812
- u8 tclass, __be32 label)
885
+ u8 tclass, __be32 label, u32 priority)
813886 {
814887 const struct tcphdr *th = tcp_hdr(skb);
815888 struct tcphdr *t1;
....@@ -889,14 +962,22 @@
889962 fl6.flowi6_oif = oif;
890963 }
891964
892
- if (sk)
893
- mark = (sk->sk_state == TCP_TIME_WAIT) ?
894
- inet_twsk(sk)->tw_mark : sk->sk_mark;
965
+ if (sk) {
966
+ if (sk->sk_state == TCP_TIME_WAIT) {
967
+ mark = inet_twsk(sk)->tw_mark;
968
+ /* autoflowlabel relies on buff->hash */
969
+ skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
970
+ PKT_HASH_TYPE_L4);
971
+ } else {
972
+ mark = sk->sk_mark;
973
+ }
974
+ buff->tstamp = tcp_transmit_time(sk);
975
+ }
895976 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
896977 fl6.fl6_dport = t1->dest;
897978 fl6.fl6_sport = t1->source;
898979 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
899
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
980
+ security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
900981
901982 /* Pass a socket to ip6_dst_lookup either it is for RST
902983 * Underlying function will use this to retrieve the network
....@@ -905,7 +986,8 @@
905986 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
906987 if (!IS_ERR(dst)) {
907988 skb_dst_set(buff, dst);
908
- ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
989
+ ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
990
+ tclass & ~INET_ECN_MASK, priority);
909991 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
910992 if (rst)
911993 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
....@@ -918,15 +1000,18 @@
9181000 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
9191001 {
9201002 const struct tcphdr *th = tcp_hdr(skb);
1003
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
9211004 u32 seq = 0, ack_seq = 0;
9221005 struct tcp_md5sig_key *key = NULL;
9231006 #ifdef CONFIG_TCP_MD5SIG
9241007 const __u8 *hash_location = NULL;
925
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
9261008 unsigned char newhash[16];
9271009 int genhash;
9281010 struct sock *sk1 = NULL;
9291011 #endif
1012
+ __be32 label = 0;
1013
+ u32 priority = 0;
1014
+ struct net *net;
9301015 int oif = 0;
9311016
9321017 if (th->rst)
....@@ -938,12 +1023,23 @@
9381023 if (!sk && !ipv6_unicast_destination(skb))
9391024 return;
9401025
1026
+ net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
9411027 #ifdef CONFIG_TCP_MD5SIG
9421028 rcu_read_lock();
9431029 hash_location = tcp_parse_md5sig_option(th);
9441030 if (sk && sk_fullsock(sk)) {
945
- key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
1031
+ int l3index;
1032
+
1033
+ /* sdif set, means packet ingressed via a device
1034
+ * in an L3 domain and inet_iif is set to it.
1035
+ */
1036
+ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1037
+ key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
9461038 } else if (hash_location) {
1039
+ int dif = tcp_v6_iif_l3_slave(skb);
1040
+ int sdif = tcp_v6_sdif(skb);
1041
+ int l3index;
1042
+
9471043 /*
9481044 * active side is lost. Try to find listening socket through
9491045 * source port, and then find md5 key through listening socket.
....@@ -951,17 +1047,20 @@
9511047 * Incoming packet is checked with md5 hash with finding key,
9521048 * no RST generated if md5 hash doesn't match.
9531049 */
954
- sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
1050
+ sk1 = inet6_lookup_listener(net,
9551051 &tcp_hashinfo, NULL, 0,
9561052 &ipv6h->saddr,
9571053 th->source, &ipv6h->daddr,
958
- ntohs(th->source),
959
- tcp_v6_iif_l3_slave(skb),
960
- tcp_v6_sdif(skb));
1054
+ ntohs(th->source), dif, sdif);
9611055 if (!sk1)
9621056 goto out;
9631057
964
- key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
1058
+ /* sdif set, means packet ingressed via a device
1059
+ * in an L3 domain and dif is set to it.
1060
+ */
1061
+ l3index = tcp_v6_sdif(skb) ? dif : 0;
1062
+
1063
+ key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
9651064 if (!key)
9661065 goto out;
9671066
....@@ -979,11 +1078,25 @@
9791078
9801079 if (sk) {
9811080 oif = sk->sk_bound_dev_if;
982
- if (sk_fullsock(sk))
1081
+ if (sk_fullsock(sk)) {
1082
+ const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1083
+
9831084 trace_tcp_send_reset(sk, skb);
1085
+ if (np->repflow)
1086
+ label = ip6_flowlabel(ipv6h);
1087
+ priority = sk->sk_priority;
1088
+ }
1089
+ if (sk->sk_state == TCP_TIME_WAIT) {
1090
+ label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1091
+ priority = inet_twsk(sk)->tw_priority;
1092
+ }
1093
+ } else {
1094
+ if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1095
+ label = ip6_flowlabel(ipv6h);
9841096 }
9851097
986
- tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
1098
+ tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1099
+ ipv6_get_dsfield(ipv6h), label, priority);
9871100
9881101 #ifdef CONFIG_TCP_MD5SIG
9891102 out:
....@@ -994,10 +1107,10 @@
9941107 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
9951108 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
9961109 struct tcp_md5sig_key *key, u8 tclass,
997
- __be32 label)
1110
+ __be32 label, u32 priority)
9981111 {
9991112 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1000
- tclass, label);
1113
+ tclass, label, priority);
10011114 }
10021115
10031116 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
....@@ -1009,7 +1122,7 @@
10091122 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
10101123 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
10111124 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1012
- tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
1125
+ tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
10131126
10141127 inet_twsk_put(tw);
10151128 }
....@@ -1017,6 +1130,10 @@
10171130 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
10181131 struct request_sock *req)
10191132 {
1133
+ int l3index;
1134
+
1135
+ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1136
+
10201137 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
10211138 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
10221139 */
....@@ -1031,8 +1148,8 @@
10311148 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
10321149 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
10331150 req->ts_recent, sk->sk_bound_dev_if,
1034
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1035
- 0, 0);
1151
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1152
+ ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
10361153 }
10371154
10381155
....@@ -1045,6 +1162,21 @@
10451162 sk = cookie_v6_check(sk, skb);
10461163 #endif
10471164 return sk;
1165
+}
1166
+
1167
+u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1168
+ struct tcphdr *th, u32 *cookie)
1169
+{
1170
+ u16 mss = 0;
1171
+#ifdef CONFIG_SYN_COOKIES
1172
+ mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1173
+ &tcp_request_sock_ipv6_ops, sk, th);
1174
+ if (mss) {
1175
+ *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1176
+ tcp_synq_overflow(sk);
1177
+ }
1178
+#endif
1179
+ return mss;
10481180 }
10491181
10501182 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
....@@ -1086,14 +1218,15 @@
10861218 {
10871219 struct inet_request_sock *ireq;
10881220 struct ipv6_pinfo *newnp;
1089
- const struct ipv6_pinfo *np = inet6_sk(sk);
1221
+ const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
10901222 struct ipv6_txoptions *opt;
1091
- struct tcp6_sock *newtcp6sk;
10921223 struct inet_sock *newinet;
1224
+ bool found_dup_sk = false;
10931225 struct tcp_sock *newtp;
10941226 struct sock *newsk;
10951227 #ifdef CONFIG_TCP_MD5SIG
10961228 struct tcp_md5sig_key *key;
1229
+ int l3index;
10971230 #endif
10981231 struct flowi6 fl6;
10991232
....@@ -1108,11 +1241,10 @@
11081241 if (!newsk)
11091242 return NULL;
11101243
1111
- newtcp6sk = (struct tcp6_sock *)newsk;
1112
- inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1244
+ inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
11131245
11141246 newinet = inet_sk(newsk);
1115
- newnp = inet6_sk(newsk);
1247
+ newnp = tcp_inet6_sk(newsk);
11161248 newtp = tcp_sk(newsk);
11171249
11181250 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
....@@ -1120,6 +1252,8 @@
11201252 newnp->saddr = newsk->sk_v6_rcv_saddr;
11211253
11221254 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1255
+ if (sk_is_mptcp(newsk))
1256
+ mptcpv6_handle_mapped(newsk, true);
11231257 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
11241258 #ifdef CONFIG_TCP_MD5SIG
11251259 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
....@@ -1176,12 +1310,11 @@
11761310 ip6_dst_store(newsk, dst, NULL, NULL);
11771311 inet6_sk_rx_dst_set(newsk, skb);
11781312
1179
- newtcp6sk = (struct tcp6_sock *)newsk;
1180
- inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1313
+ inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
11811314
11821315 newtp = tcp_sk(newsk);
11831316 newinet = inet_sk(newsk);
1184
- newnp = inet6_sk(newsk);
1317
+ newnp = tcp_inet6_sk(newsk);
11851318
11861319 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
11871320
....@@ -1209,6 +1342,12 @@
12091342 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
12101343 if (np->repflow)
12111344 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1345
+
1346
+ /* Set ToS of the new socket based upon the value of incoming SYN.
1347
+ * ECT bits are set later in tcp_init_transfer().
1348
+ */
1349
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1350
+ newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
12121351
12131352 /* Clone native IPv6 options from listening socket (if any)
12141353
....@@ -1239,8 +1378,10 @@
12391378 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
12401379
12411380 #ifdef CONFIG_TCP_MD5SIG
1381
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1382
+
12421383 /* Copy over the MD5 key from the original socket */
1243
- key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1384
+ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
12441385 if (key) {
12451386 /* We're using one, so create a matching key
12461387 * on the newsk structure. If we fail to get
....@@ -1248,7 +1389,7 @@
12481389 * across. Shucks.
12491390 */
12501391 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1251
- AF_INET6, 128, key->key, key->keylen,
1392
+ AF_INET6, 128, l3index, key->key, key->keylen,
12521393 sk_gfp_mask(sk, GFP_ATOMIC));
12531394 }
12541395 #endif
....@@ -1258,7 +1399,8 @@
12581399 tcp_done(newsk);
12591400 goto out;
12601401 }
1261
- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1402
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1403
+ &found_dup_sk);
12621404 if (*own_req) {
12631405 tcp_move_syn(newtp, req);
12641406
....@@ -1272,6 +1414,15 @@
12721414 tcp_v6_restore_cb(newnp->pktoptions);
12731415 skb_set_owner_r(newnp->pktoptions, newsk);
12741416 }
1417
+ }
1418
+ } else {
1419
+ if (!req_unhash && found_dup_sk) {
1420
+ /* This code path should only be executed in the
1421
+ * syncookie case only
1422
+ */
1423
+ bh_unlock_sock(newsk);
1424
+ sock_put(newsk);
1425
+ newsk = NULL;
12751426 }
12761427 }
12771428
....@@ -1296,9 +1447,9 @@
12961447 */
12971448 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
12981449 {
1299
- struct ipv6_pinfo *np = inet6_sk(sk);
1300
- struct tcp_sock *tp;
1450
+ struct ipv6_pinfo *np = tcp_inet6_sk(sk);
13011451 struct sk_buff *opt_skb = NULL;
1452
+ struct tcp_sock *tp;
13021453
13031454 /* Imagine: socket is IPv6. IPv4 packet arrives,
13041455 goes to IPv4 receive handler and backlogged.
....@@ -1333,15 +1484,18 @@
13331484 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
13341485
13351486 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1336
- struct dst_entry *dst = sk->sk_rx_dst;
1487
+ struct dst_entry *dst;
1488
+
1489
+ dst = rcu_dereference_protected(sk->sk_rx_dst,
1490
+ lockdep_sock_is_held(sk));
13371491
13381492 sock_rps_save_rxhash(sk, skb);
13391493 sk_mark_napi_id(sk, skb);
13401494 if (dst) {
13411495 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
13421496 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1497
+ RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
13431498 dst_release(dst);
1344
- sk->sk_rx_dst = NULL;
13451499 }
13461500 }
13471501
....@@ -1446,9 +1600,11 @@
14461600 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
14471601 }
14481602
1449
-static int tcp_v6_rcv(struct sk_buff *skb)
1603
+INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
14501604 {
1605
+ struct sk_buff *skb_to_free;
14511606 int sdif = inet6_sdif(skb);
1607
+ int dif = inet6_iif(skb);
14521608 const struct tcphdr *th;
14531609 const struct ipv6hdr *hdr;
14541610 bool refcounted;
....@@ -1497,7 +1653,7 @@
14971653 struct sock *nsk;
14981654
14991655 sk = req->rsk_listener;
1500
- if (tcp_v6_inbound_md5_hash(sk, skb)) {
1656
+ if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
15011657 sk_drops_add(sk, skb);
15021658 reqsk_put(req);
15031659 goto discard_it;
....@@ -1544,7 +1700,7 @@
15441700 return 0;
15451701 }
15461702 }
1547
- if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1703
+ if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
15481704 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
15491705 goto discard_and_relse;
15501706 }
....@@ -1552,7 +1708,7 @@
15521708 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
15531709 goto discard_and_relse;
15541710
1555
- if (tcp_v6_inbound_md5_hash(sk, skb))
1711
+ if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
15561712 goto discard_and_relse;
15571713
15581714 if (tcp_filter(sk, skb))
....@@ -1574,12 +1730,17 @@
15741730 tcp_segs_in(tcp_sk(sk), skb);
15751731 ret = 0;
15761732 if (!sock_owned_by_user(sk)) {
1733
+ skb_to_free = sk->sk_rx_skb_cache;
1734
+ sk->sk_rx_skb_cache = NULL;
15771735 ret = tcp_v6_do_rcv(sk, skb);
1578
- } else if (tcp_add_backlog(sk, skb)) {
1579
- goto discard_and_relse;
1736
+ } else {
1737
+ if (tcp_add_backlog(sk, skb))
1738
+ goto discard_and_relse;
1739
+ skb_to_free = NULL;
15801740 }
15811741 bh_unlock_sock(sk);
1582
-
1742
+ if (skb_to_free)
1743
+ __kfree_skb(skb_to_free);
15831744 put_and_return:
15841745 if (refcounted)
15851746 sock_put(sk);
....@@ -1645,7 +1806,7 @@
16451806 }
16461807 }
16471808 /* to ACK */
1648
- /* fall through */
1809
+ fallthrough;
16491810 case TCP_TW_ACK:
16501811 tcp_v6_timewait_ack(sk, skb);
16511812 break;
....@@ -1659,7 +1820,7 @@
16591820 goto discard_it;
16601821 }
16611822
1662
-static void tcp_v6_early_demux(struct sk_buff *skb)
1823
+void tcp_v6_early_demux(struct sk_buff *skb)
16631824 {
16641825 const struct ipv6hdr *hdr;
16651826 const struct tcphdr *th;
....@@ -1686,10 +1847,10 @@
16861847 skb->sk = sk;
16871848 skb->destructor = sock_edemux;
16881849 if (sk_fullsock(sk)) {
1689
- struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1850
+ struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
16901851
16911852 if (dst)
1692
- dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1853
+ dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
16931854 if (dst &&
16941855 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
16951856 skb_dst_set_noref(skb, dst);
....@@ -1703,7 +1864,14 @@
17031864 .twsk_destructor = tcp_twsk_destructor,
17041865 };
17051866
1706
-static const struct inet_connection_sock_af_ops ipv6_specific = {
1867
+INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1868
+{
1869
+ struct ipv6_pinfo *np = inet6_sk(sk);
1870
+
1871
+ __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1872
+}
1873
+
1874
+const struct inet_connection_sock_af_ops ipv6_specific = {
17071875 .queue_xmit = inet6_csk_xmit,
17081876 .send_check = tcp_v6_send_check,
17091877 .rebuild_header = inet6_sk_rebuild_header,
....@@ -1716,10 +1884,6 @@
17161884 .getsockopt = ipv6_getsockopt,
17171885 .addr2sockaddr = inet6_csk_addr2sockaddr,
17181886 .sockaddr_len = sizeof(struct sockaddr_in6),
1719
-#ifdef CONFIG_COMPAT
1720
- .compat_setsockopt = compat_ipv6_setsockopt,
1721
- .compat_getsockopt = compat_ipv6_getsockopt,
1722
-#endif
17231887 .mtu_reduced = tcp_v6_mtu_reduced,
17241888 };
17251889
....@@ -1746,10 +1910,6 @@
17461910 .getsockopt = ipv6_getsockopt,
17471911 .addr2sockaddr = inet6_csk_addr2sockaddr,
17481912 .sockaddr_len = sizeof(struct sockaddr_in6),
1749
-#ifdef CONFIG_COMPAT
1750
- .compat_setsockopt = compat_ipv6_setsockopt,
1751
- .compat_getsockopt = compat_ipv6_getsockopt,
1752
-#endif
17531913 .mtu_reduced = tcp_v4_mtu_reduced,
17541914 };
17551915
....@@ -1855,7 +2015,7 @@
18552015
18562016 state = inet_sk_state_load(sp);
18572017 if (state == TCP_LISTEN)
1858
- rx_queue = sp->sk_ack_backlog;
2018
+ rx_queue = READ_ONCE(sp->sk_ack_backlog);
18592019 else
18602020 /* Because we don't lock the socket,
18612021 * we might find a transient negative value.
....@@ -1883,7 +2043,7 @@
18832043 refcount_read(&sp->sk_refcnt), sp,
18842044 jiffies_to_clock_t(icsk->icsk_rto),
18852045 jiffies_to_clock_t(icsk->icsk_ack.ato),
1886
- (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2046
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
18872047 tp->snd_cwnd,
18882048 state == TCP_LISTEN ?
18892049 fastopenq->max_qlen :
....@@ -2007,19 +2167,11 @@
20072167 .rsk_prot = &tcp6_request_sock_ops,
20082168 .h.hashinfo = &tcp_hashinfo,
20092169 .no_autobind = true,
2010
-#ifdef CONFIG_COMPAT
2011
- .compat_setsockopt = compat_tcp_setsockopt,
2012
- .compat_getsockopt = compat_tcp_getsockopt,
2013
-#endif
20142170 .diag_destroy = tcp_abort,
20152171 };
2172
+EXPORT_SYMBOL_GPL(tcpv6_prot);
20162173
2017
-/* thinking of making this const? Don't.
2018
- * early_demux can change based on sysctl.
2019
- */
2020
-static struct inet6_protocol tcpv6_protocol = {
2021
- .early_demux = tcp_v6_early_demux,
2022
- .early_demux_handler = tcp_v6_early_demux,
2174
+static const struct inet6_protocol tcpv6_protocol = {
20232175 .handler = tcp_v6_rcv,
20242176 .err_handler = tcp_v6_err,
20252177 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
....@@ -2072,9 +2224,16 @@
20722224 ret = register_pernet_subsys(&tcpv6_net_ops);
20732225 if (ret)
20742226 goto out_tcpv6_protosw;
2227
+
2228
+ ret = mptcpv6_init();
2229
+ if (ret)
2230
+ goto out_tcpv6_pernet_subsys;
2231
+
20752232 out:
20762233 return ret;
20772234
2235
+out_tcpv6_pernet_subsys:
2236
+ unregister_pernet_subsys(&tcpv6_net_ops);
20782237 out_tcpv6_protosw:
20792238 inet6_unregister_protosw(&tcpv6_protosw);
20802239 out_tcpv6_protocol: