hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/net/ipv4/inet_connection_sock.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * INET An implementation of the TCP/IP protocol suite for the LINUX
34 * operating system. INET is implemented using the BSD Socket
....@@ -6,11 +7,6 @@
67 * Support for INET connection oriented protocols.
78 *
89 * Authors: See the TCP sources
9
- *
10
- * This program is free software; you can redistribute it and/or
11
- * modify it under the terms of the GNU General Public License
12
- * as published by the Free Software Foundation; either version
13
- * 2 of the License, or(at your option) any later version.
1410 */
1511
1612 #include <linux/module.h>
....@@ -140,7 +136,7 @@
140136 {
141137 struct sock *sk2;
142138 bool reuse = sk->sk_reuse;
143
- bool reuseport = !!sk->sk_reuseport && reuseport_ok;
139
+ bool reuseport = !!sk->sk_reuseport;
144140 kuid_t uid = sock_i_uid((struct sock *)sk);
145141
146142 /*
....@@ -151,21 +147,29 @@
151147 */
152148
153149 sk_for_each_bound(sk2, &tb->owners) {
154
- if (sk != sk2 &&
155
- (!sk->sk_bound_dev_if ||
156
- !sk2->sk_bound_dev_if ||
157
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
158
- if ((!reuse || !sk2->sk_reuse ||
159
- sk2->sk_state == TCP_LISTEN) &&
160
- (!reuseport || !sk2->sk_reuseport ||
161
- rcu_access_pointer(sk->sk_reuseport_cb) ||
162
- (sk2->sk_state != TCP_TIME_WAIT &&
163
- !uid_eq(uid, sock_i_uid(sk2))))) {
164
- if (inet_rcv_saddr_equal(sk, sk2, true))
165
- break;
166
- }
167
- if (!relax && reuse && sk2->sk_reuse &&
150
+ int bound_dev_if2;
151
+
152
+ if (sk == sk2)
153
+ continue;
154
+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
155
+ if ((!sk->sk_bound_dev_if ||
156
+ !bound_dev_if2 ||
157
+ sk->sk_bound_dev_if == bound_dev_if2)) {
158
+ if (reuse && sk2->sk_reuse &&
168159 sk2->sk_state != TCP_LISTEN) {
160
+ if ((!relax ||
161
+ (!reuseport_ok &&
162
+ reuseport && sk2->sk_reuseport &&
163
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
164
+ (sk2->sk_state == TCP_TIME_WAIT ||
165
+ uid_eq(uid, sock_i_uid(sk2))))) &&
166
+ inet_rcv_saddr_equal(sk, sk2, true))
167
+ break;
168
+ } else if (!reuseport_ok ||
169
+ !reuseport || !sk2->sk_reuseport ||
170
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
171
+ (sk2->sk_state != TCP_TIME_WAIT &&
172
+ !uid_eq(uid, sock_i_uid(sk2)))) {
169173 if (inet_rcv_saddr_equal(sk, sk2, true))
170174 break;
171175 }
....@@ -185,10 +189,14 @@
185189 int port = 0;
186190 struct inet_bind_hashbucket *head;
187191 struct net *net = sock_net(sk);
192
+ bool relax = false;
188193 int i, low, high, attempt_half;
189194 struct inet_bind_bucket *tb;
190195 u32 remaining, offset;
196
+ int l3mdev;
191197
198
+ l3mdev = inet_sk_bound_l3mdev(sk);
199
+ports_exhausted:
192200 attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
193201 other_half_scan:
194202 inet_get_local_port_range(net, &low, &high);
....@@ -224,8 +232,9 @@
224232 hinfo->bhash_size)];
225233 spin_lock_bh(&head->lock);
226234 inet_bind_bucket_for_each(tb, &head->chain)
227
- if (net_eq(ib_net(tb), net) && tb->port == port) {
228
- if (!inet_csk_bind_conflict(sk, tb, false, false))
235
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
236
+ tb->port == port) {
237
+ if (!inet_csk_bind_conflict(sk, tb, relax, false))
229238 goto success;
230239 goto next_port;
231240 }
....@@ -244,6 +253,12 @@
244253 /* OK we now try the upper half of the range */
245254 attempt_half = 2;
246255 goto other_half_scan;
256
+ }
257
+
258
+ if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) {
259
+ /* We still have a chance to connect to different destinations */
260
+ relax = true;
261
+ goto ports_exhausted;
247262 }
248263 return NULL;
249264 success:
....@@ -348,6 +363,9 @@
348363 struct inet_bind_hashbucket *head;
349364 struct net *net = sock_net(sk);
350365 struct inet_bind_bucket *tb = NULL;
366
+ int l3mdev;
367
+
368
+ l3mdev = inet_sk_bound_l3mdev(sk);
351369
352370 if (!port) {
353371 head = inet_csk_find_open_port(sk, &tb, &port);
....@@ -361,11 +379,12 @@
361379 hinfo->bhash_size)];
362380 spin_lock_bh(&head->lock);
363381 inet_bind_bucket_for_each(tb, &head->chain)
364
- if (net_eq(ib_net(tb), net) && tb->port == port)
382
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
383
+ tb->port == port)
365384 goto tb_found;
366385 tb_not_found:
367386 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
368
- net, head, port);
387
+ net, head, port, l3mdev);
369388 if (!tb)
370389 goto fail_unlock;
371390 tb_found:
....@@ -549,7 +568,7 @@
549568 {
550569 struct inet_connection_sock *icsk = inet_csk(sk);
551570
552
- icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0;
571
+ icsk->icsk_pending = icsk->icsk_ack.pending = 0;
553572
554573 sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
555574 sk_stop_timer(sk, &icsk->icsk_delack_timer);
....@@ -587,7 +606,7 @@
587606 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
588607 ireq->ir_loc_addr, ireq->ir_rmt_port,
589608 htons(ireq->ir_num), sk->sk_uid);
590
- security_req_classify_flow(req, flowi4_to_flowi(fl4));
609
+ security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
591610 rt = ip_route_output_flow(net, fl4, sk);
592611 if (IS_ERR(rt))
593612 goto no_route;
....@@ -625,7 +644,7 @@
625644 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
626645 ireq->ir_loc_addr, ireq->ir_rmt_port,
627646 htons(ireq->ir_num), sk->sk_uid);
628
- security_req_classify_flow(req, flowi4_to_flowi(fl4));
647
+ security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
629648 rt = ip_route_output_flow(net, fl4, sk);
630649 if (IS_ERR(rt))
631650 goto no_route;
....@@ -641,27 +660,20 @@
641660 }
642661 EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
643662
644
-#if IS_ENABLED(CONFIG_IPV6)
645
-#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
646
-#else
647
-#define AF_INET_FAMILY(fam) true
648
-#endif
649
-
650663 /* Decide when to expire the request and when to resend SYN-ACK */
651
-static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
652
- const int max_retries,
653
- const u8 rskq_defer_accept,
654
- int *expire, int *resend)
664
+static void syn_ack_recalc(struct request_sock *req,
665
+ const int max_syn_ack_retries,
666
+ const u8 rskq_defer_accept,
667
+ int *expire, int *resend)
655668 {
656669 if (!rskq_defer_accept) {
657
- *expire = req->num_timeout >= thresh;
670
+ *expire = req->num_timeout >= max_syn_ack_retries;
658671 *resend = 1;
659672 return;
660673 }
661
- *expire = req->num_timeout >= thresh &&
662
- (!inet_rsk(req)->acked || req->num_timeout >= max_retries);
663
- /*
664
- * Do not resend while waiting for data after ACK,
674
+ *expire = req->num_timeout >= max_syn_ack_retries &&
675
+ (!inet_rsk(req)->acked || req->num_timeout >= rskq_defer_accept);
676
+ /* Do not resend while waiting for data after ACK,
665677 * start to resend on end of deferring period to give
666678 * last chance for data or ACK to create established socket.
667679 */
....@@ -680,8 +692,7 @@
680692 EXPORT_SYMBOL(inet_rtx_syn_ack);
681693
682694 /* return true if req was found in the ehash table */
683
-static bool reqsk_queue_unlink(struct request_sock_queue *queue,
684
- struct request_sock *req)
695
+static bool reqsk_queue_unlink(struct request_sock *req)
685696 {
686697 struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
687698 bool found = false;
....@@ -700,7 +711,7 @@
700711
701712 bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
702713 {
703
- bool unlinked = reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req);
714
+ bool unlinked = reqsk_queue_unlink(req);
704715
705716 if (unlinked) {
706717 reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
....@@ -724,15 +735,12 @@
724735 struct net *net = sock_net(sk_listener);
725736 struct inet_connection_sock *icsk = inet_csk(sk_listener);
726737 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
727
- int qlen, expire = 0, resend = 0;
728
- int max_retries, thresh;
729
- u8 defer_accept;
738
+ int max_syn_ack_retries, qlen, expire = 0, resend = 0;
730739
731740 if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
732741 goto drop;
733742
734
- max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
735
- thresh = max_retries;
743
+ max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
736744 /* Normally all the openreqs are young and become mature
737745 * (i.e. converted to established socket) for first timeout.
738746 * If synack was not acknowledged for 1 second, it means
....@@ -751,20 +759,17 @@
751759 * ones are about to clog our table.
752760 */
753761 qlen = reqsk_queue_len(queue);
754
- if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) {
762
+ if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
755763 int young = reqsk_queue_len_young(queue) << 1;
756764
757
- while (thresh > 2) {
765
+ while (max_syn_ack_retries > 2) {
758766 if (qlen < young)
759767 break;
760
- thresh--;
768
+ max_syn_ack_retries--;
761769 young <<= 1;
762770 }
763771 }
764
- defer_accept = READ_ONCE(queue->rskq_defer_accept);
765
- if (defer_accept)
766
- max_retries = defer_accept;
767
- syn_ack_recalc(req, thresh, max_retries, defer_accept,
772
+ syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept),
768773 &expire, &resend);
769774 req->rsk_ops->syn_ack_timeout(req);
770775 if (!expire &&
....@@ -786,14 +791,10 @@
786791 static void reqsk_queue_hash_req(struct request_sock *req,
787792 unsigned long timeout)
788793 {
789
- req->num_retrans = 0;
790
- req->num_timeout = 0;
791
- req->sk = NULL;
792
-
793794 timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
794795 mod_timer(&req->rsk_timer, jiffies + timeout);
795796
796
- inet_ehash_insert(req_to_sk(req), NULL);
797
+ inet_ehash_insert(req_to_sk(req), NULL, NULL);
797798 /* before letting lookups find us, make sure all req fields
798799 * are committed to memory and refcnt initialized.
799800 */
....@@ -808,6 +809,18 @@
808809 inet_csk_reqsk_queue_added(sk);
809810 }
810811 EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
812
+
813
+static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk,
814
+ const gfp_t priority)
815
+{
816
+ struct inet_connection_sock *icsk = inet_csk(newsk);
817
+
818
+ if (!icsk->icsk_ulp_ops)
819
+ return;
820
+
821
+ if (icsk->icsk_ulp_ops->clone)
822
+ icsk->icsk_ulp_ops->clone(req, newsk, priority);
823
+}
811824
812825 /**
813826 * inet_csk_clone_lock - clone an inet socket, and lock its clone
....@@ -845,9 +858,12 @@
845858 newicsk->icsk_retransmits = 0;
846859 newicsk->icsk_backoff = 0;
847860 newicsk->icsk_probes_out = 0;
861
+ newicsk->icsk_probes_tstamp = 0;
848862
849863 /* Deinitialize accept_queue to trap illegal accesses. */
850864 memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
865
+
866
+ inet_clone_ulp(req, newsk, priority);
851867
852868 security_inet_csk_clone(newsk, req);
853869 }
....@@ -895,23 +911,33 @@
895911 /* sk_clone_lock locked the socket and set refcnt to 2 */
896912 bh_unlock_sock(sk);
897913 sock_put(sk);
898
-
899
- /* The below has to be done to allow calling inet_csk_destroy_sock */
900
- sock_set_flag(sk, SOCK_DEAD);
901
- percpu_counter_inc(sk->sk_prot->orphan_count);
914
+ inet_csk_prepare_for_destroy_sock(sk);
902915 inet_sk(sk)->inet_num = 0;
903916 }
904917 EXPORT_SYMBOL(inet_csk_prepare_forced_close);
918
+
919
+static int inet_ulp_can_listen(const struct sock *sk)
920
+{
921
+ const struct inet_connection_sock *icsk = inet_csk(sk);
922
+
923
+ if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone)
924
+ return -EINVAL;
925
+
926
+ return 0;
927
+}
905928
906929 int inet_csk_listen_start(struct sock *sk, int backlog)
907930 {
908931 struct inet_connection_sock *icsk = inet_csk(sk);
909932 struct inet_sock *inet = inet_sk(sk);
910
- int err = -EADDRINUSE;
933
+ int err;
934
+
935
+ err = inet_ulp_can_listen(sk);
936
+ if (unlikely(err))
937
+ return err;
911938
912939 reqsk_queue_alloc(&icsk->icsk_accept_queue);
913940
914
- sk->sk_max_ack_backlog = backlog;
915941 sk->sk_ack_backlog = 0;
916942 inet_csk_delack_init(sk);
917943
....@@ -920,6 +946,7 @@
920946 * It is OK, because this socket enters to hash table only
921947 * after validation is complete.
922948 */
949
+ err = -EADDRINUSE;
923950 inet_sk_state_store(sk, TCP_LISTEN);
924951 if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
925952 inet->inet_sport = htons(inet->inet_num);
....@@ -946,7 +973,7 @@
946973 percpu_counter_inc(sk->sk_prot->orphan_count);
947974
948975 if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
949
- BUG_ON(tcp_sk(child)->fastopen_rsk != req);
976
+ BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
950977 BUG_ON(sk != req->rsk_listener);
951978
952979 /* Paranoid, to prevent race condition if
....@@ -955,7 +982,7 @@
955982 * Also to satisfy an assertion in
956983 * tcp_v4_destroy_sock().
957984 */
958
- tcp_sk(child)->fastopen_rsk = NULL;
985
+ RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL);
959986 }
960987 inet_csk_destroy_sock(child);
961988 }
....@@ -1061,34 +1088,6 @@
10611088 sin->sin_port = inet->inet_dport;
10621089 }
10631090 EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
1064
-
1065
-#ifdef CONFIG_COMPAT
1066
-int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
1067
- char __user *optval, int __user *optlen)
1068
-{
1069
- const struct inet_connection_sock *icsk = inet_csk(sk);
1070
-
1071
- if (icsk->icsk_af_ops->compat_getsockopt)
1072
- return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname,
1073
- optval, optlen);
1074
- return icsk->icsk_af_ops->getsockopt(sk, level, optname,
1075
- optval, optlen);
1076
-}
1077
-EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
1078
-
1079
-int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
1080
- char __user *optval, unsigned int optlen)
1081
-{
1082
- const struct inet_connection_sock *icsk = inet_csk(sk);
1083
-
1084
- if (icsk->icsk_af_ops->compat_setsockopt)
1085
- return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname,
1086
- optval, optlen);
1087
- return icsk->icsk_af_ops->setsockopt(sk, level, optname,
1088
- optval, optlen);
1089
-}
1090
-EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
1091
-#endif
10921091
10931092 static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl)
10941093 {