| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
|---|
| 3 | 4 | * operating system. INET is implemented using the BSD Socket |
|---|
| .. | .. |
|---|
| 7 | 8 | * |
|---|
| 8 | 9 | * Authors: Lotsa people, from code originally in tcp, generalised here |
|---|
| 9 | 10 | * by Arnaldo Carvalho de Melo <acme@mandriva.com> |
|---|
| 10 | | - * |
|---|
| 11 | | - * This program is free software; you can redistribute it and/or |
|---|
| 12 | | - * modify it under the terms of the GNU General Public License |
|---|
| 13 | | - * as published by the Free Software Foundation; either version |
|---|
| 14 | | - * 2 of the License, or (at your option) any later version. |
|---|
| 15 | 11 | */ |
|---|
| 16 | 12 | |
|---|
| 17 | 13 | #include <linux/module.h> |
|---|
| .. | .. |
|---|
| 24 | 20 | #include <net/secure_seq.h> |
|---|
| 25 | 21 | #include <net/ip.h> |
|---|
| 26 | 22 | #include <net/sock_reuseport.h> |
|---|
| 23 | + |
|---|
| 24 | +extern struct inet_hashinfo tcp_hashinfo; |
|---|
| 27 | 25 | |
|---|
| 28 | 26 | u32 inet6_ehashfn(const struct net *net, |
|---|
| 29 | 27 | const struct in6_addr *laddr, const u16 lport, |
|---|
| .. | .. |
|---|
| 73 | 71 | sk_nulls_for_each_rcu(sk, node, &head->chain) { |
|---|
| 74 | 72 | if (sk->sk_hash != hash) |
|---|
| 75 | 73 | continue; |
|---|
| 76 | | - if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif)) |
|---|
| 74 | + if (!inet6_match(net, sk, saddr, daddr, ports, dif, sdif)) |
|---|
| 77 | 75 | continue; |
|---|
| 78 | 76 | if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) |
|---|
| 79 | 77 | goto out; |
|---|
| 80 | 78 | |
|---|
| 81 | | - if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) { |
|---|
| 79 | + if (unlikely(!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))) { |
|---|
| 82 | 80 | sock_gen_put(sk); |
|---|
| 83 | 81 | goto begin; |
|---|
| 84 | 82 | } |
|---|
| .. | .. |
|---|
| 96 | 94 | static inline int compute_score(struct sock *sk, struct net *net, |
|---|
| 97 | 95 | const unsigned short hnum, |
|---|
| 98 | 96 | const struct in6_addr *daddr, |
|---|
| 99 | | - const int dif, const int sdif, bool exact_dif) |
|---|
| 97 | + const int dif, const int sdif) |
|---|
| 100 | 98 | { |
|---|
| 101 | 99 | int score = -1; |
|---|
| 102 | 100 | |
|---|
| 103 | 101 | if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && |
|---|
| 104 | 102 | sk->sk_family == PF_INET6) { |
|---|
| 103 | + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) |
|---|
| 104 | + return -1; |
|---|
| 105 | 105 | |
|---|
| 106 | | - score = 1; |
|---|
| 107 | | - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { |
|---|
| 108 | | - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) |
|---|
| 109 | | - return -1; |
|---|
| 110 | | - score++; |
|---|
| 111 | | - } |
|---|
| 112 | | - if (sk->sk_bound_dev_if || exact_dif) { |
|---|
| 113 | | - bool dev_match = (sk->sk_bound_dev_if == dif || |
|---|
| 114 | | - sk->sk_bound_dev_if == sdif); |
|---|
| 106 | + if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) |
|---|
| 107 | + return -1; |
|---|
| 115 | 108 | |
|---|
| 116 | | - if (!dev_match) |
|---|
| 117 | | - return -1; |
|---|
| 118 | | - if (sk->sk_bound_dev_if) |
|---|
| 119 | | - score++; |
|---|
| 120 | | - } |
|---|
| 109 | + score = sk->sk_bound_dev_if ? 2 : 1; |
|---|
| 121 | 110 | if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) |
|---|
| 122 | 111 | score++; |
|---|
| 123 | 112 | } |
|---|
| 124 | 113 | return score; |
|---|
| 114 | +} |
|---|
| 115 | + |
|---|
| 116 | +static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, |
|---|
| 117 | + struct sk_buff *skb, int doff, |
|---|
| 118 | + const struct in6_addr *saddr, |
|---|
| 119 | + __be16 sport, |
|---|
| 120 | + const struct in6_addr *daddr, |
|---|
| 121 | + unsigned short hnum) |
|---|
| 122 | +{ |
|---|
| 123 | + struct sock *reuse_sk = NULL; |
|---|
| 124 | + u32 phash; |
|---|
| 125 | + |
|---|
| 126 | + if (sk->sk_reuseport) { |
|---|
| 127 | + phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); |
|---|
| 128 | + reuse_sk = reuseport_select_sock(sk, phash, skb, doff); |
|---|
| 129 | + } |
|---|
| 130 | + return reuse_sk; |
|---|
| 125 | 131 | } |
|---|
| 126 | 132 | |
|---|
| 127 | 133 | /* called with rcu_read_lock() */ |
|---|
| .. | .. |
|---|
| 132 | 138 | const __be16 sport, const struct in6_addr *daddr, |
|---|
| 133 | 139 | const unsigned short hnum, const int dif, const int sdif) |
|---|
| 134 | 140 | { |
|---|
| 135 | | - bool exact_dif = inet6_exact_dif_match(net, skb); |
|---|
| 136 | 141 | struct inet_connection_sock *icsk; |
|---|
| 137 | 142 | struct sock *sk, *result = NULL; |
|---|
| 138 | 143 | int score, hiscore = 0; |
|---|
| 139 | | - u32 phash = 0; |
|---|
| 140 | 144 | |
|---|
| 141 | 145 | inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { |
|---|
| 142 | 146 | sk = (struct sock *)icsk; |
|---|
| 143 | | - score = compute_score(sk, net, hnum, daddr, dif, sdif, |
|---|
| 144 | | - exact_dif); |
|---|
| 147 | + score = compute_score(sk, net, hnum, daddr, dif, sdif); |
|---|
| 145 | 148 | if (score > hiscore) { |
|---|
| 146 | | - if (sk->sk_reuseport) { |
|---|
| 147 | | - phash = inet6_ehashfn(net, daddr, hnum, |
|---|
| 148 | | - saddr, sport); |
|---|
| 149 | | - result = reuseport_select_sock(sk, phash, |
|---|
| 150 | | - skb, doff); |
|---|
| 151 | | - if (result) |
|---|
| 152 | | - return result; |
|---|
| 153 | | - } |
|---|
| 149 | + result = lookup_reuseport(net, sk, skb, doff, |
|---|
| 150 | + saddr, sport, daddr, hnum); |
|---|
| 151 | + if (result) |
|---|
| 152 | + return result; |
|---|
| 153 | + |
|---|
| 154 | 154 | result = sk; |
|---|
| 155 | 155 | hiscore = score; |
|---|
| 156 | 156 | } |
|---|
| 157 | 157 | } |
|---|
| 158 | 158 | |
|---|
| 159 | 159 | return result; |
|---|
| 160 | +} |
|---|
| 161 | + |
|---|
| 162 | +static inline struct sock *inet6_lookup_run_bpf(struct net *net, |
|---|
| 163 | + struct inet_hashinfo *hashinfo, |
|---|
| 164 | + struct sk_buff *skb, int doff, |
|---|
| 165 | + const struct in6_addr *saddr, |
|---|
| 166 | + const __be16 sport, |
|---|
| 167 | + const struct in6_addr *daddr, |
|---|
| 168 | + const u16 hnum) |
|---|
| 169 | +{ |
|---|
| 170 | + struct sock *sk, *reuse_sk; |
|---|
| 171 | + bool no_reuseport; |
|---|
| 172 | + |
|---|
| 173 | + if (hashinfo != &tcp_hashinfo) |
|---|
| 174 | + return NULL; /* only TCP is supported */ |
|---|
| 175 | + |
|---|
| 176 | + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, |
|---|
| 177 | + saddr, sport, daddr, hnum, &sk); |
|---|
| 178 | + if (no_reuseport || IS_ERR_OR_NULL(sk)) |
|---|
| 179 | + return sk; |
|---|
| 180 | + |
|---|
| 181 | + reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); |
|---|
| 182 | + if (reuse_sk) |
|---|
| 183 | + sk = reuse_sk; |
|---|
| 184 | + return sk; |
|---|
| 160 | 185 | } |
|---|
| 161 | 186 | |
|---|
| 162 | 187 | struct sock *inet6_lookup_listener(struct net *net, |
|---|
| .. | .. |
|---|
| 166 | 191 | const __be16 sport, const struct in6_addr *daddr, |
|---|
| 167 | 192 | const unsigned short hnum, const int dif, const int sdif) |
|---|
| 168 | 193 | { |
|---|
| 169 | | - unsigned int hash = inet_lhashfn(net, hnum); |
|---|
| 170 | | - struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; |
|---|
| 171 | | - bool exact_dif = inet6_exact_dif_match(net, skb); |
|---|
| 172 | 194 | struct inet_listen_hashbucket *ilb2; |
|---|
| 173 | | - struct sock *sk, *result = NULL; |
|---|
| 174 | | - struct hlist_nulls_node *node; |
|---|
| 175 | | - int score, hiscore = 0; |
|---|
| 195 | + struct sock *result = NULL; |
|---|
| 176 | 196 | unsigned int hash2; |
|---|
| 177 | | - u32 phash = 0; |
|---|
| 178 | 197 | |
|---|
| 179 | | - if (ilb->count <= 10 || !hashinfo->lhash2) |
|---|
| 180 | | - goto port_lookup; |
|---|
| 181 | | - |
|---|
| 182 | | - /* Too many sk in the ilb bucket (which is hashed by port alone). |
|---|
| 183 | | - * Try lhash2 (which is hashed by port and addr) instead. |
|---|
| 184 | | - */ |
|---|
| 198 | + /* Lookup redirect from BPF */ |
|---|
| 199 | + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { |
|---|
| 200 | + result = inet6_lookup_run_bpf(net, hashinfo, skb, doff, |
|---|
| 201 | + saddr, sport, daddr, hnum); |
|---|
| 202 | + if (result) |
|---|
| 203 | + goto done; |
|---|
| 204 | + } |
|---|
| 185 | 205 | |
|---|
| 186 | 206 | hash2 = ipv6_portaddr_hash(net, daddr, hnum); |
|---|
| 187 | 207 | ilb2 = inet_lhash2_bucket(hashinfo, hash2); |
|---|
| 188 | | - if (ilb2->count > ilb->count) |
|---|
| 189 | | - goto port_lookup; |
|---|
| 190 | 208 | |
|---|
| 191 | 209 | result = inet6_lhash2_lookup(net, ilb2, skb, doff, |
|---|
| 192 | 210 | saddr, sport, daddr, hnum, |
|---|
| .. | .. |
|---|
| 195 | 213 | goto done; |
|---|
| 196 | 214 | |
|---|
| 197 | 215 | /* Lookup lhash2 with in6addr_any */ |
|---|
| 198 | | - |
|---|
| 199 | 216 | hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); |
|---|
| 200 | 217 | ilb2 = inet_lhash2_bucket(hashinfo, hash2); |
|---|
| 201 | | - if (ilb2->count > ilb->count) |
|---|
| 202 | | - goto port_lookup; |
|---|
| 203 | 218 | |
|---|
| 204 | 219 | result = inet6_lhash2_lookup(net, ilb2, skb, doff, |
|---|
| 205 | | - saddr, sport, daddr, hnum, |
|---|
| 220 | + saddr, sport, &in6addr_any, hnum, |
|---|
| 206 | 221 | dif, sdif); |
|---|
| 207 | | - goto done; |
|---|
| 208 | | - |
|---|
| 209 | | -port_lookup: |
|---|
| 210 | | - sk_nulls_for_each(sk, node, &ilb->nulls_head) { |
|---|
| 211 | | - score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); |
|---|
| 212 | | - if (score > hiscore) { |
|---|
| 213 | | - if (sk->sk_reuseport) { |
|---|
| 214 | | - phash = inet6_ehashfn(net, daddr, hnum, |
|---|
| 215 | | - saddr, sport); |
|---|
| 216 | | - result = reuseport_select_sock(sk, phash, |
|---|
| 217 | | - skb, doff); |
|---|
| 218 | | - if (result) |
|---|
| 219 | | - goto done; |
|---|
| 220 | | - } |
|---|
| 221 | | - result = sk; |
|---|
| 222 | | - hiscore = score; |
|---|
| 223 | | - } |
|---|
| 224 | | - } |
|---|
| 225 | 222 | done: |
|---|
| 226 | | - if (unlikely(IS_ERR(result))) |
|---|
| 223 | + if (IS_ERR(result)) |
|---|
| 227 | 224 | return NULL; |
|---|
| 228 | 225 | return result; |
|---|
| 229 | 226 | } |
|---|
| .. | .. |
|---|
| 272 | 269 | if (sk2->sk_hash != hash) |
|---|
| 273 | 270 | continue; |
|---|
| 274 | 271 | |
|---|
| 275 | | - if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, |
|---|
| 272 | + if (likely(inet6_match(net, sk2, saddr, daddr, ports, |
|---|
| 276 | 273 | dif, sdif))) { |
|---|
| 277 | 274 | if (sk2->sk_state == TCP_TIME_WAIT) { |
|---|
| 278 | 275 | tw = inet_twsk(sk2); |
|---|
| .. | .. |
|---|
| 311 | 308 | return -EADDRNOTAVAIL; |
|---|
| 312 | 309 | } |
|---|
| 313 | 310 | |
|---|
| 314 | | -static u32 inet6_sk_port_offset(const struct sock *sk) |
|---|
| 311 | +static u64 inet6_sk_port_offset(const struct sock *sk) |
|---|
| 315 | 312 | { |
|---|
| 316 | 313 | const struct inet_sock *inet = inet_sk(sk); |
|---|
| 317 | 314 | |
|---|
| .. | .. |
|---|
| 323 | 320 | int inet6_hash_connect(struct inet_timewait_death_row *death_row, |
|---|
| 324 | 321 | struct sock *sk) |
|---|
| 325 | 322 | { |
|---|
| 326 | | - u32 port_offset = 0; |
|---|
| 323 | + u64 port_offset = 0; |
|---|
| 327 | 324 | |
|---|
| 328 | 325 | if (!inet_sk(sk)->inet_num) |
|---|
| 329 | 326 | port_offset = inet6_sk_port_offset(sk); |
|---|
| .. | .. |
|---|
| 336 | 333 | { |
|---|
| 337 | 334 | int err = 0; |
|---|
| 338 | 335 | |
|---|
| 339 | | - if (sk->sk_state != TCP_CLOSE) { |
|---|
| 340 | | - local_bh_disable(); |
|---|
| 336 | + if (sk->sk_state != TCP_CLOSE) |
|---|
| 341 | 337 | err = __inet_hash(sk, NULL); |
|---|
| 342 | | - local_bh_enable(); |
|---|
| 343 | | - } |
|---|
| 344 | 338 | |
|---|
| 345 | 339 | return err; |
|---|
| 346 | 340 | } |
|---|