hc
2024-05-10 23fa18eaa71266feff7ba8d83022d9e1cc83c65a
kernel/net/ipv4/inet_hashtables.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * INET An implementation of the TCP/IP protocol suite for the LINUX
34 * operating system. INET is implemented using the BSD Socket
....@@ -6,11 +7,6 @@
67 * Generic INET transport hashtables
78 *
89 * Authors: Lotsa people, from code originally in tcp
9
- *
10
- * This program is free software; you can redistribute it and/or
11
- * modify it under the terms of the GNU General Public License
12
- * as published by the Free Software Foundation; either version
13
- * 2 of the License, or (at your option) any later version.
1410 */
1511
1612 #include <linux/module.h>
....@@ -19,11 +15,14 @@
1915 #include <linux/slab.h>
2016 #include <linux/wait.h>
2117 #include <linux/vmalloc.h>
22
-#include <linux/bootmem.h>
18
+#include <linux/memblock.h>
2319
2420 #include <net/addrconf.h>
2521 #include <net/inet_connection_sock.h>
2622 #include <net/inet_hashtables.h>
23
+#if IS_ENABLED(CONFIG_IPV6)
24
+#include <net/inet6_hashtables.h>
25
+#endif
2726 #include <net/secure_seq.h>
2827 #include <net/ip.h>
2928 #include <net/tcp.h>
....@@ -65,12 +64,14 @@
6564 struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
6665 struct net *net,
6766 struct inet_bind_hashbucket *head,
68
- const unsigned short snum)
67
+ const unsigned short snum,
68
+ int l3mdev)
6969 {
7070 struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
7171
7272 if (tb) {
7373 write_pnet(&tb->ib_net, net);
74
+ tb->l3mdev = l3mdev;
7475 tb->port = snum;
7576 tb->fastreuse = 0;
7677 tb->fastreuseport = 0;
....@@ -135,6 +136,7 @@
135136 table->bhash_size);
136137 struct inet_bind_hashbucket *head = &table->bhash[bhash];
137138 struct inet_bind_bucket *tb;
139
+ int l3mdev;
138140
139141 spin_lock(&head->lock);
140142 tb = inet_csk(sk)->icsk_bind_hash;
....@@ -143,6 +145,8 @@
143145 return -ENOENT;
144146 }
145147 if (tb->port != port) {
148
+ l3mdev = inet_sk_bound_l3mdev(sk);
149
+
146150 /* NOTE: using tproxy and redirecting skbs to a proxy
147151 * on a different listener port breaks the assumption
148152 * that the listener socket's icsk_bind_hash is the same
....@@ -150,12 +154,13 @@
150154 * create a new bind bucket for the child here. */
151155 inet_bind_bucket_for_each(tb, &head->chain) {
152156 if (net_eq(ib_net(tb), sock_net(sk)) &&
153
- tb->port == port)
157
+ tb->l3mdev == l3mdev && tb->port == port)
154158 break;
155159 }
156160 if (!tb) {
157161 tb = inet_bind_bucket_create(table->bind_bucket_cachep,
158
- sock_net(sk), head, port);
162
+ sock_net(sk), head, port,
163
+ l3mdev);
159164 if (!tb) {
160165 spin_unlock(&head->lock);
161166 return -ENOMEM;
....@@ -226,33 +231,40 @@
226231
227232 static inline int compute_score(struct sock *sk, struct net *net,
228233 const unsigned short hnum, const __be32 daddr,
229
- const int dif, const int sdif, bool exact_dif)
234
+ const int dif, const int sdif)
230235 {
231236 int score = -1;
232
- struct inet_sock *inet = inet_sk(sk);
233237
234
- if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
238
+ if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&
235239 !ipv6_only_sock(sk)) {
236
- __be32 rcv_saddr = inet->inet_rcv_saddr;
237
- score = sk->sk_family == PF_INET ? 2 : 1;
238
- if (rcv_saddr) {
239
- if (rcv_saddr != daddr)
240
- return -1;
241
- score += 4;
242
- }
243
- if (sk->sk_bound_dev_if || exact_dif) {
244
- bool dev_match = (sk->sk_bound_dev_if == dif ||
245
- sk->sk_bound_dev_if == sdif);
240
+ if (sk->sk_rcv_saddr != daddr)
241
+ return -1;
246242
247
- if (!dev_match)
248
- return -1;
249
- if (sk->sk_bound_dev_if)
250
- score += 4;
251
- }
243
+ if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
244
+ return -1;
245
+ score = sk->sk_bound_dev_if ? 2 : 1;
246
+
247
+ if (sk->sk_family == PF_INET)
248
+ score++;
252249 if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
253250 score++;
254251 }
255252 return score;
253
+}
254
+
255
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
256
+ struct sk_buff *skb, int doff,
257
+ __be32 saddr, __be16 sport,
258
+ __be32 daddr, unsigned short hnum)
259
+{
260
+ struct sock *reuse_sk = NULL;
261
+ u32 phash;
262
+
263
+ if (sk->sk_reuseport) {
264
+ phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
265
+ reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
266
+ }
267
+ return reuse_sk;
256268 }
257269
258270 /*
....@@ -270,31 +282,48 @@
270282 const __be32 daddr, const unsigned short hnum,
271283 const int dif, const int sdif)
272284 {
273
- bool exact_dif = inet_exact_dif_match(net, skb);
274285 struct inet_connection_sock *icsk;
275286 struct sock *sk, *result = NULL;
276287 int score, hiscore = 0;
277
- u32 phash = 0;
278288
279289 inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
280290 sk = (struct sock *)icsk;
281
- score = compute_score(sk, net, hnum, daddr,
282
- dif, sdif, exact_dif);
291
+ score = compute_score(sk, net, hnum, daddr, dif, sdif);
283292 if (score > hiscore) {
284
- if (sk->sk_reuseport) {
285
- phash = inet_ehashfn(net, daddr, hnum,
286
- saddr, sport);
287
- result = reuseport_select_sock(sk, phash,
288
- skb, doff);
289
- if (result)
290
- return result;
291
- }
293
+ result = lookup_reuseport(net, sk, skb, doff,
294
+ saddr, sport, daddr, hnum);
295
+ if (result)
296
+ return result;
297
+
292298 result = sk;
293299 hiscore = score;
294300 }
295301 }
296302
297303 return result;
304
+}
305
+
306
+static inline struct sock *inet_lookup_run_bpf(struct net *net,
307
+ struct inet_hashinfo *hashinfo,
308
+ struct sk_buff *skb, int doff,
309
+ __be32 saddr, __be16 sport,
310
+ __be32 daddr, u16 hnum)
311
+{
312
+ struct sock *sk, *reuse_sk;
313
+ bool no_reuseport;
314
+
315
+ if (hashinfo != &tcp_hashinfo)
316
+ return NULL; /* only TCP is supported */
317
+
318
+ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
319
+ saddr, sport, daddr, hnum, &sk);
320
+ if (no_reuseport || IS_ERR_OR_NULL(sk))
321
+ return sk;
322
+
323
+ reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
324
+ if (reuse_sk)
325
+ sk = reuse_sk;
326
+ return sk;
298327 }
299328
300329 struct sock *__inet_lookup_listener(struct net *net,
....@@ -304,27 +333,20 @@
304333 const __be32 daddr, const unsigned short hnum,
305334 const int dif, const int sdif)
306335 {
307
- unsigned int hash = inet_lhashfn(net, hnum);
308
- struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
309
- bool exact_dif = inet_exact_dif_match(net, skb);
310336 struct inet_listen_hashbucket *ilb2;
311
- struct sock *sk, *result = NULL;
312
- struct hlist_nulls_node *node;
313
- int score, hiscore = 0;
337
+ struct sock *result = NULL;
314338 unsigned int hash2;
315
- u32 phash = 0;
316339
317
- if (ilb->count <= 10 || !hashinfo->lhash2)
318
- goto port_lookup;
319
-
320
- /* Too many sk in the ilb bucket (which is hashed by port alone).
321
- * Try lhash2 (which is hashed by port and addr) instead.
322
- */
340
+ /* Lookup redirect from BPF */
341
+ if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
342
+ result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
343
+ saddr, sport, daddr, hnum);
344
+ if (result)
345
+ goto done;
346
+ }
323347
324348 hash2 = ipv4_portaddr_hash(net, daddr, hnum);
325349 ilb2 = inet_lhash2_bucket(hashinfo, hash2);
326
- if (ilb2->count > ilb->count)
327
- goto port_lookup;
328350
329351 result = inet_lhash2_lookup(net, ilb2, skb, doff,
330352 saddr, sport, daddr, hnum,
....@@ -333,36 +355,14 @@
333355 goto done;
334356
335357 /* Lookup lhash2 with INADDR_ANY */
336
-
337358 hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
338359 ilb2 = inet_lhash2_bucket(hashinfo, hash2);
339
- if (ilb2->count > ilb->count)
340
- goto port_lookup;
341360
342361 result = inet_lhash2_lookup(net, ilb2, skb, doff,
343
- saddr, sport, daddr, hnum,
362
+ saddr, sport, htonl(INADDR_ANY), hnum,
344363 dif, sdif);
345
- goto done;
346
-
347
-port_lookup:
348
- sk_nulls_for_each_rcu(sk, node, &ilb->nulls_head) {
349
- score = compute_score(sk, net, hnum, daddr,
350
- dif, sdif, exact_dif);
351
- if (score > hiscore) {
352
- if (sk->sk_reuseport) {
353
- phash = inet_ehashfn(net, daddr, hnum,
354
- saddr, sport);
355
- result = reuseport_select_sock(sk, phash,
356
- skb, doff);
357
- if (result)
358
- goto done;
359
- }
360
- result = sk;
361
- hiscore = score;
362
- }
363
- }
364364 done:
365
- if (unlikely(IS_ERR(result)))
365
+ if (IS_ERR(result))
366366 return NULL;
367367 return result;
368368 }
....@@ -410,13 +410,11 @@
410410 sk_nulls_for_each_rcu(sk, node, &head->chain) {
411411 if (sk->sk_hash != hash)
412412 continue;
413
- if (likely(INET_MATCH(sk, net, acookie,
414
- saddr, daddr, ports, dif, sdif))) {
413
+ if (likely(INET_MATCH(net, sk, acookie, ports, dif, sdif))) {
415414 if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
416415 goto out;
417
- if (unlikely(!INET_MATCH(sk, net, acookie,
418
- saddr, daddr, ports,
419
- dif, sdif))) {
416
+ if (unlikely(!INET_MATCH(net, sk, acookie,
417
+ ports, dif, sdif))) {
420418 sock_gen_put(sk);
421419 goto begin;
422420 }
....@@ -465,8 +463,7 @@
465463 if (sk2->sk_hash != hash)
466464 continue;
467465
468
- if (likely(INET_MATCH(sk2, net, acookie,
469
- saddr, daddr, ports, dif, sdif))) {
466
+ if (likely(INET_MATCH(net, sk2, acookie, ports, dif, sdif))) {
470467 if (sk2->sk_state == TCP_TIME_WAIT) {
471468 tw = inet_twsk(sk2);
472469 if (twsk_unique(sk, sk2, twp))
....@@ -504,7 +501,7 @@
504501 return -EADDRNOTAVAIL;
505502 }
506503
507
-static u32 inet_sk_port_offset(const struct sock *sk)
504
+static u64 inet_sk_port_offset(const struct sock *sk)
508505 {
509506 const struct inet_sock *inet = inet_sk(sk);
510507
....@@ -513,10 +510,50 @@
513510 inet->inet_dport);
514511 }
515512
516
-/* insert a socket into ehash, and eventually remove another one
517
- * (The another one can be a SYN_RECV or TIMEWAIT
513
+/* Searches for an exsiting socket in the ehash bucket list.
514
+ * Returns true if found, false otherwise.
518515 */
519
-bool inet_ehash_insert(struct sock *sk, struct sock *osk)
516
+static bool inet_ehash_lookup_by_sk(struct sock *sk,
517
+ struct hlist_nulls_head *list)
518
+{
519
+ const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);
520
+ const int sdif = sk->sk_bound_dev_if;
521
+ const int dif = sk->sk_bound_dev_if;
522
+ const struct hlist_nulls_node *node;
523
+ struct net *net = sock_net(sk);
524
+ struct sock *esk;
525
+
526
+ INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
527
+
528
+ sk_nulls_for_each_rcu(esk, node, list) {
529
+ if (esk->sk_hash != sk->sk_hash)
530
+ continue;
531
+ if (sk->sk_family == AF_INET) {
532
+ if (unlikely(INET_MATCH(net, esk, acookie,
533
+ ports, dif, sdif))) {
534
+ return true;
535
+ }
536
+ }
537
+#if IS_ENABLED(CONFIG_IPV6)
538
+ else if (sk->sk_family == AF_INET6) {
539
+ if (unlikely(inet6_match(net, esk,
540
+ &sk->sk_v6_daddr,
541
+ &sk->sk_v6_rcv_saddr,
542
+ ports, dif, sdif))) {
543
+ return true;
544
+ }
545
+ }
546
+#endif
547
+ }
548
+ return false;
549
+}
550
+
551
+/* Insert a socket into ehash, and eventually remove another one
552
+ * (The another one can be a SYN_RECV or TIMEWAIT)
553
+ * If an existing socket already exists, socket sk is not inserted,
554
+ * and sets found_dup_sk parameter to true.
555
+ */
556
+bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
520557 {
521558 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
522559 struct hlist_nulls_head *list;
....@@ -535,16 +572,23 @@
535572 if (osk) {
536573 WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
537574 ret = sk_nulls_del_node_init_rcu(osk);
575
+ } else if (found_dup_sk) {
576
+ *found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
577
+ if (*found_dup_sk)
578
+ ret = false;
538579 }
580
+
539581 if (ret)
540582 __sk_nulls_add_node_rcu(sk, list);
583
+
541584 spin_unlock(lock);
585
+
542586 return ret;
543587 }
544588
545
-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
589
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
546590 {
547
- bool ok = inet_ehash_insert(sk, osk);
591
+ bool ok = inet_ehash_insert(sk, osk, found_dup_sk);
548592
549593 if (ok) {
550594 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
....@@ -588,7 +632,9 @@
588632 int err = 0;
589633
590634 if (sk->sk_state != TCP_LISTEN) {
591
- inet_ehash_nolisten(sk, osk);
635
+ local_bh_disable();
636
+ inet_ehash_nolisten(sk, osk, NULL);
637
+ local_bh_enable();
592638 return 0;
593639 }
594640 WARN_ON(!sk_unhashed(sk));
....@@ -620,50 +666,72 @@
620666 {
621667 int err = 0;
622668
623
- if (sk->sk_state != TCP_CLOSE) {
624
- local_bh_disable();
669
+ if (sk->sk_state != TCP_CLOSE)
625670 err = __inet_hash(sk, NULL);
626
- local_bh_enable();
627
- }
628671
629672 return err;
630673 }
631674 EXPORT_SYMBOL_GPL(inet_hash);
632675
633
-void inet_unhash(struct sock *sk)
676
+static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
634677 {
635
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
636
- struct inet_listen_hashbucket *ilb = NULL;
637
- spinlock_t *lock;
638
-
639678 if (sk_unhashed(sk))
640679 return;
641
-
642
- if (sk->sk_state == TCP_LISTEN) {
643
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
644
- lock = &ilb->lock;
645
- } else {
646
- lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
647
- }
648
- spin_lock_bh(lock);
649
- if (sk_unhashed(sk))
650
- goto unlock;
651680
652681 if (rcu_access_pointer(sk->sk_reuseport_cb))
653682 reuseport_detach_sock(sk);
654683 if (ilb) {
684
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
685
+
655686 inet_unhash2(hashinfo, sk);
656687 ilb->count--;
657688 }
658689 __sk_nulls_del_node_init_rcu(sk);
659690 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
660
-unlock:
661
- spin_unlock_bh(lock);
691
+}
692
+
693
+void inet_unhash(struct sock *sk)
694
+{
695
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
696
+
697
+ if (sk_unhashed(sk))
698
+ return;
699
+
700
+ if (sk->sk_state == TCP_LISTEN) {
701
+ struct inet_listen_hashbucket *ilb;
702
+
703
+ ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
704
+ /* Don't disable bottom halves while acquiring the lock to
705
+ * avoid circular locking dependency on PREEMPT_RT.
706
+ */
707
+ spin_lock(&ilb->lock);
708
+ __inet_unhash(sk, ilb);
709
+ spin_unlock(&ilb->lock);
710
+ } else {
711
+ spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
712
+
713
+ spin_lock_bh(lock);
714
+ __inet_unhash(sk, NULL);
715
+ spin_unlock_bh(lock);
716
+ }
662717 }
663718 EXPORT_SYMBOL_GPL(inet_unhash);
664719
720
+/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
721
+ * Note that we use 32bit integers (vs RFC 'short integers')
722
+ * because 2^16 is not a multiple of num_ephemeral and this
723
+ * property might be used by clever attacker.
724
+ *
725
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though
726
+ * attacks were since demonstrated, thus we use 65536 by default instead
727
+ * to really give more isolation and privacy, at the expense of 256kB
728
+ * of kernel memory.
729
+ */
730
+#define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER)
731
+static u32 *table_perturb;
732
+
665733 int __inet_hash_connect(struct inet_timewait_death_row *death_row,
666
- struct sock *sk, u32 port_offset,
734
+ struct sock *sk, u64 port_offset,
667735 int (*check_established)(struct inet_timewait_death_row *,
668736 struct sock *, __u16, struct inet_timewait_sock **))
669737 {
....@@ -675,24 +743,17 @@
675743 struct inet_bind_bucket *tb;
676744 u32 remaining, offset;
677745 int ret, i, low, high;
678
- static u32 hint;
746
+ int l3mdev;
747
+ u32 index;
679748
680749 if (port) {
681
- head = &hinfo->bhash[inet_bhashfn(net, port,
682
- hinfo->bhash_size)];
683
- tb = inet_csk(sk)->icsk_bind_hash;
684
- spin_lock_bh(&head->lock);
685
- if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
686
- inet_ehash_nolisten(sk, NULL);
687
- spin_unlock_bh(&head->lock);
688
- return 0;
689
- }
690
- spin_unlock(&head->lock);
691
- /* No definite answer... Walk to established hash table */
750
+ local_bh_disable();
692751 ret = check_established(death_row, sk, port, NULL);
693752 local_bh_enable();
694753 return ret;
695754 }
755
+
756
+ l3mdev = inet_sk_bound_l3mdev(sk);
696757
697758 inet_get_local_port_range(net, &low, &high);
698759 high++; /* [32768, 60999] -> [32768, 61000[ */
....@@ -700,7 +761,13 @@
700761 if (likely(remaining > 1))
701762 remaining &= ~1U;
702763
703
- offset = (hint + port_offset) % remaining;
764
+ get_random_slow_once(table_perturb,
765
+ INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
766
+ index = port_offset & (INET_TABLE_PERTURB_SIZE - 1);
767
+
768
+ offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32);
769
+ offset %= remaining;
770
+
704771 /* In first pass we try ports of @low parity.
705772 * inet_csk_get_port() does the opposite choice.
706773 */
....@@ -720,7 +787,8 @@
720787 * the established check is already unique enough.
721788 */
722789 inet_bind_bucket_for_each(tb, &head->chain) {
723
- if (net_eq(ib_net(tb), net) && tb->port == port) {
790
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
791
+ tb->port == port) {
724792 if (tb->fastreuse >= 0 ||
725793 tb->fastreuseport >= 0)
726794 goto next_port;
....@@ -733,7 +801,7 @@
733801 }
734802
735803 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
736
- net, head, port);
804
+ net, head, port, l3mdev);
737805 if (!tb) {
738806 spin_unlock_bh(&head->lock);
739807 return -ENOMEM;
....@@ -753,13 +821,19 @@
753821 return -EADDRNOTAVAIL;
754822
755823 ok:
756
- hint += i + 2;
824
+ /* Here we want to add a little bit of randomness to the next source
825
+ * port that will be chosen. We use a max() with a random here so that
826
+ * on low contention the randomness is maximal and on high contention
827
+ * it may be inexistent.
828
+ */
829
+ i = max_t(int, i, (prandom_u32() & 7) * 2);
830
+ WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
757831
758832 /* Head lock still held and bh's disabled */
759833 inet_bind_hash(sk, tb, port);
760834 if (sk_unhashed(sk)) {
761835 inet_sk(sk)->inet_sport = htons(port);
762
- inet_ehash_nolisten(sk, (struct sock *)tw);
836
+ inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
763837 }
764838 if (tw)
765839 inet_twsk_bind_unhash(tw, hinfo);
....@@ -776,7 +850,7 @@
776850 int inet_hash_connect(struct inet_timewait_death_row *death_row,
777851 struct sock *sk)
778852 {
779
- u32 port_offset = 0;
853
+ u64 port_offset = 0;
780854
781855 if (!inet_sk(sk)->inet_num)
782856 port_offset = inet_sk_port_offset(sk);
....@@ -800,13 +874,22 @@
800874 }
801875 EXPORT_SYMBOL_GPL(inet_hashinfo_init);
802876
877
+static void init_hashinfo_lhash2(struct inet_hashinfo *h)
878
+{
879
+ int i;
880
+
881
+ for (i = 0; i <= h->lhash2_mask; i++) {
882
+ spin_lock_init(&h->lhash2[i].lock);
883
+ INIT_HLIST_HEAD(&h->lhash2[i].head);
884
+ h->lhash2[i].count = 0;
885
+ }
886
+}
887
+
803888 void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
804889 unsigned long numentries, int scale,
805890 unsigned long low_limit,
806891 unsigned long high_limit)
807892 {
808
- unsigned int i;
809
-
810893 h->lhash2 = alloc_large_system_hash(name,
811894 sizeof(*h->lhash2),
812895 numentries,
....@@ -816,14 +899,30 @@
816899 &h->lhash2_mask,
817900 low_limit,
818901 high_limit);
902
+ init_hashinfo_lhash2(h);
819903
820
- for (i = 0; i <= h->lhash2_mask; i++) {
821
- spin_lock_init(&h->lhash2[i].lock);
822
- INIT_HLIST_HEAD(&h->lhash2[i].head);
823
- h->lhash2[i].count = 0;
824
- }
904
+ /* this one is used for source ports of outgoing connections */
905
+ table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE,
906
+ sizeof(*table_perturb), GFP_KERNEL);
907
+ if (!table_perturb)
908
+ panic("TCP: failed to alloc table_perturb");
825909 }
826910
911
+int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
912
+{
913
+ h->lhash2 = kmalloc_array(INET_LHTABLE_SIZE, sizeof(*h->lhash2), GFP_KERNEL);
914
+ if (!h->lhash2)
915
+ return -ENOMEM;
916
+
917
+ h->lhash2_mask = INET_LHTABLE_SIZE - 1;
918
+ /* INET_LHTABLE_SIZE must be a power of 2 */
919
+ BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask);
920
+
921
+ init_hashinfo_lhash2(h);
922
+ return 0;
923
+}
924
+EXPORT_SYMBOL_GPL(inet_hashinfo2_init_mod);
925
+
827926 int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
828927 {
829928 unsigned int locksz = sizeof(spinlock_t);