hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/net/ipv6/ip6_flowlabel.c
....@@ -1,10 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * ip6_flowlabel.c IPv6 flowlabel manager.
3
- *
4
- * This program is free software; you can redistribute it and/or
5
- * modify it under the terms of the GNU General Public License
6
- * as published by the Free Software Foundation; either version
7
- * 2 of the License, or (at your option) any later version.
84 *
95 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
106 */
....@@ -21,6 +17,7 @@
2117 #include <linux/slab.h>
2218 #include <linux/export.h>
2319 #include <linux/pid_namespace.h>
20
+#include <linux/jump_label_ratelimit.h>
2421
2522 #include <net/net_namespace.h>
2623 #include <net/sock.h>
....@@ -56,6 +53,9 @@
5653 /* Big socket sock */
5754
5855 static DEFINE_SPINLOCK(ip6_sk_fl_lock);
56
+
57
+DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ);
58
+EXPORT_SYMBOL(ipv6_flowlabel_exclusive);
5959
6060 #define for_each_fl_rcu(hash, fl) \
6161 for (fl = rcu_dereference_bh(fl_ht[(hash)]); \
....@@ -94,6 +94,13 @@
9494 return fl;
9595 }
9696
97
+static bool fl_shared_exclusive(struct ip6_flowlabel *fl)
98
+{
99
+ return fl->share == IPV6_FL_S_EXCL ||
100
+ fl->share == IPV6_FL_S_PROCESS ||
101
+ fl->share == IPV6_FL_S_USER;
102
+}
103
+
97104 static void fl_free_rcu(struct rcu_head *head)
98105 {
99106 struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
....@@ -107,8 +114,13 @@
107114
108115 static void fl_free(struct ip6_flowlabel *fl)
109116 {
110
- if (fl)
111
- call_rcu(&fl->rcu, fl_free_rcu);
117
+ if (!fl)
118
+ return;
119
+
120
+ if (fl_shared_exclusive(fl) || fl->opt)
121
+ static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive);
122
+
123
+ call_rcu(&fl->rcu, fl_free_rcu);
112124 }
113125
114126 static void fl_release(struct ip6_flowlabel *fl)
....@@ -244,7 +256,7 @@
244256
245257 /* Socket flowlabel lists */
246258
247
-struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label)
259
+struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
248260 {
249261 struct ipv6_fl_socklist *sfl;
250262 struct ipv6_pinfo *np = inet6_sk(sk);
....@@ -264,7 +276,7 @@
264276 rcu_read_unlock_bh();
265277 return NULL;
266278 }
267
-EXPORT_SYMBOL_GPL(fl6_sock_lookup);
279
+EXPORT_SYMBOL_GPL(__fl6_sock_lookup);
268280
269281 void fl6_free_socklist(struct sock *sk)
270282 {
....@@ -359,7 +371,7 @@
359371
360372 static struct ip6_flowlabel *
361373 fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
362
- char __user *optval, int optlen, int *err_p)
374
+ sockptr_t optval, int optlen, int *err_p)
363375 {
364376 struct ip6_flowlabel *fl = NULL;
365377 int olen;
....@@ -389,7 +401,8 @@
389401 memset(fl->opt, 0, sizeof(*fl->opt));
390402 fl->opt->tot_len = sizeof(*fl->opt) + olen;
391403 err = -EFAULT;
392
- if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
404
+ if (copy_from_sockptr_offset(fl->opt + 1, optval,
405
+ CMSG_ALIGN(sizeof(*freq)), olen))
393406 goto done;
394407
395408 msg.msg_controllen = olen;
....@@ -437,10 +450,15 @@
437450 err = -EINVAL;
438451 goto done;
439452 }
453
+ if (fl_shared_exclusive(fl) || fl->opt)
454
+ static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
440455 return fl;
441456
442457 done:
443
- fl_free(fl);
458
+ if (fl) {
459
+ kfree(fl->opt);
460
+ kfree(fl);
461
+ }
444462 *err_p = err;
445463 return NULL;
446464 }
....@@ -516,185 +534,210 @@
516534 return -ENOENT;
517535 }
518536
519
-int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
537
+#define socklist_dereference(__sflp) \
538
+ rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock))
539
+
540
+static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
520541 {
521
- int uninitialized_var(err);
522
- struct net *net = sock_net(sk);
523542 struct ipv6_pinfo *np = inet6_sk(sk);
524
- struct in6_flowlabel_req freq;
525
- struct ipv6_fl_socklist *sfl1 = NULL;
526
- struct ipv6_fl_socklist *sfl;
527543 struct ipv6_fl_socklist __rcu **sflp;
544
+ struct ipv6_fl_socklist *sfl;
545
+
546
+ if (freq->flr_flags & IPV6_FL_F_REFLECT) {
547
+ if (sk->sk_protocol != IPPROTO_TCP)
548
+ return -ENOPROTOOPT;
549
+ if (!np->repflow)
550
+ return -ESRCH;
551
+ np->flow_label = 0;
552
+ np->repflow = 0;
553
+ return 0;
554
+ }
555
+
556
+ spin_lock_bh(&ip6_sk_fl_lock);
557
+ for (sflp = &np->ipv6_fl_list;
558
+ (sfl = socklist_dereference(*sflp)) != NULL;
559
+ sflp = &sfl->next) {
560
+ if (sfl->fl->label == freq->flr_label)
561
+ goto found;
562
+ }
563
+ spin_unlock_bh(&ip6_sk_fl_lock);
564
+ return -ESRCH;
565
+found:
566
+ if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK))
567
+ np->flow_label &= ~IPV6_FLOWLABEL_MASK;
568
+ *sflp = sfl->next;
569
+ spin_unlock_bh(&ip6_sk_fl_lock);
570
+ fl_release(sfl->fl);
571
+ kfree_rcu(sfl, rcu);
572
+ return 0;
573
+}
574
+
575
+static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq)
576
+{
577
+ struct ipv6_pinfo *np = inet6_sk(sk);
578
+ struct net *net = sock_net(sk);
579
+ struct ipv6_fl_socklist *sfl;
580
+ int err;
581
+
582
+ rcu_read_lock_bh();
583
+ for_each_sk_fl_rcu(np, sfl) {
584
+ if (sfl->fl->label == freq->flr_label) {
585
+ err = fl6_renew(sfl->fl, freq->flr_linger,
586
+ freq->flr_expires);
587
+ rcu_read_unlock_bh();
588
+ return err;
589
+ }
590
+ }
591
+ rcu_read_unlock_bh();
592
+
593
+ if (freq->flr_share == IPV6_FL_S_NONE &&
594
+ ns_capable(net->user_ns, CAP_NET_ADMIN)) {
595
+ struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label);
596
+
597
+ if (fl) {
598
+ err = fl6_renew(fl, freq->flr_linger,
599
+ freq->flr_expires);
600
+ fl_release(fl);
601
+ return err;
602
+ }
603
+ }
604
+ return -ESRCH;
605
+}
606
+
607
+static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
608
+ sockptr_t optval, int optlen)
609
+{
610
+ struct ipv6_fl_socklist *sfl, *sfl1 = NULL;
528611 struct ip6_flowlabel *fl, *fl1 = NULL;
612
+ struct ipv6_pinfo *np = inet6_sk(sk);
613
+ struct net *net = sock_net(sk);
614
+ int err;
529615
616
+ if (freq->flr_flags & IPV6_FL_F_REFLECT) {
617
+ if (net->ipv6.sysctl.flowlabel_consistency) {
618
+ net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
619
+ return -EPERM;
620
+ }
530621
531
- if (optlen < sizeof(freq))
622
+ if (sk->sk_protocol != IPPROTO_TCP)
623
+ return -ENOPROTOOPT;
624
+ np->repflow = 1;
625
+ return 0;
626
+ }
627
+
628
+ if (freq->flr_label & ~IPV6_FLOWLABEL_MASK)
532629 return -EINVAL;
630
+ if (net->ipv6.sysctl.flowlabel_state_ranges &&
631
+ (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
632
+ return -ERANGE;
533633
534
- if (copy_from_user(&freq, optval, sizeof(freq)))
535
- return -EFAULT;
634
+ fl = fl_create(net, sk, freq, optval, optlen, &err);
635
+ if (!fl)
636
+ return err;
536637
537
- switch (freq.flr_action) {
538
- case IPV6_FL_A_PUT:
539
- if (freq.flr_flags & IPV6_FL_F_REFLECT) {
540
- if (sk->sk_protocol != IPPROTO_TCP)
541
- return -ENOPROTOOPT;
542
- if (!np->repflow)
543
- return -ESRCH;
544
- np->flow_label = 0;
545
- np->repflow = 0;
546
- return 0;
547
- }
548
- spin_lock_bh(&ip6_sk_fl_lock);
549
- for (sflp = &np->ipv6_fl_list;
550
- (sfl = rcu_dereference_protected(*sflp,
551
- lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
552
- sflp = &sfl->next) {
553
- if (sfl->fl->label == freq.flr_label) {
554
- if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
555
- np->flow_label &= ~IPV6_FLOWLABEL_MASK;
556
- *sflp = sfl->next;
557
- spin_unlock_bh(&ip6_sk_fl_lock);
558
- fl_release(sfl->fl);
559
- kfree_rcu(sfl, rcu);
560
- return 0;
561
- }
562
- }
563
- spin_unlock_bh(&ip6_sk_fl_lock);
564
- return -ESRCH;
638
+ sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
565639
566
- case IPV6_FL_A_RENEW:
640
+ if (freq->flr_label) {
641
+ err = -EEXIST;
567642 rcu_read_lock_bh();
568643 for_each_sk_fl_rcu(np, sfl) {
569
- if (sfl->fl->label == freq.flr_label) {
570
- err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
571
- rcu_read_unlock_bh();
572
- return err;
644
+ if (sfl->fl->label == freq->flr_label) {
645
+ if (freq->flr_flags & IPV6_FL_F_EXCL) {
646
+ rcu_read_unlock_bh();
647
+ goto done;
648
+ }
649
+ fl1 = sfl->fl;
650
+ if (!atomic_inc_not_zero(&fl1->users))
651
+ fl1 = NULL;
652
+ break;
573653 }
574654 }
575655 rcu_read_unlock_bh();
576656
577
- if (freq.flr_share == IPV6_FL_S_NONE &&
578
- ns_capable(net->user_ns, CAP_NET_ADMIN)) {
579
- fl = fl_lookup(net, freq.flr_label);
580
- if (fl) {
581
- err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
582
- fl_release(fl);
583
- return err;
584
- }
585
- }
586
- return -ESRCH;
587
-
588
- case IPV6_FL_A_GET:
589
- if (freq.flr_flags & IPV6_FL_F_REFLECT) {
590
- struct net *net = sock_net(sk);
591
- if (net->ipv6.sysctl.flowlabel_consistency) {
592
- net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
593
- return -EPERM;
594
- }
595
-
596
- if (sk->sk_protocol != IPPROTO_TCP)
597
- return -ENOPROTOOPT;
598
-
599
- np->repflow = 1;
600
- return 0;
601
- }
602
-
603
- if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
604
- return -EINVAL;
605
-
606
- if (net->ipv6.sysctl.flowlabel_state_ranges &&
607
- (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
608
- return -ERANGE;
609
-
610
- fl = fl_create(net, sk, &freq, optval, optlen, &err);
611
- if (!fl)
612
- return err;
613
- sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
614
-
615
- if (freq.flr_label) {
616
- err = -EEXIST;
617
- rcu_read_lock_bh();
618
- for_each_sk_fl_rcu(np, sfl) {
619
- if (sfl->fl->label == freq.flr_label) {
620
- if (freq.flr_flags&IPV6_FL_F_EXCL) {
621
- rcu_read_unlock_bh();
622
- goto done;
623
- }
624
- fl1 = sfl->fl;
625
- if (!atomic_inc_not_zero(&fl1->users))
626
- fl1 = NULL;
627
- break;
628
- }
629
- }
630
- rcu_read_unlock_bh();
631
-
632
- if (!fl1)
633
- fl1 = fl_lookup(net, freq.flr_label);
634
- if (fl1) {
657
+ if (!fl1)
658
+ fl1 = fl_lookup(net, freq->flr_label);
659
+ if (fl1) {
635660 recheck:
636
- err = -EEXIST;
637
- if (freq.flr_flags&IPV6_FL_F_EXCL)
638
- goto release;
639
- err = -EPERM;
640
- if (fl1->share == IPV6_FL_S_EXCL ||
641
- fl1->share != fl->share ||
642
- ((fl1->share == IPV6_FL_S_PROCESS) &&
643
- (fl1->owner.pid != fl->owner.pid)) ||
644
- ((fl1->share == IPV6_FL_S_USER) &&
645
- !uid_eq(fl1->owner.uid, fl->owner.uid)))
646
- goto release;
661
+ err = -EEXIST;
662
+ if (freq->flr_flags&IPV6_FL_F_EXCL)
663
+ goto release;
664
+ err = -EPERM;
665
+ if (fl1->share == IPV6_FL_S_EXCL ||
666
+ fl1->share != fl->share ||
667
+ ((fl1->share == IPV6_FL_S_PROCESS) &&
668
+ (fl1->owner.pid != fl->owner.pid)) ||
669
+ ((fl1->share == IPV6_FL_S_USER) &&
670
+ !uid_eq(fl1->owner.uid, fl->owner.uid)))
671
+ goto release;
647672
648
- err = -ENOMEM;
649
- if (!sfl1)
650
- goto release;
651
- if (fl->linger > fl1->linger)
652
- fl1->linger = fl->linger;
653
- if ((long)(fl->expires - fl1->expires) > 0)
654
- fl1->expires = fl->expires;
655
- fl_link(np, sfl1, fl1);
656
- fl_free(fl);
657
- return 0;
673
+ err = -ENOMEM;
674
+ if (!sfl1)
675
+ goto release;
676
+ if (fl->linger > fl1->linger)
677
+ fl1->linger = fl->linger;
678
+ if ((long)(fl->expires - fl1->expires) > 0)
679
+ fl1->expires = fl->expires;
680
+ fl_link(np, sfl1, fl1);
681
+ fl_free(fl);
682
+ return 0;
658683
659684 release:
660
- fl_release(fl1);
661
- goto done;
662
- }
685
+ fl_release(fl1);
686
+ goto done;
663687 }
664
- err = -ENOENT;
665
- if (!(freq.flr_flags&IPV6_FL_F_CREATE))
666
- goto done;
688
+ }
689
+ err = -ENOENT;
690
+ if (!(freq->flr_flags & IPV6_FL_F_CREATE))
691
+ goto done;
667692
668
- err = -ENOMEM;
669
- if (!sfl1)
670
- goto done;
693
+ err = -ENOMEM;
694
+ if (!sfl1)
695
+ goto done;
671696
672
- err = mem_check(sk);
673
- if (err != 0)
674
- goto done;
697
+ err = mem_check(sk);
698
+ if (err != 0)
699
+ goto done;
675700
676
- fl1 = fl_intern(net, fl, freq.flr_label);
677
- if (fl1)
678
- goto recheck;
701
+ fl1 = fl_intern(net, fl, freq->flr_label);
702
+ if (fl1)
703
+ goto recheck;
679704
680
- if (!freq.flr_label) {
681
- if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
682
- &fl->label, sizeof(fl->label))) {
683
- /* Intentionally ignore fault. */
684
- }
705
+ if (!freq->flr_label) {
706
+ size_t offset = offsetof(struct in6_flowlabel_req, flr_label);
707
+
708
+ if (copy_to_sockptr_offset(optval, offset, &fl->label,
709
+ sizeof(fl->label))) {
710
+ /* Intentionally ignore fault. */
685711 }
686
-
687
- fl_link(np, sfl1, fl);
688
- return 0;
689
-
690
- default:
691
- return -EINVAL;
692712 }
693713
714
+ fl_link(np, sfl1, fl);
715
+ return 0;
694716 done:
695717 fl_free(fl);
696718 kfree(sfl1);
697719 return err;
720
+}
721
+
722
+int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen)
723
+{
724
+ struct in6_flowlabel_req freq;
725
+
726
+ if (optlen < sizeof(freq))
727
+ return -EINVAL;
728
+ if (copy_from_sockptr(&freq, optval, sizeof(freq)))
729
+ return -EFAULT;
730
+
731
+ switch (freq.flr_action) {
732
+ case IPV6_FL_A_PUT:
733
+ return ipv6_flowlabel_put(sk, &freq);
734
+ case IPV6_FL_A_RENEW:
735
+ return ipv6_flowlabel_renew(sk, &freq);
736
+ case IPV6_FL_A_GET:
737
+ return ipv6_flowlabel_get(sk, &freq, optval, optlen);
738
+ default:
739
+ return -EINVAL;
740
+ }
698741 }
699742
700743 #ifdef CONFIG_PROC_FS
....@@ -762,7 +805,7 @@
762805 {
763806 struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
764807
765
- state->pid_ns = proc_pid_ns(file_inode(seq->file));
808
+ state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb);
766809
767810 rcu_read_lock_bh();
768811 return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
....@@ -858,6 +901,7 @@
858901
859902 void ip6_flowlabel_cleanup(void)
860903 {
904
+ static_key_deferred_flush(&ipv6_flowlabel_exclusive);
861905 del_timer(&ip6_fl_gc_timer);
862906 unregister_pernet_subsys(&ip6_flowlabel_net_ops);
863907 }