hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/net/core/sock.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * INET An implementation of the TCP/IP protocol suite for the LINUX
34 * operating system. INET is implemented using the BSD Socket
....@@ -5,7 +6,6 @@
56 *
67 * Generic socket support routines. Memory allocators, socket lock/release
78 * handler for protocols to use and generic option handler.
8
- *
99 *
1010 * Authors: Ross Biro
1111 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
....@@ -81,12 +81,6 @@
8181 * Arnaldo C. Melo : cleanups, use skb_queue_purge
8282 *
8383 * To Fix:
84
- *
85
- *
86
- * This program is free software; you can redistribute it and/or
87
- * modify it under the terms of the GNU General Public License
88
- * as published by the Free Software Foundation; either version
89
- * 2 of the License, or (at your option) any later version.
9084 */
9185
9286 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -119,6 +113,7 @@
119113 #include <linux/static_key.h>
120114 #include <linux/memcontrol.h>
121115 #include <linux/prefetch.h>
116
+#include <linux/compat.h>
122117
123118 #include <linux/uaccess.h>
124119
....@@ -137,8 +132,10 @@
137132
138133 #include <linux/filter.h>
139134 #include <net/sock_reuseport.h>
135
+#include <net/bpf_sk_storage.h>
140136
141137 #include <trace/events/sock.h>
138
+#include <trace/hooks/sched.h>
142139
143140 #include <net/tcp.h>
144141 #include <net/busy_poll.h>
....@@ -335,14 +332,66 @@
335332 }
336333 EXPORT_SYMBOL(__sk_backlog_rcv);
337334
338
-static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
335
+static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
339336 {
340
- struct timeval tv;
337
+ struct __kernel_sock_timeval tv;
341338
342
- if (optlen < sizeof(tv))
343
- return -EINVAL;
344
- if (copy_from_user(&tv, optval, sizeof(tv)))
345
- return -EFAULT;
339
+ if (timeo == MAX_SCHEDULE_TIMEOUT) {
340
+ tv.tv_sec = 0;
341
+ tv.tv_usec = 0;
342
+ } else {
343
+ tv.tv_sec = timeo / HZ;
344
+ tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
345
+ }
346
+
347
+ if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
348
+ struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
349
+ *(struct old_timeval32 *)optval = tv32;
350
+ return sizeof(tv32);
351
+ }
352
+
353
+ if (old_timeval) {
354
+ struct __kernel_old_timeval old_tv;
355
+ old_tv.tv_sec = tv.tv_sec;
356
+ old_tv.tv_usec = tv.tv_usec;
357
+ *(struct __kernel_old_timeval *)optval = old_tv;
358
+ return sizeof(old_tv);
359
+ }
360
+
361
+ *(struct __kernel_sock_timeval *)optval = tv;
362
+ return sizeof(tv);
363
+}
364
+
365
+static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
366
+ bool old_timeval)
367
+{
368
+ struct __kernel_sock_timeval tv;
369
+
370
+ if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
371
+ struct old_timeval32 tv32;
372
+
373
+ if (optlen < sizeof(tv32))
374
+ return -EINVAL;
375
+
376
+ if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
377
+ return -EFAULT;
378
+ tv.tv_sec = tv32.tv_sec;
379
+ tv.tv_usec = tv32.tv_usec;
380
+ } else if (old_timeval) {
381
+ struct __kernel_old_timeval old_tv;
382
+
383
+ if (optlen < sizeof(old_tv))
384
+ return -EINVAL;
385
+ if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
386
+ return -EFAULT;
387
+ tv.tv_sec = old_tv.tv_sec;
388
+ tv.tv_usec = old_tv.tv_usec;
389
+ } else {
390
+ if (optlen < sizeof(tv))
391
+ return -EINVAL;
392
+ if (copy_from_sockptr(&tv, optval, sizeof(tv)))
393
+ return -EFAULT;
394
+ }
346395 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
347396 return -EDOM;
348397
....@@ -360,21 +409,9 @@
360409 *timeo_p = MAX_SCHEDULE_TIMEOUT;
361410 if (tv.tv_sec == 0 && tv.tv_usec == 0)
362411 return 0;
363
- if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
364
- *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ);
412
+ if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
413
+ *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
365414 return 0;
366
-}
367
-
368
-static void sock_warn_obsolete_bsdism(const char *name)
369
-{
370
- static int warned;
371
- static char warncomm[TASK_COMM_LEN];
372
- if (strcmp(warncomm, current->comm) && warned < 5) {
373
- strcpy(warncomm, current->comm);
374
- pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
375
- warncomm, name);
376
- warned++;
377
- }
378415 }
379416
380417 static bool sock_needs_netstamp(const struct sock *sk)
....@@ -472,8 +509,8 @@
472509
473510 rc = sk_backlog_rcv(sk, skb);
474511
475
- mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
476
- } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
512
+ mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
513
+ } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
477514 bh_unlock_sock(sk);
478515 atomic_inc(&sk->sk_drops);
479516 goto discard_and_relse;
....@@ -520,19 +557,55 @@
520557 }
521558 EXPORT_SYMBOL(sk_dst_check);
522559
523
-static int sock_setbindtodevice(struct sock *sk, char __user *optval,
524
- int optlen)
560
+static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
561
+{
562
+ int ret = -ENOPROTOOPT;
563
+#ifdef CONFIG_NETDEVICES
564
+ struct net *net = sock_net(sk);
565
+
566
+ /* Sorry... */
567
+ ret = -EPERM;
568
+ if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
569
+ goto out;
570
+
571
+ ret = -EINVAL;
572
+ if (ifindex < 0)
573
+ goto out;
574
+
575
+ sk->sk_bound_dev_if = ifindex;
576
+ if (sk->sk_prot->rehash)
577
+ sk->sk_prot->rehash(sk);
578
+ sk_dst_reset(sk);
579
+
580
+ ret = 0;
581
+
582
+out:
583
+#endif
584
+
585
+ return ret;
586
+}
587
+
588
+int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
589
+{
590
+ int ret;
591
+
592
+ if (lock_sk)
593
+ lock_sock(sk);
594
+ ret = sock_bindtoindex_locked(sk, ifindex);
595
+ if (lock_sk)
596
+ release_sock(sk);
597
+
598
+ return ret;
599
+}
600
+EXPORT_SYMBOL(sock_bindtoindex);
601
+
602
+static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
525603 {
526604 int ret = -ENOPROTOOPT;
527605 #ifdef CONFIG_NETDEVICES
528606 struct net *net = sock_net(sk);
529607 char devname[IFNAMSIZ];
530608 int index;
531
-
532
- /* Sorry... */
533
- ret = -EPERM;
534
- if (!ns_capable(net->user_ns, CAP_NET_RAW))
535
- goto out;
536609
537610 ret = -EINVAL;
538611 if (optlen < 0)
....@@ -548,7 +621,7 @@
548621 memset(devname, 0, sizeof(devname));
549622
550623 ret = -EFAULT;
551
- if (copy_from_user(devname, optval, optlen))
624
+ if (copy_from_sockptr(devname, optval, optlen))
552625 goto out;
553626
554627 index = 0;
....@@ -565,13 +638,7 @@
565638 goto out;
566639 }
567640
568
- lock_sock(sk);
569
- sk->sk_bound_dev_if = index;
570
- sk_dst_reset(sk);
571
- release_sock(sk);
572
-
573
- ret = 0;
574
-
641
+ return sock_bindtoindex(sk, index, true);
575642 out:
576643 #endif
577644
....@@ -618,21 +685,14 @@
618685 return ret;
619686 }
620687
621
-static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
622
-{
623
- if (valbool)
624
- sock_set_flag(sk, bit);
625
- else
626
- sock_reset_flag(sk, bit);
627
-}
628
-
629688 bool sk_mc_loop(struct sock *sk)
630689 {
631690 if (dev_recursion_level())
632691 return false;
633692 if (!sk)
634693 return true;
635
- switch (sk->sk_family) {
694
+ /* IPV6_ADDRFORM can change sk->sk_family under us. */
695
+ switch (READ_ONCE(sk->sk_family)) {
636696 case AF_INET:
637697 return inet_sk(sk)->mc_loop;
638698 #if IS_ENABLED(CONFIG_IPV6)
....@@ -645,13 +705,133 @@
645705 }
646706 EXPORT_SYMBOL(sk_mc_loop);
647707
708
+void sock_set_reuseaddr(struct sock *sk)
709
+{
710
+ lock_sock(sk);
711
+ sk->sk_reuse = SK_CAN_REUSE;
712
+ release_sock(sk);
713
+}
714
+EXPORT_SYMBOL(sock_set_reuseaddr);
715
+
716
+void sock_set_reuseport(struct sock *sk)
717
+{
718
+ lock_sock(sk);
719
+ sk->sk_reuseport = true;
720
+ release_sock(sk);
721
+}
722
+EXPORT_SYMBOL(sock_set_reuseport);
723
+
724
+void sock_no_linger(struct sock *sk)
725
+{
726
+ lock_sock(sk);
727
+ sk->sk_lingertime = 0;
728
+ sock_set_flag(sk, SOCK_LINGER);
729
+ release_sock(sk);
730
+}
731
+EXPORT_SYMBOL(sock_no_linger);
732
+
733
+void sock_set_priority(struct sock *sk, u32 priority)
734
+{
735
+ lock_sock(sk);
736
+ sk->sk_priority = priority;
737
+ release_sock(sk);
738
+}
739
+EXPORT_SYMBOL(sock_set_priority);
740
+
741
+void sock_set_sndtimeo(struct sock *sk, s64 secs)
742
+{
743
+ lock_sock(sk);
744
+ if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
745
+ sk->sk_sndtimeo = secs * HZ;
746
+ else
747
+ sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
748
+ release_sock(sk);
749
+}
750
+EXPORT_SYMBOL(sock_set_sndtimeo);
751
+
752
+static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
753
+{
754
+ if (val) {
755
+ sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
756
+ sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
757
+ sock_set_flag(sk, SOCK_RCVTSTAMP);
758
+ sock_enable_timestamp(sk, SOCK_TIMESTAMP);
759
+ } else {
760
+ sock_reset_flag(sk, SOCK_RCVTSTAMP);
761
+ sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
762
+ }
763
+}
764
+
765
+void sock_enable_timestamps(struct sock *sk)
766
+{
767
+ lock_sock(sk);
768
+ __sock_set_timestamps(sk, true, false, true);
769
+ release_sock(sk);
770
+}
771
+EXPORT_SYMBOL(sock_enable_timestamps);
772
+
773
+void sock_set_keepalive(struct sock *sk)
774
+{
775
+ lock_sock(sk);
776
+ if (sk->sk_prot->keepalive)
777
+ sk->sk_prot->keepalive(sk, true);
778
+ sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
779
+ release_sock(sk);
780
+}
781
+EXPORT_SYMBOL(sock_set_keepalive);
782
+
783
+static void __sock_set_rcvbuf(struct sock *sk, int val)
784
+{
785
+ /* Ensure val * 2 fits into an int, to prevent max_t() from treating it
786
+ * as a negative value.
787
+ */
788
+ val = min_t(int, val, INT_MAX / 2);
789
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
790
+
791
+ /* We double it on the way in to account for "struct sk_buff" etc.
792
+ * overhead. Applications assume that the SO_RCVBUF setting they make
793
+ * will allow that much actual data to be received on that socket.
794
+ *
795
+ * Applications are unaware that "struct sk_buff" and other overheads
796
+ * allocate from the receive buffer during socket buffer allocation.
797
+ *
798
+ * And after considering the possible alternatives, returning the value
799
+ * we actually used in getsockopt is the most desirable behavior.
800
+ */
801
+ WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
802
+}
803
+
804
+void sock_set_rcvbuf(struct sock *sk, int val)
805
+{
806
+ lock_sock(sk);
807
+ __sock_set_rcvbuf(sk, val);
808
+ release_sock(sk);
809
+}
810
+EXPORT_SYMBOL(sock_set_rcvbuf);
811
+
812
+static void __sock_set_mark(struct sock *sk, u32 val)
813
+{
814
+ if (val != sk->sk_mark) {
815
+ sk->sk_mark = val;
816
+ sk_dst_reset(sk);
817
+ }
818
+}
819
+
820
+void sock_set_mark(struct sock *sk, u32 val)
821
+{
822
+ lock_sock(sk);
823
+ __sock_set_mark(sk, val);
824
+ release_sock(sk);
825
+}
826
+EXPORT_SYMBOL(sock_set_mark);
827
+
648828 /*
649829 * This is meant for all protocols to use and covers goings on
650830 * at the socket level. Everything here is generic.
651831 */
652832
653833 int sock_setsockopt(struct socket *sock, int level, int optname,
654
- char __user *optval, unsigned int optlen)
834
+ sockptr_t optval, unsigned int optlen)
655835 {
656836 struct sock_txtime sk_txtime;
657837 struct sock *sk = sock->sk;
....@@ -670,7 +850,7 @@
670850 if (optlen < sizeof(int))
671851 return -EINVAL;
672852
673
- if (get_user(val, (int __user *)optval))
853
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
674854 return -EFAULT;
675855
676856 valbool = val ? 1 : 0;
....@@ -709,10 +889,15 @@
709889 * play 'guess the biggest size' games. RCVBUF/SNDBUF
710890 * are treated in BSD as hints
711891 */
712
- val = min_t(u32, val, sysctl_wmem_max);
892
+ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
713893 set_sndbuf:
894
+ /* Ensure val * 2 fits into an int, to prevent max_t()
895
+ * from treating it as a negative value.
896
+ */
897
+ val = min_t(int, val, INT_MAX / 2);
714898 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
715
- sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
899
+ WRITE_ONCE(sk->sk_sndbuf,
900
+ max_t(int, val * 2, SOCK_MIN_SNDBUF));
716901 /* Wake up sending tasks if we upped the value. */
717902 sk->sk_write_space(sk);
718903 break;
....@@ -722,6 +907,12 @@
722907 ret = -EPERM;
723908 break;
724909 }
910
+
911
+ /* No negative values (to prevent underflow, as val will be
912
+ * multiplied by 2).
913
+ */
914
+ if (val < 0)
915
+ val = 0;
725916 goto set_sndbuf;
726917
727918 case SO_RCVBUF:
....@@ -730,25 +921,7 @@
730921 * play 'guess the biggest size' games. RCVBUF/SNDBUF
731922 * are treated in BSD as hints
732923 */
733
- val = min_t(u32, val, sysctl_rmem_max);
734
-set_rcvbuf:
735
- sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
736
- /*
737
- * We double it on the way in to account for
738
- * "struct sk_buff" etc. overhead. Applications
739
- * assume that the SO_RCVBUF setting they make will
740
- * allow that much actual data to be received on that
741
- * socket.
742
- *
743
- * Applications are unaware that "struct sk_buff" and
744
- * other overheads allocate from the receive buffer
745
- * during socket buffer allocation.
746
- *
747
- * And after considering the possible alternatives,
748
- * returning the value we actually used in getsockopt
749
- * is the most desirable behavior.
750
- */
751
- sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
924
+ __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
752925 break;
753926
754927 case SO_RCVBUFFORCE:
....@@ -756,7 +929,12 @@
756929 ret = -EPERM;
757930 break;
758931 }
759
- goto set_rcvbuf;
932
+
933
+ /* No negative values (to prevent underflow, as val will be
934
+ * multiplied by 2).
935
+ */
936
+ __sock_set_rcvbuf(sk, max(val, 0));
937
+ break;
760938
761939 case SO_KEEPALIVE:
762940 if (sk->sk_prot->keepalive)
....@@ -785,7 +963,7 @@
785963 ret = -EINVAL; /* 1003.1g */
786964 break;
787965 }
788
- if (copy_from_user(&ling, optval, sizeof(ling))) {
966
+ if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
789967 ret = -EFAULT;
790968 break;
791969 }
....@@ -803,7 +981,6 @@
803981 break;
804982
805983 case SO_BSDCOMPAT:
806
- sock_warn_obsolete_bsdism("setsockopt");
807984 break;
808985
809986 case SO_PASSCRED:
....@@ -813,22 +990,20 @@
813990 clear_bit(SOCK_PASSCRED, &sock->flags);
814991 break;
815992
816
- case SO_TIMESTAMP:
817
- case SO_TIMESTAMPNS:
818
- if (valbool) {
819
- if (optname == SO_TIMESTAMP)
820
- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
821
- else
822
- sock_set_flag(sk, SOCK_RCVTSTAMPNS);
823
- sock_set_flag(sk, SOCK_RCVTSTAMP);
824
- sock_enable_timestamp(sk, SOCK_TIMESTAMP);
825
- } else {
826
- sock_reset_flag(sk, SOCK_RCVTSTAMP);
827
- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
828
- }
993
+ case SO_TIMESTAMP_OLD:
994
+ __sock_set_timestamps(sk, valbool, false, false);
829995 break;
830
-
831
- case SO_TIMESTAMPING:
996
+ case SO_TIMESTAMP_NEW:
997
+ __sock_set_timestamps(sk, valbool, true, false);
998
+ break;
999
+ case SO_TIMESTAMPNS_OLD:
1000
+ __sock_set_timestamps(sk, valbool, false, true);
1001
+ break;
1002
+ case SO_TIMESTAMPNS_NEW:
1003
+ __sock_set_timestamps(sk, valbool, true, true);
1004
+ break;
1005
+ case SO_TIMESTAMPING_NEW:
1006
+ case SO_TIMESTAMPING_OLD:
8321007 if (val & ~SOF_TIMESTAMPING_MASK) {
8331008 ret = -EINVAL;
8341009 break;
....@@ -856,6 +1031,8 @@
8561031 }
8571032
8581033 sk->sk_tsflags = val;
1034
+ sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
1035
+
8591036 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
8601037 sock_enable_timestamp(sk,
8611038 SOCK_TIMESTAMPING_RX_SOFTWARE);
....@@ -870,67 +1047,65 @@
8701047 if (sock->ops->set_rcvlowat)
8711048 ret = sock->ops->set_rcvlowat(sk, val);
8721049 else
873
- sk->sk_rcvlowat = val ? : 1;
1050
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
8741051 break;
8751052
876
- case SO_RCVTIMEO:
877
- ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
1053
+ case SO_RCVTIMEO_OLD:
1054
+ case SO_RCVTIMEO_NEW:
1055
+ ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
1056
+ optlen, optname == SO_RCVTIMEO_OLD);
8781057 break;
8791058
880
- case SO_SNDTIMEO:
881
- ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
1059
+ case SO_SNDTIMEO_OLD:
1060
+ case SO_SNDTIMEO_NEW:
1061
+ ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
1062
+ optlen, optname == SO_SNDTIMEO_OLD);
8821063 break;
8831064
884
- case SO_ATTACH_FILTER:
885
- ret = -EINVAL;
886
- if (optlen == sizeof(struct sock_fprog)) {
887
- struct sock_fprog fprog;
1065
+ case SO_ATTACH_FILTER: {
1066
+ struct sock_fprog fprog;
8881067
889
- ret = -EFAULT;
890
- if (copy_from_user(&fprog, optval, sizeof(fprog)))
891
- break;
892
-
1068
+ ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1069
+ if (!ret)
8931070 ret = sk_attach_filter(&fprog, sk);
894
- }
8951071 break;
896
-
1072
+ }
8971073 case SO_ATTACH_BPF:
8981074 ret = -EINVAL;
8991075 if (optlen == sizeof(u32)) {
9001076 u32 ufd;
9011077
9021078 ret = -EFAULT;
903
- if (copy_from_user(&ufd, optval, sizeof(ufd)))
1079
+ if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
9041080 break;
9051081
9061082 ret = sk_attach_bpf(ufd, sk);
9071083 }
9081084 break;
9091085
910
- case SO_ATTACH_REUSEPORT_CBPF:
911
- ret = -EINVAL;
912
- if (optlen == sizeof(struct sock_fprog)) {
913
- struct sock_fprog fprog;
1086
+ case SO_ATTACH_REUSEPORT_CBPF: {
1087
+ struct sock_fprog fprog;
9141088
915
- ret = -EFAULT;
916
- if (copy_from_user(&fprog, optval, sizeof(fprog)))
917
- break;
918
-
1089
+ ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1090
+ if (!ret)
9191091 ret = sk_reuseport_attach_filter(&fprog, sk);
920
- }
9211092 break;
922
-
1093
+ }
9231094 case SO_ATTACH_REUSEPORT_EBPF:
9241095 ret = -EINVAL;
9251096 if (optlen == sizeof(u32)) {
9261097 u32 ufd;
9271098
9281099 ret = -EFAULT;
929
- if (copy_from_user(&ufd, optval, sizeof(ufd)))
1100
+ if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
9301101 break;
9311102
9321103 ret = sk_reuseport_attach_bpf(ufd, sk);
9331104 }
1105
+ break;
1106
+
1107
+ case SO_DETACH_REUSEPORT_BPF:
1108
+ ret = reuseport_detach_prog(sk);
9341109 break;
9351110
9361111 case SO_DETACH_FILTER:
....@@ -951,10 +1126,12 @@
9511126 clear_bit(SOCK_PASSSEC, &sock->flags);
9521127 break;
9531128 case SO_MARK:
954
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1129
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
9551130 ret = -EPERM;
956
- else
957
- sk->sk_mark = val;
1131
+ break;
1132
+ }
1133
+
1134
+ __sock_set_mark(sk, val);
9581135 break;
9591136
9601137 case SO_RXQ_OVFL:
....@@ -995,15 +1172,24 @@
9951172 #endif
9961173
9971174 case SO_MAX_PACING_RATE:
998
- if (val != ~0U)
1175
+ {
1176
+ unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
1177
+
1178
+ if (sizeof(ulval) != sizeof(val) &&
1179
+ optlen >= sizeof(ulval) &&
1180
+ copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
1181
+ ret = -EFAULT;
1182
+ break;
1183
+ }
1184
+ if (ulval != ~0UL)
9991185 cmpxchg(&sk->sk_pacing_status,
10001186 SK_PACING_NONE,
10011187 SK_PACING_NEEDED);
1002
- sk->sk_max_pacing_rate = val;
1003
- sk->sk_pacing_rate = min(sk->sk_pacing_rate,
1004
- sk->sk_max_pacing_rate);
1188
+ /* Pairs with READ_ONCE() from sk_getsockopt() */
1189
+ WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
1190
+ sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
10051191 break;
1006
-
1192
+ }
10071193 case SO_INCOMING_CPU:
10081194 WRITE_ONCE(sk->sk_incoming_cpu, val);
10091195 break;
....@@ -1015,7 +1201,10 @@
10151201
10161202 case SO_ZEROCOPY:
10171203 if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
1018
- if (sk->sk_protocol != IPPROTO_TCP)
1204
+ if (!((sk->sk_type == SOCK_STREAM &&
1205
+ sk->sk_protocol == IPPROTO_TCP) ||
1206
+ (sk->sk_type == SOCK_DGRAM &&
1207
+ sk->sk_protocol == IPPROTO_UDP)))
10191208 ret = -ENOTSUPP;
10201209 } else if (sk->sk_family != PF_RDS) {
10211210 ret = -ENOTSUPP;
....@@ -1029,23 +1218,35 @@
10291218 break;
10301219
10311220 case SO_TXTIME:
1032
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1033
- ret = -EPERM;
1034
- } else if (optlen != sizeof(struct sock_txtime)) {
1221
+ if (optlen != sizeof(struct sock_txtime)) {
10351222 ret = -EINVAL;
1036
- } else if (copy_from_user(&sk_txtime, optval,
1223
+ break;
1224
+ } else if (copy_from_sockptr(&sk_txtime, optval,
10371225 sizeof(struct sock_txtime))) {
10381226 ret = -EFAULT;
1227
+ break;
10391228 } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
10401229 ret = -EINVAL;
1041
- } else {
1042
- sock_valbool_flag(sk, SOCK_TXTIME, true);
1043
- sk->sk_clockid = sk_txtime.clockid;
1044
- sk->sk_txtime_deadline_mode =
1045
- !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
1046
- sk->sk_txtime_report_errors =
1047
- !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
1230
+ break;
10481231 }
1232
+ /* CLOCK_MONOTONIC is only used by sch_fq, and this packet
1233
+ * scheduler has enough safe guards.
1234
+ */
1235
+ if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1236
+ !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1237
+ ret = -EPERM;
1238
+ break;
1239
+ }
1240
+ sock_valbool_flag(sk, SOCK_TXTIME, true);
1241
+ sk->sk_clockid = sk_txtime.clockid;
1242
+ sk->sk_txtime_deadline_mode =
1243
+ !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
1244
+ sk->sk_txtime_report_errors =
1245
+ !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
1246
+ break;
1247
+
1248
+ case SO_BINDTOIFINDEX:
1249
+ ret = sock_bindtoindex_locked(sk, val);
10491250 break;
10501251
10511252 default:
....@@ -1101,8 +1302,11 @@
11011302 union {
11021303 int val;
11031304 u64 val64;
1305
+ unsigned long ulval;
11041306 struct linger ling;
1105
- struct timeval tm;
1307
+ struct old_timeval32 tm32;
1308
+ struct __kernel_old_timeval tm;
1309
+ struct __kernel_sock_timeval stm;
11061310 struct sock_txtime txtime;
11071311 } v;
11081312
....@@ -1130,11 +1334,11 @@
11301334 break;
11311335
11321336 case SO_SNDBUF:
1133
- v.val = sk->sk_sndbuf;
1337
+ v.val = READ_ONCE(sk->sk_sndbuf);
11341338 break;
11351339
11361340 case SO_RCVBUF:
1137
- v.val = sk->sk_rcvbuf;
1341
+ v.val = READ_ONCE(sk->sk_rcvbuf);
11381342 break;
11391343
11401344 case SO_REUSEADDR:
....@@ -1186,46 +1390,42 @@
11861390 break;
11871391
11881392 case SO_BSDCOMPAT:
1189
- sock_warn_obsolete_bsdism("getsockopt");
11901393 break;
11911394
1192
- case SO_TIMESTAMP:
1395
+ case SO_TIMESTAMP_OLD:
11931396 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1397
+ !sock_flag(sk, SOCK_TSTAMP_NEW) &&
11941398 !sock_flag(sk, SOCK_RCVTSTAMPNS);
11951399 break;
11961400
1197
- case SO_TIMESTAMPNS:
1198
- v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1401
+ case SO_TIMESTAMPNS_OLD:
1402
+ v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
11991403 break;
12001404
1201
- case SO_TIMESTAMPING:
1405
+ case SO_TIMESTAMP_NEW:
1406
+ v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1407
+ break;
1408
+
1409
+ case SO_TIMESTAMPNS_NEW:
1410
+ v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
1411
+ break;
1412
+
1413
+ case SO_TIMESTAMPING_OLD:
12021414 v.val = sk->sk_tsflags;
12031415 break;
12041416
1205
- case SO_RCVTIMEO:
1206
- lv = sizeof(struct timeval);
1207
- if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1208
- v.tm.tv_sec = 0;
1209
- v.tm.tv_usec = 0;
1210
- } else {
1211
- v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1212
- v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ;
1213
- }
1417
+ case SO_RCVTIMEO_OLD:
1418
+ case SO_RCVTIMEO_NEW:
1419
+ lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
12141420 break;
12151421
1216
- case SO_SNDTIMEO:
1217
- lv = sizeof(struct timeval);
1218
- if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1219
- v.tm.tv_sec = 0;
1220
- v.tm.tv_usec = 0;
1221
- } else {
1222
- v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1223
- v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ;
1224
- }
1422
+ case SO_SNDTIMEO_OLD:
1423
+ case SO_SNDTIMEO_NEW:
1424
+ lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
12251425 break;
12261426
12271427 case SO_RCVLOWAT:
1228
- v.val = sk->sk_rcvlowat;
1428
+ v.val = READ_ONCE(sk->sk_rcvlowat);
12291429 break;
12301430
12311431 case SO_SNDLOWAT:
....@@ -1319,7 +1519,7 @@
13191519 if (!sock->ops->set_peek_off)
13201520 return -EOPNOTSUPP;
13211521
1322
- v.val = sk->sk_peek_off;
1522
+ v.val = READ_ONCE(sk->sk_peek_off);
13231523 break;
13241524 case SO_NOFCS:
13251525 v.val = sock_flag(sk, SOCK_NOFCS);
....@@ -1349,12 +1549,20 @@
13491549
13501550 #ifdef CONFIG_NET_RX_BUSY_POLL
13511551 case SO_BUSY_POLL:
1352
- v.val = sk->sk_ll_usec;
1552
+ v.val = READ_ONCE(sk->sk_ll_usec);
13531553 break;
13541554 #endif
13551555
13561556 case SO_MAX_PACING_RATE:
1357
- v.val = sk->sk_max_pacing_rate;
1557
+ /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
1558
+ if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1559
+ lv = sizeof(v.ulval);
1560
+ v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
1561
+ } else {
1562
+ /* 32bit version */
1563
+ v.val = min_t(unsigned long, ~0U,
1564
+ READ_ONCE(sk->sk_max_pacing_rate));
1565
+ }
13581566 break;
13591567
13601568 case SO_INCOMING_CPU:
....@@ -1405,6 +1613,17 @@
14051613 SOF_TXTIME_REPORT_ERRORS : 0;
14061614 break;
14071615
1616
+ case SO_BINDTOIFINDEX:
1617
+ v.val = sk->sk_bound_dev_if;
1618
+ break;
1619
+
1620
+ case SO_NETNS_COOKIE:
1621
+ lv = sizeof(u64);
1622
+ if (len != lv)
1623
+ return -EINVAL;
1624
+ v.val64 = atomic64_read(&sock_net(sk)->net_cookie);
1625
+ break;
1626
+
14081627 default:
14091628 /* We implement the SO_SNDLOWAT etc to not be settable
14101629 * (1003.1g 7).
....@@ -1452,13 +1671,14 @@
14521671 */
14531672 static void sock_copy(struct sock *nsk, const struct sock *osk)
14541673 {
1674
+ const struct proto *prot = READ_ONCE(osk->sk_prot);
14551675 #ifdef CONFIG_SECURITY_NETWORK
14561676 void *sptr = nsk->sk_security;
14571677 #endif
14581678 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
14591679
14601680 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1461
- osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1681
+ prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
14621682
14631683 #ifdef CONFIG_SECURITY_NETWORK
14641684 nsk->sk_security = sptr;
....@@ -1584,6 +1804,10 @@
15841804
15851805 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
15861806
1807
+#ifdef CONFIG_BPF_SYSCALL
1808
+ bpf_sk_storage_free(sk);
1809
+#endif
1810
+
15871811 if (atomic_read(&sk->sk_omem_alloc))
15881812 pr_debug("%s: optmem leakage (%d bytes) detected\n",
15891813 __func__, atomic_read(&sk->sk_omem_alloc));
....@@ -1670,112 +1894,121 @@
16701894 */
16711895 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
16721896 {
1673
- struct sock *newsk;
1897
+ struct proto *prot = READ_ONCE(sk->sk_prot);
1898
+ struct sk_filter *filter;
16741899 bool is_charged = true;
1900
+ struct sock *newsk;
16751901
1676
- newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1677
- if (newsk != NULL) {
1678
- struct sk_filter *filter;
1902
+ newsk = sk_prot_alloc(prot, priority, sk->sk_family);
1903
+ if (!newsk)
1904
+ goto out;
16791905
1680
- sock_copy(newsk, sk);
1906
+ sock_copy(newsk, sk);
16811907
1682
- newsk->sk_prot_creator = sk->sk_prot;
1908
+ newsk->sk_prot_creator = prot;
16831909
1684
- /* SANITY */
1685
- if (likely(newsk->sk_net_refcnt))
1686
- get_net(sock_net(newsk));
1687
- sk_node_init(&newsk->sk_node);
1688
- sock_lock_init(newsk);
1689
- bh_lock_sock(newsk);
1690
- newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1691
- newsk->sk_backlog.len = 0;
1692
-
1693
- atomic_set(&newsk->sk_rmem_alloc, 0);
1694
- /*
1695
- * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1696
- */
1697
- refcount_set(&newsk->sk_wmem_alloc, 1);
1698
- atomic_set(&newsk->sk_omem_alloc, 0);
1699
- sk_init_common(newsk);
1700
-
1701
- newsk->sk_dst_cache = NULL;
1702
- newsk->sk_dst_pending_confirm = 0;
1703
- newsk->sk_wmem_queued = 0;
1704
- newsk->sk_forward_alloc = 0;
1705
- atomic_set(&newsk->sk_drops, 0);
1706
- newsk->sk_send_head = NULL;
1707
- newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1708
- atomic_set(&newsk->sk_zckey, 0);
1709
-
1710
- sock_reset_flag(newsk, SOCK_DONE);
1711
-
1712
- /* sk->sk_memcg will be populated at accept() time */
1713
- newsk->sk_memcg = NULL;
1714
-
1715
- cgroup_sk_clone(&newsk->sk_cgrp_data);
1716
-
1717
- rcu_read_lock();
1718
- filter = rcu_dereference(sk->sk_filter);
1719
- if (filter != NULL)
1720
- /* though it's an empty new sock, the charging may fail
1721
- * if sysctl_optmem_max was changed between creation of
1722
- * original socket and cloning
1723
- */
1724
- is_charged = sk_filter_charge(newsk, filter);
1725
- RCU_INIT_POINTER(newsk->sk_filter, filter);
1726
- rcu_read_unlock();
1727
-
1728
- if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1729
- /* We need to make sure that we don't uncharge the new
1730
- * socket if we couldn't charge it in the first place
1731
- * as otherwise we uncharge the parent's filter.
1732
- */
1733
- if (!is_charged)
1734
- RCU_INIT_POINTER(newsk->sk_filter, NULL);
1735
- sk_free_unlock_clone(newsk);
1736
- newsk = NULL;
1737
- goto out;
1738
- }
1739
- RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1740
-
1741
- newsk->sk_err = 0;
1742
- newsk->sk_err_soft = 0;
1743
- newsk->sk_priority = 0;
1744
- newsk->sk_incoming_cpu = raw_smp_processor_id();
1745
- atomic64_set(&newsk->sk_cookie, 0);
1746
- if (likely(newsk->sk_net_refcnt))
1747
- sock_inuse_add(sock_net(newsk), 1);
1748
-
1749
- /*
1750
- * Before updating sk_refcnt, we must commit prior changes to memory
1751
- * (Documentation/RCU/rculist_nulls.txt for details)
1752
- */
1753
- smp_wmb();
1754
- refcount_set(&newsk->sk_refcnt, 2);
1755
-
1756
- /*
1757
- * Increment the counter in the same struct proto as the master
1758
- * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1759
- * is the same as sk->sk_prot->socks, as this field was copied
1760
- * with memcpy).
1761
- *
1762
- * This _changes_ the previous behaviour, where
1763
- * tcp_create_openreq_child always was incrementing the
1764
- * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1765
- * to be taken into account in all callers. -acme
1766
- */
1767
- sk_refcnt_debug_inc(newsk);
1768
- sk_set_socket(newsk, NULL);
1769
- sk_tx_queue_clear(newsk);
1770
- newsk->sk_wq = NULL;
1771
-
1772
- if (newsk->sk_prot->sockets_allocated)
1773
- sk_sockets_allocated_inc(newsk);
1774
-
1775
- if (sock_needs_netstamp(sk) &&
1776
- newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1777
- net_enable_timestamp();
1910
+ /* SANITY */
1911
+ if (likely(newsk->sk_net_refcnt)) {
1912
+ get_net(sock_net(newsk));
1913
+ sock_inuse_add(sock_net(newsk), 1);
17781914 }
1915
+ sk_node_init(&newsk->sk_node);
1916
+ sock_lock_init(newsk);
1917
+ bh_lock_sock(newsk);
1918
+ newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1919
+ newsk->sk_backlog.len = 0;
1920
+
1921
+ atomic_set(&newsk->sk_rmem_alloc, 0);
1922
+
1923
+ /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
1924
+ refcount_set(&newsk->sk_wmem_alloc, 1);
1925
+
1926
+ atomic_set(&newsk->sk_omem_alloc, 0);
1927
+ sk_init_common(newsk);
1928
+
1929
+ newsk->sk_dst_cache = NULL;
1930
+ newsk->sk_dst_pending_confirm = 0;
1931
+ newsk->sk_wmem_queued = 0;
1932
+ newsk->sk_forward_alloc = 0;
1933
+ atomic_set(&newsk->sk_drops, 0);
1934
+ newsk->sk_send_head = NULL;
1935
+ newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1936
+ atomic_set(&newsk->sk_zckey, 0);
1937
+
1938
+ sock_reset_flag(newsk, SOCK_DONE);
1939
+
1940
+ /* sk->sk_memcg will be populated at accept() time */
1941
+ newsk->sk_memcg = NULL;
1942
+
1943
+ cgroup_sk_clone(&newsk->sk_cgrp_data);
1944
+
1945
+ rcu_read_lock();
1946
+ filter = rcu_dereference(sk->sk_filter);
1947
+ if (filter != NULL)
1948
+ /* though it's an empty new sock, the charging may fail
1949
+ * if sysctl_optmem_max was changed between creation of
1950
+ * original socket and cloning
1951
+ */
1952
+ is_charged = sk_filter_charge(newsk, filter);
1953
+ RCU_INIT_POINTER(newsk->sk_filter, filter);
1954
+ rcu_read_unlock();
1955
+
1956
+ if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1957
+ /* We need to make sure that we don't uncharge the new
1958
+ * socket if we couldn't charge it in the first place
1959
+ * as otherwise we uncharge the parent's filter.
1960
+ */
1961
+ if (!is_charged)
1962
+ RCU_INIT_POINTER(newsk->sk_filter, NULL);
1963
+ sk_free_unlock_clone(newsk);
1964
+ newsk = NULL;
1965
+ goto out;
1966
+ }
1967
+ RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1968
+
1969
+ if (bpf_sk_storage_clone(sk, newsk)) {
1970
+ sk_free_unlock_clone(newsk);
1971
+ newsk = NULL;
1972
+ goto out;
1973
+ }
1974
+
1975
+ /* Clear sk_user_data if parent had the pointer tagged
1976
+ * as not suitable for copying when cloning.
1977
+ */
1978
+ if (sk_user_data_is_nocopy(newsk))
1979
+ newsk->sk_user_data = NULL;
1980
+
1981
+ newsk->sk_err = 0;
1982
+ newsk->sk_err_soft = 0;
1983
+ newsk->sk_priority = 0;
1984
+ newsk->sk_incoming_cpu = raw_smp_processor_id();
1985
+
1986
+ /* Before updating sk_refcnt, we must commit prior changes to memory
1987
+ * (Documentation/RCU/rculist_nulls.rst for details)
1988
+ */
1989
+ smp_wmb();
1990
+ refcount_set(&newsk->sk_refcnt, 2);
1991
+
1992
+ /* Increment the counter in the same struct proto as the master
1993
+ * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1994
+ * is the same as sk->sk_prot->socks, as this field was copied
1995
+ * with memcpy).
1996
+ *
1997
+ * This _changes_ the previous behaviour, where
1998
+ * tcp_create_openreq_child always was incrementing the
1999
+ * equivalent to tcp_prot->socks (inet_sock_nr), so this have
2000
+ * to be taken into account in all callers. -acme
2001
+ */
2002
+ sk_refcnt_debug_inc(newsk);
2003
+ sk_set_socket(newsk, NULL);
2004
+ sk_tx_queue_clear(newsk);
2005
+ RCU_INIT_POINTER(newsk->sk_wq, NULL);
2006
+
2007
+ if (newsk->sk_prot->sockets_allocated)
2008
+ sk_sockets_allocated_inc(newsk);
2009
+
2010
+ if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2011
+ net_enable_timestamp();
17792012 out:
17802013 return newsk;
17812014 }
....@@ -1795,7 +2028,6 @@
17952028 {
17962029 u32 max_segs = 1;
17972030
1798
- sk_dst_set(sk, dst);
17992031 sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
18002032 if (sk->sk_route_caps & NETIF_F_GSO)
18012033 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
....@@ -1810,6 +2042,7 @@
18102042 }
18112043 }
18122044 sk->sk_gso_max_segs = max_segs;
2045
+ sk_dst_set(sk, dst);
18132046 }
18142047 EXPORT_SYMBOL_GPL(sk_setup_caps);
18152048
....@@ -1877,6 +2110,19 @@
18772110 }
18782111 EXPORT_SYMBOL(skb_set_owner_w);
18792112
2113
+static bool can_skb_orphan_partial(const struct sk_buff *skb)
2114
+{
2115
+#ifdef CONFIG_TLS_DEVICE
2116
+ /* Drivers depend on in-order delivery for crypto offload,
2117
+ * partial orphan breaks out-of-order-OK logic.
2118
+ */
2119
+ if (skb->decrypted)
2120
+ return false;
2121
+#endif
2122
+ return (skb->destructor == sock_wfree ||
2123
+ (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
2124
+}
2125
+
18802126 /* This helper is used by netem, as it can hold packets in its
18812127 * delay queue. We want to allow the owner socket to send more
18822128 * packets, as if they were already TX completed by a typical driver.
....@@ -1888,20 +2134,10 @@
18882134 if (skb_is_tcp_pure_ack(skb))
18892135 return;
18902136
1891
- if (skb->destructor == sock_wfree
1892
-#ifdef CONFIG_INET
1893
- || skb->destructor == tcp_wfree
1894
-#endif
1895
- ) {
1896
- struct sock *sk = skb->sk;
2137
+ if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
2138
+ return;
18972139
1898
- if (refcount_inc_not_zero(&sk->sk_refcnt)) {
1899
- WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
1900
- skb->destructor = sock_efree;
1901
- }
1902
- } else {
1903
- skb_orphan(skb);
1904
- }
2140
+ skb_orphan(skb);
19052141 }
19062142 EXPORT_SYMBOL(skb_orphan_partial);
19072143
....@@ -1928,6 +2164,18 @@
19282164 }
19292165 EXPORT_SYMBOL(sock_efree);
19302166
2167
+/* Buffer destructor for prefetch/receive path where reference count may
2168
+ * not be held, e.g. for listen sockets.
2169
+ */
2170
+#ifdef CONFIG_INET
2171
+void sock_pfree(struct sk_buff *skb)
2172
+{
2173
+ if (sk_is_refcounted(skb->sk))
2174
+ sock_gen_put(skb->sk);
2175
+}
2176
+EXPORT_SYMBOL(sock_pfree);
2177
+#endif /* CONFIG_INET */
2178
+
19312179 kuid_t sock_i_uid(struct sock *sk)
19322180 {
19332181 kuid_t uid;
....@@ -1939,13 +2187,24 @@
19392187 }
19402188 EXPORT_SYMBOL(sock_i_uid);
19412189
2190
+unsigned long __sock_i_ino(struct sock *sk)
2191
+{
2192
+ unsigned long ino;
2193
+
2194
+ read_lock(&sk->sk_callback_lock);
2195
+ ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
2196
+ read_unlock(&sk->sk_callback_lock);
2197
+ return ino;
2198
+}
2199
+EXPORT_SYMBOL(__sock_i_ino);
2200
+
19422201 unsigned long sock_i_ino(struct sock *sk)
19432202 {
19442203 unsigned long ino;
19452204
1946
- read_lock_bh(&sk->sk_callback_lock);
1947
- ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1948
- read_unlock_bh(&sk->sk_callback_lock);
2205
+ local_bh_disable();
2206
+ ino = __sock_i_ino(sk);
2207
+ local_bh_enable();
19492208 return ino;
19502209 }
19512210 EXPORT_SYMBOL(sock_i_ino);
....@@ -1956,8 +2215,10 @@
19562215 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
19572216 gfp_t priority)
19582217 {
1959
- if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
2218
+ if (force ||
2219
+ refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
19602220 struct sk_buff *skb = alloc_skb(size, priority);
2221
+
19612222 if (skb) {
19622223 skb_set_owner_w(skb, sk);
19632224 return skb;
....@@ -1981,7 +2242,7 @@
19812242
19822243 /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
19832244 if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
1984
- sysctl_optmem_max)
2245
+ READ_ONCE(sysctl_optmem_max))
19852246 return NULL;
19862247
19872248 skb = alloc_skb(size, priority);
....@@ -1999,8 +2260,10 @@
19992260 */
20002261 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
20012262 {
2002
- if ((unsigned int)size <= sysctl_optmem_max &&
2003
- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
2263
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
2264
+
2265
+ if ((unsigned int)size <= optmem_max &&
2266
+ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
20042267 void *mem;
20052268 /* First do the add, to avoid the race if kmalloc
20062269 * might sleep.
....@@ -2025,7 +2288,7 @@
20252288 if (WARN_ON_ONCE(!mem))
20262289 return;
20272290 if (nullify)
2028
- kzfree(mem);
2291
+ kfree_sensitive(mem);
20292292 else
20302293 kfree(mem);
20312294 atomic_sub(size, &sk->sk_omem_alloc);
....@@ -2058,11 +2321,11 @@
20582321 break;
20592322 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
20602323 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2061
- if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
2324
+ if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
20622325 break;
2063
- if (sk->sk_shutdown & SEND_SHUTDOWN)
2326
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
20642327 break;
2065
- if (sk->sk_err)
2328
+ if (READ_ONCE(sk->sk_err))
20662329 break;
20672330 timeo = schedule_timeout(timeo);
20682331 }
....@@ -2090,10 +2353,10 @@
20902353 goto failure;
20912354
20922355 err = -EPIPE;
2093
- if (sk->sk_shutdown & SEND_SHUTDOWN)
2356
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
20942357 goto failure;
20952358
2096
- if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
2359
+ if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
20972360 break;
20982361
20992362 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
....@@ -2139,7 +2402,7 @@
21392402 return -EINVAL;
21402403 sockc->mark = *(u32 *)CMSG_DATA(cmsg);
21412404 break;
2142
- case SO_TIMESTAMPING:
2405
+ case SO_TIMESTAMPING_OLD:
21432406 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
21442407 return -EINVAL;
21452408
....@@ -2207,8 +2470,8 @@
22072470 }
22082471 }
22092472
2210
-/* On 32bit arches, an skb frag is limited to 2^15 */
22112473 #define SKB_FRAG_PAGE_ORDER get_order(32768)
2474
+DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
22122475
22132476 /**
22142477 * skb_page_frag_refill - check that a page_frag contains enough room
....@@ -2233,7 +2496,8 @@
22332496 }
22342497
22352498 pfrag->offset = 0;
2236
- if (SKB_FRAG_PAGE_ORDER) {
2499
+ if (SKB_FRAG_PAGE_ORDER &&
2500
+ !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
22372501 /* Avoid direct reclaim but allow kswapd to wake */
22382502 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
22392503 __GFP_COMP | __GFP_NOWARN |
....@@ -2263,67 +2527,6 @@
22632527 return false;
22642528 }
22652529 EXPORT_SYMBOL(sk_page_frag_refill);
2266
-
2267
-int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
2268
- int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
2269
- int first_coalesce)
2270
-{
2271
- int sg_curr = *sg_curr_index, use = 0, rc = 0;
2272
- unsigned int size = *sg_curr_size;
2273
- struct page_frag *pfrag;
2274
- struct scatterlist *sge;
2275
-
2276
- len -= size;
2277
- pfrag = sk_page_frag(sk);
2278
-
2279
- while (len > 0) {
2280
- unsigned int orig_offset;
2281
-
2282
- if (!sk_page_frag_refill(sk, pfrag)) {
2283
- rc = -ENOMEM;
2284
- goto out;
2285
- }
2286
-
2287
- use = min_t(int, len, pfrag->size - pfrag->offset);
2288
-
2289
- if (!sk_wmem_schedule(sk, use)) {
2290
- rc = -ENOMEM;
2291
- goto out;
2292
- }
2293
-
2294
- sk_mem_charge(sk, use);
2295
- size += use;
2296
- orig_offset = pfrag->offset;
2297
- pfrag->offset += use;
2298
-
2299
- sge = sg + sg_curr - 1;
2300
- if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page &&
2301
- sge->offset + sge->length == orig_offset) {
2302
- sge->length += use;
2303
- } else {
2304
- sge = sg + sg_curr;
2305
- sg_unmark_end(sge);
2306
- sg_set_page(sge, pfrag->page, use, orig_offset);
2307
- get_page(pfrag->page);
2308
- sg_curr++;
2309
-
2310
- if (sg_curr == MAX_SKB_FRAGS)
2311
- sg_curr = 0;
2312
-
2313
- if (sg_curr == sg_start) {
2314
- rc = -ENOSPC;
2315
- break;
2316
- }
2317
- }
2318
-
2319
- len -= use;
2320
- }
2321
-out:
2322
- *sg_curr_size = size;
2323
- *sg_curr_index = sg_curr;
2324
- return rc;
2325
-}
2326
-EXPORT_SYMBOL(sk_alloc_sg);
23272530
23282531 static void __lock_sock(struct sock *sk)
23292532 __releases(&sk->sk_lock.slock)
....@@ -2358,7 +2561,7 @@
23582561 next = skb->next;
23592562 prefetch(next);
23602563 WARN_ON_ONCE(skb_dst_is_noref(skb));
2361
- skb->next = NULL;
2564
+ skb_mark_not_on_list(skb);
23622565 sk_backlog_rcv(sk, skb);
23632566
23642567 cond_resched();
....@@ -2530,7 +2733,7 @@
25302733 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
25312734 mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
25322735
2533
- if (sk_under_memory_pressure(sk) &&
2736
+ if (sk_under_global_memory_pressure(sk) &&
25342737 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
25352738 sk_leave_memory_pressure(sk);
25362739 }
....@@ -2551,7 +2754,7 @@
25512754
25522755 int sk_set_peek_off(struct sock *sk, int val)
25532756 {
2554
- sk->sk_peek_off = val;
2757
+ WRITE_ONCE(sk->sk_peek_off, val);
25552758 return 0;
25562759 }
25572760 EXPORT_SYMBOL_GPL(sk_set_peek_off);
....@@ -2613,20 +2816,6 @@
26132816 return -EOPNOTSUPP;
26142817 }
26152818 EXPORT_SYMBOL(sock_no_shutdown);
2616
-
2617
-int sock_no_setsockopt(struct socket *sock, int level, int optname,
2618
- char __user *optval, unsigned int optlen)
2619
-{
2620
- return -EOPNOTSUPP;
2621
-}
2622
-EXPORT_SYMBOL(sock_no_setsockopt);
2623
-
2624
-int sock_no_getsockopt(struct socket *sock, int level, int optname,
2625
- char __user *optval, int __user *optlen)
2626
-{
2627
- return -EOPNOTSUPP;
2628
-}
2629
-EXPORT_SYMBOL(sock_no_getsockopt);
26302819
26312820 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
26322821 {
....@@ -2732,15 +2921,25 @@
27322921 rcu_read_unlock();
27332922 }
27342923
2735
-static void sock_def_readable(struct sock *sk)
2924
+void sock_def_readable(struct sock *sk)
27362925 {
27372926 struct socket_wq *wq;
27382927
27392928 rcu_read_lock();
27402929 wq = rcu_dereference(sk->sk_wq);
2741
- if (skwq_has_sleeper(wq))
2930
+
2931
+ if (skwq_has_sleeper(wq)) {
2932
+ int done = 0;
2933
+
2934
+ trace_android_vh_do_wake_up_sync(&wq->wait, &done);
2935
+ if (done)
2936
+ goto out;
2937
+
27422938 wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
27432939 EPOLLRDNORM | EPOLLRDBAND);
2940
+ }
2941
+
2942
+out:
27442943 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
27452944 rcu_read_unlock();
27462945 }
....@@ -2754,7 +2953,7 @@
27542953 /* Do not wake up a writer until he can make "significant"
27552954 * progress. --DaveM
27562955 */
2757
- if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2956
+ if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
27582957 wq = rcu_dereference(sk->sk_wq);
27592958 if (skwq_has_sleeper(wq))
27602959 wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
....@@ -2795,7 +2994,14 @@
27952994 }
27962995 EXPORT_SYMBOL(sk_stop_timer);
27972996
2798
-void sock_init_data(struct socket *sock, struct sock *sk)
2997
+void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
2998
+{
2999
+ if (del_timer_sync(timer))
3000
+ __sock_put(sk);
3001
+}
3002
+EXPORT_SYMBOL(sk_stop_timer_sync);
3003
+
3004
+void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
27993005 {
28003006 sk_init_common(sk);
28013007 sk->sk_send_head = NULL;
....@@ -2803,8 +3009,8 @@
28033009 timer_setup(&sk->sk_timer, NULL, 0);
28043010
28053011 sk->sk_allocation = GFP_KERNEL;
2806
- sk->sk_rcvbuf = sysctl_rmem_default;
2807
- sk->sk_sndbuf = sysctl_wmem_default;
3012
+ sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
3013
+ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
28083014 sk->sk_state = TCP_CLOSE;
28093015 sk_set_socket(sk, sock);
28103016
....@@ -2812,13 +3018,12 @@
28123018
28133019 if (sock) {
28143020 sk->sk_type = sock->type;
2815
- sk->sk_wq = sock->wq;
3021
+ RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
28163022 sock->sk = sk;
2817
- sk->sk_uid = SOCK_INODE(sock)->i_uid;
28183023 } else {
2819
- sk->sk_wq = NULL;
2820
- sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0);
3024
+ RCU_INIT_POINTER(sk->sk_wq, NULL);
28213025 }
3026
+ sk->sk_uid = uid;
28223027
28233028 rwlock_init(&sk->sk_callback_lock);
28243029 if (sk->sk_kern_sock)
....@@ -2859,22 +3064,32 @@
28593064
28603065 #ifdef CONFIG_NET_RX_BUSY_POLL
28613066 sk->sk_napi_id = 0;
2862
- sk->sk_ll_usec = sysctl_net_busy_read;
3067
+ sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
28633068 #endif
28643069
2865
- sk->sk_max_pacing_rate = ~0U;
2866
- sk->sk_pacing_rate = ~0U;
2867
- sk->sk_pacing_shift = 10;
3070
+ sk->sk_max_pacing_rate = ~0UL;
3071
+ sk->sk_pacing_rate = ~0UL;
3072
+ WRITE_ONCE(sk->sk_pacing_shift, 10);
28683073 sk->sk_incoming_cpu = -1;
28693074
28703075 sk_rx_queue_clear(sk);
28713076 /*
28723077 * Before updating sk_refcnt, we must commit prior changes to memory
2873
- * (Documentation/RCU/rculist_nulls.txt for details)
3078
+ * (Documentation/RCU/rculist_nulls.rst for details)
28743079 */
28753080 smp_wmb();
28763081 refcount_set(&sk->sk_refcnt, 1);
28773082 atomic_set(&sk->sk_drops, 0);
3083
+}
3084
+EXPORT_SYMBOL(sock_init_data_uid);
3085
+
3086
+void sock_init_data(struct socket *sock, struct sock *sk)
3087
+{
3088
+ kuid_t uid = sock ?
3089
+ SOCK_INODE(sock)->i_uid :
3090
+ make_kuid(sock_net(sk)->user_ns, 0);
3091
+
3092
+ sock_init_data_uid(sock, sk, uid);
28783093 }
28793094 EXPORT_SYMBOL(sock_init_data);
28803095
....@@ -2949,41 +3164,46 @@
29493164 }
29503165 EXPORT_SYMBOL(lock_sock_fast);
29513166
2952
-int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
3167
+int sock_gettstamp(struct socket *sock, void __user *userstamp,
3168
+ bool timeval, bool time32)
29533169 {
2954
- struct timeval tv;
3170
+ struct sock *sk = sock->sk;
3171
+ struct timespec64 ts;
29553172
29563173 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2957
- tv = ktime_to_timeval(sock_read_timestamp(sk));
2958
- if (tv.tv_sec == -1)
2959
- return -ENOENT;
2960
- if (tv.tv_sec == 0) {
2961
- ktime_t kt = ktime_get_real();
2962
- sock_write_timestamp(sk, kt);
2963
- tv = ktime_to_timeval(kt);
2964
- }
2965
- return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2966
-}
2967
-EXPORT_SYMBOL(sock_get_timestamp);
2968
-
2969
-int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2970
-{
2971
- struct timespec ts;
2972
-
2973
- sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2974
- ts = ktime_to_timespec(sock_read_timestamp(sk));
3174
+ ts = ktime_to_timespec64(sock_read_timestamp(sk));
29753175 if (ts.tv_sec == -1)
29763176 return -ENOENT;
29773177 if (ts.tv_sec == 0) {
29783178 ktime_t kt = ktime_get_real();
29793179 sock_write_timestamp(sk, kt);
2980
- ts = ktime_to_timespec(sk->sk_stamp);
3180
+ ts = ktime_to_timespec64(kt);
29813181 }
2982
- return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2983
-}
2984
-EXPORT_SYMBOL(sock_get_timestampns);
29853182
2986
-void sock_enable_timestamp(struct sock *sk, int flag)
3183
+ if (timeval)
3184
+ ts.tv_nsec /= 1000;
3185
+
3186
+#ifdef CONFIG_COMPAT_32BIT_TIME
3187
+ if (time32)
3188
+ return put_old_timespec32(&ts, userstamp);
3189
+#endif
3190
+#ifdef CONFIG_SPARC64
3191
+ /* beware of padding in sparc64 timeval */
3192
+ if (timeval && !in_compat_syscall()) {
3193
+ struct __kernel_old_timeval __user tv = {
3194
+ .tv_sec = ts.tv_sec,
3195
+ .tv_usec = ts.tv_nsec,
3196
+ };
3197
+ if (copy_to_user(userstamp, &tv, sizeof(tv)))
3198
+ return -EFAULT;
3199
+ return 0;
3200
+ }
3201
+#endif
3202
+ return put_timespec64(&ts, userstamp);
3203
+}
3204
+EXPORT_SYMBOL(sock_gettstamp);
3205
+
3206
+void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
29873207 {
29883208 if (!sock_flag(sk, flag)) {
29893209 unsigned long previous_flags = sk->sk_flags;
....@@ -3052,20 +3272,6 @@
30523272 }
30533273 EXPORT_SYMBOL(sock_common_getsockopt);
30543274
3055
-#ifdef CONFIG_COMPAT
3056
-int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
3057
- char __user *optval, int __user *optlen)
3058
-{
3059
- struct sock *sk = sock->sk;
3060
-
3061
- if (sk->sk_prot->compat_getsockopt != NULL)
3062
- return sk->sk_prot->compat_getsockopt(sk, level, optname,
3063
- optval, optlen);
3064
- return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
3065
-}
3066
-EXPORT_SYMBOL(compat_sock_common_getsockopt);
3067
-#endif
3068
-
30693275 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
30703276 int flags)
30713277 {
....@@ -3085,7 +3291,7 @@
30853291 * Set socket options on an inet socket.
30863292 */
30873293 int sock_common_setsockopt(struct socket *sock, int level, int optname,
3088
- char __user *optval, unsigned int optlen)
3294
+ sockptr_t optval, unsigned int optlen)
30893295 {
30903296 struct sock *sk = sock->sk;
30913297
....@@ -3093,27 +3299,13 @@
30933299 }
30943300 EXPORT_SYMBOL(sock_common_setsockopt);
30953301
3096
-#ifdef CONFIG_COMPAT
3097
-int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
3098
- char __user *optval, unsigned int optlen)
3099
-{
3100
- struct sock *sk = sock->sk;
3101
-
3102
- if (sk->sk_prot->compat_setsockopt != NULL)
3103
- return sk->sk_prot->compat_setsockopt(sk, level, optname,
3104
- optval, optlen);
3105
- return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
3106
-}
3107
-EXPORT_SYMBOL(compat_sock_common_setsockopt);
3108
-#endif
3109
-
31103302 void sk_common_release(struct sock *sk)
31113303 {
31123304 if (sk->sk_prot->destroy)
31133305 sk->sk_prot->destroy(sk);
31143306
31153307 /*
3116
- * Observation: when sock_common_release is called, processes have
3308
+ * Observation: when sk_common_release is called, processes have
31173309 * no access to socket. But net still has.
31183310 * Step one, detach it from networking:
31193311 *
....@@ -3149,13 +3341,13 @@
31493341 memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
31503342
31513343 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3152
- mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
3344
+ mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
31533345 mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3154
- mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
3346
+ mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
31553347 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
3156
- mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
3348
+ mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
31573349 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
3158
- mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
3350
+ mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
31593351 mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
31603352 }
31613353
....@@ -3240,16 +3432,17 @@
32403432
32413433 core_initcall(net_inuse_init);
32423434
3243
-static void assign_proto_idx(struct proto *prot)
3435
+static int assign_proto_idx(struct proto *prot)
32443436 {
32453437 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
32463438
32473439 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
32483440 pr_err("PROTO_INUSE_NR exhausted\n");
3249
- return;
3441
+ return -ENOSPC;
32503442 }
32513443
32523444 set_bit(prot->inuse_idx, proto_inuse_idx);
3445
+ return 0;
32533446 }
32543447
32553448 static void release_proto_idx(struct proto *prot)
....@@ -3258,8 +3451,9 @@
32583451 clear_bit(prot->inuse_idx, proto_inuse_idx);
32593452 }
32603453 #else
3261
-static inline void assign_proto_idx(struct proto *prot)
3454
+static inline int assign_proto_idx(struct proto *prot)
32623455 {
3456
+ return 0;
32633457 }
32643458
32653459 static inline void release_proto_idx(struct proto *prot)
....@@ -3270,6 +3464,16 @@
32703464 {
32713465 }
32723466 #endif
3467
+
3468
+static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
3469
+{
3470
+ if (!twsk_prot)
3471
+ return;
3472
+ kfree(twsk_prot->twsk_slab_name);
3473
+ twsk_prot->twsk_slab_name = NULL;
3474
+ kmem_cache_destroy(twsk_prot->twsk_slab);
3475
+ twsk_prot->twsk_slab = NULL;
3476
+}
32733477
32743478 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
32753479 {
....@@ -3308,6 +3512,8 @@
33083512
33093513 int proto_register(struct proto *prot, int alloc_slab)
33103514 {
3515
+ int ret = -ENOBUFS;
3516
+
33113517 if (alloc_slab) {
33123518 prot->slab = kmem_cache_create_usercopy(prot->name,
33133519 prot->obj_size, 0,
....@@ -3339,25 +3545,32 @@
33393545 prot->slab_flags,
33403546 NULL);
33413547 if (prot->twsk_prot->twsk_slab == NULL)
3342
- goto out_free_timewait_sock_slab_name;
3548
+ goto out_free_timewait_sock_slab;
33433549 }
33443550 }
33453551
33463552 mutex_lock(&proto_list_mutex);
3553
+ ret = assign_proto_idx(prot);
3554
+ if (ret) {
3555
+ mutex_unlock(&proto_list_mutex);
3556
+ goto out_free_timewait_sock_slab;
3557
+ }
33473558 list_add(&prot->node, &proto_list);
3348
- assign_proto_idx(prot);
33493559 mutex_unlock(&proto_list_mutex);
3350
- return 0;
3560
+ return ret;
33513561
3352
-out_free_timewait_sock_slab_name:
3353
- kfree(prot->twsk_prot->twsk_slab_name);
3562
+out_free_timewait_sock_slab:
3563
+ if (alloc_slab && prot->twsk_prot)
3564
+ tw_prot_cleanup(prot->twsk_prot);
33543565 out_free_request_sock_slab:
3355
- req_prot_cleanup(prot->rsk_prot);
3566
+ if (alloc_slab) {
3567
+ req_prot_cleanup(prot->rsk_prot);
33563568
3357
- kmem_cache_destroy(prot->slab);
3358
- prot->slab = NULL;
3569
+ kmem_cache_destroy(prot->slab);
3570
+ prot->slab = NULL;
3571
+ }
33593572 out:
3360
- return -ENOBUFS;
3573
+ return ret;
33613574 }
33623575 EXPORT_SYMBOL(proto_register);
33633576
....@@ -3372,12 +3585,7 @@
33723585 prot->slab = NULL;
33733586
33743587 req_prot_cleanup(prot->rsk_prot);
3375
-
3376
- if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
3377
- kmem_cache_destroy(prot->twsk_prot->twsk_slab);
3378
- kfree(prot->twsk_prot->twsk_slab_name);
3379
- prot->twsk_prot->twsk_slab = NULL;
3380
- }
3588
+ tw_prot_cleanup(prot->twsk_prot);
33813589 }
33823590 EXPORT_SYMBOL(proto_unregister);
33833591
....@@ -3394,6 +3602,7 @@
33943602 #ifdef CONFIG_INET
33953603 if (family == AF_INET &&
33963604 protocol != IPPROTO_RAW &&
3605
+ protocol < MAX_INET_PROTOS &&
33973606 !rcu_access_pointer(inet_protos[protocol]))
33983607 return -ENOENT;
33993608 #endif
....@@ -3431,7 +3640,7 @@
34313640 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
34323641 }
34333642
3434
-static char *sock_prot_memory_pressure(struct proto *proto)
3643
+static const char *sock_prot_memory_pressure(struct proto *proto)
34353644 {
34363645 return proto->memory_pressure != NULL ?
34373646 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
....@@ -3535,3 +3744,11 @@
35353744 }
35363745 EXPORT_SYMBOL(sk_busy_loop_end);
35373746 #endif /* CONFIG_NET_RX_BUSY_POLL */
3747
+
3748
+int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
3749
+{
3750
+ if (!sk->sk_prot->bind_add)
3751
+ return -EOPNOTSUPP;
3752
+ return sk->sk_prot->bind_add(sk, addr, addr_len);
3753
+}
3754
+EXPORT_SYMBOL(sock_bind_add);