hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/net/socket.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * NET An implementation of the SOCKET network access protocol.
34 *
....@@ -45,13 +46,6 @@
4546 * Tigran Aivazian : Made listen(2) backlog sanity checks
4647 * protocol-independent
4748 *
48
- *
49
- * This program is free software; you can redistribute it and/or
50
- * modify it under the terms of the GNU General Public License
51
- * as published by the Free Software Foundation; either version
52
- * 2 of the License, or (at your option) any later version.
53
- *
54
- *
5549 * This module is effectively the top level interface to the BSD socket
5650 * paradigm.
5751 *
....@@ -79,6 +73,7 @@
7973 #include <linux/module.h>
8074 #include <linux/highmem.h>
8175 #include <linux/mount.h>
76
+#include <linux/pseudo_fs.h>
8277 #include <linux/security.h>
8378 #include <linux/syscalls.h>
8479 #include <linux/compat.h>
....@@ -90,6 +85,7 @@
9085 #include <linux/slab.h>
9186 #include <linux/xattr.h>
9287 #include <linux/nospec.h>
88
+#include <linux/indirect_call_wrapper.h>
9389
9490 #include <linux/uaccess.h>
9591 #include <asm/unistd.h>
....@@ -104,6 +100,7 @@
104100 #include <linux/if_tun.h>
105101 #include <linux/ipv6_route.h>
106102 #include <linux/route.h>
103
+#include <linux/termios.h>
107104 #include <linux/sockios.h>
108105 #include <net/busy_poll.h>
109106 #include <linux/errqueue.h>
....@@ -132,6 +129,18 @@
132129 struct pipe_inode_info *pipe, size_t len,
133130 unsigned int flags);
134131
132
+#ifdef CONFIG_PROC_FS
133
+static void sock_show_fdinfo(struct seq_file *m, struct file *f)
134
+{
135
+ struct socket *sock = f->private_data;
136
+
137
+ if (sock->ops->show_fdinfo)
138
+ sock->ops->show_fdinfo(m, sock);
139
+}
140
+#else
141
+#define sock_show_fdinfo NULL
142
+#endif
143
+
135144 /*
136145 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137146 * in the operation structures but are done directly via the socketcall() multiplexor.
....@@ -153,6 +162,7 @@
153162 .sendpage = sock_sendpage,
154163 .splice_write = generic_splice_sendpage,
155164 .splice_read = sock_splice_read,
165
+ .show_fdinfo = sock_show_fdinfo,
156166 };
157167
158168 /*
....@@ -239,20 +249,13 @@
239249 static struct inode *sock_alloc_inode(struct super_block *sb)
240250 {
241251 struct socket_alloc *ei;
242
- struct socket_wq *wq;
243252
244253 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
245254 if (!ei)
246255 return NULL;
247
- wq = kmalloc(sizeof(*wq), GFP_KERNEL);
248
- if (!wq) {
249
- kmem_cache_free(sock_inode_cachep, ei);
250
- return NULL;
251
- }
252
- init_waitqueue_head(&wq->wait);
253
- wq->fasync_list = NULL;
254
- wq->flags = 0;
255
- ei->socket.wq = wq;
256
+ init_waitqueue_head(&ei->socket.wq.wait);
257
+ ei->socket.wq.fasync_list = NULL;
258
+ ei->socket.wq.flags = 0;
256259
257260 ei->socket.state = SS_UNCONNECTED;
258261 ei->socket.flags = 0;
....@@ -263,12 +266,11 @@
263266 return &ei->vfs_inode;
264267 }
265268
266
-static void sock_destroy_inode(struct inode *inode)
269
+static void sock_free_inode(struct inode *inode)
267270 {
268271 struct socket_alloc *ei;
269272
270273 ei = container_of(inode, struct socket_alloc, vfs_inode);
271
- kfree_rcu(ei->socket.wq, rcu);
272274 kmem_cache_free(sock_inode_cachep, ei);
273275 }
274276
....@@ -293,7 +295,7 @@
293295
294296 static const struct super_operations sockfs_ops = {
295297 .alloc_inode = sock_alloc_inode,
296
- .destroy_inode = sock_destroy_inode,
298
+ .free_inode = sock_free_inode,
297299 .statfs = simple_statfs,
298300 };
299301
....@@ -312,7 +314,8 @@
312314
313315 static int sockfs_xattr_get(const struct xattr_handler *handler,
314316 struct dentry *dentry, struct inode *inode,
315
- const char *suffix, void *value, size_t size)
317
+ const char *suffix, void *value, size_t size,
318
+ int flags)
316319 {
317320 if (value) {
318321 if (dentry->d_name.len + 1 > size)
....@@ -351,19 +354,22 @@
351354 NULL
352355 };
353356
354
-static struct dentry *sockfs_mount(struct file_system_type *fs_type,
355
- int flags, const char *dev_name, void *data)
357
+static int sockfs_init_fs_context(struct fs_context *fc)
356358 {
357
- return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
358
- sockfs_xattr_handlers,
359
- &sockfs_dentry_operations, SOCKFS_MAGIC);
359
+ struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
360
+ if (!ctx)
361
+ return -ENOMEM;
362
+ ctx->ops = &sockfs_ops;
363
+ ctx->dops = &sockfs_dentry_operations;
364
+ ctx->xattr = sockfs_xattr_handlers;
365
+ return 0;
360366 }
361367
362368 static struct vfsmount *sock_mnt __read_mostly;
363369
364370 static struct file_system_type sock_fs_type = {
365371 .name = "sockfs",
366
- .mount = sockfs_mount,
372
+ .init_fs_context = sockfs_init_fs_context,
367373 .kill_sb = kill_anon_super,
368374 };
369375
....@@ -413,6 +419,7 @@
413419
414420 sock->file = file;
415421 file->private_data = sock;
422
+ stream_open(SOCK_INODE(sock), file);
416423 return file;
417424 }
418425 EXPORT_SYMBOL(sock_alloc_file);
....@@ -427,7 +434,7 @@
427434 }
428435
429436 newfile = sock_alloc_file(sock, flags, NULL);
430
- if (likely(!IS_ERR(newfile))) {
437
+ if (!IS_ERR(newfile)) {
431438 fd_install(fd, newfile);
432439 return fd;
433440 }
....@@ -580,15 +587,6 @@
580587 }
581588 EXPORT_SYMBOL(sock_alloc);
582589
583
-/**
584
- * sock_release - close a socket
585
- * @sock: socket to close
586
- *
587
- * The socket is released from the protocol stack if it has a release
588
- * callback, and the inode is then released if the socket is bound to
589
- * an inode not a file.
590
- */
591
-
592590 static void __sock_release(struct socket *sock, struct inode *inode)
593591 {
594592 if (sock->ops) {
....@@ -604,7 +602,7 @@
604602 module_put(owner);
605603 }
606604
607
- if (sock->wq->fasync_list)
605
+ if (sock->wq.fasync_list)
608606 pr_err("%s: fasync list not empty!\n", __func__);
609607
610608 if (!sock->file) {
....@@ -614,6 +612,14 @@
614612 sock->file = NULL;
615613 }
616614
615
+/**
616
+ * sock_release - close a socket
617
+ * @sock: socket to close
618
+ *
619
+ * The socket is released from the protocol stack if it has a release
620
+ * callback, and the inode is then released if the socket is bound to
621
+ * an inode not a file.
622
+ */
617623 void sock_release(struct socket *sock)
618624 {
619625 __sock_release(sock, NULL);
....@@ -637,6 +643,19 @@
637643 }
638644 EXPORT_SYMBOL(__sock_tx_timestamp);
639645
646
+INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
647
+ size_t));
648
+INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
649
+ size_t));
650
+static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
651
+{
652
+ int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
653
+ inet_sendmsg, sock, msg,
654
+ msg_data_left(msg));
655
+ BUG_ON(ret == -EIOCBQUEUED);
656
+ return ret;
657
+}
658
+
640659 /**
641660 * sock_sendmsg - send a message through @sock
642661 * @sock: socket
....@@ -645,14 +664,6 @@
645664 * Sends @msg through @sock, passing through LSM.
646665 * Returns the number of bytes sent, or an error code.
647666 */
648
-
649
-static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
650
-{
651
- int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
652
- BUG_ON(ret == -EIOCBQUEUED);
653
- return ret;
654
-}
655
-
656667 int sock_sendmsg(struct socket *sock, struct msghdr *msg)
657668 {
658669 int err = security_socket_sendmsg(sock, msg,
....@@ -677,7 +688,7 @@
677688 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
678689 struct kvec *vec, size_t num, size_t size)
679690 {
680
- iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
691
+ iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
681692 return sock_sendmsg(sock, msg);
682693 }
683694 EXPORT_SYMBOL(kernel_sendmsg);
....@@ -703,7 +714,7 @@
703714 if (!sock->ops->sendmsg_locked)
704715 return sock_no_sendmsg_locked(sk, msg, size);
705716
706
- iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
717
+ iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
707718
708719 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
709720 }
....@@ -724,7 +735,7 @@
724735 * before the software timestamp is received, a hardware TX timestamp may be
725736 * returned only if there is no software TX timestamp. Ignore false software
726737 * timestamps, which may be made in the __sock_recv_timestamp() call when the
727
- * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
738
+ * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
728739 * hardware timestamp.
729740 */
730741 static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
....@@ -760,7 +771,9 @@
760771 struct sk_buff *skb)
761772 {
762773 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
763
- struct scm_timestamping tss;
774
+ int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
775
+ struct scm_timestamping_internal tss;
776
+
764777 int empty = 1, false_tstamp = 0;
765778 struct skb_shared_hwtstamps *shhwtstamps =
766779 skb_hwtstamps(skb);
....@@ -774,34 +787,54 @@
774787
775788 if (need_software_tstamp) {
776789 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
777
- struct timeval tv;
778
- skb_get_timestamp(skb, &tv);
779
- put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
780
- sizeof(tv), &tv);
790
+ if (new_tstamp) {
791
+ struct __kernel_sock_timeval tv;
792
+
793
+ skb_get_new_timestamp(skb, &tv);
794
+ put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
795
+ sizeof(tv), &tv);
796
+ } else {
797
+ struct __kernel_old_timeval tv;
798
+
799
+ skb_get_timestamp(skb, &tv);
800
+ put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
801
+ sizeof(tv), &tv);
802
+ }
781803 } else {
782
- struct timespec ts;
783
- skb_get_timestampns(skb, &ts);
784
- put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
785
- sizeof(ts), &ts);
804
+ if (new_tstamp) {
805
+ struct __kernel_timespec ts;
806
+
807
+ skb_get_new_timestampns(skb, &ts);
808
+ put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
809
+ sizeof(ts), &ts);
810
+ } else {
811
+ struct __kernel_old_timespec ts;
812
+
813
+ skb_get_timestampns(skb, &ts);
814
+ put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
815
+ sizeof(ts), &ts);
816
+ }
786817 }
787818 }
788819
789820 memset(&tss, 0, sizeof(tss));
790821 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
791
- ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
822
+ ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
792823 empty = 0;
793824 if (shhwtstamps &&
794825 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
795826 !skb_is_swtx_tstamp(skb, false_tstamp) &&
796
- ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
827
+ ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
797828 empty = 0;
798829 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
799830 !skb_is_err_queue(skb))
800831 put_ts_pktinfo(msg, skb);
801832 }
802833 if (!empty) {
803
- put_cmsg(msg, SOL_SOCKET,
804
- SCM_TIMESTAMPING, sizeof(tss), &tss);
834
+ if (sock_flag(sk, SOCK_TSTAMP_NEW))
835
+ put_cmsg_scm_timestamping64(msg, &tss);
836
+ else
837
+ put_cmsg_scm_timestamping(msg, &tss);
805838
806839 if (skb_is_err_queue(skb) && skb->len &&
807840 SKB_EXT_ERR(skb)->opt_stats)
....@@ -843,6 +876,18 @@
843876 }
844877 EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
845878
879
+INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
880
+ size_t, int));
881
+INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
882
+ size_t, int));
883
+static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
884
+ int flags)
885
+{
886
+ return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
887
+ inet_recvmsg, sock, msg, msg_data_left(msg),
888
+ flags);
889
+}
890
+
846891 /**
847892 * sock_recvmsg - receive a message from @sock
848893 * @sock: socket
....@@ -852,13 +897,6 @@
852897 * Receives @msg from @sock, passing through LSM. Returns the total number
853898 * of bytes received, or an error.
854899 */
855
-
856
-static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
857
- int flags)
858
-{
859
- return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
860
-}
861
-
862900 int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
863901 {
864902 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
....@@ -886,14 +924,9 @@
886924 int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
887925 struct kvec *vec, size_t num, size_t size, int flags)
888926 {
889
- mm_segment_t oldfs = get_fs();
890
- int result;
891
-
892
- iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
893
- set_fs(KERNEL_DS);
894
- result = sock_recvmsg(sock, msg, flags);
895
- set_fs(oldfs);
896
- return result;
927
+ msg->msg_control_is_user = false;
928
+ iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
929
+ return sock_recvmsg(sock, msg, flags);
897930 }
898931 EXPORT_SYMBOL(kernel_recvmsg);
899932
....@@ -919,7 +952,7 @@
919952 struct socket *sock = file->private_data;
920953
921954 if (unlikely(!sock->ops->splice_read))
922
- return -EINVAL;
955
+ return generic_file_splice_read(file, ppos, pipe, len, flags);
923956
924957 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
925958 }
....@@ -1131,6 +1164,26 @@
11311164
11321165 err = open_related_ns(&net->ns, get_net_ns);
11331166 break;
1167
+ case SIOCGSTAMP_OLD:
1168
+ case SIOCGSTAMPNS_OLD:
1169
+ if (!sock->ops->gettstamp) {
1170
+ err = -ENOIOCTLCMD;
1171
+ break;
1172
+ }
1173
+ err = sock->ops->gettstamp(sock, argp,
1174
+ cmd == SIOCGSTAMP_OLD,
1175
+ !IS_ENABLED(CONFIG_64BIT));
1176
+ break;
1177
+ case SIOCGSTAMP_NEW:
1178
+ case SIOCGSTAMPNS_NEW:
1179
+ if (!sock->ops->gettstamp) {
1180
+ err = -ENOIOCTLCMD;
1181
+ break;
1182
+ }
1183
+ err = sock->ops->gettstamp(sock, argp,
1184
+ cmd == SIOCGSTAMP_NEW,
1185
+ false);
1186
+ break;
11341187 default:
11351188 err = sock_do_ioctl(net, sock, cmd, arg);
11361189 break;
....@@ -1230,13 +1283,12 @@
12301283 {
12311284 struct socket *sock = filp->private_data;
12321285 struct sock *sk = sock->sk;
1233
- struct socket_wq *wq;
1286
+ struct socket_wq *wq = &sock->wq;
12341287
12351288 if (sk == NULL)
12361289 return -EINVAL;
12371290
12381291 lock_sock(sk);
1239
- wq = sock->wq;
12401292 fasync_helper(fd, filp, on, &wq->fasync_list);
12411293
12421294 if (!wq->fasync_list)
....@@ -1263,7 +1315,7 @@
12631315 case SOCK_WAKE_SPACE:
12641316 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
12651317 break;
1266
- /* fall through */
1318
+ fallthrough;
12671319 case SOCK_WAKE_IO:
12681320 call_kill:
12691321 kill_fasync(&wq->fasync_list, SIGIO, band);
....@@ -1586,7 +1638,7 @@
15861638 sock = sockfd_lookup_light(fd, &err, &fput_needed);
15871639 if (sock) {
15881640 err = move_addr_to_kernel(umyaddr, addrlen, &address);
1589
- if (err >= 0) {
1641
+ if (!err) {
15901642 err = security_socket_bind(sock,
15911643 (struct sockaddr *)&address,
15921644 addrlen);
....@@ -1619,7 +1671,7 @@
16191671
16201672 sock = sockfd_lookup_light(fd, &err, &fput_needed);
16211673 if (sock) {
1622
- somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
1674
+ somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
16231675 if ((unsigned int)backlog > somaxconn)
16241676 backlog = somaxconn;
16251677
....@@ -1637,40 +1689,22 @@
16371689 return __sys_listen(fd, backlog);
16381690 }
16391691
1640
-/*
1641
- * For accept, we attempt to create a new socket, set up the link
1642
- * with the client, wake up the client, then return the new
1643
- * connected fd. We collect the address of the connector in kernel
1644
- * space and move it to user at the very end. This is unclean because
1645
- * we open the socket then return an error.
1646
- *
1647
- * 1003.1g adds the ability to recvmsg() to query connection pending
1648
- * status to recvmsg. We need to add that support in a way thats
1649
- * clean when we restructure accept also.
1650
- */
1651
-
1652
-int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1653
- int __user *upeer_addrlen, int flags)
1692
+struct file *do_accept(struct file *file, unsigned file_flags,
1693
+ struct sockaddr __user *upeer_sockaddr,
1694
+ int __user *upeer_addrlen, int flags)
16541695 {
16551696 struct socket *sock, *newsock;
16561697 struct file *newfile;
1657
- int err, len, newfd, fput_needed;
1698
+ int err, len;
16581699 struct sockaddr_storage address;
16591700
1660
- if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1661
- return -EINVAL;
1662
-
1663
- if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1664
- flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1665
-
1666
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
1701
+ sock = sock_from_file(file, &err);
16671702 if (!sock)
1668
- goto out;
1703
+ return ERR_PTR(err);
16691704
1670
- err = -ENFILE;
16711705 newsock = sock_alloc();
16721706 if (!newsock)
1673
- goto out_put;
1707
+ return ERR_PTR(-ENFILE);
16741708
16751709 newsock->type = sock->type;
16761710 newsock->ops = sock->ops;
....@@ -1681,24 +1715,16 @@
16811715 */
16821716 __module_get(newsock->ops->owner);
16831717
1684
- newfd = get_unused_fd_flags(flags);
1685
- if (unlikely(newfd < 0)) {
1686
- err = newfd;
1687
- sock_release(newsock);
1688
- goto out_put;
1689
- }
16901718 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
1691
- if (IS_ERR(newfile)) {
1692
- err = PTR_ERR(newfile);
1693
- put_unused_fd(newfd);
1694
- goto out_put;
1695
- }
1719
+ if (IS_ERR(newfile))
1720
+ return newfile;
16961721
16971722 err = security_socket_accept(sock, newsock);
16981723 if (err)
16991724 goto out_fd;
17001725
1701
- err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1726
+ err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1727
+ false);
17021728 if (err < 0)
17031729 goto out_fd;
17041730
....@@ -1716,18 +1742,67 @@
17161742 }
17171743
17181744 /* File flags are not inherited via accept() unlike another OSes. */
1719
-
1720
- fd_install(newfd, newfile);
1721
- err = newfd;
1722
-
1723
-out_put:
1724
- fput_light(sock->file, fput_needed);
1725
-out:
1726
- return err;
1745
+ return newfile;
17271746 out_fd:
17281747 fput(newfile);
1729
- put_unused_fd(newfd);
1730
- goto out_put;
1748
+ return ERR_PTR(err);
1749
+}
1750
+
1751
+int __sys_accept4_file(struct file *file, unsigned file_flags,
1752
+ struct sockaddr __user *upeer_sockaddr,
1753
+ int __user *upeer_addrlen, int flags,
1754
+ unsigned long nofile)
1755
+{
1756
+ struct file *newfile;
1757
+ int newfd;
1758
+
1759
+ if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1760
+ return -EINVAL;
1761
+
1762
+ if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1763
+ flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1764
+
1765
+ newfd = __get_unused_fd_flags(flags, nofile);
1766
+ if (unlikely(newfd < 0))
1767
+ return newfd;
1768
+
1769
+ newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen,
1770
+ flags);
1771
+ if (IS_ERR(newfile)) {
1772
+ put_unused_fd(newfd);
1773
+ return PTR_ERR(newfile);
1774
+ }
1775
+ fd_install(newfd, newfile);
1776
+ return newfd;
1777
+}
1778
+
1779
+/*
1780
+ * For accept, we attempt to create a new socket, set up the link
1781
+ * with the client, wake up the client, then return the new
1782
+ * connected fd. We collect the address of the connector in kernel
1783
+ * space and move it to user at the very end. This is unclean because
1784
+ * we open the socket then return an error.
1785
+ *
1786
+ * 1003.1g adds the ability to recvmsg() to query connection pending
1787
+ * status to recvmsg. We need to add that support in a way thats
1788
+ * clean when we restructure accept also.
1789
+ */
1790
+
1791
+int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1792
+ int __user *upeer_addrlen, int flags)
1793
+{
1794
+ int ret = -EBADF;
1795
+ struct fd f;
1796
+
1797
+ f = fdget(fd);
1798
+ if (f.file) {
1799
+ ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
1800
+ upeer_addrlen, flags,
1801
+ rlimit(RLIMIT_NOFILE));
1802
+ fdput(f);
1803
+ }
1804
+
1805
+ return ret;
17311806 }
17321807
17331808 SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
....@@ -1754,30 +1829,43 @@
17541829 * include the -EINPROGRESS status for such sockets.
17551830 */
17561831
1757
-int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1832
+int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
1833
+ int addrlen, int file_flags)
17581834 {
17591835 struct socket *sock;
1760
- struct sockaddr_storage address;
1761
- int err, fput_needed;
1836
+ int err;
17621837
1763
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
1838
+ sock = sock_from_file(file, &err);
17641839 if (!sock)
17651840 goto out;
1766
- err = move_addr_to_kernel(uservaddr, addrlen, &address);
1767
- if (err < 0)
1768
- goto out_put;
17691841
17701842 err =
1771
- security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1843
+ security_socket_connect(sock, (struct sockaddr *)address, addrlen);
17721844 if (err)
1773
- goto out_put;
1845
+ goto out;
17741846
1775
- err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1776
- sock->file->f_flags);
1777
-out_put:
1778
- fput_light(sock->file, fput_needed);
1847
+ err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1848
+ sock->file->f_flags | file_flags);
17791849 out:
17801850 return err;
1851
+}
1852
+
1853
+int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1854
+{
1855
+ int ret = -EBADF;
1856
+ struct fd f;
1857
+
1858
+ f = fdget(fd);
1859
+ if (f.file) {
1860
+ struct sockaddr_storage address;
1861
+
1862
+ ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1863
+ if (!ret)
1864
+ ret = __sys_connect_file(f.file, &address, addrlen, 0);
1865
+ fdput(f);
1866
+ }
1867
+
1868
+ return ret;
17811869 }
17821870
17831871 SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
....@@ -1984,14 +2072,26 @@
19842072 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
19852073 }
19862074
2075
+static bool sock_use_custom_sol_socket(const struct socket *sock)
2076
+{
2077
+ const struct sock *sk = sock->sk;
2078
+
2079
+ /* Use sock->ops->setsockopt() for MPTCP */
2080
+ return IS_ENABLED(CONFIG_MPTCP) &&
2081
+ sk->sk_protocol == IPPROTO_MPTCP &&
2082
+ sk->sk_type == SOCK_STREAM &&
2083
+ (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
2084
+}
2085
+
19872086 /*
19882087 * Set a socket option. Because we don't know the option lengths we have
19892088 * to pass the user mode parameter for the protocols to sort out.
19902089 */
1991
-
1992
-static int __sys_setsockopt(int fd, int level, int optname,
1993
- char __user *optval, int optlen)
2090
+int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
2091
+ int optlen)
19942092 {
2093
+ sockptr_t optval = USER_SOCKPTR(user_optval);
2094
+ char *kernel_optval = NULL;
19952095 int err, fput_needed;
19962096 struct socket *sock;
19972097
....@@ -1999,22 +2099,36 @@
19992099 return -EINVAL;
20002100
20012101 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2002
- if (sock != NULL) {
2003
- err = security_socket_setsockopt(sock, level, optname);
2004
- if (err)
2005
- goto out_put;
2102
+ if (!sock)
2103
+ return err;
20062104
2007
- if (level == SOL_SOCKET)
2008
- err =
2009
- sock_setsockopt(sock, level, optname, optval,
2010
- optlen);
2011
- else
2012
- err =
2013
- sock->ops->setsockopt(sock, level, optname, optval,
2014
- optlen);
2015
-out_put:
2016
- fput_light(sock->file, fput_needed);
2105
+ err = security_socket_setsockopt(sock, level, optname);
2106
+ if (err)
2107
+ goto out_put;
2108
+
2109
+ if (!in_compat_syscall())
2110
+ err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
2111
+ user_optval, &optlen,
2112
+ &kernel_optval);
2113
+ if (err < 0)
2114
+ goto out_put;
2115
+ if (err > 0) {
2116
+ err = 0;
2117
+ goto out_put;
20172118 }
2119
+
2120
+ if (kernel_optval)
2121
+ optval = KERNEL_SOCKPTR(kernel_optval);
2122
+ if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
2123
+ err = sock_setsockopt(sock, level, optname, optval, optlen);
2124
+ else if (unlikely(!sock->ops->setsockopt))
2125
+ err = -EOPNOTSUPP;
2126
+ else
2127
+ err = sock->ops->setsockopt(sock, level, optname, optval,
2128
+ optlen);
2129
+ kfree(kernel_optval);
2130
+out_put:
2131
+ fput_light(sock->file, fput_needed);
20182132 return err;
20192133 }
20202134
....@@ -2028,30 +2142,38 @@
20282142 * Get a socket option. Because we don't know the option lengths we have
20292143 * to pass a user mode parameter for the protocols to sort out.
20302144 */
2031
-
2032
-static int __sys_getsockopt(int fd, int level, int optname,
2033
- char __user *optval, int __user *optlen)
2145
+int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2146
+ int __user *optlen)
20342147 {
20352148 int err, fput_needed;
20362149 struct socket *sock;
2150
+ int max_optlen;
20372151
20382152 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2039
- if (sock != NULL) {
2040
- err = security_socket_getsockopt(sock, level, optname);
2041
- if (err)
2042
- goto out_put;
2153
+ if (!sock)
2154
+ return err;
20432155
2044
- if (level == SOL_SOCKET)
2045
- err =
2046
- sock_getsockopt(sock, level, optname, optval,
2156
+ err = security_socket_getsockopt(sock, level, optname);
2157
+ if (err)
2158
+ goto out_put;
2159
+
2160
+ if (!in_compat_syscall())
2161
+ max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2162
+
2163
+ if (level == SOL_SOCKET)
2164
+ err = sock_getsockopt(sock, level, optname, optval, optlen);
2165
+ else if (unlikely(!sock->ops->getsockopt))
2166
+ err = -EOPNOTSUPP;
2167
+ else
2168
+ err = sock->ops->getsockopt(sock, level, optname, optval,
20472169 optlen);
2048
- else
2049
- err =
2050
- sock->ops->getsockopt(sock, level, optname, optval,
2051
- optlen);
2170
+
2171
+ if (!in_compat_syscall())
2172
+ err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2173
+ optval, optlen, max_optlen,
2174
+ err);
20522175 out_put:
2053
- fput_light(sock->file, fput_needed);
2054
- }
2176
+ fput_light(sock->file, fput_needed);
20552177 return err;
20562178 }
20572179
....@@ -2065,6 +2187,17 @@
20652187 * Shutdown a socket.
20662188 */
20672189
2190
+int __sys_shutdown_sock(struct socket *sock, int how)
2191
+{
2192
+ int err;
2193
+
2194
+ err = security_socket_shutdown(sock, how);
2195
+ if (!err)
2196
+ err = sock->ops->shutdown(sock, how);
2197
+
2198
+ return err;
2199
+}
2200
+
20682201 int __sys_shutdown(int fd, int how)
20692202 {
20702203 int err, fput_needed;
....@@ -2072,9 +2205,7 @@
20722205
20732206 sock = sockfd_lookup_light(fd, &err, &fput_needed);
20742207 if (sock != NULL) {
2075
- err = security_socket_shutdown(sock, how);
2076
- if (!err)
2077
- err = sock->ops->shutdown(sock, how);
2208
+ err = __sys_shutdown_sock(sock, how);
20782209 fput_light(sock->file, fput_needed);
20792210 }
20802211 return err;
....@@ -2097,10 +2228,10 @@
20972228 unsigned int name_len;
20982229 };
20992230
2100
-static int copy_msghdr_from_user(struct msghdr *kmsg,
2101
- struct user_msghdr __user *umsg,
2102
- struct sockaddr __user **save_addr,
2103
- struct iovec **iov)
2231
+int __copy_msghdr_from_user(struct msghdr *kmsg,
2232
+ struct user_msghdr __user *umsg,
2233
+ struct sockaddr __user **save_addr,
2234
+ struct iovec __user **uiov, size_t *nsegs)
21042235 {
21052236 struct user_msghdr msg;
21062237 ssize_t err;
....@@ -2108,7 +2239,8 @@
21082239 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
21092240 return -EFAULT;
21102241
2111
- kmsg->msg_control = (void __force *)msg.msg_control;
2242
+ kmsg->msg_control_is_user = true;
2243
+ kmsg->msg_control_user = msg.msg_control;
21122244 kmsg->msg_controllen = msg.msg_controllen;
21132245 kmsg->msg_flags = msg.msg_flags;
21142246
....@@ -2142,21 +2274,34 @@
21422274 return -EMSGSIZE;
21432275
21442276 kmsg->msg_iocb = NULL;
2145
-
2146
- return import_iovec(save_addr ? READ : WRITE,
2147
- msg.msg_iov, msg.msg_iovlen,
2148
- UIO_FASTIOV, iov, &kmsg->msg_iter);
2277
+ *uiov = msg.msg_iov;
2278
+ *nsegs = msg.msg_iovlen;
2279
+ return 0;
21492280 }
21502281
2151
-static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2152
- struct msghdr *msg_sys, unsigned int flags,
2153
- struct used_address *used_address,
2154
- unsigned int allowed_msghdr_flags)
2282
+static int copy_msghdr_from_user(struct msghdr *kmsg,
2283
+ struct user_msghdr __user *umsg,
2284
+ struct sockaddr __user **save_addr,
2285
+ struct iovec **iov)
21552286 {
2156
- struct compat_msghdr __user *msg_compat =
2157
- (struct compat_msghdr __user *)msg;
2158
- struct sockaddr_storage address;
2159
- struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2287
+ struct user_msghdr msg;
2288
+ ssize_t err;
2289
+
2290
+ err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
2291
+ &msg.msg_iovlen);
2292
+ if (err)
2293
+ return err;
2294
+
2295
+ err = import_iovec(save_addr ? READ : WRITE,
2296
+ msg.msg_iov, msg.msg_iovlen,
2297
+ UIO_FASTIOV, iov, &kmsg->msg_iter);
2298
+ return err < 0 ? err : 0;
2299
+}
2300
+
2301
+static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2302
+ unsigned int flags, struct used_address *used_address,
2303
+ unsigned int allowed_msghdr_flags)
2304
+{
21602305 unsigned char ctl[sizeof(struct cmsghdr) + 20]
21612306 __aligned(sizeof(__kernel_size_t));
21622307 /* 20 is size of ipv6_pktinfo */
....@@ -2164,19 +2309,10 @@
21642309 int ctl_len;
21652310 ssize_t err;
21662311
2167
- msg_sys->msg_name = &address;
2168
-
2169
- if (MSG_CMSG_COMPAT & flags)
2170
- err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
2171
- else
2172
- err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
2173
- if (err < 0)
2174
- return err;
2175
-
21762312 err = -ENOBUFS;
21772313
21782314 if (msg_sys->msg_controllen > INT_MAX)
2179
- goto out_freeiov;
2315
+ goto out;
21802316 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
21812317 ctl_len = msg_sys->msg_controllen;
21822318 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
....@@ -2184,7 +2320,7 @@
21842320 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
21852321 sizeof(ctl));
21862322 if (err)
2187
- goto out_freeiov;
2323
+ goto out;
21882324 ctl_buf = msg_sys->msg_control;
21892325 ctl_len = msg_sys->msg_controllen;
21902326 } else if (ctl_len) {
....@@ -2193,19 +2329,13 @@
21932329 if (ctl_len > sizeof(ctl)) {
21942330 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
21952331 if (ctl_buf == NULL)
2196
- goto out_freeiov;
2332
+ goto out;
21972333 }
21982334 err = -EFAULT;
2199
- /*
2200
- * Careful! Before this, msg_sys->msg_control contains a user pointer.
2201
- * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2202
- * checking falls down on this.
2203
- */
2204
- if (copy_from_user(ctl_buf,
2205
- (void __user __force *)msg_sys->msg_control,
2206
- ctl_len))
2335
+ if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
22072336 goto out_freectl;
22082337 msg_sys->msg_control = ctl_buf;
2338
+ msg_sys->msg_control_is_user = false;
22092339 }
22102340 msg_sys->msg_flags = flags;
22112341
....@@ -2239,7 +2369,47 @@
22392369 out_freectl:
22402370 if (ctl_buf != ctl)
22412371 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2242
-out_freeiov:
2372
+out:
2373
+ return err;
2374
+}
2375
+
2376
+int sendmsg_copy_msghdr(struct msghdr *msg,
2377
+ struct user_msghdr __user *umsg, unsigned flags,
2378
+ struct iovec **iov)
2379
+{
2380
+ int err;
2381
+
2382
+ if (flags & MSG_CMSG_COMPAT) {
2383
+ struct compat_msghdr __user *msg_compat;
2384
+
2385
+ msg_compat = (struct compat_msghdr __user *) umsg;
2386
+ err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2387
+ } else {
2388
+ err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2389
+ }
2390
+ if (err < 0)
2391
+ return err;
2392
+
2393
+ return 0;
2394
+}
2395
+
2396
+static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2397
+ struct msghdr *msg_sys, unsigned int flags,
2398
+ struct used_address *used_address,
2399
+ unsigned int allowed_msghdr_flags)
2400
+{
2401
+ struct sockaddr_storage address;
2402
+ struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2403
+ ssize_t err;
2404
+
2405
+ msg_sys->msg_name = &address;
2406
+
2407
+ err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2408
+ if (err < 0)
2409
+ return err;
2410
+
2411
+ err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2412
+ allowed_msghdr_flags);
22432413 kfree(iov);
22442414 return err;
22452415 }
....@@ -2247,6 +2417,11 @@
22472417 /*
22482418 * BSD sendmsg interface
22492419 */
2420
+long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
2421
+ unsigned int flags)
2422
+{
2423
+ return ____sys_sendmsg(sock, msg, flags, NULL, 0);
2424
+}
22502425
22512426 long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
22522427 bool forbid_cmsg_compat)
....@@ -2351,33 +2526,41 @@
23512526 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
23522527 }
23532528
2354
-static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2355
- struct msghdr *msg_sys, unsigned int flags, int nosec)
2529
+int recvmsg_copy_msghdr(struct msghdr *msg,
2530
+ struct user_msghdr __user *umsg, unsigned flags,
2531
+ struct sockaddr __user **uaddr,
2532
+ struct iovec **iov)
2533
+{
2534
+ ssize_t err;
2535
+
2536
+ if (MSG_CMSG_COMPAT & flags) {
2537
+ struct compat_msghdr __user *msg_compat;
2538
+
2539
+ msg_compat = (struct compat_msghdr __user *) umsg;
2540
+ err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2541
+ } else {
2542
+ err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2543
+ }
2544
+ if (err < 0)
2545
+ return err;
2546
+
2547
+ return 0;
2548
+}
2549
+
2550
+static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2551
+ struct user_msghdr __user *msg,
2552
+ struct sockaddr __user *uaddr,
2553
+ unsigned int flags, int nosec)
23562554 {
23572555 struct compat_msghdr __user *msg_compat =
2358
- (struct compat_msghdr __user *)msg;
2359
- struct iovec iovstack[UIO_FASTIOV];
2360
- struct iovec *iov = iovstack;
2556
+ (struct compat_msghdr __user *) msg;
2557
+ int __user *uaddr_len = COMPAT_NAMELEN(msg);
2558
+ struct sockaddr_storage addr;
23612559 unsigned long cmsg_ptr;
23622560 int len;
23632561 ssize_t err;
23642562
2365
- /* kernel mode address */
2366
- struct sockaddr_storage addr;
2367
-
2368
- /* user mode address pointers */
2369
- struct sockaddr __user *uaddr;
2370
- int __user *uaddr_len = COMPAT_NAMELEN(msg);
2371
-
23722563 msg_sys->msg_name = &addr;
2373
-
2374
- if (MSG_CMSG_COMPAT & flags)
2375
- err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
2376
- else
2377
- err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
2378
- if (err < 0)
2379
- return err;
2380
-
23812564 cmsg_ptr = (unsigned long)msg_sys->msg_control;
23822565 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
23832566
....@@ -2386,9 +2569,14 @@
23862569
23872570 if (sock->file->f_flags & O_NONBLOCK)
23882571 flags |= MSG_DONTWAIT;
2389
- err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
2572
+
2573
+ if (unlikely(nosec))
2574
+ err = sock_recvmsg_nosec(sock, msg_sys, flags);
2575
+ else
2576
+ err = sock_recvmsg(sock, msg_sys, flags);
2577
+
23902578 if (err < 0)
2391
- goto out_freeiov;
2579
+ goto out;
23922580 len = err;
23932581
23942582 if (uaddr != NULL) {
....@@ -2396,12 +2584,12 @@
23962584 msg_sys->msg_namelen, uaddr,
23972585 uaddr_len);
23982586 if (err < 0)
2399
- goto out_freeiov;
2587
+ goto out;
24002588 }
24012589 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
24022590 COMPAT_FLAGS(msg));
24032591 if (err)
2404
- goto out_freeiov;
2592
+ goto out;
24052593 if (MSG_CMSG_COMPAT & flags)
24062594 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
24072595 &msg_compat->msg_controllen);
....@@ -2409,10 +2597,25 @@
24092597 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
24102598 &msg->msg_controllen);
24112599 if (err)
2412
- goto out_freeiov;
2600
+ goto out;
24132601 err = len;
2602
+out:
2603
+ return err;
2604
+}
24142605
2415
-out_freeiov:
2606
+static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2607
+ struct msghdr *msg_sys, unsigned int flags, int nosec)
2608
+{
2609
+ struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2610
+ /* user mode address pointers */
2611
+ struct sockaddr __user *uaddr;
2612
+ ssize_t err;
2613
+
2614
+ err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2615
+ if (err < 0)
2616
+ return err;
2617
+
2618
+ err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
24162619 kfree(iov);
24172620 return err;
24182621 }
....@@ -2420,6 +2623,13 @@
24202623 /*
24212624 * BSD recvmsg interface
24222625 */
2626
+
2627
+long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2628
+ struct user_msghdr __user *umsg,
2629
+ struct sockaddr __user *uaddr, unsigned int flags)
2630
+{
2631
+ return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
2632
+}
24232633
24242634 long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
24252635 bool forbid_cmsg_compat)
....@@ -2452,8 +2662,9 @@
24522662 * Linux recvmmsg interface
24532663 */
24542664
2455
-int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2456
- unsigned int flags, struct timespec *timeout)
2665
+static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2666
+ unsigned int vlen, unsigned int flags,
2667
+ struct timespec64 *timeout)
24572668 {
24582669 int fput_needed, err, datagrams;
24592670 struct socket *sock;
....@@ -2518,8 +2729,7 @@
25182729
25192730 if (timeout) {
25202731 ktime_get_ts64(&timeout64);
2521
- *timeout = timespec64_to_timespec(
2522
- timespec64_sub(end_time, timeout64));
2732
+ *timeout = timespec64_sub(end_time, timeout64);
25232733 if (timeout->tv_sec < 0) {
25242734 timeout->tv_sec = timeout->tv_nsec = 0;
25252735 break;
....@@ -2563,26 +2773,32 @@
25632773 return datagrams;
25642774 }
25652775
2566
-static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2567
- unsigned int vlen, unsigned int flags,
2568
- struct timespec __user *timeout)
2776
+int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2777
+ unsigned int vlen, unsigned int flags,
2778
+ struct __kernel_timespec __user *timeout,
2779
+ struct old_timespec32 __user *timeout32)
25692780 {
25702781 int datagrams;
2571
- struct timespec timeout_sys;
2782
+ struct timespec64 timeout_sys;
25722783
2573
- if (flags & MSG_CMSG_COMPAT)
2574
- return -EINVAL;
2575
-
2576
- if (!timeout)
2577
- return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2578
-
2579
- if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2784
+ if (timeout && get_timespec64(&timeout_sys, timeout))
25802785 return -EFAULT;
25812786
2582
- datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2787
+ if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
2788
+ return -EFAULT;
25832789
2584
- if (datagrams > 0 &&
2585
- copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2790
+ if (!timeout && !timeout32)
2791
+ return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2792
+
2793
+ datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2794
+
2795
+ if (datagrams <= 0)
2796
+ return datagrams;
2797
+
2798
+ if (timeout && put_timespec64(&timeout_sys, timeout))
2799
+ datagrams = -EFAULT;
2800
+
2801
+ if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
25862802 datagrams = -EFAULT;
25872803
25882804 return datagrams;
....@@ -2590,10 +2806,25 @@
25902806
25912807 SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
25922808 unsigned int, vlen, unsigned int, flags,
2593
- struct timespec __user *, timeout)
2809
+ struct __kernel_timespec __user *, timeout)
25942810 {
2595
- return do_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
2811
+ if (flags & MSG_CMSG_COMPAT)
2812
+ return -EINVAL;
2813
+
2814
+ return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
25962815 }
2816
+
2817
+#ifdef CONFIG_COMPAT_32BIT_TIME
2818
+SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2819
+ unsigned int, vlen, unsigned int, flags,
2820
+ struct old_timespec32 __user *, timeout)
2821
+{
2822
+ if (flags & MSG_CMSG_COMPAT)
2823
+ return -EINVAL;
2824
+
2825
+ return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
2826
+}
2827
+#endif
25972828
25982829 #ifdef __ARCH_WANT_SYS_SOCKETCALL
25992830 /* Argument list sizes for sys_socketcall */
....@@ -2713,8 +2944,15 @@
27132944 a[2], true);
27142945 break;
27152946 case SYS_RECVMMSG:
2716
- err = do_sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2717
- a[3], (struct timespec __user *)a[4]);
2947
+ if (IS_ENABLED(CONFIG_64BIT))
2948
+ err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2949
+ a[2], a[3],
2950
+ (struct __kernel_timespec __user *)a[4],
2951
+ NULL);
2952
+ else
2953
+ err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2954
+ a[2], a[3], NULL,
2955
+ (struct old_timespec32 __user *)a[4]);
27182956 break;
27192957 case SYS_ACCEPT4:
27202958 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
....@@ -2817,7 +3055,7 @@
28173055
28183056 err = register_filesystem(&sock_fs_type);
28193057 if (err)
2820
- goto out_fs;
3058
+ goto out;
28213059 sock_mnt = kern_mount(&sock_fs_type);
28223060 if (IS_ERR(sock_mnt)) {
28233061 err = PTR_ERR(sock_mnt);
....@@ -2840,7 +3078,6 @@
28403078
28413079 out_mount:
28423080 unregister_filesystem(&sock_fs_type);
2843
-out_fs:
28443081 goto out;
28453082 }
28463083
....@@ -2855,38 +3092,6 @@
28553092 #endif /* CONFIG_PROC_FS */
28563093
28573094 #ifdef CONFIG_COMPAT
2858
-static int do_siocgstamp(struct net *net, struct socket *sock,
2859
- unsigned int cmd, void __user *up)
2860
-{
2861
- mm_segment_t old_fs = get_fs();
2862
- struct timeval ktv;
2863
- int err;
2864
-
2865
- set_fs(KERNEL_DS);
2866
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
2867
- set_fs(old_fs);
2868
- if (!err)
2869
- err = compat_put_timeval(&ktv, up);
2870
-
2871
- return err;
2872
-}
2873
-
2874
-static int do_siocgstampns(struct net *net, struct socket *sock,
2875
- unsigned int cmd, void __user *up)
2876
-{
2877
- mm_segment_t old_fs = get_fs();
2878
- struct timespec kts;
2879
- int err;
2880
-
2881
- set_fs(KERNEL_DS);
2882
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
2883
- set_fs(old_fs);
2884
- if (!err)
2885
- err = compat_put_timespec(&kts, up);
2886
-
2887
- return err;
2888
-}
2889
-
28903095 static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
28913096 {
28923097 struct compat_ifconf ifc32;
....@@ -2908,128 +3113,6 @@
29083113 ifc32.ifc_len = ifc.ifc_len;
29093114 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
29103115 return -EFAULT;
2911
-
2912
- return 0;
2913
-}
2914
-
2915
-static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2916
-{
2917
- struct compat_ethtool_rxnfc __user *compat_rxnfc;
2918
- bool convert_in = false, convert_out = false;
2919
- size_t buf_size = 0;
2920
- struct ethtool_rxnfc __user *rxnfc = NULL;
2921
- struct ifreq ifr;
2922
- u32 rule_cnt = 0, actual_rule_cnt;
2923
- u32 ethcmd;
2924
- u32 data;
2925
- int ret;
2926
-
2927
- if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2928
- return -EFAULT;
2929
-
2930
- compat_rxnfc = compat_ptr(data);
2931
-
2932
- if (get_user(ethcmd, &compat_rxnfc->cmd))
2933
- return -EFAULT;
2934
-
2935
- /* Most ethtool structures are defined without padding.
2936
- * Unfortunately struct ethtool_rxnfc is an exception.
2937
- */
2938
- switch (ethcmd) {
2939
- default:
2940
- break;
2941
- case ETHTOOL_GRXCLSRLALL:
2942
- /* Buffer size is variable */
2943
- if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2944
- return -EFAULT;
2945
- if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2946
- return -ENOMEM;
2947
- buf_size += rule_cnt * sizeof(u32);
2948
- /* fall through */
2949
- case ETHTOOL_GRXRINGS:
2950
- case ETHTOOL_GRXCLSRLCNT:
2951
- case ETHTOOL_GRXCLSRULE:
2952
- case ETHTOOL_SRXCLSRLINS:
2953
- convert_out = true;
2954
- /* fall through */
2955
- case ETHTOOL_SRXCLSRLDEL:
2956
- buf_size += sizeof(struct ethtool_rxnfc);
2957
- convert_in = true;
2958
- rxnfc = compat_alloc_user_space(buf_size);
2959
- break;
2960
- }
2961
-
2962
- if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2963
- return -EFAULT;
2964
-
2965
- ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
2966
-
2967
- if (convert_in) {
2968
- /* We expect there to be holes between fs.m_ext and
2969
- * fs.ring_cookie and at the end of fs, but nowhere else.
2970
- */
2971
- BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2972
- sizeof(compat_rxnfc->fs.m_ext) !=
2973
- offsetof(struct ethtool_rxnfc, fs.m_ext) +
2974
- sizeof(rxnfc->fs.m_ext));
2975
- BUILD_BUG_ON(
2976
- offsetof(struct compat_ethtool_rxnfc, fs.location) -
2977
- offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2978
- offsetof(struct ethtool_rxnfc, fs.location) -
2979
- offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2980
-
2981
- if (copy_in_user(rxnfc, compat_rxnfc,
2982
- (void __user *)(&rxnfc->fs.m_ext + 1) -
2983
- (void __user *)rxnfc) ||
2984
- copy_in_user(&rxnfc->fs.ring_cookie,
2985
- &compat_rxnfc->fs.ring_cookie,
2986
- (void __user *)(&rxnfc->fs.location + 1) -
2987
- (void __user *)&rxnfc->fs.ring_cookie))
2988
- return -EFAULT;
2989
- if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2990
- if (put_user(rule_cnt, &rxnfc->rule_cnt))
2991
- return -EFAULT;
2992
- } else if (copy_in_user(&rxnfc->rule_cnt,
2993
- &compat_rxnfc->rule_cnt,
2994
- sizeof(rxnfc->rule_cnt)))
2995
- return -EFAULT;
2996
- }
2997
-
2998
- ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
2999
- if (ret)
3000
- return ret;
3001
-
3002
- if (convert_out) {
3003
- if (copy_in_user(compat_rxnfc, rxnfc,
3004
- (const void __user *)(&rxnfc->fs.m_ext + 1) -
3005
- (const void __user *)rxnfc) ||
3006
- copy_in_user(&compat_rxnfc->fs.ring_cookie,
3007
- &rxnfc->fs.ring_cookie,
3008
- (const void __user *)(&rxnfc->fs.location + 1) -
3009
- (const void __user *)&rxnfc->fs.ring_cookie) ||
3010
- copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3011
- sizeof(rxnfc->rule_cnt)))
3012
- return -EFAULT;
3013
-
3014
- if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3015
- /* As an optimisation, we only copy the actual
3016
- * number of rules that the underlying
3017
- * function returned. Since Mallory might
3018
- * change the rule count in user memory, we
3019
- * check that it is less than the rule count
3020
- * originally given (as the user buffer size),
3021
- * which has been range-checked.
3022
- */
3023
- if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3024
- return -EFAULT;
3025
- if (actual_rule_cnt < rule_cnt)
3026
- rule_cnt = actual_rule_cnt;
3027
- if (copy_in_user(&compat_rxnfc->rule_locs[0],
3028
- &rxnfc->rule_locs[0],
3029
- rule_cnt * sizeof(u32)))
3030
- return -EFAULT;
3031
- }
3032
- }
30333116
30343117 return 0;
30353118 }
....@@ -3159,94 +3242,6 @@
31593242 return err;
31603243 }
31613244
3162
-struct rtentry32 {
3163
- u32 rt_pad1;
3164
- struct sockaddr rt_dst; /* target address */
3165
- struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3166
- struct sockaddr rt_genmask; /* target network mask (IP) */
3167
- unsigned short rt_flags;
3168
- short rt_pad2;
3169
- u32 rt_pad3;
3170
- unsigned char rt_tos;
3171
- unsigned char rt_class;
3172
- short rt_pad4;
3173
- short rt_metric; /* +1 for binary compatibility! */
3174
- /* char * */ u32 rt_dev; /* forcing the device at add */
3175
- u32 rt_mtu; /* per route MTU/Window */
3176
- u32 rt_window; /* Window clamping */
3177
- unsigned short rt_irtt; /* Initial RTT */
3178
-};
3179
-
3180
-struct in6_rtmsg32 {
3181
- struct in6_addr rtmsg_dst;
3182
- struct in6_addr rtmsg_src;
3183
- struct in6_addr rtmsg_gateway;
3184
- u32 rtmsg_type;
3185
- u16 rtmsg_dst_len;
3186
- u16 rtmsg_src_len;
3187
- u32 rtmsg_metric;
3188
- u32 rtmsg_info;
3189
- u32 rtmsg_flags;
3190
- s32 rtmsg_ifindex;
3191
-};
3192
-
3193
-static int routing_ioctl(struct net *net, struct socket *sock,
3194
- unsigned int cmd, void __user *argp)
3195
-{
3196
- int ret;
3197
- void *r = NULL;
3198
- struct in6_rtmsg r6;
3199
- struct rtentry r4;
3200
- char devname[16];
3201
- u32 rtdev;
3202
- mm_segment_t old_fs = get_fs();
3203
-
3204
- if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3205
- struct in6_rtmsg32 __user *ur6 = argp;
3206
- ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
3207
- 3 * sizeof(struct in6_addr));
3208
- ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3209
- ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3210
- ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3211
- ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3212
- ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3213
- ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3214
- ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
3215
-
3216
- r = (void *) &r6;
3217
- } else { /* ipv4 */
3218
- struct rtentry32 __user *ur4 = argp;
3219
- ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
3220
- 3 * sizeof(struct sockaddr));
3221
- ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3222
- ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3223
- ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3224
- ret |= get_user(r4.rt_window, &(ur4->rt_window));
3225
- ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3226
- ret |= get_user(rtdev, &(ur4->rt_dev));
3227
- if (rtdev) {
3228
- ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
3229
- r4.rt_dev = (char __user __force *)devname;
3230
- devname[15] = 0;
3231
- } else
3232
- r4.rt_dev = NULL;
3233
-
3234
- r = (void *) &r4;
3235
- }
3236
-
3237
- if (ret) {
3238
- ret = -EFAULT;
3239
- goto out;
3240
- }
3241
-
3242
- set_fs(KERNEL_DS);
3243
- ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
3244
- set_fs(old_fs);
3245
-
3246
-out:
3247
- return ret;
3248
-}
3249
-
32503245 /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
32513246 * for some operations; this forces use of the newer bridge-utils that
32523247 * use compatible ioctls
....@@ -3278,20 +3273,19 @@
32783273 return old_bridge_ioctl(argp);
32793274 case SIOCGIFCONF:
32803275 return compat_dev_ifconf(net, argp);
3281
- case SIOCETHTOOL:
3282
- return ethtool_ioctl(net, argp);
32833276 case SIOCWANDEV:
32843277 return compat_siocwandev(net, argp);
32853278 case SIOCGIFMAP:
32863279 case SIOCSIFMAP:
32873280 return compat_sioc_ifmap(net, cmd, argp);
3288
- case SIOCADDRT:
3289
- case SIOCDELRT:
3290
- return routing_ioctl(net, sock, cmd, argp);
3291
- case SIOCGSTAMP:
3292
- return do_siocgstamp(net, sock, cmd, argp);
3293
- case SIOCGSTAMPNS:
3294
- return do_siocgstampns(net, sock, cmd, argp);
3281
+ case SIOCGSTAMP_OLD:
3282
+ case SIOCGSTAMPNS_OLD:
3283
+ if (!sock->ops->gettstamp)
3284
+ return -ENOIOCTLCMD;
3285
+ return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
3286
+ !COMPAT_USE_64BIT_TIME);
3287
+
3288
+ case SIOCETHTOOL:
32953289 case SIOCBONDSLAVEINFOQUERY:
32963290 case SIOCBONDINFOQUERY:
32973291 case SIOCSHWTSTAMP:
....@@ -3309,6 +3303,8 @@
33093303 case SIOCADDDLCI:
33103304 case SIOCDELDLCI:
33113305 case SIOCGSKNS:
3306
+ case SIOCGSTAMP_NEW:
3307
+ case SIOCGSTAMPNS_NEW:
33123308 return sock_ioctl(file, cmd, arg);
33133309
33143310 case SIOCGIFFLAGS:
....@@ -3354,6 +3350,7 @@
33543350 case SIOCSARP:
33553351 case SIOCGARP:
33563352 case SIOCDARP:
3353
+ case SIOCOUTQ:
33573354 case SIOCOUTQNSD:
33583355 case SIOCATMARK:
33593356 return sock_do_ioctl(net, sock, cmd, arg);
....@@ -3488,7 +3485,7 @@
34883485 EXPORT_SYMBOL(kernel_getsockname);
34893486
34903487 /**
3491
- * kernel_peername - get the address which the socket is connected (kernel space)
3488
+ * kernel_getpeername - get the address which the socket is connected (kernel space)
34923489 * @sock: socket
34933490 * @addr: address holder
34943491 *
....@@ -3501,71 +3498,6 @@
35013498 return sock->ops->getname(sock, addr, 1);
35023499 }
35033500 EXPORT_SYMBOL(kernel_getpeername);
3504
-
3505
-/**
3506
- * kernel_getsockopt - get a socket option (kernel space)
3507
- * @sock: socket
3508
- * @level: API level (SOL_SOCKET, ...)
3509
- * @optname: option tag
3510
- * @optval: option value
3511
- * @optlen: option length
3512
- *
3513
- * Assigns the option length to @optlen.
3514
- * Returns 0 or an error.
3515
- */
3516
-
3517
-int kernel_getsockopt(struct socket *sock, int level, int optname,
3518
- char *optval, int *optlen)
3519
-{
3520
- mm_segment_t oldfs = get_fs();
3521
- char __user *uoptval;
3522
- int __user *uoptlen;
3523
- int err;
3524
-
3525
- uoptval = (char __user __force *) optval;
3526
- uoptlen = (int __user __force *) optlen;
3527
-
3528
- set_fs(KERNEL_DS);
3529
- if (level == SOL_SOCKET)
3530
- err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
3531
- else
3532
- err = sock->ops->getsockopt(sock, level, optname, uoptval,
3533
- uoptlen);
3534
- set_fs(oldfs);
3535
- return err;
3536
-}
3537
-EXPORT_SYMBOL(kernel_getsockopt);
3538
-
3539
-/**
3540
- * kernel_setsockopt - set a socket option (kernel space)
3541
- * @sock: socket
3542
- * @level: API level (SOL_SOCKET, ...)
3543
- * @optname: option tag
3544
- * @optval: option value
3545
- * @optlen: option length
3546
- *
3547
- * Returns 0 or an error.
3548
- */
3549
-
3550
-int kernel_setsockopt(struct socket *sock, int level, int optname,
3551
- char *optval, unsigned int optlen)
3552
-{
3553
- mm_segment_t oldfs = get_fs();
3554
- char __user *uoptval;
3555
- int err;
3556
-
3557
- uoptval = (char __user __force *) optval;
3558
-
3559
- set_fs(KERNEL_DS);
3560
- if (level == SOL_SOCKET)
3561
- err = sock_setsockopt(sock, level, optname, uoptval, optlen);
3562
- else
3563
- err = sock->ops->setsockopt(sock, level, optname, uoptval,
3564
- optlen);
3565
- set_fs(oldfs);
3566
- return err;
3567
-}
3568
-EXPORT_SYMBOL(kernel_setsockopt);
35693501
35703502 /**
35713503 * kernel_sendpage - send a &page through a socket (kernel space)
....@@ -3581,9 +3513,11 @@
35813513 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
35823514 size_t size, int flags)
35833515 {
3584
- if (sock->ops->sendpage)
3516
+ if (sock->ops->sendpage) {
3517
+ /* Warn in case the improper page to zero-copy send */
3518
+ WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
35853519 return sock->ops->sendpage(sock, page, offset, size, flags);
3586
-
3520
+ }
35873521 return sock_no_sendpage(sock, page, offset, size, flags);
35883522 }
35893523 EXPORT_SYMBOL(kernel_sendpage);
....@@ -3614,7 +3548,7 @@
36143548 EXPORT_SYMBOL(kernel_sendpage_locked);
36153549
36163550 /**
3617
- * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3551
+ * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
36183552 * @sock: socket
36193553 * @how: connection part
36203554 *