hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/vmw_vsock/af_vsock.c
....@@ -1,16 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * VMware vSockets Driver
34 *
45 * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
5
- *
6
- * This program is free software; you can redistribute it and/or modify it
7
- * under the terms of the GNU General Public License as published by the Free
8
- * Software Foundation version 2 and no later version.
9
- *
10
- * This program is distributed in the hope that it will be useful, but WITHOUT
11
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13
- * more details.
146 */
157
168 /* Implementation notes:
....@@ -134,18 +126,19 @@
134126 */
135127 #define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
136128
137
-static const struct vsock_transport *transport;
129
+#define VSOCK_DEFAULT_BUFFER_SIZE (1024 * 256)
130
+#define VSOCK_DEFAULT_BUFFER_MAX_SIZE (1024 * 256)
131
+#define VSOCK_DEFAULT_BUFFER_MIN_SIZE 128
132
+
133
+/* Transport used for host->guest communication */
134
+static const struct vsock_transport *transport_h2g;
135
+/* Transport used for guest->host communication */
136
+static const struct vsock_transport *transport_g2h;
137
+/* Transport used for DGRAM communication */
138
+static const struct vsock_transport *transport_dgram;
139
+/* Transport used for local communication */
140
+static const struct vsock_transport *transport_local;
138141 static DEFINE_MUTEX(vsock_register_mutex);
139
-
140
-/**** EXPORTS ****/
141
-
142
-/* Get the ID of the local context. This is transport dependent. */
143
-
144
-int vm_sockets_get_local_cid(void)
145
-{
146
- return transport->get_local_cid();
147
-}
148
-EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
149142
150143 /**** UTILS ****/
151144
....@@ -196,7 +189,7 @@
196189 return __vsock_bind(sk, &local_addr);
197190 }
198191
199
-static int __init vsock_init_tables(void)
192
+static void vsock_init_tables(void)
200193 {
201194 int i;
202195
....@@ -205,7 +198,6 @@
205198
206199 for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
207200 INIT_LIST_HEAD(&vsock_connected_table[i]);
208
- return 0;
209201 }
210202
211203 static void __vsock_insert_bound(struct list_head *list,
....@@ -238,9 +230,15 @@
238230 {
239231 struct vsock_sock *vsk;
240232
241
- list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table)
242
- if (addr->svm_port == vsk->local_addr.svm_port)
233
+ list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) {
234
+ if (vsock_addr_equals_addr(addr, &vsk->local_addr))
243235 return sk_vsock(vsk);
236
+
237
+ if (addr->svm_port == vsk->local_addr.svm_port &&
238
+ (vsk->local_addr.svm_cid == VMADDR_CID_ANY ||
239
+ addr->svm_cid == VMADDR_CID_ANY))
240
+ return sk_vsock(vsk);
241
+ }
244242
245243 return NULL;
246244 }
....@@ -390,6 +388,112 @@
390388 }
391389 EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
392390
391
+static bool vsock_use_local_transport(unsigned int remote_cid)
392
+{
393
+ if (!transport_local)
394
+ return false;
395
+
396
+ if (remote_cid == VMADDR_CID_LOCAL)
397
+ return true;
398
+
399
+ if (transport_g2h) {
400
+ return remote_cid == transport_g2h->get_local_cid();
401
+ } else {
402
+ return remote_cid == VMADDR_CID_HOST;
403
+ }
404
+}
405
+
406
+static void vsock_deassign_transport(struct vsock_sock *vsk)
407
+{
408
+ if (!vsk->transport)
409
+ return;
410
+
411
+ vsk->transport->destruct(vsk);
412
+ module_put(vsk->transport->module);
413
+ vsk->transport = NULL;
414
+}
415
+
416
+/* Assign a transport to a socket and call the .init transport callback.
417
+ *
418
+ * Note: for stream socket this must be called when vsk->remote_addr is set
419
+ * (e.g. during the connect() or when a connection request on a listener
420
+ * socket is received).
421
+ * The vsk->remote_addr is used to decide which transport to use:
422
+ * - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if
423
+ * g2h is not loaded, will use local transport;
424
+ * - remote CID <= VMADDR_CID_HOST will use guest->host transport;
425
+ * - remote CID > VMADDR_CID_HOST will use host->guest transport;
426
+ */
427
+int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
428
+{
429
+ const struct vsock_transport *new_transport;
430
+ struct sock *sk = sk_vsock(vsk);
431
+ unsigned int remote_cid = vsk->remote_addr.svm_cid;
432
+ int ret;
433
+
434
+ switch (sk->sk_type) {
435
+ case SOCK_DGRAM:
436
+ new_transport = transport_dgram;
437
+ break;
438
+ case SOCK_STREAM:
439
+ if (vsock_use_local_transport(remote_cid))
440
+ new_transport = transport_local;
441
+ else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g)
442
+ new_transport = transport_g2h;
443
+ else
444
+ new_transport = transport_h2g;
445
+ break;
446
+ default:
447
+ return -ESOCKTNOSUPPORT;
448
+ }
449
+
450
+ if (vsk->transport) {
451
+ if (vsk->transport == new_transport)
452
+ return 0;
453
+
454
+ /* transport->release() must be called with sock lock acquired.
455
+ * This path can only be taken during vsock_stream_connect(),
456
+ * where we have already held the sock lock.
457
+ * In the other cases, this function is called on a new socket
458
+ * which is not assigned to any transport.
459
+ */
460
+ vsk->transport->release(vsk);
461
+ vsock_deassign_transport(vsk);
462
+ }
463
+
464
+ /* We increase the module refcnt to prevent the transport unloading
465
+ * while there are open sockets assigned to it.
466
+ */
467
+ if (!new_transport || !try_module_get(new_transport->module))
468
+ return -ENODEV;
469
+
470
+ ret = new_transport->init(vsk, psk);
471
+ if (ret) {
472
+ module_put(new_transport->module);
473
+ return ret;
474
+ }
475
+
476
+ vsk->transport = new_transport;
477
+
478
+ return 0;
479
+}
480
+EXPORT_SYMBOL_GPL(vsock_assign_transport);
481
+
482
+bool vsock_find_cid(unsigned int cid)
483
+{
484
+ if (transport_g2h && cid == transport_g2h->get_local_cid())
485
+ return true;
486
+
487
+ if (transport_h2g && cid == VMADDR_CID_HOST)
488
+ return true;
489
+
490
+ if (transport_local && cid == VMADDR_CID_LOCAL)
491
+ return true;
492
+
493
+ return false;
494
+}
495
+EXPORT_SYMBOL_GPL(vsock_find_cid);
496
+
393497 static struct sock *vsock_dequeue_accept(struct sock *listener)
394498 {
395499 struct vsock_sock *vlistener;
....@@ -426,7 +530,12 @@
426530
427531 static int vsock_send_shutdown(struct sock *sk, int mode)
428532 {
429
- return transport->shutdown(vsock_sk(sk), mode);
533
+ struct vsock_sock *vsk = vsock_sk(sk);
534
+
535
+ if (!vsk->transport)
536
+ return -ENODEV;
537
+
538
+ return vsk->transport->shutdown(vsk, mode);
430539 }
431540
432541 static void vsock_pending_work(struct work_struct *work)
....@@ -447,7 +556,7 @@
447556 if (vsock_is_pending(sk)) {
448557 vsock_remove_pending(listener, sk);
449558
450
- listener->sk_ack_backlog--;
559
+ sk_acceptq_removed(listener);
451560 } else if (!vsk->rejected) {
452561 /* We are not on the pending list and accept() did not reject
453562 * us, so we must have been accepted by our user process. We
....@@ -481,7 +590,7 @@
481590 static int __vsock_bind_stream(struct vsock_sock *vsk,
482591 struct sockaddr_vm *addr)
483592 {
484
- static u32 port = 0;
593
+ static u32 port;
485594 struct sockaddr_vm new_addr;
486595
487596 if (!port)
....@@ -536,13 +645,12 @@
536645 static int __vsock_bind_dgram(struct vsock_sock *vsk,
537646 struct sockaddr_vm *addr)
538647 {
539
- return transport->dgram_bind(vsk, addr);
648
+ return vsk->transport->dgram_bind(vsk, addr);
540649 }
541650
542651 static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
543652 {
544653 struct vsock_sock *vsk = vsock_sk(sk);
545
- u32 cid;
546654 int retval;
547655
548656 /* First ensure this socket isn't already bound. */
....@@ -552,10 +660,9 @@
552660 /* Now bind to the provided address or select appropriate values if
553661 * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that
554662 * like AF_INET prevents binding to a non-local IP address (in most
555
- * cases), we only allow binding to the local CID.
663
+ * cases), we only allow binding to a local CID.
556664 */
557
- cid = transport->get_local_cid();
558
- if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY)
665
+ if (addr->svm_cid != VMADDR_CID_ANY && !vsock_find_cid(addr->svm_cid))
559666 return -EADDRNOTAVAIL;
560667
561668 switch (sk->sk_socket->type) {
....@@ -579,12 +686,12 @@
579686
580687 static void vsock_connect_timeout(struct work_struct *work);
581688
582
-struct sock *__vsock_create(struct net *net,
583
- struct socket *sock,
584
- struct sock *parent,
585
- gfp_t priority,
586
- unsigned short type,
587
- int kern)
689
+static struct sock *__vsock_create(struct net *net,
690
+ struct socket *sock,
691
+ struct sock *parent,
692
+ gfp_t priority,
693
+ unsigned short type,
694
+ int kern)
588695 {
589696 struct sock *sk;
590697 struct vsock_sock *psk;
....@@ -628,39 +735,30 @@
628735 vsk->trusted = psk->trusted;
629736 vsk->owner = get_cred(psk->owner);
630737 vsk->connect_timeout = psk->connect_timeout;
738
+ vsk->buffer_size = psk->buffer_size;
739
+ vsk->buffer_min_size = psk->buffer_min_size;
740
+ vsk->buffer_max_size = psk->buffer_max_size;
631741 security_sk_clone(parent, sk);
632742 } else {
633743 vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN);
634744 vsk->owner = get_current_cred();
635745 vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
746
+ vsk->buffer_size = VSOCK_DEFAULT_BUFFER_SIZE;
747
+ vsk->buffer_min_size = VSOCK_DEFAULT_BUFFER_MIN_SIZE;
748
+ vsk->buffer_max_size = VSOCK_DEFAULT_BUFFER_MAX_SIZE;
636749 }
637
-
638
- if (transport->init(vsk, psk) < 0) {
639
- sk_free(sk);
640
- return NULL;
641
- }
642
-
643
- if (sock)
644
- vsock_insert_unbound(vsk);
645750
646751 return sk;
647752 }
648
-EXPORT_SYMBOL_GPL(__vsock_create);
649753
650754 static void __vsock_release(struct sock *sk, int level)
651755 {
652756 if (sk) {
653
- struct sk_buff *skb;
654757 struct sock *pending;
655758 struct vsock_sock *vsk;
656759
657760 vsk = vsock_sk(sk);
658761 pending = NULL; /* Compiler warning. */
659
-
660
- /* The release call is supposed to use lock_sock_nested()
661
- * rather than lock_sock(), if a sock lock should be acquired.
662
- */
663
- transport->release(vsk);
664762
665763 /* When "level" is SINGLE_DEPTH_NESTING, use the nested
666764 * version to avoid the warning "possible recursive locking
....@@ -668,11 +766,16 @@
668766 * is the same as lock_sock(sk).
669767 */
670768 lock_sock_nested(sk, level);
769
+
770
+ if (vsk->transport)
771
+ vsk->transport->release(vsk);
772
+ else if (sk->sk_type == SOCK_STREAM)
773
+ vsock_remove_sock(vsk);
774
+
671775 sock_orphan(sk);
672776 sk->sk_shutdown = SHUTDOWN_MASK;
673777
674
- while ((skb = skb_dequeue(&sk->sk_receive_queue)))
675
- kfree_skb(skb);
778
+ skb_queue_purge(&sk->sk_receive_queue);
676779
677780 /* Clean up any sockets that never were accepted. */
678781 while ((pending = vsock_dequeue_accept(sk)) != NULL) {
....@@ -689,7 +792,7 @@
689792 {
690793 struct vsock_sock *vsk = vsock_sk(sk);
691794
692
- transport->destruct(vsk);
795
+ vsock_deassign_transport(vsk);
693796
694797 /* When clearing these addresses, there's no need to set the family and
695798 * possibly register the address family with the kernel.
....@@ -711,15 +814,22 @@
711814 return err;
712815 }
713816
817
+struct sock *vsock_create_connected(struct sock *parent)
818
+{
819
+ return __vsock_create(sock_net(parent), NULL, parent, GFP_KERNEL,
820
+ parent->sk_type, 0);
821
+}
822
+EXPORT_SYMBOL_GPL(vsock_create_connected);
823
+
714824 s64 vsock_stream_has_data(struct vsock_sock *vsk)
715825 {
716
- return transport->stream_has_data(vsk);
826
+ return vsk->transport->stream_has_data(vsk);
717827 }
718828 EXPORT_SYMBOL_GPL(vsock_stream_has_data);
719829
720830 s64 vsock_stream_has_space(struct vsock_sock *vsk)
721831 {
722
- return transport->stream_has_space(vsk);
832
+ return vsk->transport->stream_has_space(vsk);
723833 }
724834 EXPORT_SYMBOL_GPL(vsock_stream_has_space);
725835
....@@ -890,7 +1000,11 @@
8901000 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
8911001
8921002 } else if (sock->type == SOCK_STREAM) {
1003
+ const struct vsock_transport *transport;
1004
+
8931005 lock_sock(sk);
1006
+
1007
+ transport = vsk->transport;
8941008
8951009 /* Listening sockets that have connections in their accept
8961010 * queue can be read.
....@@ -900,7 +1014,7 @@
9001014 mask |= EPOLLIN | EPOLLRDNORM;
9011015
9021016 /* If there is something in the queue then we can read. */
903
- if (transport->stream_is_active(vsk) &&
1017
+ if (transport && transport->stream_is_active(vsk) &&
9041018 !(sk->sk_shutdown & RCV_SHUTDOWN)) {
9051019 bool data_ready_now = false;
9061020 int ret = transport->notify_poll_in(
....@@ -924,7 +1038,7 @@
9241038 }
9251039
9261040 /* Connected sockets that can produce data can be written. */
927
- if (sk->sk_state == TCP_ESTABLISHED) {
1041
+ if (transport && sk->sk_state == TCP_ESTABLISHED) {
9281042 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
9291043 bool space_avail_now = false;
9301044 int ret = transport->notify_poll_out(
....@@ -965,6 +1079,7 @@
9651079 struct sock *sk;
9661080 struct vsock_sock *vsk;
9671081 struct sockaddr_vm *remote_addr;
1082
+ const struct vsock_transport *transport;
9681083
9691084 if (msg->msg_flags & MSG_OOB)
9701085 return -EOPNOTSUPP;
....@@ -975,6 +1090,8 @@
9751090 vsk = vsock_sk(sk);
9761091
9771092 lock_sock(sk);
1093
+
1094
+ transport = vsk->transport;
9781095
9791096 err = vsock_auto_bind(vsk);
9801097 if (err)
....@@ -1057,8 +1174,8 @@
10571174 if (err)
10581175 goto out;
10591176
1060
- if (!transport->dgram_allow(remote_addr->svm_cid,
1061
- remote_addr->svm_port)) {
1177
+ if (!vsk->transport->dgram_allow(remote_addr->svm_cid,
1178
+ remote_addr->svm_port)) {
10621179 err = -EINVAL;
10631180 goto out;
10641181 }
....@@ -1074,7 +1191,9 @@
10741191 static int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
10751192 size_t len, int flags)
10761193 {
1077
- return transport->dgram_dequeue(vsock_sk(sock->sk), msg, len, flags);
1194
+ struct vsock_sock *vsk = vsock_sk(sock->sk);
1195
+
1196
+ return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
10781197 }
10791198
10801199 static const struct proto_ops vsock_dgram_ops = {
....@@ -1090,8 +1209,6 @@
10901209 .ioctl = sock_no_ioctl,
10911210 .listen = sock_no_listen,
10921211 .shutdown = vsock_shutdown,
1093
- .setsockopt = sock_no_setsockopt,
1094
- .getsockopt = sock_no_getsockopt,
10951212 .sendmsg = vsock_dgram_sendmsg,
10961213 .recvmsg = vsock_dgram_recvmsg,
10971214 .mmap = sock_no_mmap,
....@@ -1100,7 +1217,9 @@
11001217
11011218 static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
11021219 {
1103
- if (!transport->cancel_pkt)
1220
+ const struct vsock_transport *transport = vsk->transport;
1221
+
1222
+ if (!transport || !transport->cancel_pkt)
11041223 return -EOPNOTSUPP;
11051224
11061225 return transport->cancel_pkt(vsk);
....@@ -1118,6 +1237,7 @@
11181237 if (sk->sk_state == TCP_SYN_SENT &&
11191238 (sk->sk_shutdown != SHUTDOWN_MASK)) {
11201239 sk->sk_state = TCP_CLOSE;
1240
+ sk->sk_socket->state = SS_UNCONNECTED;
11211241 sk->sk_err = ETIMEDOUT;
11221242 sk->sk_error_report(sk);
11231243 vsock_transport_cancel_pkt(vsk);
....@@ -1133,6 +1253,7 @@
11331253 int err;
11341254 struct sock *sk;
11351255 struct vsock_sock *vsk;
1256
+ const struct vsock_transport *transport;
11361257 struct sockaddr_vm *remote_addr;
11371258 long timeout;
11381259 DEFINE_WAIT(wait);
....@@ -1169,18 +1290,25 @@
11691290 goto out;
11701291 }
11711292
1293
+ /* Set the remote address that we are connecting to. */
1294
+ memcpy(&vsk->remote_addr, remote_addr,
1295
+ sizeof(vsk->remote_addr));
1296
+
1297
+ err = vsock_assign_transport(vsk, NULL);
1298
+ if (err)
1299
+ goto out;
1300
+
1301
+ transport = vsk->transport;
1302
+
11721303 /* The hypervisor and well-known contexts do not have socket
11731304 * endpoints.
11741305 */
1175
- if (!transport->stream_allow(remote_addr->svm_cid,
1306
+ if (!transport ||
1307
+ !transport->stream_allow(remote_addr->svm_cid,
11761308 remote_addr->svm_port)) {
11771309 err = -ENETUNREACH;
11781310 goto out;
11791311 }
1180
-
1181
- /* Set the remote address that we are connecting to. */
1182
- memcpy(&vsk->remote_addr, remote_addr,
1183
- sizeof(vsk->remote_addr));
11841312
11851313 err = vsock_auto_bind(vsk);
11861314 if (err)
....@@ -1215,7 +1343,14 @@
12151343 * timeout fires.
12161344 */
12171345 sock_hold(sk);
1218
- schedule_delayed_work(&vsk->connect_work, timeout);
1346
+
1347
+ /* If the timeout function is already scheduled,
1348
+ * reschedule it, then ungrab the socket refcount to
1349
+ * keep it balanced.
1350
+ */
1351
+ if (mod_delayed_work(system_wq, &vsk->connect_work,
1352
+ timeout))
1353
+ sock_put(sk);
12191354
12201355 /* Skip ahead to preserve error code set above. */
12211356 goto out_wait;
....@@ -1232,7 +1367,7 @@
12321367 vsock_transport_cancel_pkt(vsk);
12331368 vsock_remove_connected(vsk);
12341369 goto out_wait;
1235
- } else if (timeout == 0) {
1370
+ } else if ((sk->sk_state != TCP_ESTABLISHED) && (timeout == 0)) {
12361371 err = -ETIMEDOUT;
12371372 sk->sk_state = TCP_CLOSE;
12381373 sock->state = SS_UNCONNECTED;
....@@ -1312,7 +1447,7 @@
13121447 err = -listener->sk_err;
13131448
13141449 if (connected) {
1315
- listener->sk_ack_backlog--;
1450
+ sk_acceptq_removed(listener);
13161451
13171452 lock_sock_nested(connected, SINGLE_DEPTH_NESTING);
13181453 vconnected = vsock_sk(connected);
....@@ -1377,15 +1512,33 @@
13771512 return err;
13781513 }
13791514
1515
+static void vsock_update_buffer_size(struct vsock_sock *vsk,
1516
+ const struct vsock_transport *transport,
1517
+ u64 val)
1518
+{
1519
+ if (val > vsk->buffer_max_size)
1520
+ val = vsk->buffer_max_size;
1521
+
1522
+ if (val < vsk->buffer_min_size)
1523
+ val = vsk->buffer_min_size;
1524
+
1525
+ if (val != vsk->buffer_size &&
1526
+ transport && transport->notify_buffer_size)
1527
+ transport->notify_buffer_size(vsk, &val);
1528
+
1529
+ vsk->buffer_size = val;
1530
+}
1531
+
13801532 static int vsock_stream_setsockopt(struct socket *sock,
13811533 int level,
13821534 int optname,
1383
- char __user *optval,
1535
+ sockptr_t optval,
13841536 unsigned int optlen)
13851537 {
13861538 int err;
13871539 struct sock *sk;
13881540 struct vsock_sock *vsk;
1541
+ const struct vsock_transport *transport;
13891542 u64 val;
13901543
13911544 if (level != AF_VSOCK)
....@@ -1397,7 +1550,7 @@
13971550 err = -EINVAL; \
13981551 goto exit; \
13991552 } \
1400
- if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \
1553
+ if (copy_from_sockptr(&_v, optval, sizeof(_v)) != 0) { \
14011554 err = -EFAULT; \
14021555 goto exit; \
14031556 } \
....@@ -1409,24 +1562,28 @@
14091562
14101563 lock_sock(sk);
14111564
1565
+ transport = vsk->transport;
1566
+
14121567 switch (optname) {
14131568 case SO_VM_SOCKETS_BUFFER_SIZE:
14141569 COPY_IN(val);
1415
- transport->set_buffer_size(vsk, val);
1570
+ vsock_update_buffer_size(vsk, transport, val);
14161571 break;
14171572
14181573 case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
14191574 COPY_IN(val);
1420
- transport->set_max_buffer_size(vsk, val);
1575
+ vsk->buffer_max_size = val;
1576
+ vsock_update_buffer_size(vsk, transport, vsk->buffer_size);
14211577 break;
14221578
14231579 case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
14241580 COPY_IN(val);
1425
- transport->set_min_buffer_size(vsk, val);
1581
+ vsk->buffer_min_size = val;
1582
+ vsock_update_buffer_size(vsk, transport, vsk->buffer_size);
14261583 break;
14271584
14281585 case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
1429
- struct timeval tv;
1586
+ struct __kernel_old_timeval tv;
14301587 COPY_IN(tv);
14311588 if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC &&
14321589 tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) {
....@@ -1489,22 +1646,22 @@
14891646
14901647 switch (optname) {
14911648 case SO_VM_SOCKETS_BUFFER_SIZE:
1492
- val = transport->get_buffer_size(vsk);
1649
+ val = vsk->buffer_size;
14931650 COPY_OUT(val);
14941651 break;
14951652
14961653 case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
1497
- val = transport->get_max_buffer_size(vsk);
1654
+ val = vsk->buffer_max_size;
14981655 COPY_OUT(val);
14991656 break;
15001657
15011658 case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
1502
- val = transport->get_min_buffer_size(vsk);
1659
+ val = vsk->buffer_min_size;
15031660 COPY_OUT(val);
15041661 break;
15051662
15061663 case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
1507
- struct timeval tv;
1664
+ struct __kernel_old_timeval tv;
15081665 tv.tv_sec = vsk->connect_timeout / HZ;
15091666 tv.tv_usec =
15101667 (vsk->connect_timeout -
....@@ -1530,6 +1687,7 @@
15301687 {
15311688 struct sock *sk;
15321689 struct vsock_sock *vsk;
1690
+ const struct vsock_transport *transport;
15331691 ssize_t total_written;
15341692 long timeout;
15351693 int err;
....@@ -1546,6 +1704,8 @@
15461704
15471705 lock_sock(sk);
15481706
1707
+ transport = vsk->transport;
1708
+
15491709 /* Callers should not provide a destination with stream sockets. */
15501710 if (msg->msg_namelen) {
15511711 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
....@@ -1559,7 +1719,7 @@
15591719 goto out;
15601720 }
15611721
1562
- if (sk->sk_state != TCP_ESTABLISHED ||
1722
+ if (!transport || sk->sk_state != TCP_ESTABLISHED ||
15631723 !vsock_addr_bound(&vsk->local_addr)) {
15641724 err = -ENOTCONN;
15651725 goto out;
....@@ -1669,6 +1829,7 @@
16691829 {
16701830 struct sock *sk;
16711831 struct vsock_sock *vsk;
1832
+ const struct vsock_transport *transport;
16721833 int err;
16731834 size_t target;
16741835 ssize_t copied;
....@@ -1683,7 +1844,9 @@
16831844
16841845 lock_sock(sk);
16851846
1686
- if (sk->sk_state != TCP_ESTABLISHED) {
1847
+ transport = vsk->transport;
1848
+
1849
+ if (!transport || sk->sk_state != TCP_ESTABLISHED) {
16871850 /* Recvmsg is supposed to return 0 if a peer performs an
16881851 * orderly shutdown. Differentiate between that case and when a
16891852 * peer has not connected or a local shutdown occured with the
....@@ -1857,6 +2020,10 @@
18572020 static int vsock_create(struct net *net, struct socket *sock,
18582021 int protocol, int kern)
18592022 {
2023
+ struct vsock_sock *vsk;
2024
+ struct sock *sk;
2025
+ int ret;
2026
+
18602027 if (!sock)
18612028 return -EINVAL;
18622029
....@@ -1876,7 +2043,23 @@
18762043
18772044 sock->state = SS_UNCONNECTED;
18782045
1879
- return __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern) ? 0 : -ENOMEM;
2046
+ sk = __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern);
2047
+ if (!sk)
2048
+ return -ENOMEM;
2049
+
2050
+ vsk = vsock_sk(sk);
2051
+
2052
+ if (sock->type == SOCK_DGRAM) {
2053
+ ret = vsock_assign_transport(vsk, NULL);
2054
+ if (ret < 0) {
2055
+ sock_put(sk);
2056
+ return ret;
2057
+ }
2058
+ }
2059
+
2060
+ vsock_insert_unbound(vsk);
2061
+
2062
+ return 0;
18802063 }
18812064
18822065 static const struct net_proto_family vsock_family_ops = {
....@@ -1889,11 +2072,20 @@
18892072 unsigned int cmd, void __user *ptr)
18902073 {
18912074 u32 __user *p = ptr;
2075
+ u32 cid = VMADDR_CID_ANY;
18922076 int retval = 0;
18932077
18942078 switch (cmd) {
18952079 case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
1896
- if (put_user(transport->get_local_cid(), p) != 0)
2080
+ /* To be compatible with the VMCI behavior, we prioritize the
2081
+ * guest CID instead of well-know host CID (VMADDR_CID_HOST).
2082
+ */
2083
+ if (transport_g2h)
2084
+ cid = transport_g2h->get_local_cid();
2085
+ else if (transport_h2g)
2086
+ cid = transport_h2g->get_local_cid();
2087
+
2088
+ if (put_user(cid, p) != 0)
18972089 retval = -EFAULT;
18982090 break;
18992091
....@@ -1933,24 +2125,13 @@
19332125 .fops = &vsock_device_ops,
19342126 };
19352127
1936
-int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
2128
+static int __init vsock_init(void)
19372129 {
1938
- int err = mutex_lock_interruptible(&vsock_register_mutex);
2130
+ int err = 0;
19392131
1940
- if (err)
1941
- return err;
2132
+ vsock_init_tables();
19422133
1943
- if (transport) {
1944
- err = -EBUSY;
1945
- goto err_busy;
1946
- }
1947
-
1948
- /* Transport must be the owner of the protocol so that it can't
1949
- * unload while there are open sockets.
1950
- */
1951
- vsock_proto.owner = owner;
1952
- transport = t;
1953
-
2134
+ vsock_proto.owner = THIS_MODULE;
19542135 vsock_device.minor = MISC_DYNAMIC_MINOR;
19552136 err = misc_register(&vsock_device);
19562137 if (err) {
....@@ -1971,7 +2152,6 @@
19712152 goto err_unregister_proto;
19722153 }
19732154
1974
- mutex_unlock(&vsock_register_mutex);
19752155 return 0;
19762156
19772157 err_unregister_proto:
....@@ -1979,44 +2159,99 @@
19792159 err_deregister_misc:
19802160 misc_deregister(&vsock_device);
19812161 err_reset_transport:
1982
- transport = NULL;
2162
+ return err;
2163
+}
2164
+
2165
+static void __exit vsock_exit(void)
2166
+{
2167
+ misc_deregister(&vsock_device);
2168
+ sock_unregister(AF_VSOCK);
2169
+ proto_unregister(&vsock_proto);
2170
+}
2171
+
2172
+const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk)
2173
+{
2174
+ return vsk->transport;
2175
+}
2176
+EXPORT_SYMBOL_GPL(vsock_core_get_transport);
2177
+
2178
+int vsock_core_register(const struct vsock_transport *t, int features)
2179
+{
2180
+ const struct vsock_transport *t_h2g, *t_g2h, *t_dgram, *t_local;
2181
+ int err = mutex_lock_interruptible(&vsock_register_mutex);
2182
+
2183
+ if (err)
2184
+ return err;
2185
+
2186
+ t_h2g = transport_h2g;
2187
+ t_g2h = transport_g2h;
2188
+ t_dgram = transport_dgram;
2189
+ t_local = transport_local;
2190
+
2191
+ if (features & VSOCK_TRANSPORT_F_H2G) {
2192
+ if (t_h2g) {
2193
+ err = -EBUSY;
2194
+ goto err_busy;
2195
+ }
2196
+ t_h2g = t;
2197
+ }
2198
+
2199
+ if (features & VSOCK_TRANSPORT_F_G2H) {
2200
+ if (t_g2h) {
2201
+ err = -EBUSY;
2202
+ goto err_busy;
2203
+ }
2204
+ t_g2h = t;
2205
+ }
2206
+
2207
+ if (features & VSOCK_TRANSPORT_F_DGRAM) {
2208
+ if (t_dgram) {
2209
+ err = -EBUSY;
2210
+ goto err_busy;
2211
+ }
2212
+ t_dgram = t;
2213
+ }
2214
+
2215
+ if (features & VSOCK_TRANSPORT_F_LOCAL) {
2216
+ if (t_local) {
2217
+ err = -EBUSY;
2218
+ goto err_busy;
2219
+ }
2220
+ t_local = t;
2221
+ }
2222
+
2223
+ transport_h2g = t_h2g;
2224
+ transport_g2h = t_g2h;
2225
+ transport_dgram = t_dgram;
2226
+ transport_local = t_local;
2227
+
19832228 err_busy:
19842229 mutex_unlock(&vsock_register_mutex);
19852230 return err;
19862231 }
1987
-EXPORT_SYMBOL_GPL(__vsock_core_init);
2232
+EXPORT_SYMBOL_GPL(vsock_core_register);
19882233
1989
-void vsock_core_exit(void)
2234
+void vsock_core_unregister(const struct vsock_transport *t)
19902235 {
19912236 mutex_lock(&vsock_register_mutex);
19922237
1993
- misc_deregister(&vsock_device);
1994
- sock_unregister(AF_VSOCK);
1995
- proto_unregister(&vsock_proto);
2238
+ if (transport_h2g == t)
2239
+ transport_h2g = NULL;
19962240
1997
- /* We do not want the assignment below re-ordered. */
1998
- mb();
1999
- transport = NULL;
2241
+ if (transport_g2h == t)
2242
+ transport_g2h = NULL;
2243
+
2244
+ if (transport_dgram == t)
2245
+ transport_dgram = NULL;
2246
+
2247
+ if (transport_local == t)
2248
+ transport_local = NULL;
20002249
20012250 mutex_unlock(&vsock_register_mutex);
20022251 }
2003
-EXPORT_SYMBOL_GPL(vsock_core_exit);
2252
+EXPORT_SYMBOL_GPL(vsock_core_unregister);
20042253
2005
-const struct vsock_transport *vsock_core_get_transport(void)
2006
-{
2007
- /* vsock_register_mutex not taken since only the transport uses this
2008
- * function and only while registered.
2009
- */
2010
- return transport;
2011
-}
2012
-EXPORT_SYMBOL_GPL(vsock_core_get_transport);
2013
-
2014
-static void __exit vsock_exit(void)
2015
-{
2016
- /* Do nothing. This function makes this module removable. */
2017
-}
2018
-
2019
-module_init(vsock_init_tables);
2254
+module_init(vsock_init);
20202255 module_exit(vsock_exit);
20212256
20222257 MODULE_AUTHOR("VMware, Inc.");