hc
2024-05-14 bedbef8ad3e75a304af6361af235302bcc61d06b
kernel/drivers/block/nbd.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Network block device - make block devices work over TCP
34 *
....@@ -6,8 +7,6 @@
67 *
78 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
89 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
9
- *
10
- * This file is released under GPLv2 or later.
1110 *
1211 * (part of code stolen from loop.c)
1312 */
....@@ -27,6 +26,7 @@
2726 #include <linux/ioctl.h>
2827 #include <linux/mutex.h>
2928 #include <linux/compiler.h>
29
+#include <linux/completion.h>
3030 #include <linux/err.h>
3131 #include <linux/kernel.h>
3232 #include <linux/slab.h>
....@@ -43,6 +43,9 @@
4343 #include <linux/nbd.h>
4444 #include <linux/nbd-netlink.h>
4545 #include <net/genetlink.h>
46
+
47
+#define CREATE_TRACE_POINTS
48
+#include <trace/events/nbd.h>
4649
4750 static DEFINE_IDR(nbd_index_idr);
4851 static DEFINE_MUTEX(nbd_index_mutex);
....@@ -69,14 +72,16 @@
6972 int index;
7073 };
7174
72
-#define NBD_TIMEDOUT 0
75
+#define NBD_RT_TIMEDOUT 0
76
+#define NBD_RT_DISCONNECT_REQUESTED 1
77
+#define NBD_RT_DISCONNECTED 2
78
+#define NBD_RT_HAS_PID_FILE 3
79
+#define NBD_RT_HAS_CONFIG_REF 4
80
+#define NBD_RT_BOUND 5
81
+#define NBD_RT_DISCONNECT_ON_CLOSE 6
82
+
83
+#define NBD_DESTROY_ON_DISCONNECT 0
7384 #define NBD_DISCONNECT_REQUESTED 1
74
-#define NBD_DISCONNECTED 2
75
-#define NBD_HAS_PID_FILE 3
76
-#define NBD_HAS_CONFIG_REF 4
77
-#define NBD_BOUND 5
78
-#define NBD_DESTROY_ON_DISCONNECT 6
79
-#define NBD_DISCONNECT_ON_CLOSE 7
8085
8186 struct nbd_config {
8287 u32 flags;
....@@ -111,6 +116,9 @@
111116 struct list_head list;
112117 struct task_struct *task_recv;
113118 struct task_struct *task_setup;
119
+
120
+ struct completion *destroy_complete;
121
+ unsigned long flags;
114122 };
115123
116124 #define NBD_CMD_REQUEUED 1
....@@ -120,6 +128,7 @@
120128 struct mutex lock;
121129 int index;
122130 int cookie;
131
+ int retries;
123132 blk_status_t status;
124133 unsigned long flags;
125134 u32 cmd_cookie;
....@@ -220,6 +229,16 @@
220229 disk->private_data = NULL;
221230 put_disk(disk);
222231 }
232
+
233
+ /*
234
+ * Place this in the last just before the nbd is freed to
235
+ * make sure that the disk and the related kobject are also
236
+ * totally removed to avoid duplicate creation of the same
237
+ * one.
238
+ */
239
+ if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
240
+ complete(nbd->destroy_complete);
241
+
223242 kfree(nbd);
224243 }
225244
....@@ -235,8 +254,8 @@
235254
236255 static int nbd_disconnected(struct nbd_config *config)
237256 {
238
- return test_bit(NBD_DISCONNECTED, &config->runtime_flags) ||
239
- test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
257
+ return test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags) ||
258
+ test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
240259 }
241260
242261 static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
....@@ -254,9 +273,9 @@
254273 if (!nsock->dead) {
255274 kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
256275 if (atomic_dec_return(&nbd->config->live_connections) == 0) {
257
- if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED,
276
+ if (test_and_clear_bit(NBD_RT_DISCONNECT_REQUESTED,
258277 &nbd->config->runtime_flags)) {
259
- set_bit(NBD_DISCONNECTED,
278
+ set_bit(NBD_RT_DISCONNECTED,
260279 &nbd->config->runtime_flags);
261280 dev_info(nbd_to_dev(nbd),
262281 "Disconnected due to user request.\n");
....@@ -280,6 +299,7 @@
280299 {
281300 struct nbd_config *config = nbd->config;
282301 struct block_device *bdev = bdget_disk(nbd->disk, 0);
302
+ sector_t nr_sectors = config->bytesize >> 9;
283303
284304 if (config->flags & NBD_FLAG_SEND_TRIM) {
285305 nbd->disk->queue->limits.discard_granularity = config->blksize;
....@@ -288,14 +308,14 @@
288308 }
289309 blk_queue_logical_block_size(nbd->disk->queue, config->blksize);
290310 blk_queue_physical_block_size(nbd->disk->queue, config->blksize);
291
- set_capacity(nbd->disk, config->bytesize >> 9);
311
+ set_capacity(nbd->disk, nr_sectors);
292312 if (bdev) {
293313 if (bdev->bd_disk) {
294
- bd_set_size(bdev, config->bytesize);
314
+ bd_set_nr_sectors(bdev, nr_sectors);
295315 if (start)
296316 set_blocksize(bdev, config->blksize);
297317 } else
298
- bdev->bd_invalidated = 1;
318
+ set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
299319 bdput(bdev);
300320 }
301321 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
....@@ -331,7 +351,7 @@
331351
332352 if (config->num_connections == 0)
333353 return;
334
- if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags))
354
+ if (test_and_set_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
335355 return;
336356
337357 for (i = 0; i < config->num_connections; i++) {
....@@ -341,6 +361,22 @@
341361 mutex_unlock(&nsock->tx_lock);
342362 }
343363 dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
364
+}
365
+
366
+static u32 req_to_nbd_cmd_type(struct request *req)
367
+{
368
+ switch (req_op(req)) {
369
+ case REQ_OP_DISCARD:
370
+ return NBD_CMD_TRIM;
371
+ case REQ_OP_FLUSH:
372
+ return NBD_CMD_FLUSH;
373
+ case REQ_OP_WRITE:
374
+ return NBD_CMD_WRITE;
375
+ case REQ_OP_READ:
376
+ return NBD_CMD_READ;
377
+ default:
378
+ return U32_MAX;
379
+ }
344380 }
345381
346382 static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
....@@ -360,16 +396,19 @@
360396 }
361397 config = nbd->config;
362398
363
- if (config->num_connections > 1) {
399
+ if (config->num_connections > 1 ||
400
+ (config->num_connections == 1 && nbd->tag_set.timeout)) {
364401 dev_err_ratelimited(nbd_to_dev(nbd),
365402 "Connection timed out, retrying (%d/%d alive)\n",
366403 atomic_read(&config->live_connections),
367404 config->num_connections);
368405 /*
369406 * Hooray we have more connections, requeue this IO, the submit
370
- * path will put it on a real connection.
407
+ * path will put it on a real connection. Or if only one
408
+ * connection is configured, the submit path will wait util
409
+ * a new connection is reconfigured or util dead timeout.
371410 */
372
- if (config->socks && config->num_connections > 1) {
411
+ if (config->socks) {
373412 if (cmd->index < config->num_connections) {
374413 struct nbd_sock *nsock =
375414 config->socks[cmd->index];
....@@ -389,11 +428,36 @@
389428 nbd_config_put(nbd);
390429 return BLK_EH_DONE;
391430 }
392
- } else {
393
- dev_err_ratelimited(nbd_to_dev(nbd),
394
- "Connection timed out\n");
395431 }
396
- set_bit(NBD_TIMEDOUT, &config->runtime_flags);
432
+
433
+ if (!nbd->tag_set.timeout) {
434
+ /*
435
+ * Userspace sets timeout=0 to disable socket disconnection,
436
+ * so just warn and reset the timer.
437
+ */
438
+ struct nbd_sock *nsock = config->socks[cmd->index];
439
+ cmd->retries++;
440
+ dev_info(nbd_to_dev(nbd), "Possible stuck request %p: control (%s@%llu,%uB). Runtime %u seconds\n",
441
+ req, nbdcmd_to_ascii(req_to_nbd_cmd_type(req)),
442
+ (unsigned long long)blk_rq_pos(req) << 9,
443
+ blk_rq_bytes(req), (req->timeout / HZ) * cmd->retries);
444
+
445
+ mutex_lock(&nsock->tx_lock);
446
+ if (cmd->cookie != nsock->cookie) {
447
+ nbd_requeue_cmd(cmd);
448
+ mutex_unlock(&nsock->tx_lock);
449
+ mutex_unlock(&cmd->lock);
450
+ nbd_config_put(nbd);
451
+ return BLK_EH_DONE;
452
+ }
453
+ mutex_unlock(&nsock->tx_lock);
454
+ mutex_unlock(&cmd->lock);
455
+ nbd_config_put(nbd);
456
+ return BLK_EH_RESET_TIMER;
457
+ }
458
+
459
+ dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
460
+ set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
397461 cmd->status = BLK_STS_IOERR;
398462 mutex_unlock(&cmd->lock);
399463 sock_shutdown(nbd);
....@@ -478,24 +542,11 @@
478542 u32 nbd_cmd_flags = 0;
479543 int sent = nsock->sent, skip = 0;
480544
481
- iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
545
+ iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
482546
483
- switch (req_op(req)) {
484
- case REQ_OP_DISCARD:
485
- type = NBD_CMD_TRIM;
486
- break;
487
- case REQ_OP_FLUSH:
488
- type = NBD_CMD_FLUSH;
489
- break;
490
- case REQ_OP_WRITE:
491
- type = NBD_CMD_WRITE;
492
- break;
493
- case REQ_OP_READ:
494
- type = NBD_CMD_READ;
495
- break;
496
- default:
547
+ type = req_to_nbd_cmd_type(req);
548
+ if (type == U32_MAX)
497549 return -EIO;
498
- }
499550
500551 if (rq_data_dir(req) == WRITE &&
501552 (config->flags & NBD_FLAG_READ_ONLY)) {
....@@ -514,6 +565,10 @@
514565 if (sent) {
515566 if (sent >= sizeof(request)) {
516567 skip = sent - sizeof(request);
568
+
569
+ /* initialize handle for tracing purposes */
570
+ handle = nbd_cmd_handle(cmd);
571
+
517572 goto send_pages;
518573 }
519574 iov_iter_advance(&from, sent);
....@@ -522,6 +577,7 @@
522577 }
523578 cmd->index = index;
524579 cmd->cookie = nsock->cookie;
580
+ cmd->retries = 0;
525581 request.type = htonl(type | nbd_cmd_flags);
526582 if (type != NBD_CMD_FLUSH) {
527583 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
....@@ -530,11 +586,14 @@
530586 handle = nbd_cmd_handle(cmd);
531587 memcpy(request.handle, &handle, sizeof(handle));
532588
589
+ trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));
590
+
533591 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
534592 req, nbdcmd_to_ascii(type),
535593 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
536594 result = sock_xmit(nbd, index, 1, &from,
537595 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
596
+ trace_nbd_header_sent(req, handle);
538597 if (result <= 0) {
539598 if (was_interrupted(result)) {
540599 /* If we havne't sent anything we can just return BUSY,
....@@ -569,8 +628,7 @@
569628
570629 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
571630 req, bvec.bv_len);
572
- iov_iter_bvec(&from, ITER_BVEC | WRITE,
573
- &bvec, 1, bvec.bv_len);
631
+ iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
574632 if (skip) {
575633 if (skip >= iov_iter_count(&from)) {
576634 skip -= iov_iter_count(&from);
....@@ -608,6 +666,7 @@
608666 bio = next;
609667 }
610668 out:
669
+ trace_nbd_payload_sent(req, handle);
611670 nsock->pending = NULL;
612671 nsock->sent = 0;
613672 return 0;
....@@ -629,7 +688,7 @@
629688 int ret = 0;
630689
631690 reply.magic = 0;
632
- iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
691
+ iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply));
633692 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
634693 if (result <= 0) {
635694 if (!nbd_disconnected(config))
....@@ -655,6 +714,7 @@
655714 tag, req);
656715 return ERR_PTR(-ENOENT);
657716 }
717
+ trace_nbd_header_received(req, handle);
658718 cmd = blk_mq_rq_to_pdu(req);
659719
660720 mutex_lock(&cmd->lock);
....@@ -689,21 +749,18 @@
689749 struct bio_vec bvec;
690750
691751 rq_for_each_segment(bvec, req, iter) {
692
- iov_iter_bvec(&to, ITER_BVEC | READ,
693
- &bvec, 1, bvec.bv_len);
752
+ iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
694753 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
695754 if (result <= 0) {
696755 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
697756 result);
698757 /*
699
- * If we've disconnected or we only have 1
700
- * connection then we need to make sure we
758
+ * If we've disconnected, we need to make sure we
701759 * complete this request, otherwise error out
702760 * and let the timeout stuff handle resubmitting
703761 * this request onto another connection.
704762 */
705
- if (nbd_disconnected(config) ||
706
- config->num_connections <= 1) {
763
+ if (nbd_disconnected(config)) {
707764 cmd->status = BLK_STS_IOERR;
708765 goto out;
709766 }
....@@ -715,6 +772,7 @@
715772 }
716773 }
717774 out:
775
+ trace_nbd_payload_received(req, handle);
718776 mutex_unlock(&cmd->lock);
719777 return ret ? ERR_PTR(ret) : cmd;
720778 }
....@@ -727,6 +785,7 @@
727785 struct nbd_device *nbd = args->nbd;
728786 struct nbd_config *config = nbd->config;
729787 struct nbd_cmd *cmd;
788
+ struct request *rq;
730789
731790 while (1) {
732791 cmd = nbd_read_stat(nbd, args->index);
....@@ -739,7 +798,9 @@
739798 break;
740799 }
741800
742
- blk_mq_complete_request(blk_mq_rq_from_pdu(cmd));
801
+ rq = blk_mq_rq_from_pdu(cmd);
802
+ if (likely(!blk_should_fake_timeout(rq->q)))
803
+ blk_mq_complete_request(rq);
743804 }
744805 nbd_config_put(nbd);
745806 atomic_dec(&config->recv_threads);
....@@ -747,15 +808,20 @@
747808 kfree(args);
748809 }
749810
750
-static void nbd_clear_req(struct request *req, void *data, bool reserved)
811
+static bool nbd_clear_req(struct request *req, void *data, bool reserved)
751812 {
752813 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
814
+
815
+ /* don't abort one completed request */
816
+ if (blk_mq_request_completed(req))
817
+ return true;
753818
754819 mutex_lock(&cmd->lock);
755820 cmd->status = BLK_STS_IOERR;
756821 mutex_unlock(&cmd->lock);
757822
758823 blk_mq_complete_request(req);
824
+ return true;
759825 }
760826
761827 static void nbd_clear_que(struct nbd_device *nbd)
....@@ -773,12 +839,12 @@
773839 struct nbd_sock *nsock = config->socks[index];
774840 int fallback = nsock->fallback_index;
775841
776
- if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
842
+ if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
777843 return new_index;
778844
779845 if (config->num_connections <= 1) {
780846 dev_err_ratelimited(disk_to_dev(nbd->disk),
781
- "Attempted send on invalid socket\n");
847
+ "Dead connection, failed to find a fallback\n");
782848 return new_index;
783849 }
784850
....@@ -814,11 +880,15 @@
814880 struct nbd_config *config = nbd->config;
815881 if (!config->dead_conn_timeout)
816882 return 0;
817
- if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
883
+
884
+ if (!wait_event_timeout(config->conn_wait,
885
+ test_bit(NBD_RT_DISCONNECTED,
886
+ &config->runtime_flags) ||
887
+ atomic_read(&config->live_connections) > 0,
888
+ config->dead_conn_timeout))
818889 return 0;
819
- return wait_event_timeout(config->conn_wait,
820
- atomic_read(&config->live_connections) > 0,
821
- config->dead_conn_timeout) > 0;
890
+
891
+ return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
822892 }
823893
824894 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
....@@ -973,12 +1043,12 @@
9731043 blk_mq_freeze_queue(nbd->disk->queue);
9741044
9751045 if (!netlink && !nbd->task_setup &&
976
- !test_bit(NBD_BOUND, &config->runtime_flags))
1046
+ !test_bit(NBD_RT_BOUND, &config->runtime_flags))
9771047 nbd->task_setup = current;
9781048
9791049 if (!netlink &&
9801050 (nbd->task_setup != current ||
981
- test_bit(NBD_BOUND, &config->runtime_flags))) {
1051
+ test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
9821052 dev_err(disk_to_dev(nbd->disk),
9831053 "Device being setup by another task");
9841054 err = -EBUSY;
....@@ -1065,7 +1135,7 @@
10651135 mutex_unlock(&nsock->tx_lock);
10661136 sockfd_put(old);
10671137
1068
- clear_bit(NBD_DISCONNECTED, &config->runtime_flags);
1138
+ clear_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
10691139
10701140 /* We take the tx_mutex in an error path in the recv_work, so we
10711141 * need to queue_work outside of the tx_mutex.
....@@ -1085,7 +1155,7 @@
10851155 {
10861156 if (bdev->bd_openers > 1)
10871157 return;
1088
- bd_set_size(bdev, 0);
1158
+ bd_set_nr_sectors(bdev, 0);
10891159 }
10901160
10911161 static void nbd_parse_flags(struct nbd_device *nbd)
....@@ -1121,7 +1191,7 @@
11211191 for (i = 0; i < config->num_connections; i++) {
11221192 struct nbd_sock *nsock = config->socks[i];
11231193
1124
- iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
1194
+ iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
11251195 mutex_lock(&nsock->tx_lock);
11261196 ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
11271197 if (ret <= 0)
....@@ -1136,7 +1206,8 @@
11361206 struct nbd_config *config = nbd->config;
11371207
11381208 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
1139
- set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
1209
+ set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
1210
+ set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
11401211 send_disconnects(nbd);
11411212 return 0;
11421213 }
....@@ -1155,7 +1226,7 @@
11551226 struct nbd_config *config = nbd->config;
11561227 nbd_dev_dbg_close(nbd);
11571228 nbd_size_clear(nbd);
1158
- if (test_and_clear_bit(NBD_HAS_PID_FILE,
1229
+ if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
11591230 &config->runtime_flags))
11601231 device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
11611232 nbd->task_recv = NULL;
....@@ -1221,7 +1292,7 @@
12211292 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
12221293 return error;
12231294 }
1224
- set_bit(NBD_HAS_PID_FILE, &config->runtime_flags);
1295
+ set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags);
12251296
12261297 nbd_dev_dbg_init(nbd);
12271298 for (i = 0; i < num_connections; i++) {
....@@ -1267,20 +1338,22 @@
12671338 return ret;
12681339
12691340 if (max_part)
1270
- bdev->bd_invalidated = 1;
1341
+ set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
12711342 mutex_unlock(&nbd->config_lock);
12721343 ret = wait_event_interruptible(config->recv_wq,
12731344 atomic_read(&config->recv_threads) == 0);
1274
- if (ret)
1345
+ if (ret) {
12751346 sock_shutdown(nbd);
1276
- flush_workqueue(nbd->recv_workq);
1347
+ nbd_clear_que(nbd);
1348
+ }
12771349
1350
+ flush_workqueue(nbd->recv_workq);
12781351 mutex_lock(&nbd->config_lock);
12791352 nbd_bdev_reset(bdev);
12801353 /* user requested, ignore socket errors */
1281
- if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
1354
+ if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
12821355 ret = 0;
1283
- if (test_bit(NBD_TIMEDOUT, &config->runtime_flags))
1356
+ if (test_bit(NBD_RT_TIMEDOUT, &config->runtime_flags))
12841357 ret = -ETIMEDOUT;
12851358 return ret;
12861359 }
....@@ -1288,10 +1361,10 @@
12881361 static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
12891362 struct block_device *bdev)
12901363 {
1291
- sock_shutdown(nbd);
1364
+ nbd_clear_sock(nbd);
12921365 __invalidate_device(bdev, true);
12931366 nbd_bdev_reset(bdev);
1294
- if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
1367
+ if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
12951368 &nbd->config->runtime_flags))
12961369 nbd_config_put(nbd);
12971370 }
....@@ -1302,6 +1375,15 @@
13021375 blksize > PAGE_SIZE)
13031376 return false;
13041377 return true;
1378
+}
1379
+
1380
+static void nbd_set_cmd_timeout(struct nbd_device *nbd, u64 timeout)
1381
+{
1382
+ nbd->tag_set.timeout = timeout * HZ;
1383
+ if (timeout)
1384
+ blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
1385
+ else
1386
+ blk_queue_rq_timeout(nbd->disk->queue, 30 * HZ);
13051387 }
13061388
13071389 /* Must be called with config_lock held */
....@@ -1334,10 +1416,7 @@
13341416 nbd_size_set(nbd, config->blksize, arg);
13351417 return 0;
13361418 case NBD_SET_TIMEOUT:
1337
- if (arg) {
1338
- nbd->tag_set.timeout = arg * HZ;
1339
- blk_queue_rq_timeout(nbd->disk->queue, arg * HZ);
1340
- }
1419
+ nbd_set_cmd_timeout(nbd, arg);
13411420 return 0;
13421421
13431422 case NBD_SET_FLAGS:
....@@ -1382,7 +1461,7 @@
13821461 /* Don't allow ioctl operations on a nbd device that was created with
13831462 * netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine.
13841463 */
1385
- if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
1464
+ if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
13861465 (cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
13871466 error = __nbd_ioctl(bdev, nbd, cmd, arg);
13881467 else
....@@ -1395,15 +1474,20 @@
13951474 {
13961475 struct nbd_config *config;
13971476
1477
+ if (!try_module_get(THIS_MODULE))
1478
+ return ERR_PTR(-ENODEV);
1479
+
13981480 config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
1399
- if (!config)
1400
- return NULL;
1481
+ if (!config) {
1482
+ module_put(THIS_MODULE);
1483
+ return ERR_PTR(-ENOMEM);
1484
+ }
1485
+
14011486 atomic_set(&config->recv_threads, 0);
14021487 init_waitqueue_head(&config->recv_wq);
14031488 init_waitqueue_head(&config->conn_wait);
14041489 config->blksize = NBD_DEF_BLKSIZE;
14051490 atomic_set(&config->live_connections, 0);
1406
- try_module_get(THIS_MODULE);
14071491 return config;
14081492 }
14091493
....@@ -1430,18 +1514,19 @@
14301514 mutex_unlock(&nbd->config_lock);
14311515 goto out;
14321516 }
1433
- config = nbd->config = nbd_alloc_config();
1434
- if (!config) {
1435
- ret = -ENOMEM;
1517
+ config = nbd_alloc_config();
1518
+ if (IS_ERR(config)) {
1519
+ ret = PTR_ERR(config);
14361520 mutex_unlock(&nbd->config_lock);
14371521 goto out;
14381522 }
1523
+ nbd->config = config;
14391524 refcount_set(&nbd->config_refs, 1);
14401525 refcount_inc(&nbd->refs);
14411526 mutex_unlock(&nbd->config_lock);
1442
- bdev->bd_invalidated = 1;
1527
+ set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
14431528 } else if (nbd_disconnected(nbd->config)) {
1444
- bdev->bd_invalidated = 1;
1529
+ set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
14451530 }
14461531 out:
14471532 mutex_unlock(&nbd_index_mutex);
....@@ -1453,7 +1538,7 @@
14531538 struct nbd_device *nbd = disk->private_data;
14541539 struct block_device *bdev = bdget_disk(disk, 0);
14551540
1456
- if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
1541
+ if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
14571542 bdev->bd_openers == 0)
14581543 nbd_disconnect_and_put(nbd);
14591544 bdput(bdev);
....@@ -1539,7 +1624,7 @@
15391624 return -EIO;
15401625
15411626 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
1542
- if (!dir) {
1627
+ if (IS_ERR(dir)) {
15431628 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
15441629 nbd_name(nbd));
15451630 return -EIO;
....@@ -1565,7 +1650,7 @@
15651650 struct dentry *dbg_dir;
15661651
15671652 dbg_dir = debugfs_create_dir("nbd", NULL);
1568
- if (!dbg_dir)
1653
+ if (IS_ERR(dbg_dir))
15691654 return -EIO;
15701655
15711656 nbd_dbg_dir = dbg_dir;
....@@ -1638,7 +1723,8 @@
16381723 if (err == -ENOSPC)
16391724 err = -EEXIST;
16401725 } else {
1641
- err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
1726
+ err = idr_alloc(&nbd_index_idr, nbd, 0,
1727
+ (MINORMASK >> part_shift) + 1, GFP_KERNEL);
16421728 if (err >= 0)
16431729 index = err;
16441730 }
....@@ -1653,8 +1739,9 @@
16531739 nbd->tag_set.numa_node = NUMA_NO_NODE;
16541740 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
16551741 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
1656
- BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
1742
+ BLK_MQ_F_BLOCKING;
16571743 nbd->tag_set.driver_data = nbd;
1744
+ nbd->destroy_complete = NULL;
16581745
16591746 err = blk_mq_alloc_tag_set(&nbd->tag_set);
16601747 if (err)
....@@ -1743,8 +1830,33 @@
17431830 [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 },
17441831 };
17451832
1833
+static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
1834
+{
1835
+ struct nbd_config *config = nbd->config;
1836
+ u64 bsize = config->blksize;
1837
+ u64 bytes = config->bytesize;
1838
+
1839
+ if (info->attrs[NBD_ATTR_SIZE_BYTES])
1840
+ bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]);
1841
+
1842
+ if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) {
1843
+ bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
1844
+ if (!bsize)
1845
+ bsize = NBD_DEF_BLKSIZE;
1846
+ if (!nbd_is_valid_blksize(bsize)) {
1847
+ printk(KERN_ERR "Invalid block size %llu\n", bsize);
1848
+ return -EINVAL;
1849
+ }
1850
+ }
1851
+
1852
+ if (bytes != config->bytesize || bsize != config->blksize)
1853
+ nbd_size_set(nbd, bsize, div64_u64(bytes, bsize));
1854
+ return 0;
1855
+}
1856
+
17461857 static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
17471858 {
1859
+ DECLARE_COMPLETION_ONSTACK(destroy_complete);
17481860 struct nbd_device *nbd = NULL;
17491861 struct nbd_config *config;
17501862 int index = -1;
....@@ -1754,8 +1866,19 @@
17541866 if (!netlink_capable(skb, CAP_SYS_ADMIN))
17551867 return -EPERM;
17561868
1757
- if (info->attrs[NBD_ATTR_INDEX])
1869
+ if (info->attrs[NBD_ATTR_INDEX]) {
17581870 index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
1871
+
1872
+ /*
1873
+ * Too big first_minor can cause duplicate creation of
1874
+ * sysfs files/links, since index << part_shift might overflow, or
1875
+ * MKDEV() expect that the max bits of first_minor is 20.
1876
+ */
1877
+ if (index < 0 || index > MINORMASK >> part_shift) {
1878
+ printk(KERN_ERR "nbd: illegal input index %d\n", index);
1879
+ return -EINVAL;
1880
+ }
1881
+ }
17591882 if (!info->attrs[NBD_ATTR_SOCKETS]) {
17601883 printk(KERN_ERR "nbd: must specify at least one socket\n");
17611884 return -EINVAL;
....@@ -1796,6 +1919,17 @@
17961919 mutex_unlock(&nbd_index_mutex);
17971920 return -EINVAL;
17981921 }
1922
+
1923
+ if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
1924
+ test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
1925
+ nbd->destroy_complete = &destroy_complete;
1926
+ mutex_unlock(&nbd_index_mutex);
1927
+
1928
+ /* Wait untill the the nbd stuff is totally destroyed */
1929
+ wait_for_completion(&destroy_complete);
1930
+ goto again;
1931
+ }
1932
+
17991933 if (!refcount_inc_not_zero(&nbd->refs)) {
18001934 mutex_unlock(&nbd_index_mutex);
18011935 if (index == -1)
....@@ -1820,37 +1954,24 @@
18201954 nbd_put(nbd);
18211955 return -EINVAL;
18221956 }
1823
- config = nbd->config = nbd_alloc_config();
1824
- if (!nbd->config) {
1957
+ config = nbd_alloc_config();
1958
+ if (IS_ERR(config)) {
18251959 mutex_unlock(&nbd->config_lock);
18261960 nbd_put(nbd);
18271961 printk(KERN_ERR "nbd: couldn't allocate config\n");
1828
- return -ENOMEM;
1962
+ return PTR_ERR(config);
18291963 }
1964
+ nbd->config = config;
18301965 refcount_set(&nbd->config_refs, 1);
1831
- set_bit(NBD_BOUND, &config->runtime_flags);
1966
+ set_bit(NBD_RT_BOUND, &config->runtime_flags);
18321967
1833
- if (info->attrs[NBD_ATTR_SIZE_BYTES]) {
1834
- u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]);
1835
- nbd_size_set(nbd, config->blksize,
1836
- div64_u64(bytes, config->blksize));
1837
- }
1838
- if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) {
1839
- u64 bsize =
1840
- nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
1841
- if (!bsize)
1842
- bsize = NBD_DEF_BLKSIZE;
1843
- if (!nbd_is_valid_blksize(bsize)) {
1844
- ret = -EINVAL;
1845
- goto out;
1846
- }
1847
- nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize));
1848
- }
1849
- if (info->attrs[NBD_ATTR_TIMEOUT]) {
1850
- u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]);
1851
- nbd->tag_set.timeout = timeout * HZ;
1852
- blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
1853
- }
1968
+ ret = nbd_genl_size_set(info, nbd);
1969
+ if (ret)
1970
+ goto out;
1971
+
1972
+ if (info->attrs[NBD_ATTR_TIMEOUT])
1973
+ nbd_set_cmd_timeout(nbd,
1974
+ nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
18541975 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
18551976 config->dead_conn_timeout =
18561977 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
....@@ -1862,12 +1983,24 @@
18621983 if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
18631984 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
18641985 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
1865
- set_bit(NBD_DESTROY_ON_DISCONNECT,
1866
- &config->runtime_flags);
1867
- put_dev = true;
1986
+ /*
1987
+ * We have 1 ref to keep the device around, and then 1
1988
+ * ref for our current operation here, which will be
1989
+ * inherited by the config. If we already have
1990
+ * DESTROY_ON_DISCONNECT set then we know we don't have
1991
+ * that extra ref already held so we don't need the
1992
+ * put_dev.
1993
+ */
1994
+ if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
1995
+ &nbd->flags))
1996
+ put_dev = true;
1997
+ } else {
1998
+ if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
1999
+ &nbd->flags))
2000
+ refcount_inc(&nbd->refs);
18682001 }
18692002 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
1870
- set_bit(NBD_DISCONNECT_ON_CLOSE,
2003
+ set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
18712004 &config->runtime_flags);
18722005 }
18732006 }
....@@ -1885,8 +2018,10 @@
18852018 ret = -EINVAL;
18862019 goto out;
18872020 }
1888
- ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr,
1889
- nbd_sock_policy, info->extack);
2021
+ ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX,
2022
+ attr,
2023
+ nbd_sock_policy,
2024
+ info->extack);
18902025 if (ret != 0) {
18912026 printk(KERN_ERR "nbd: error processing sock list\n");
18922027 ret = -EINVAL;
....@@ -1904,7 +2039,7 @@
19042039 out:
19052040 mutex_unlock(&nbd->config_lock);
19062041 if (!ret) {
1907
- set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags);
2042
+ set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
19082043 refcount_inc(&nbd->config_refs);
19092044 nbd_connect_reply(info, nbd->index);
19102045 }
....@@ -1918,15 +2053,21 @@
19182053 {
19192054 mutex_lock(&nbd->config_lock);
19202055 nbd_disconnect(nbd);
1921
- nbd_clear_sock(nbd);
1922
- mutex_unlock(&nbd->config_lock);
2056
+ sock_shutdown(nbd);
2057
+ wake_up(&nbd->config->conn_wait);
19232058 /*
19242059 * Make sure recv thread has finished, so it does not drop the last
19252060 * config ref and try to destroy the workqueue from inside the work
1926
- * queue.
2061
+ * queue. And this also ensure that we can safely call nbd_clear_que()
2062
+ * to cancel the inflight I/Os.
19272063 */
1928
- flush_workqueue(nbd->recv_workq);
1929
- if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
2064
+ if (nbd->recv_workq)
2065
+ flush_workqueue(nbd->recv_workq);
2066
+ nbd_clear_que(nbd);
2067
+ nbd->task_setup = NULL;
2068
+ mutex_unlock(&nbd->config_lock);
2069
+
2070
+ if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
19302071 &nbd->config->runtime_flags))
19312072 nbd_config_put(nbd);
19322073 }
....@@ -2010,7 +2151,7 @@
20102151
20112152 mutex_lock(&nbd->config_lock);
20122153 config = nbd->config;
2013
- if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
2154
+ if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
20142155 !nbd->task_recv) {
20152156 dev_err(nbd_to_dev(nbd),
20162157 "not configured, cannot reconfigure\n");
....@@ -2018,11 +2159,13 @@
20182159 goto out;
20192160 }
20202161
2021
- if (info->attrs[NBD_ATTR_TIMEOUT]) {
2022
- u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]);
2023
- nbd->tag_set.timeout = timeout * HZ;
2024
- blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
2025
- }
2162
+ ret = nbd_genl_size_set(info, nbd);
2163
+ if (ret)
2164
+ goto out;
2165
+
2166
+ if (info->attrs[NBD_ATTR_TIMEOUT])
2167
+ nbd_set_cmd_timeout(nbd,
2168
+ nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
20262169 if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
20272170 config->dead_conn_timeout =
20282171 nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
....@@ -2032,19 +2175,19 @@
20322175 u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
20332176 if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
20342177 if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
2035
- &config->runtime_flags))
2178
+ &nbd->flags))
20362179 put_dev = true;
20372180 } else {
20382181 if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
2039
- &config->runtime_flags))
2182
+ &nbd->flags))
20402183 refcount_inc(&nbd->refs);
20412184 }
20422185
20432186 if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
2044
- set_bit(NBD_DISCONNECT_ON_CLOSE,
2187
+ set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
20452188 &config->runtime_flags);
20462189 } else {
2047
- clear_bit(NBD_DISCONNECT_ON_CLOSE,
2190
+ clear_bit(NBD_RT_DISCONNECT_ON_CLOSE,
20482191 &config->runtime_flags);
20492192 }
20502193 }
....@@ -2062,8 +2205,10 @@
20622205 ret = -EINVAL;
20632206 goto out;
20642207 }
2065
- ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr,
2066
- nbd_sock_policy, info->extack);
2208
+ ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX,
2209
+ attr,
2210
+ nbd_sock_policy,
2211
+ info->extack);
20672212 if (ret != 0) {
20682213 printk(KERN_ERR "nbd: error processing sock list\n");
20692214 ret = -EINVAL;
....@@ -2090,25 +2235,25 @@
20902235 return ret;
20912236 }
20922237
2093
-static const struct genl_ops nbd_connect_genl_ops[] = {
2238
+static const struct genl_small_ops nbd_connect_genl_ops[] = {
20942239 {
20952240 .cmd = NBD_CMD_CONNECT,
2096
- .policy = nbd_attr_policy,
2241
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
20972242 .doit = nbd_genl_connect,
20982243 },
20992244 {
21002245 .cmd = NBD_CMD_DISCONNECT,
2101
- .policy = nbd_attr_policy,
2246
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
21022247 .doit = nbd_genl_disconnect,
21032248 },
21042249 {
21052250 .cmd = NBD_CMD_RECONFIGURE,
2106
- .policy = nbd_attr_policy,
2251
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
21072252 .doit = nbd_genl_reconfigure,
21082253 },
21092254 {
21102255 .cmd = NBD_CMD_STATUS,
2111
- .policy = nbd_attr_policy,
2256
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
21122257 .doit = nbd_genl_status,
21132258 },
21142259 };
....@@ -2122,9 +2267,10 @@
21222267 .name = NBD_GENL_FAMILY_NAME,
21232268 .version = NBD_GENL_VERSION,
21242269 .module = THIS_MODULE,
2125
- .ops = nbd_connect_genl_ops,
2126
- .n_ops = ARRAY_SIZE(nbd_connect_genl_ops),
2270
+ .small_ops = nbd_connect_genl_ops,
2271
+ .n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops),
21272272 .maxattr = NBD_ATTR_MAX,
2273
+ .policy = nbd_attr_policy,
21282274 .mcgrps = nbd_mcast_grps,
21292275 .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps),
21302276 };
....@@ -2144,7 +2290,7 @@
21442290 */
21452291 if (refcount_read(&nbd->config_refs))
21462292 connected = 1;
2147
- dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM);
2293
+ dev_opt = nla_nest_start_noflag(reply, NBD_DEVICE_ITEM);
21482294 if (!dev_opt)
21492295 return -EMSGSIZE;
21502296 ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index);
....@@ -2192,7 +2338,7 @@
21922338 goto out;
21932339 }
21942340
2195
- dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST);
2341
+ dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST);
21962342 if (index == -1) {
21972343 ret = idr_for_each(&nbd_index_idr, &status_cb, reply);
21982344 if (ret) {
....@@ -2212,8 +2358,7 @@
22122358 }
22132359 nla_nest_end(reply, dev_list);
22142360 genlmsg_end(reply, reply_head);
2215
- genlmsg_reply(reply, info);
2216
- ret = 0;
2361
+ ret = genlmsg_reply(reply, info);
22172362 out:
22182363 mutex_unlock(&nbd_index_mutex);
22192364 return ret;
....@@ -2337,6 +2482,12 @@
23372482 struct nbd_device *nbd;
23382483 LIST_HEAD(del_list);
23392484
2485
+ /*
2486
+ * Unregister netlink interface prior to waiting
2487
+ * for the completion of netlink commands.
2488
+ */
2489
+ genl_unregister_family(&nbd_genl_family);
2490
+
23402491 nbd_dbg_close();
23412492
23422493 mutex_lock(&nbd_index_mutex);
....@@ -2346,13 +2497,15 @@
23462497 while (!list_empty(&del_list)) {
23472498 nbd = list_first_entry(&del_list, struct nbd_device, list);
23482499 list_del_init(&nbd->list);
2500
+ if (refcount_read(&nbd->config_refs))
2501
+ printk(KERN_ERR "nbd: possibly leaking nbd_config (ref %d)\n",
2502
+ refcount_read(&nbd->config_refs));
23492503 if (refcount_read(&nbd->refs) != 1)
23502504 printk(KERN_ERR "nbd: possibly leaking a device\n");
23512505 nbd_put(nbd);
23522506 }
23532507
23542508 idr_destroy(&nbd_index_idr);
2355
- genl_unregister_family(&nbd_genl_family);
23562509 unregister_blkdev(NBD_MAJOR, "nbd");
23572510 }
23582511