From 072de836f53be56a70cecf70b43ae43b7ce17376 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 11 Dec 2023 10:08:36 +0000
Subject: [PATCH] mk-rootfs.sh
---
kernel/drivers/block/nbd.c | 437 ++++++++++++++++++++++++++++++++++++------------------
1 files changed, 289 insertions(+), 148 deletions(-)
diff --git a/kernel/drivers/block/nbd.c b/kernel/drivers/block/nbd.c
index 81b9556..b0d3dad 100644
--- a/kernel/drivers/block/nbd.c
+++ b/kernel/drivers/block/nbd.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Network block device - make block devices work over TCP
*
@@ -6,8 +7,6 @@
*
* Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
* Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
- *
- * This file is released under GPLv2 or later.
*
* (part of code stolen from loop.c)
*/
@@ -27,6 +26,7 @@
#include <linux/ioctl.h>
#include <linux/mutex.h>
#include <linux/compiler.h>
+#include <linux/completion.h>
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -43,6 +43,9 @@
#include <linux/nbd.h>
#include <linux/nbd-netlink.h>
#include <net/genetlink.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/nbd.h>
static DEFINE_IDR(nbd_index_idr);
static DEFINE_MUTEX(nbd_index_mutex);
@@ -69,14 +72,16 @@
int index;
};
-#define NBD_TIMEDOUT 0
+#define NBD_RT_TIMEDOUT 0
+#define NBD_RT_DISCONNECT_REQUESTED 1
+#define NBD_RT_DISCONNECTED 2
+#define NBD_RT_HAS_PID_FILE 3
+#define NBD_RT_HAS_CONFIG_REF 4
+#define NBD_RT_BOUND 5
+#define NBD_RT_DISCONNECT_ON_CLOSE 6
+
+#define NBD_DESTROY_ON_DISCONNECT 0
#define NBD_DISCONNECT_REQUESTED 1
-#define NBD_DISCONNECTED 2
-#define NBD_HAS_PID_FILE 3
-#define NBD_HAS_CONFIG_REF 4
-#define NBD_BOUND 5
-#define NBD_DESTROY_ON_DISCONNECT 6
-#define NBD_DISCONNECT_ON_CLOSE 7
struct nbd_config {
u32 flags;
@@ -111,6 +116,9 @@
struct list_head list;
struct task_struct *task_recv;
struct task_struct *task_setup;
+
+ struct completion *destroy_complete;
+ unsigned long flags;
};
#define NBD_CMD_REQUEUED 1
@@ -120,6 +128,7 @@
struct mutex lock;
int index;
int cookie;
+ int retries;
blk_status_t status;
unsigned long flags;
u32 cmd_cookie;
@@ -220,6 +229,16 @@
disk->private_data = NULL;
put_disk(disk);
}
+
+ /*
+ * Place this in the last just before the nbd is freed to
+ * make sure that the disk and the related kobject are also
+ * totally removed to avoid duplicate creation of the same
+ * one.
+ */
+ if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) && nbd->destroy_complete)
+ complete(nbd->destroy_complete);
+
kfree(nbd);
}
@@ -235,8 +254,8 @@
static int nbd_disconnected(struct nbd_config *config)
{
- return test_bit(NBD_DISCONNECTED, &config->runtime_flags) ||
- test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+ return test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags) ||
+ test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
}
static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock,
@@ -254,9 +273,9 @@
if (!nsock->dead) {
kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
if (atomic_dec_return(&nbd->config->live_connections) == 0) {
- if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED,
+ if (test_and_clear_bit(NBD_RT_DISCONNECT_REQUESTED,
&nbd->config->runtime_flags)) {
- set_bit(NBD_DISCONNECTED,
+ set_bit(NBD_RT_DISCONNECTED,
&nbd->config->runtime_flags);
dev_info(nbd_to_dev(nbd),
"Disconnected due to user request.\n");
@@ -280,6 +299,7 @@
{
struct nbd_config *config = nbd->config;
struct block_device *bdev = bdget_disk(nbd->disk, 0);
+ sector_t nr_sectors = config->bytesize >> 9;
if (config->flags & NBD_FLAG_SEND_TRIM) {
nbd->disk->queue->limits.discard_granularity = config->blksize;
@@ -288,14 +308,14 @@
}
blk_queue_logical_block_size(nbd->disk->queue, config->blksize);
blk_queue_physical_block_size(nbd->disk->queue, config->blksize);
- set_capacity(nbd->disk, config->bytesize >> 9);
+ set_capacity(nbd->disk, nr_sectors);
if (bdev) {
if (bdev->bd_disk) {
- bd_set_size(bdev, config->bytesize);
+ bd_set_nr_sectors(bdev, nr_sectors);
if (start)
set_blocksize(bdev, config->blksize);
} else
- bdev->bd_invalidated = 1;
+ set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
bdput(bdev);
}
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
@@ -331,7 +351,7 @@
if (config->num_connections == 0)
return;
- if (test_and_set_bit(NBD_DISCONNECTED, &config->runtime_flags))
+ if (test_and_set_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
return;
for (i = 0; i < config->num_connections; i++) {
@@ -341,6 +361,22 @@
mutex_unlock(&nsock->tx_lock);
}
dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
+}
+
+static u32 req_to_nbd_cmd_type(struct request *req)
+{
+ switch (req_op(req)) {
+ case REQ_OP_DISCARD:
+ return NBD_CMD_TRIM;
+ case REQ_OP_FLUSH:
+ return NBD_CMD_FLUSH;
+ case REQ_OP_WRITE:
+ return NBD_CMD_WRITE;
+ case REQ_OP_READ:
+ return NBD_CMD_READ;
+ default:
+ return U32_MAX;
+ }
}
static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
@@ -360,16 +396,19 @@
}
config = nbd->config;
- if (config->num_connections > 1) {
+ if (config->num_connections > 1 ||
+ (config->num_connections == 1 && nbd->tag_set.timeout)) {
dev_err_ratelimited(nbd_to_dev(nbd),
"Connection timed out, retrying (%d/%d alive)\n",
atomic_read(&config->live_connections),
config->num_connections);
/*
* Hooray we have more connections, requeue this IO, the submit
- * path will put it on a real connection.
+ * path will put it on a real connection. Or if only one
+ * connection is configured, the submit path will wait util
+ * a new connection is reconfigured or util dead timeout.
*/
- if (config->socks && config->num_connections > 1) {
+ if (config->socks) {
if (cmd->index < config->num_connections) {
struct nbd_sock *nsock =
config->socks[cmd->index];
@@ -389,11 +428,36 @@
nbd_config_put(nbd);
return BLK_EH_DONE;
}
- } else {
- dev_err_ratelimited(nbd_to_dev(nbd),
- "Connection timed out\n");
}
- set_bit(NBD_TIMEDOUT, &config->runtime_flags);
+
+ if (!nbd->tag_set.timeout) {
+ /*
+ * Userspace sets timeout=0 to disable socket disconnection,
+ * so just warn and reset the timer.
+ */
+ struct nbd_sock *nsock = config->socks[cmd->index];
+ cmd->retries++;
+ dev_info(nbd_to_dev(nbd), "Possible stuck request %p: control (%s@%llu,%uB). Runtime %u seconds\n",
+ req, nbdcmd_to_ascii(req_to_nbd_cmd_type(req)),
+ (unsigned long long)blk_rq_pos(req) << 9,
+ blk_rq_bytes(req), (req->timeout / HZ) * cmd->retries);
+
+ mutex_lock(&nsock->tx_lock);
+ if (cmd->cookie != nsock->cookie) {
+ nbd_requeue_cmd(cmd);
+ mutex_unlock(&nsock->tx_lock);
+ mutex_unlock(&cmd->lock);
+ nbd_config_put(nbd);
+ return BLK_EH_DONE;
+ }
+ mutex_unlock(&nsock->tx_lock);
+ mutex_unlock(&cmd->lock);
+ nbd_config_put(nbd);
+ return BLK_EH_RESET_TIMER;
+ }
+
+ dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
+ set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
cmd->status = BLK_STS_IOERR;
mutex_unlock(&cmd->lock);
sock_shutdown(nbd);
@@ -478,24 +542,11 @@
u32 nbd_cmd_flags = 0;
int sent = nsock->sent, skip = 0;
- iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+ iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
- switch (req_op(req)) {
- case REQ_OP_DISCARD:
- type = NBD_CMD_TRIM;
- break;
- case REQ_OP_FLUSH:
- type = NBD_CMD_FLUSH;
- break;
- case REQ_OP_WRITE:
- type = NBD_CMD_WRITE;
- break;
- case REQ_OP_READ:
- type = NBD_CMD_READ;
- break;
- default:
+ type = req_to_nbd_cmd_type(req);
+ if (type == U32_MAX)
return -EIO;
- }
if (rq_data_dir(req) == WRITE &&
(config->flags & NBD_FLAG_READ_ONLY)) {
@@ -514,6 +565,10 @@
if (sent) {
if (sent >= sizeof(request)) {
skip = sent - sizeof(request);
+
+ /* initialize handle for tracing purposes */
+ handle = nbd_cmd_handle(cmd);
+
goto send_pages;
}
iov_iter_advance(&from, sent);
@@ -522,6 +577,7 @@
}
cmd->index = index;
cmd->cookie = nsock->cookie;
+ cmd->retries = 0;
request.type = htonl(type | nbd_cmd_flags);
if (type != NBD_CMD_FLUSH) {
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
@@ -530,11 +586,14 @@
handle = nbd_cmd_handle(cmd);
memcpy(request.handle, &handle, sizeof(handle));
+ trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));
+
dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
req, nbdcmd_to_ascii(type),
(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
result = sock_xmit(nbd, index, 1, &from,
(type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
+ trace_nbd_header_sent(req, handle);
if (result <= 0) {
if (was_interrupted(result)) {
/* If we havne't sent anything we can just return BUSY,
@@ -569,8 +628,7 @@
dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
req, bvec.bv_len);
- iov_iter_bvec(&from, ITER_BVEC | WRITE,
- &bvec, 1, bvec.bv_len);
+ iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
if (skip) {
if (skip >= iov_iter_count(&from)) {
skip -= iov_iter_count(&from);
@@ -608,6 +666,7 @@
bio = next;
}
out:
+ trace_nbd_payload_sent(req, handle);
nsock->pending = NULL;
nsock->sent = 0;
return 0;
@@ -629,7 +688,7 @@
int ret = 0;
reply.magic = 0;
- iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
+ iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply));
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
if (result <= 0) {
if (!nbd_disconnected(config))
@@ -655,6 +714,7 @@
tag, req);
return ERR_PTR(-ENOENT);
}
+ trace_nbd_header_received(req, handle);
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
@@ -689,21 +749,18 @@
struct bio_vec bvec;
rq_for_each_segment(bvec, req, iter) {
- iov_iter_bvec(&to, ITER_BVEC | READ,
- &bvec, 1, bvec.bv_len);
+ iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
if (result <= 0) {
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
result);
/*
- * If we've disconnected or we only have 1
- * connection then we need to make sure we
+ * If we've disconnected, we need to make sure we
* complete this request, otherwise error out
* and let the timeout stuff handle resubmitting
* this request onto another connection.
*/
- if (nbd_disconnected(config) ||
- config->num_connections <= 1) {
+ if (nbd_disconnected(config)) {
cmd->status = BLK_STS_IOERR;
goto out;
}
@@ -715,6 +772,7 @@
}
}
out:
+ trace_nbd_payload_received(req, handle);
mutex_unlock(&cmd->lock);
return ret ? ERR_PTR(ret) : cmd;
}
@@ -727,6 +785,7 @@
struct nbd_device *nbd = args->nbd;
struct nbd_config *config = nbd->config;
struct nbd_cmd *cmd;
+ struct request *rq;
while (1) {
cmd = nbd_read_stat(nbd, args->index);
@@ -739,7 +798,9 @@
break;
}
- blk_mq_complete_request(blk_mq_rq_from_pdu(cmd));
+ rq = blk_mq_rq_from_pdu(cmd);
+ if (likely(!blk_should_fake_timeout(rq->q)))
+ blk_mq_complete_request(rq);
}
nbd_config_put(nbd);
atomic_dec(&config->recv_threads);
@@ -747,15 +808,20 @@
kfree(args);
}
-static void nbd_clear_req(struct request *req, void *data, bool reserved)
+static bool nbd_clear_req(struct request *req, void *data, bool reserved)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
+
+ /* don't abort one completed request */
+ if (blk_mq_request_completed(req))
+ return true;
mutex_lock(&cmd->lock);
cmd->status = BLK_STS_IOERR;
mutex_unlock(&cmd->lock);
blk_mq_complete_request(req);
+ return true;
}
static void nbd_clear_que(struct nbd_device *nbd)
@@ -773,12 +839,12 @@
struct nbd_sock *nsock = config->socks[index];
int fallback = nsock->fallback_index;
- if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+ if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
return new_index;
if (config->num_connections <= 1) {
dev_err_ratelimited(disk_to_dev(nbd->disk),
- "Attempted send on invalid socket\n");
+ "Dead connection, failed to find a fallback\n");
return new_index;
}
@@ -814,11 +880,15 @@
struct nbd_config *config = nbd->config;
if (!config->dead_conn_timeout)
return 0;
- if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
+
+ if (!wait_event_timeout(config->conn_wait,
+ test_bit(NBD_RT_DISCONNECTED,
+ &config->runtime_flags) ||
+ atomic_read(&config->live_connections) > 0,
+ config->dead_conn_timeout))
return 0;
- return wait_event_timeout(config->conn_wait,
- atomic_read(&config->live_connections) > 0,
- config->dead_conn_timeout) > 0;
+
+ return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
}
static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
@@ -973,12 +1043,12 @@
blk_mq_freeze_queue(nbd->disk->queue);
if (!netlink && !nbd->task_setup &&
- !test_bit(NBD_BOUND, &config->runtime_flags))
+ !test_bit(NBD_RT_BOUND, &config->runtime_flags))
nbd->task_setup = current;
if (!netlink &&
(nbd->task_setup != current ||
- test_bit(NBD_BOUND, &config->runtime_flags))) {
+ test_bit(NBD_RT_BOUND, &config->runtime_flags))) {
dev_err(disk_to_dev(nbd->disk),
"Device being setup by another task");
err = -EBUSY;
@@ -1065,7 +1135,7 @@
mutex_unlock(&nsock->tx_lock);
sockfd_put(old);
- clear_bit(NBD_DISCONNECTED, &config->runtime_flags);
+ clear_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
/* We take the tx_mutex in an error path in the recv_work, so we
* need to queue_work outside of the tx_mutex.
@@ -1085,7 +1155,7 @@
{
if (bdev->bd_openers > 1)
return;
- bd_set_size(bdev, 0);
+ bd_set_nr_sectors(bdev, 0);
}
static void nbd_parse_flags(struct nbd_device *nbd)
@@ -1121,7 +1191,7 @@
for (i = 0; i < config->num_connections; i++) {
struct nbd_sock *nsock = config->socks[i];
- iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+ iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
mutex_lock(&nsock->tx_lock);
ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
if (ret <= 0)
@@ -1136,7 +1206,8 @@
struct nbd_config *config = nbd->config;
dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
- set_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags);
+ set_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags);
+ set_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags);
send_disconnects(nbd);
return 0;
}
@@ -1155,7 +1226,7 @@
struct nbd_config *config = nbd->config;
nbd_dev_dbg_close(nbd);
nbd_size_clear(nbd);
- if (test_and_clear_bit(NBD_HAS_PID_FILE,
+ if (test_and_clear_bit(NBD_RT_HAS_PID_FILE,
&config->runtime_flags))
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
nbd->task_recv = NULL;
@@ -1221,7 +1292,7 @@
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
return error;
}
- set_bit(NBD_HAS_PID_FILE, &config->runtime_flags);
+ set_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags);
nbd_dev_dbg_init(nbd);
for (i = 0; i < num_connections; i++) {
@@ -1267,20 +1338,22 @@
return ret;
if (max_part)
- bdev->bd_invalidated = 1;
+ set_bit(GD_NEED_PART_SCAN, &nbd->disk->state);
mutex_unlock(&nbd->config_lock);
ret = wait_event_interruptible(config->recv_wq,
atomic_read(&config->recv_threads) == 0);
- if (ret)
+ if (ret) {
sock_shutdown(nbd);
- flush_workqueue(nbd->recv_workq);
+ nbd_clear_que(nbd);
+ }
+ flush_workqueue(nbd->recv_workq);
mutex_lock(&nbd->config_lock);
nbd_bdev_reset(bdev);
/* user requested, ignore socket errors */
- if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
+ if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
ret = 0;
- if (test_bit(NBD_TIMEDOUT, &config->runtime_flags))
+ if (test_bit(NBD_RT_TIMEDOUT, &config->runtime_flags))
ret = -ETIMEDOUT;
return ret;
}
@@ -1288,10 +1361,10 @@
static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
struct block_device *bdev)
{
- sock_shutdown(nbd);
+ nbd_clear_sock(nbd);
__invalidate_device(bdev, true);
nbd_bdev_reset(bdev);
- if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+ if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
}
@@ -1302,6 +1375,15 @@
blksize > PAGE_SIZE)
return false;
return true;
+}
+
+static void nbd_set_cmd_timeout(struct nbd_device *nbd, u64 timeout)
+{
+ nbd->tag_set.timeout = timeout * HZ;
+ if (timeout)
+ blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
+ else
+ blk_queue_rq_timeout(nbd->disk->queue, 30 * HZ);
}
/* Must be called with config_lock held */
@@ -1334,10 +1416,7 @@
nbd_size_set(nbd, config->blksize, arg);
return 0;
case NBD_SET_TIMEOUT:
- if (arg) {
- nbd->tag_set.timeout = arg * HZ;
- blk_queue_rq_timeout(nbd->disk->queue, arg * HZ);
- }
+ nbd_set_cmd_timeout(nbd, arg);
return 0;
case NBD_SET_FLAGS:
@@ -1382,7 +1461,7 @@
/* Don't allow ioctl operations on a nbd device that was created with
* netlink, unless it's DISCONNECT or CLEAR_SOCK, which are fine.
*/
- if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+ if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
(cmd == NBD_DISCONNECT || cmd == NBD_CLEAR_SOCK))
error = __nbd_ioctl(bdev, nbd, cmd, arg);
else
@@ -1395,15 +1474,20 @@
{
struct nbd_config *config;
+ if (!try_module_get(THIS_MODULE))
+ return ERR_PTR(-ENODEV);
+
config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
- if (!config)
- return NULL;
+ if (!config) {
+ module_put(THIS_MODULE);
+ return ERR_PTR(-ENOMEM);
+ }
+
atomic_set(&config->recv_threads, 0);
init_waitqueue_head(&config->recv_wq);
init_waitqueue_head(&config->conn_wait);
config->blksize = NBD_DEF_BLKSIZE;
atomic_set(&config->live_connections, 0);
- try_module_get(THIS_MODULE);
return config;
}
@@ -1430,18 +1514,19 @@
mutex_unlock(&nbd->config_lock);
goto out;
}
- config = nbd->config = nbd_alloc_config();
- if (!config) {
- ret = -ENOMEM;
+ config = nbd_alloc_config();
+ if (IS_ERR(config)) {
+ ret = PTR_ERR(config);
mutex_unlock(&nbd->config_lock);
goto out;
}
+ nbd->config = config;
refcount_set(&nbd->config_refs, 1);
refcount_inc(&nbd->refs);
mutex_unlock(&nbd->config_lock);
- bdev->bd_invalidated = 1;
+ set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
} else if (nbd_disconnected(nbd->config)) {
- bdev->bd_invalidated = 1;
+ set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
}
out:
mutex_unlock(&nbd_index_mutex);
@@ -1453,7 +1538,7 @@
struct nbd_device *nbd = disk->private_data;
struct block_device *bdev = bdget_disk(disk, 0);
- if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
+ if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
bdev->bd_openers == 0)
nbd_disconnect_and_put(nbd);
bdput(bdev);
@@ -1653,8 +1738,9 @@
nbd->tag_set.numa_node = NUMA_NO_NODE;
nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
- BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
+ BLK_MQ_F_BLOCKING;
nbd->tag_set.driver_data = nbd;
+ nbd->destroy_complete = NULL;
err = blk_mq_alloc_tag_set(&nbd->tag_set);
if (err)
@@ -1743,8 +1829,33 @@
[NBD_DEVICE_CONNECTED] = { .type = NLA_U8 },
};
+static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
+{
+ struct nbd_config *config = nbd->config;
+ u64 bsize = config->blksize;
+ u64 bytes = config->bytesize;
+
+ if (info->attrs[NBD_ATTR_SIZE_BYTES])
+ bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]);
+
+ if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) {
+ bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
+ if (!bsize)
+ bsize = NBD_DEF_BLKSIZE;
+ if (!nbd_is_valid_blksize(bsize)) {
+ printk(KERN_ERR "Invalid block size %llu\n", bsize);
+ return -EINVAL;
+ }
+ }
+
+ if (bytes != config->bytesize || bsize != config->blksize)
+ nbd_size_set(nbd, bsize, div64_u64(bytes, bsize));
+ return 0;
+}
+
static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
{
+ DECLARE_COMPLETION_ONSTACK(destroy_complete);
struct nbd_device *nbd = NULL;
struct nbd_config *config;
int index = -1;
@@ -1796,6 +1907,17 @@
mutex_unlock(&nbd_index_mutex);
return -EINVAL;
}
+
+ if (test_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags) &&
+ test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) {
+ nbd->destroy_complete = &destroy_complete;
+ mutex_unlock(&nbd_index_mutex);
+
+ /* Wait untill the the nbd stuff is totally destroyed */
+ wait_for_completion(&destroy_complete);
+ goto again;
+ }
+
if (!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
if (index == -1)
@@ -1820,37 +1942,24 @@
nbd_put(nbd);
return -EINVAL;
}
- config = nbd->config = nbd_alloc_config();
- if (!nbd->config) {
+ config = nbd_alloc_config();
+ if (IS_ERR(config)) {
mutex_unlock(&nbd->config_lock);
nbd_put(nbd);
printk(KERN_ERR "nbd: couldn't allocate config\n");
- return -ENOMEM;
+ return PTR_ERR(config);
}
+ nbd->config = config;
refcount_set(&nbd->config_refs, 1);
- set_bit(NBD_BOUND, &config->runtime_flags);
+ set_bit(NBD_RT_BOUND, &config->runtime_flags);
- if (info->attrs[NBD_ATTR_SIZE_BYTES]) {
- u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]);
- nbd_size_set(nbd, config->blksize,
- div64_u64(bytes, config->blksize));
- }
- if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) {
- u64 bsize =
- nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
- if (!bsize)
- bsize = NBD_DEF_BLKSIZE;
- if (!nbd_is_valid_blksize(bsize)) {
- ret = -EINVAL;
- goto out;
- }
- nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize));
- }
- if (info->attrs[NBD_ATTR_TIMEOUT]) {
- u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]);
- nbd->tag_set.timeout = timeout * HZ;
- blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
- }
+ ret = nbd_genl_size_set(info, nbd);
+ if (ret)
+ goto out;
+
+ if (info->attrs[NBD_ATTR_TIMEOUT])
+ nbd_set_cmd_timeout(nbd,
+ nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
config->dead_conn_timeout =
nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
@@ -1862,12 +1971,24 @@
if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
- set_bit(NBD_DESTROY_ON_DISCONNECT,
- &config->runtime_flags);
- put_dev = true;
+ /*
+ * We have 1 ref to keep the device around, and then 1
+ * ref for our current operation here, which will be
+ * inherited by the config. If we already have
+ * DESTROY_ON_DISCONNECT set then we know we don't have
+ * that extra ref already held so we don't need the
+ * put_dev.
+ */
+ if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+ &nbd->flags))
+ put_dev = true;
+ } else {
+ if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+ &nbd->flags))
+ refcount_inc(&nbd->refs);
}
if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
- set_bit(NBD_DISCONNECT_ON_CLOSE,
+ set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
&config->runtime_flags);
}
}
@@ -1885,8 +2006,10 @@
ret = -EINVAL;
goto out;
}
- ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr,
- nbd_sock_policy, info->extack);
+ ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX,
+ attr,
+ nbd_sock_policy,
+ info->extack);
if (ret != 0) {
printk(KERN_ERR "nbd: error processing sock list\n");
ret = -EINVAL;
@@ -1904,7 +2027,7 @@
out:
mutex_unlock(&nbd->config_lock);
if (!ret) {
- set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags);
+ set_bit(NBD_RT_HAS_CONFIG_REF, &config->runtime_flags);
refcount_inc(&nbd->config_refs);
nbd_connect_reply(info, nbd->index);
}
@@ -1918,15 +2041,21 @@
{
mutex_lock(&nbd->config_lock);
nbd_disconnect(nbd);
- nbd_clear_sock(nbd);
- mutex_unlock(&nbd->config_lock);
+ sock_shutdown(nbd);
+ wake_up(&nbd->config->conn_wait);
/*
* Make sure recv thread has finished, so it does not drop the last
* config ref and try to destroy the workqueue from inside the work
- * queue.
+ * queue. And this also ensure that we can safely call nbd_clear_que()
+ * to cancel the inflight I/Os.
*/
- flush_workqueue(nbd->recv_workq);
- if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+ if (nbd->recv_workq)
+ flush_workqueue(nbd->recv_workq);
+ nbd_clear_que(nbd);
+ nbd->task_setup = NULL;
+ mutex_unlock(&nbd->config_lock);
+
+ if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
}
@@ -2010,7 +2139,7 @@
mutex_lock(&nbd->config_lock);
config = nbd->config;
- if (!test_bit(NBD_BOUND, &config->runtime_flags) ||
+ if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
!nbd->task_recv) {
dev_err(nbd_to_dev(nbd),
"not configured, cannot reconfigure\n");
@@ -2018,11 +2147,13 @@
goto out;
}
- if (info->attrs[NBD_ATTR_TIMEOUT]) {
- u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]);
- nbd->tag_set.timeout = timeout * HZ;
- blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ);
- }
+ ret = nbd_genl_size_set(info, nbd);
+ if (ret)
+ goto out;
+
+ if (info->attrs[NBD_ATTR_TIMEOUT])
+ nbd_set_cmd_timeout(nbd,
+ nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]));
if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) {
config->dead_conn_timeout =
nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]);
@@ -2032,19 +2163,19 @@
u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
- &config->runtime_flags))
+ &nbd->flags))
put_dev = true;
} else {
if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
- &config->runtime_flags))
+ &nbd->flags))
refcount_inc(&nbd->refs);
}
if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
- set_bit(NBD_DISCONNECT_ON_CLOSE,
+ set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
&config->runtime_flags);
} else {
- clear_bit(NBD_DISCONNECT_ON_CLOSE,
+ clear_bit(NBD_RT_DISCONNECT_ON_CLOSE,
&config->runtime_flags);
}
}
@@ -2062,8 +2193,10 @@
ret = -EINVAL;
goto out;
}
- ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr,
- nbd_sock_policy, info->extack);
+ ret = nla_parse_nested_deprecated(socks, NBD_SOCK_MAX,
+ attr,
+ nbd_sock_policy,
+ info->extack);
if (ret != 0) {
printk(KERN_ERR "nbd: error processing sock list\n");
ret = -EINVAL;
@@ -2090,25 +2223,25 @@
return ret;
}
-static const struct genl_ops nbd_connect_genl_ops[] = {
+static const struct genl_small_ops nbd_connect_genl_ops[] = {
{
.cmd = NBD_CMD_CONNECT,
- .policy = nbd_attr_policy,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nbd_genl_connect,
},
{
.cmd = NBD_CMD_DISCONNECT,
- .policy = nbd_attr_policy,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nbd_genl_disconnect,
},
{
.cmd = NBD_CMD_RECONFIGURE,
- .policy = nbd_attr_policy,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nbd_genl_reconfigure,
},
{
.cmd = NBD_CMD_STATUS,
- .policy = nbd_attr_policy,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nbd_genl_status,
},
};
@@ -2122,9 +2255,10 @@
.name = NBD_GENL_FAMILY_NAME,
.version = NBD_GENL_VERSION,
.module = THIS_MODULE,
- .ops = nbd_connect_genl_ops,
- .n_ops = ARRAY_SIZE(nbd_connect_genl_ops),
+ .small_ops = nbd_connect_genl_ops,
+ .n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops),
.maxattr = NBD_ATTR_MAX,
+ .policy = nbd_attr_policy,
.mcgrps = nbd_mcast_grps,
.n_mcgrps = ARRAY_SIZE(nbd_mcast_grps),
};
@@ -2144,7 +2278,7 @@
*/
if (refcount_read(&nbd->config_refs))
connected = 1;
- dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM);
+ dev_opt = nla_nest_start_noflag(reply, NBD_DEVICE_ITEM);
if (!dev_opt)
return -EMSGSIZE;
ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index);
@@ -2192,7 +2326,7 @@
goto out;
}
- dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST);
+ dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST);
if (index == -1) {
ret = idr_for_each(&nbd_index_idr, &status_cb, reply);
if (ret) {
@@ -2212,8 +2346,7 @@
}
nla_nest_end(reply, dev_list);
genlmsg_end(reply, reply_head);
- genlmsg_reply(reply, info);
- ret = 0;
+ ret = genlmsg_reply(reply, info);
out:
mutex_unlock(&nbd_index_mutex);
return ret;
@@ -2337,6 +2470,12 @@
struct nbd_device *nbd;
LIST_HEAD(del_list);
+ /*
+ * Unregister netlink interface prior to waiting
+ * for the completion of netlink commands.
+ */
+ genl_unregister_family(&nbd_genl_family);
+
nbd_dbg_close();
mutex_lock(&nbd_index_mutex);
@@ -2346,13 +2485,15 @@
while (!list_empty(&del_list)) {
nbd = list_first_entry(&del_list, struct nbd_device, list);
list_del_init(&nbd->list);
+ if (refcount_read(&nbd->config_refs))
+ printk(KERN_ERR "nbd: possibly leaking nbd_config (ref %d)\n",
+ refcount_read(&nbd->config_refs));
if (refcount_read(&nbd->refs) != 1)
printk(KERN_ERR "nbd: possibly leaking a device\n");
nbd_put(nbd);
}
idr_destroy(&nbd_index_idr);
- genl_unregister_family(&nbd_genl_family);
unregister_blkdev(NBD_MAJOR, "nbd");
}
--
Gitblit v1.6.2