hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/drivers/nvme/host/fc.c
....@@ -1,18 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0
12 /*
23 * Copyright (c) 2016 Avago Technologies. All rights reserved.
3
- *
4
- * This program is free software; you can redistribute it and/or modify
5
- * it under the terms of version 2 of the GNU General Public License as
6
- * published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope that it will be useful.
9
- * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
10
- * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
11
- * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO
12
- * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID.
13
- * See the GNU General Public License for more details, a copy of which
14
- * can be found in the file COPYING included with this package
15
- *
164 */
175 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
186 #include <linux/module.h>
....@@ -20,12 +8,14 @@
208 #include <uapi/scsi/fc/fc_fs.h>
219 #include <uapi/scsi/fc/fc_els.h>
2210 #include <linux/delay.h>
11
+#include <linux/overflow.h>
2312
2413 #include "nvme.h"
2514 #include "fabrics.h"
2615 #include <linux/nvme-fc-driver.h>
2716 #include <linux/nvme-fc.h>
28
-
17
+#include "fc.h"
18
+#include <scsi/scsi_transport_fc.h>
2919
3020 /* *************************** Data Structures/Defines ****************** */
3121
....@@ -36,6 +26,10 @@
3626 };
3727
3828 #define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */
29
+#define NVME_FC_DEFAULT_RECONNECT_TMO 2 /* delay between reconnects
30
+ * when connected and a
31
+ * connection failure.
32
+ */
3933
4034 struct nvme_fc_queue {
4135 struct nvme_fc_ctrl *ctrl;
....@@ -72,6 +66,17 @@
7266 bool req_queued;
7367 };
7468
69
+struct nvmefc_ls_rcv_op {
70
+ struct nvme_fc_rport *rport;
71
+ struct nvmefc_ls_rsp *lsrsp;
72
+ union nvmefc_ls_requests *rqstbuf;
73
+ union nvmefc_ls_responses *rspbuf;
74
+ u16 rqstdatalen;
75
+ bool handled;
76
+ dma_addr_t rspdma;
77
+ struct list_head lsrcv_list; /* rport->ls_rcv_list */
78
+} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
79
+
7580 enum nvme_fcpop_state {
7681 FCPOP_STATE_UNINIT = 0,
7782 FCPOP_STATE_IDLE = 1,
....@@ -104,6 +109,12 @@
104109 struct nvme_fc_ersp_iu rsp_iu;
105110 };
106111
112
+struct nvme_fcp_op_w_sgl {
113
+ struct nvme_fc_fcp_op op;
114
+ struct scatterlist sgl[NVME_INLINE_SG_CNT];
115
+ uint8_t priv[];
116
+};
117
+
107118 struct nvme_fc_lport {
108119 struct nvme_fc_local_port localport;
109120
....@@ -122,17 +133,21 @@
122133 struct list_head endp_list; /* for lport->endp_list */
123134 struct list_head ctrl_list;
124135 struct list_head ls_req_list;
136
+ struct list_head ls_rcv_list;
137
+ struct list_head disc_list;
125138 struct device *dev; /* physical device for dma */
126139 struct nvme_fc_lport *lport;
127140 spinlock_t lock;
128141 struct kref ref;
129142 atomic_t act_ctrl_cnt;
130143 unsigned long dev_loss_end;
144
+ struct work_struct lsrcv_work;
131145 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
132146
133
-enum nvme_fcctrl_flags {
134
- FCCTRL_TERMIO = (1 << 0),
135
-};
147
+/* fc_ctrl flags values - specified as bit positions */
148
+#define ASSOC_ACTIVE 0
149
+#define ASSOC_FAILED 1
150
+#define FCCTRL_TERMIO 2
136151
137152 struct nvme_fc_ctrl {
138153 spinlock_t lock;
....@@ -143,20 +158,19 @@
143158 u32 cnum;
144159
145160 bool ioq_live;
146
- bool assoc_active;
147
- atomic_t err_work_active;
148161 u64 association_id;
162
+ struct nvmefc_ls_rcv_op *rcv_disconn;
149163
150164 struct list_head ctrl_list; /* rport->ctrl_list */
151165
152166 struct blk_mq_tag_set admin_tag_set;
153167 struct blk_mq_tag_set tag_set;
154168
169
+ struct work_struct ioerr_work;
155170 struct delayed_work connect_work;
156
- struct work_struct err_work;
157171
158172 struct kref ref;
159
- u32 flags;
173
+ unsigned long flags;
160174 u32 iocnt;
161175 wait_queue_head_t ioabort_wait;
162176
....@@ -208,18 +222,24 @@
208222
209223 static struct workqueue_struct *nvme_fc_wq;
210224
225
+static bool nvme_fc_waiting_to_unload;
226
+static DECLARE_COMPLETION(nvme_fc_unload_proceed);
227
+
211228 /*
212229 * These items are short-term. They will eventually be moved into
213230 * a generic FC class. See comments in module init.
214231 */
215
-static struct class *fc_class;
216232 static struct device *fc_udev_device;
217233
234
+static void nvme_fc_complete_rq(struct request *rq);
218235
219236 /* *********************** FC-NVME Port Management ************************ */
220237
221238 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
222239 struct nvme_fc_queue *, unsigned int);
240
+
241
+static void nvme_fc_handle_ls_rqst_work(struct work_struct *work);
242
+
223243
224244 static void
225245 nvme_fc_free_lport(struct kref *ref)
....@@ -234,6 +254,8 @@
234254 /* remove from transport list */
235255 spin_lock_irqsave(&nvme_fc_lock, flags);
236256 list_del(&lport->port_list);
257
+ if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list))
258
+ complete(&nvme_fc_unload_proceed);
237259 spin_unlock_irqrestore(&nvme_fc_lock, flags);
238260
239261 ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
....@@ -319,7 +341,7 @@
319341 * @template: LLDD entrypoints and operational parameters for the port
320342 * @dev: physical hardware device node port corresponds to. Will be
321343 * used for DMA mappings
322
- * @lport_p: pointer to a local port pointer. Upon success, the routine
344
+ * @portptr: pointer to a local port pointer. Upon success, the routine
323345 * will allocate a nvme_fc_local_port structure and place its
324346 * address in the local port pointer. Upon failure, local port
325347 * pointer will be set to 0.
....@@ -394,7 +416,10 @@
394416 newrec->ops = template;
395417 newrec->dev = dev;
396418 ida_init(&newrec->endp_cnt);
397
- newrec->localport.private = &newrec[1];
419
+ if (template->local_priv_sz)
420
+ newrec->localport.private = &newrec[1];
421
+ else
422
+ newrec->localport.private = NULL;
398423 newrec->localport.node_name = pinfo->node_name;
399424 newrec->localport.port_name = pinfo->port_name;
400425 newrec->localport.port_role = pinfo->port_role;
....@@ -427,8 +452,7 @@
427452 * nvme_fc_unregister_localport - transport entry point called by an
428453 * LLDD to deregister/remove a previously
429454 * registered a NVME host FC port.
430
- * @localport: pointer to the (registered) local port that is to be
431
- * deregistered.
455
+ * @portptr: pointer to the (registered) local port that is to be deregistered.
432456 *
433457 * Returns:
434458 * a completion status. Must be 0 upon success; a negative errno
....@@ -509,6 +533,7 @@
509533 list_del(&rport->endp_list);
510534 spin_unlock_irqrestore(&nvme_fc_lock, flags);
511535
536
+ WARN_ON(!list_empty(&rport->disc_list));
512537 ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
513538
514539 kfree(rport);
....@@ -633,7 +658,7 @@
633658 * @localport: pointer to the (registered) local port that the remote
634659 * subsystem port is connected to.
635660 * @pinfo: pointer to information about the port to be registered
636
- * @rport_p: pointer to a remote port pointer. Upon success, the routine
661
+ * @portptr: pointer to a remote port pointer. Upon success, the routine
637662 * will allocate a nvme_fc_remote_port structure and place its
638663 * address in the remote port pointer. Upon failure, remote port
639664 * pointer will be set to 0.
....@@ -696,13 +721,18 @@
696721 INIT_LIST_HEAD(&newrec->endp_list);
697722 INIT_LIST_HEAD(&newrec->ctrl_list);
698723 INIT_LIST_HEAD(&newrec->ls_req_list);
724
+ INIT_LIST_HEAD(&newrec->disc_list);
699725 kref_init(&newrec->ref);
700726 atomic_set(&newrec->act_ctrl_cnt, 0);
701727 spin_lock_init(&newrec->lock);
702728 newrec->remoteport.localport = &lport->localport;
729
+ INIT_LIST_HEAD(&newrec->ls_rcv_list);
703730 newrec->dev = lport->dev;
704731 newrec->lport = lport;
705
- newrec->remoteport.private = &newrec[1];
732
+ if (lport->ops->remote_priv_sz)
733
+ newrec->remoteport.private = &newrec[1];
734
+ else
735
+ newrec->remoteport.private = NULL;
706736 newrec->remoteport.port_role = pinfo->port_role;
707737 newrec->remoteport.node_name = pinfo->node_name;
708738 newrec->remoteport.port_name = pinfo->port_name;
....@@ -710,6 +740,7 @@
710740 newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
711741 newrec->remoteport.port_num = idx;
712742 __nvme_fc_set_dev_loss_tmo(newrec, pinfo);
743
+ INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work);
713744
714745 spin_lock_irqsave(&nvme_fc_lock, flags);
715746 list_add_tail(&newrec->endp_list, &lport->endp_list);
....@@ -799,6 +830,7 @@
799830 break;
800831
801832 case NVME_CTRL_DELETING:
833
+ case NVME_CTRL_DELETING_NOIO:
802834 default:
803835 /* no action to take - let it delete */
804836 break;
....@@ -809,8 +841,8 @@
809841 * nvme_fc_unregister_remoteport - transport entry point called by an
810842 * LLDD to deregister/remove a previously
811843 * registered a NVME subsystem FC port.
812
- * @remoteport: pointer to the (registered) remote port that is to be
813
- * deregistered.
844
+ * @portptr: pointer to the (registered) remote port that is to be
845
+ * deregistered.
814846 *
815847 * Returns:
816848 * a completion status. Must be 0 upon success; a negative errno
....@@ -999,6 +1031,7 @@
9991031 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *);
10001032 static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *);
10011033
1034
+static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
10021035
10031036 static void
10041037 __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop)
....@@ -1139,41 +1172,6 @@
11391172 return __nvme_fc_send_ls_req(rport, lsop, done);
11401173 }
11411174
1142
-/* Validation Error indexes into the string table below */
1143
-enum {
1144
- VERR_NO_ERROR = 0,
1145
- VERR_LSACC = 1,
1146
- VERR_LSDESC_RQST = 2,
1147
- VERR_LSDESC_RQST_LEN = 3,
1148
- VERR_ASSOC_ID = 4,
1149
- VERR_ASSOC_ID_LEN = 5,
1150
- VERR_CONN_ID = 6,
1151
- VERR_CONN_ID_LEN = 7,
1152
- VERR_CR_ASSOC = 8,
1153
- VERR_CR_ASSOC_ACC_LEN = 9,
1154
- VERR_CR_CONN = 10,
1155
- VERR_CR_CONN_ACC_LEN = 11,
1156
- VERR_DISCONN = 12,
1157
- VERR_DISCONN_ACC_LEN = 13,
1158
-};
1159
-
1160
-static char *validation_errors[] = {
1161
- "OK",
1162
- "Not LS_ACC",
1163
- "Not LSDESC_RQST",
1164
- "Bad LSDESC_RQST Length",
1165
- "Not Association ID",
1166
- "Bad Association ID Length",
1167
- "Not Connection ID",
1168
- "Bad Connection ID Length",
1169
- "Not CR_ASSOC Rqst",
1170
- "Bad CR_ASSOC ACC Length",
1171
- "Not CR_CONN Rqst",
1172
- "Bad CR_CONN ACC Length",
1173
- "Not Disconnect Rqst",
1174
- "Bad Disconnect ACC Length",
1175
-};
1176
-
11771175 static int
11781176 nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
11791177 struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio)
....@@ -1182,21 +1180,27 @@
11821180 struct nvmefc_ls_req *lsreq;
11831181 struct fcnvme_ls_cr_assoc_rqst *assoc_rqst;
11841182 struct fcnvme_ls_cr_assoc_acc *assoc_acc;
1183
+ unsigned long flags;
11851184 int ret, fcret = 0;
11861185
11871186 lsop = kzalloc((sizeof(*lsop) +
1188
- ctrl->lport->ops->lsrqst_priv_sz +
1189
- sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL);
1187
+ sizeof(*assoc_rqst) + sizeof(*assoc_acc) +
1188
+ ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
11901189 if (!lsop) {
1190
+ dev_info(ctrl->ctrl.device,
1191
+ "NVME-FC{%d}: send Create Association failed: ENOMEM\n",
1192
+ ctrl->cnum);
11911193 ret = -ENOMEM;
11921194 goto out_no_memory;
11931195 }
1194
- lsreq = &lsop->ls_req;
11951196
1196
- lsreq->private = (void *)&lsop[1];
1197
- assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)
1198
- (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
1197
+ assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[1];
11991198 assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1];
1199
+ lsreq = &lsop->ls_req;
1200
+ if (ctrl->lport->ops->lsrqst_priv_sz)
1201
+ lsreq->private = &assoc_acc[1];
1202
+ else
1203
+ lsreq->private = NULL;
12001204
12011205 assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION;
12021206 assoc_rqst->desc_list_len =
....@@ -1223,7 +1227,7 @@
12231227 lsreq->rqstlen = sizeof(*assoc_rqst);
12241228 lsreq->rspaddr = assoc_acc;
12251229 lsreq->rsplen = sizeof(*assoc_acc);
1226
- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
1230
+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
12271231
12281232 ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
12291233 if (ret)
....@@ -1263,14 +1267,16 @@
12631267 if (fcret) {
12641268 ret = -EBADF;
12651269 dev_err(ctrl->dev,
1266
- "q %d connect failed: %s\n",
1270
+ "q %d Create Association LS failed: %s\n",
12671271 queue->qnum, validation_errors[fcret]);
12681272 } else {
1273
+ spin_lock_irqsave(&ctrl->lock, flags);
12691274 ctrl->association_id =
12701275 be64_to_cpu(assoc_acc->associd.association_id);
12711276 queue->connection_id =
12721277 be64_to_cpu(assoc_acc->connectid.connection_id);
12731278 set_bit(NVME_FC_Q_CONNECTED, &queue->flags);
1279
+ spin_unlock_irqrestore(&ctrl->lock, flags);
12741280 }
12751281
12761282 out_free_buffer:
....@@ -1294,18 +1300,23 @@
12941300 int ret, fcret = 0;
12951301
12961302 lsop = kzalloc((sizeof(*lsop) +
1297
- ctrl->lport->ops->lsrqst_priv_sz +
1298
- sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL);
1303
+ sizeof(*conn_rqst) + sizeof(*conn_acc) +
1304
+ ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
12991305 if (!lsop) {
1306
+ dev_info(ctrl->ctrl.device,
1307
+ "NVME-FC{%d}: send Create Connection failed: ENOMEM\n",
1308
+ ctrl->cnum);
13001309 ret = -ENOMEM;
13011310 goto out_no_memory;
13021311 }
1303
- lsreq = &lsop->ls_req;
13041312
1305
- lsreq->private = (void *)&lsop[1];
1306
- conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)
1307
- (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
1313
+ conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[1];
13081314 conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1];
1315
+ lsreq = &lsop->ls_req;
1316
+ if (ctrl->lport->ops->lsrqst_priv_sz)
1317
+ lsreq->private = (void *)&conn_acc[1];
1318
+ else
1319
+ lsreq->private = NULL;
13091320
13101321 conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION;
13111322 conn_rqst->desc_list_len = cpu_to_be32(
....@@ -1331,7 +1342,7 @@
13311342 lsreq->rqstlen = sizeof(*conn_rqst);
13321343 lsreq->rspaddr = conn_acc;
13331344 lsreq->rsplen = sizeof(*conn_acc);
1334
- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
1345
+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
13351346
13361347 ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
13371348 if (ret)
....@@ -1362,7 +1373,7 @@
13621373 if (fcret) {
13631374 ret = -EBADF;
13641375 dev_err(ctrl->dev,
1365
- "q %d connect failed: %s\n",
1376
+ "q %d Create I/O Connection LS failed: %s\n",
13661377 queue->qnum, validation_errors[fcret]);
13671378 } else {
13681379 queue->connection_id =
....@@ -1375,7 +1386,7 @@
13751386 out_no_memory:
13761387 if (ret)
13771388 dev_err(ctrl->dev,
1378
- "queue %d connect command failed (%d).\n",
1389
+ "queue %d connect I/O queue failed (%d).\n",
13791390 queue->qnum, ret);
13801391 return ret;
13811392 }
....@@ -1387,7 +1398,7 @@
13871398
13881399 __nvme_fc_finish_ls_req(lsop);
13891400
1390
- /* fc-nvme iniator doesn't care about success or failure of cmd */
1401
+ /* fc-nvme initiator doesn't care about success or failure of cmd */
13911402
13921403 kfree(lsop);
13931404 }
....@@ -1412,66 +1423,392 @@
14121423 static void
14131424 nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
14141425 {
1415
- struct fcnvme_ls_disconnect_rqst *discon_rqst;
1416
- struct fcnvme_ls_disconnect_acc *discon_acc;
1426
+ struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst;
1427
+ struct fcnvme_ls_disconnect_assoc_acc *discon_acc;
14171428 struct nvmefc_ls_req_op *lsop;
14181429 struct nvmefc_ls_req *lsreq;
14191430 int ret;
14201431
14211432 lsop = kzalloc((sizeof(*lsop) +
1422
- ctrl->lport->ops->lsrqst_priv_sz +
1423
- sizeof(*discon_rqst) + sizeof(*discon_acc)),
1424
- GFP_KERNEL);
1425
- if (!lsop)
1426
- /* couldn't sent it... too bad */
1433
+ sizeof(*discon_rqst) + sizeof(*discon_acc) +
1434
+ ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
1435
+ if (!lsop) {
1436
+ dev_info(ctrl->ctrl.device,
1437
+ "NVME-FC{%d}: send Disconnect Association "
1438
+ "failed: ENOMEM\n",
1439
+ ctrl->cnum);
14271440 return;
1441
+ }
14281442
1443
+ discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1];
1444
+ discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
14291445 lsreq = &lsop->ls_req;
1446
+ if (ctrl->lport->ops->lsrqst_priv_sz)
1447
+ lsreq->private = (void *)&discon_acc[1];
1448
+ else
1449
+ lsreq->private = NULL;
14301450
1431
- lsreq->private = (void *)&lsop[1];
1432
- discon_rqst = (struct fcnvme_ls_disconnect_rqst *)
1433
- (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
1434
- discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1];
1435
-
1436
- discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT;
1437
- discon_rqst->desc_list_len = cpu_to_be32(
1438
- sizeof(struct fcnvme_lsdesc_assoc_id) +
1439
- sizeof(struct fcnvme_lsdesc_disconn_cmd));
1440
-
1441
- discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID);
1442
- discon_rqst->associd.desc_len =
1443
- fcnvme_lsdesc_len(
1444
- sizeof(struct fcnvme_lsdesc_assoc_id));
1445
-
1446
- discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id);
1447
-
1448
- discon_rqst->discon_cmd.desc_tag = cpu_to_be32(
1449
- FCNVME_LSDESC_DISCONN_CMD);
1450
- discon_rqst->discon_cmd.desc_len =
1451
- fcnvme_lsdesc_len(
1452
- sizeof(struct fcnvme_lsdesc_disconn_cmd));
1453
- discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION;
1454
- discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id);
1455
-
1456
- lsreq->rqstaddr = discon_rqst;
1457
- lsreq->rqstlen = sizeof(*discon_rqst);
1458
- lsreq->rspaddr = discon_acc;
1459
- lsreq->rsplen = sizeof(*discon_acc);
1460
- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
1451
+ nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc,
1452
+ ctrl->association_id);
14611453
14621454 ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
14631455 nvme_fc_disconnect_assoc_done);
14641456 if (ret)
14651457 kfree(lsop);
1466
-
1467
- /* only meaningful part to terminating the association */
1468
- ctrl->association_id = 0;
14691458 }
1459
+
1460
+static void
1461
+nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
1462
+{
1463
+ struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private;
1464
+ struct nvme_fc_rport *rport = lsop->rport;
1465
+ struct nvme_fc_lport *lport = rport->lport;
1466
+ unsigned long flags;
1467
+
1468
+ spin_lock_irqsave(&rport->lock, flags);
1469
+ list_del(&lsop->lsrcv_list);
1470
+ spin_unlock_irqrestore(&rport->lock, flags);
1471
+
1472
+ fc_dma_sync_single_for_cpu(lport->dev, lsop->rspdma,
1473
+ sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
1474
+ fc_dma_unmap_single(lport->dev, lsop->rspdma,
1475
+ sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
1476
+
1477
+ kfree(lsop);
1478
+
1479
+ nvme_fc_rport_put(rport);
1480
+}
1481
+
1482
+static void
1483
+nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop)
1484
+{
1485
+ struct nvme_fc_rport *rport = lsop->rport;
1486
+ struct nvme_fc_lport *lport = rport->lport;
1487
+ struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
1488
+ int ret;
1489
+
1490
+ fc_dma_sync_single_for_device(lport->dev, lsop->rspdma,
1491
+ sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
1492
+
1493
+ ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport,
1494
+ lsop->lsrsp);
1495
+ if (ret) {
1496
+ dev_warn(lport->dev,
1497
+ "LLDD rejected LS RSP xmt: LS %d status %d\n",
1498
+ w0->ls_cmd, ret);
1499
+ nvme_fc_xmt_ls_rsp_done(lsop->lsrsp);
1500
+ return;
1501
+ }
1502
+}
1503
+
1504
+static struct nvme_fc_ctrl *
1505
+nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport,
1506
+ struct nvmefc_ls_rcv_op *lsop)
1507
+{
1508
+ struct fcnvme_ls_disconnect_assoc_rqst *rqst =
1509
+ &lsop->rqstbuf->rq_dis_assoc;
1510
+ struct nvme_fc_ctrl *ctrl, *ret = NULL;
1511
+ struct nvmefc_ls_rcv_op *oldls = NULL;
1512
+ u64 association_id = be64_to_cpu(rqst->associd.association_id);
1513
+ unsigned long flags;
1514
+
1515
+ spin_lock_irqsave(&rport->lock, flags);
1516
+
1517
+ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
1518
+ if (!nvme_fc_ctrl_get(ctrl))
1519
+ continue;
1520
+ spin_lock(&ctrl->lock);
1521
+ if (association_id == ctrl->association_id) {
1522
+ oldls = ctrl->rcv_disconn;
1523
+ ctrl->rcv_disconn = lsop;
1524
+ ret = ctrl;
1525
+ }
1526
+ spin_unlock(&ctrl->lock);
1527
+ if (ret)
1528
+ /* leave the ctrl get reference */
1529
+ break;
1530
+ nvme_fc_ctrl_put(ctrl);
1531
+ }
1532
+
1533
+ spin_unlock_irqrestore(&rport->lock, flags);
1534
+
1535
+ /* transmit a response for anything that was pending */
1536
+ if (oldls) {
1537
+ dev_info(rport->lport->dev,
1538
+ "NVME-FC{%d}: Multiple Disconnect Association "
1539
+ "LS's received\n", ctrl->cnum);
1540
+ /* overwrite good response with bogus failure */
1541
+ oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf,
1542
+ sizeof(*oldls->rspbuf),
1543
+ rqst->w0.ls_cmd,
1544
+ FCNVME_RJT_RC_UNAB,
1545
+ FCNVME_RJT_EXP_NONE, 0);
1546
+ nvme_fc_xmt_ls_rsp(oldls);
1547
+ }
1548
+
1549
+ return ret;
1550
+}
1551
+
1552
+/*
1553
+ * returns true to mean LS handled and ls_rsp can be sent
1554
+ * returns false to defer ls_rsp xmt (will be done as part of
1555
+ * association termination)
1556
+ */
1557
+static bool
1558
+nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop)
1559
+{
1560
+ struct nvme_fc_rport *rport = lsop->rport;
1561
+ struct fcnvme_ls_disconnect_assoc_rqst *rqst =
1562
+ &lsop->rqstbuf->rq_dis_assoc;
1563
+ struct fcnvme_ls_disconnect_assoc_acc *acc =
1564
+ &lsop->rspbuf->rsp_dis_assoc;
1565
+ struct nvme_fc_ctrl *ctrl = NULL;
1566
+ int ret = 0;
1567
+
1568
+ memset(acc, 0, sizeof(*acc));
1569
+
1570
+ ret = nvmefc_vldt_lsreq_discon_assoc(lsop->rqstdatalen, rqst);
1571
+ if (!ret) {
1572
+ /* match an active association */
1573
+ ctrl = nvme_fc_match_disconn_ls(rport, lsop);
1574
+ if (!ctrl)
1575
+ ret = VERR_NO_ASSOC;
1576
+ }
1577
+
1578
+ if (ret) {
1579
+ dev_info(rport->lport->dev,
1580
+ "Disconnect LS failed: %s\n",
1581
+ validation_errors[ret]);
1582
+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(acc,
1583
+ sizeof(*acc), rqst->w0.ls_cmd,
1584
+ (ret == VERR_NO_ASSOC) ?
1585
+ FCNVME_RJT_RC_INV_ASSOC :
1586
+ FCNVME_RJT_RC_LOGIC,
1587
+ FCNVME_RJT_EXP_NONE, 0);
1588
+ return true;
1589
+ }
1590
+
1591
+ /* format an ACCept response */
1592
+
1593
+ lsop->lsrsp->rsplen = sizeof(*acc);
1594
+
1595
+ nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
1596
+ fcnvme_lsdesc_len(
1597
+ sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
1598
+ FCNVME_LS_DISCONNECT_ASSOC);
1599
+
1600
+ /*
1601
+ * the transmit of the response will occur after the exchanges
1602
+ * for the association have been ABTS'd by
1603
+ * nvme_fc_delete_association().
1604
+ */
1605
+
1606
+ /* fail the association */
1607
+ nvme_fc_error_recovery(ctrl, "Disconnect Association LS received");
1608
+
1609
+ /* release the reference taken by nvme_fc_match_disconn_ls() */
1610
+ nvme_fc_ctrl_put(ctrl);
1611
+
1612
+ return false;
1613
+}
1614
+
1615
+/*
1616
+ * Actual Processing routine for received FC-NVME LS Requests from the LLD
1617
+ * returns true if a response should be sent afterward, false if rsp will
1618
+ * be sent asynchronously.
1619
+ */
1620
+static bool
1621
+nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop)
1622
+{
1623
+ struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
1624
+ bool ret = true;
1625
+
1626
+ lsop->lsrsp->nvme_fc_private = lsop;
1627
+ lsop->lsrsp->rspbuf = lsop->rspbuf;
1628
+ lsop->lsrsp->rspdma = lsop->rspdma;
1629
+ lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done;
1630
+ /* Be preventative. handlers will later set to valid length */
1631
+ lsop->lsrsp->rsplen = 0;
1632
+
1633
+ /*
1634
+ * handlers:
1635
+ * parse request input, execute the request, and format the
1636
+ * LS response
1637
+ */
1638
+ switch (w0->ls_cmd) {
1639
+ case FCNVME_LS_DISCONNECT_ASSOC:
1640
+ ret = nvme_fc_ls_disconnect_assoc(lsop);
1641
+ break;
1642
+ case FCNVME_LS_DISCONNECT_CONN:
1643
+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
1644
+ sizeof(*lsop->rspbuf), w0->ls_cmd,
1645
+ FCNVME_RJT_RC_UNSUP, FCNVME_RJT_EXP_NONE, 0);
1646
+ break;
1647
+ case FCNVME_LS_CREATE_ASSOCIATION:
1648
+ case FCNVME_LS_CREATE_CONNECTION:
1649
+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
1650
+ sizeof(*lsop->rspbuf), w0->ls_cmd,
1651
+ FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0);
1652
+ break;
1653
+ default:
1654
+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
1655
+ sizeof(*lsop->rspbuf), w0->ls_cmd,
1656
+ FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0);
1657
+ break;
1658
+ }
1659
+
1660
+ return(ret);
1661
+}
1662
+
1663
+static void
1664
+nvme_fc_handle_ls_rqst_work(struct work_struct *work)
1665
+{
1666
+ struct nvme_fc_rport *rport =
1667
+ container_of(work, struct nvme_fc_rport, lsrcv_work);
1668
+ struct fcnvme_ls_rqst_w0 *w0;
1669
+ struct nvmefc_ls_rcv_op *lsop;
1670
+ unsigned long flags;
1671
+ bool sendrsp;
1672
+
1673
+restart:
1674
+ sendrsp = true;
1675
+ spin_lock_irqsave(&rport->lock, flags);
1676
+ list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) {
1677
+ if (lsop->handled)
1678
+ continue;
1679
+
1680
+ lsop->handled = true;
1681
+ if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
1682
+ spin_unlock_irqrestore(&rport->lock, flags);
1683
+ sendrsp = nvme_fc_handle_ls_rqst(lsop);
1684
+ } else {
1685
+ spin_unlock_irqrestore(&rport->lock, flags);
1686
+ w0 = &lsop->rqstbuf->w0;
1687
+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(
1688
+ lsop->rspbuf,
1689
+ sizeof(*lsop->rspbuf),
1690
+ w0->ls_cmd,
1691
+ FCNVME_RJT_RC_UNAB,
1692
+ FCNVME_RJT_EXP_NONE, 0);
1693
+ }
1694
+ if (sendrsp)
1695
+ nvme_fc_xmt_ls_rsp(lsop);
1696
+ goto restart;
1697
+ }
1698
+ spin_unlock_irqrestore(&rport->lock, flags);
1699
+}
1700
+
1701
+/**
1702
+ * nvme_fc_rcv_ls_req - transport entry point called by an LLDD
1703
+ * upon the reception of a NVME LS request.
1704
+ *
1705
+ * The nvme-fc layer will copy payload to an internal structure for
1706
+ * processing. As such, upon completion of the routine, the LLDD may
1707
+ * immediately free/reuse the LS request buffer passed in the call.
1708
+ *
1709
+ * If this routine returns error, the LLDD should abort the exchange.
1710
+ *
1711
+ * @remoteport: pointer to the (registered) remote port that the LS
1712
+ * was received from. The remoteport is associated with
1713
+ * a specific localport.
1714
+ * @lsrsp: pointer to a nvmefc_ls_rsp response structure to be
1715
+ * used to reference the exchange corresponding to the LS
1716
+ * when issuing an ls response.
1717
+ * @lsreqbuf: pointer to the buffer containing the LS Request
1718
+ * @lsreqbuf_len: length, in bytes, of the received LS request
1719
+ */
1720
+int
1721
+nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr,
1722
+ struct nvmefc_ls_rsp *lsrsp,
1723
+ void *lsreqbuf, u32 lsreqbuf_len)
1724
+{
1725
+ struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
1726
+ struct nvme_fc_lport *lport = rport->lport;
1727
+ struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf;
1728
+ struct nvmefc_ls_rcv_op *lsop;
1729
+ unsigned long flags;
1730
+ int ret;
1731
+
1732
+ nvme_fc_rport_get(rport);
1733
+
1734
+ /* validate there's a routine to transmit a response */
1735
+ if (!lport->ops->xmt_ls_rsp) {
1736
+ dev_info(lport->dev,
1737
+ "RCV %s LS failed: no LLDD xmt_ls_rsp\n",
1738
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
1739
+ nvmefc_ls_names[w0->ls_cmd] : "");
1740
+ ret = -EINVAL;
1741
+ goto out_put;
1742
+ }
1743
+
1744
+ if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) {
1745
+ dev_info(lport->dev,
1746
+ "RCV %s LS failed: payload too large\n",
1747
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
1748
+ nvmefc_ls_names[w0->ls_cmd] : "");
1749
+ ret = -E2BIG;
1750
+ goto out_put;
1751
+ }
1752
+
1753
+ lsop = kzalloc(sizeof(*lsop) +
1754
+ sizeof(union nvmefc_ls_requests) +
1755
+ sizeof(union nvmefc_ls_responses),
1756
+ GFP_KERNEL);
1757
+ if (!lsop) {
1758
+ dev_info(lport->dev,
1759
+ "RCV %s LS failed: No memory\n",
1760
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
1761
+ nvmefc_ls_names[w0->ls_cmd] : "");
1762
+ ret = -ENOMEM;
1763
+ goto out_put;
1764
+ }
1765
+ lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1];
1766
+ lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1];
1767
+
1768
+ lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf,
1769
+ sizeof(*lsop->rspbuf),
1770
+ DMA_TO_DEVICE);
1771
+ if (fc_dma_mapping_error(lport->dev, lsop->rspdma)) {
1772
+ dev_info(lport->dev,
1773
+ "RCV %s LS failed: DMA mapping failure\n",
1774
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
1775
+ nvmefc_ls_names[w0->ls_cmd] : "");
1776
+ ret = -EFAULT;
1777
+ goto out_free;
1778
+ }
1779
+
1780
+ lsop->rport = rport;
1781
+ lsop->lsrsp = lsrsp;
1782
+
1783
+ memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len);
1784
+ lsop->rqstdatalen = lsreqbuf_len;
1785
+
1786
+ spin_lock_irqsave(&rport->lock, flags);
1787
+ if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) {
1788
+ spin_unlock_irqrestore(&rport->lock, flags);
1789
+ ret = -ENOTCONN;
1790
+ goto out_unmap;
1791
+ }
1792
+ list_add_tail(&lsop->lsrcv_list, &rport->ls_rcv_list);
1793
+ spin_unlock_irqrestore(&rport->lock, flags);
1794
+
1795
+ schedule_work(&rport->lsrcv_work);
1796
+
1797
+ return 0;
1798
+
1799
+out_unmap:
1800
+ fc_dma_unmap_single(lport->dev, lsop->rspdma,
1801
+ sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
1802
+out_free:
1803
+ kfree(lsop);
1804
+out_put:
1805
+ nvme_fc_rport_put(rport);
1806
+ return ret;
1807
+}
1808
+EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req);
14701809
14711810
14721811 /* *********************** NVME Ctrl Routines **************************** */
1473
-
1474
-static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
14751812
14761813 static void
14771814 __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
....@@ -1504,8 +1841,10 @@
15041841 opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
15051842 if (opstate != FCPOP_STATE_ACTIVE)
15061843 atomic_set(&op->state, opstate);
1507
- else if (ctrl->flags & FCCTRL_TERMIO)
1844
+ else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
1845
+ op->flags |= FCOP_FLAGS_TERMIO;
15081846 ctrl->iocnt++;
1847
+ }
15091848 spin_unlock_irqrestore(&ctrl->lock, flags);
15101849
15111850 if (opstate != FCPOP_STATE_ACTIVE)
....@@ -1541,12 +1880,22 @@
15411880
15421881 if (opstate == FCPOP_STATE_ABORTED) {
15431882 spin_lock_irqsave(&ctrl->lock, flags);
1544
- if (ctrl->flags & FCCTRL_TERMIO) {
1883
+ if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
1884
+ op->flags & FCOP_FLAGS_TERMIO) {
15451885 if (!--ctrl->iocnt)
15461886 wake_up(&ctrl->ioabort_wait);
15471887 }
15481888 spin_unlock_irqrestore(&ctrl->lock, flags);
15491889 }
1890
+}
1891
+
1892
+static void
1893
+nvme_fc_ctrl_ioerr_work(struct work_struct *work)
1894
+{
1895
+ struct nvme_fc_ctrl *ctrl =
1896
+ container_of(work, struct nvme_fc_ctrl, ioerr_work);
1897
+
1898
+ nvme_fc_error_recovery(ctrl, "transport detected io error");
15501899 }
15511900
15521901 static void
....@@ -1607,9 +1956,13 @@
16071956 sizeof(op->rsp_iu), DMA_FROM_DEVICE);
16081957
16091958 if (opstate == FCPOP_STATE_ABORTED)
1610
- status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
1611
- else if (freq->status)
1612
- status = cpu_to_le16(NVME_SC_INTERNAL << 1);
1959
+ status = cpu_to_le16(NVME_SC_HOST_ABORTED_CMD << 1);
1960
+ else if (freq->status) {
1961
+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
1962
+ dev_info(ctrl->ctrl.device,
1963
+ "NVME-FC{%d}: io failed due to lldd error %d\n",
1964
+ ctrl->cnum, freq->status);
1965
+ }
16131966
16141967 /*
16151968 * For the linux implementation, if we have an unsuccesful
....@@ -1636,8 +1989,13 @@
16361989 * no payload in the CQE by the transport.
16371990 */
16381991 if (freq->transferred_length !=
1639
- be32_to_cpu(op->cmd_iu.data_len)) {
1640
- status = cpu_to_le16(NVME_SC_INTERNAL << 1);
1992
+ be32_to_cpu(op->cmd_iu.data_len)) {
1993
+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
1994
+ dev_info(ctrl->ctrl.device,
1995
+ "NVME-FC{%d}: io failed due to bad transfer "
1996
+ "length: %d vs expected %d\n",
1997
+ ctrl->cnum, freq->transferred_length,
1998
+ be32_to_cpu(op->cmd_iu.data_len));
16411999 goto done;
16422000 }
16432001 result.u64 = 0;
....@@ -1652,9 +2010,19 @@
16522010 (freq->rcv_rsplen / 4) ||
16532011 be32_to_cpu(op->rsp_iu.xfrd_len) !=
16542012 freq->transferred_length ||
1655
- op->rsp_iu.status_code ||
2013
+ op->rsp_iu.ersp_result ||
16562014 sqe->common.command_id != cqe->command_id)) {
1657
- status = cpu_to_le16(NVME_SC_INTERNAL << 1);
2015
+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
2016
+ dev_info(ctrl->ctrl.device,
2017
+ "NVME-FC{%d}: io failed due to bad NVMe_ERSP: "
2018
+ "iu len %d, xfr len %d vs %d, status code "
2019
+ "%d, cmdid %d vs %d\n",
2020
+ ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len),
2021
+ be32_to_cpu(op->rsp_iu.xfrd_len),
2022
+ freq->transferred_length,
2023
+ op->rsp_iu.ersp_result,
2024
+ sqe->common.command_id,
2025
+ cqe->command_id);
16582026 goto done;
16592027 }
16602028 result = cqe->result;
....@@ -1662,7 +2030,11 @@
16622030 break;
16632031
16642032 default:
1665
- status = cpu_to_le16(NVME_SC_INTERNAL << 1);
2033
+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
2034
+ dev_info(ctrl->ctrl.device,
2035
+ "NVME-FC{%d}: io failed due to odd NVMe_xRSP iu "
2036
+ "len %d\n",
2037
+ ctrl->cnum, freq->rcv_rsplen);
16662038 goto done;
16672039 }
16682040
....@@ -1679,11 +2051,12 @@
16792051 }
16802052
16812053 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
1682
- nvme_end_request(rq, status, result);
2054
+ if (!nvme_try_complete_req(rq, status, result))
2055
+ nvme_fc_complete_rq(rq);
16832056
16842057 check_error:
1685
- if (terminate_assoc)
1686
- nvme_fc_error_recovery(ctrl, "transport detected io error");
2058
+ if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING)
2059
+ queue_work(nvme_reset_wq, &ctrl->ioerr_work);
16872060 }
16882061
16892062 static int
....@@ -1691,6 +2064,8 @@
16912064 struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op,
16922065 struct request *rq, u32 rqno)
16932066 {
2067
+ struct nvme_fcp_op_w_sgl *op_w_sgl =
2068
+ container_of(op, typeof(*op_w_sgl), op);
16942069 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
16952070 int ret = 0;
16962071
....@@ -1700,16 +2075,19 @@
17002075 op->fcp_req.rspaddr = &op->rsp_iu;
17012076 op->fcp_req.rsplen = sizeof(op->rsp_iu);
17022077 op->fcp_req.done = nvme_fc_fcpio_done;
1703
- op->fcp_req.first_sgl = (struct scatterlist *)&op[1];
1704
- op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE];
17052078 op->ctrl = ctrl;
17062079 op->queue = queue;
17072080 op->rq = rq;
17082081 op->rqno = rqno;
17092082
1710
- cmdiu->scsi_id = NVME_CMD_SCSI_ID;
2083
+ cmdiu->format_id = NVME_CMD_FORMAT_ID;
17112084 cmdiu->fc_id = NVME_CMD_FC_ID;
17122085 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
2086
+ if (queue->qnum)
2087
+ cmdiu->rsv_cat = fccmnd_set_cat_css(0,
2088
+ (NVME_CC_CSS_NVM >> NVME_CC_CSS_SHIFT));
2089
+ else
2090
+ cmdiu->rsv_cat = fccmnd_set_cat_admin(0);
17132091
17142092 op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev,
17152093 &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE);
....@@ -1739,12 +2117,18 @@
17392117 unsigned int hctx_idx, unsigned int numa_node)
17402118 {
17412119 struct nvme_fc_ctrl *ctrl = set->driver_data;
1742
- struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
2120
+ struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq);
17432121 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
17442122 struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
2123
+ int res;
17452124
2125
+ res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++);
2126
+ if (res)
2127
+ return res;
2128
+ op->op.fcp_req.first_sgl = op->sgl;
2129
+ op->op.fcp_req.private = &op->priv[0];
17462130 nvme_req(rq)->ctrl = &ctrl->ctrl;
1747
- return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
2131
+ return res;
17482132 }
17492133
17502134 static int
....@@ -1753,15 +2137,17 @@
17532137 struct nvme_fc_fcp_op *aen_op;
17542138 struct nvme_fc_cmd_iu *cmdiu;
17552139 struct nvme_command *sqe;
1756
- void *private;
2140
+ void *private = NULL;
17572141 int i, ret;
17582142
17592143 aen_op = ctrl->aen_ops;
17602144 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
1761
- private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
2145
+ if (ctrl->lport->ops->fcprqst_priv_sz) {
2146
+ private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
17622147 GFP_KERNEL);
1763
- if (!private)
1764
- return -ENOMEM;
2148
+ if (!private)
2149
+ return -ENOMEM;
2150
+ }
17652151
17662152 cmdiu = &aen_op->cmd_iu;
17672153 sqe = &cmdiu->sqe;
....@@ -1774,7 +2160,6 @@
17742160 }
17752161
17762162 aen_op->flags = FCOP_FLAGS_AEN;
1777
- aen_op->fcp_req.first_sgl = NULL; /* no sg list */
17782163 aen_op->fcp_req.private = private;
17792164
17802165 memset(sqe, 0, sizeof(*sqe));
....@@ -1794,9 +2179,6 @@
17942179 cancel_work_sync(&ctrl->ctrl.async_event_work);
17952180 aen_op = ctrl->aen_ops;
17962181 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
1797
- if (!aen_op->fcp_req.private)
1798
- continue;
1799
-
18002182 __nvme_fc_exit_request(ctrl, aen_op);
18012183
18022184 kfree(aen_op->fcp_req.private);
....@@ -1948,7 +2330,7 @@
19482330 return 0;
19492331
19502332 delete_queues:
1951
- for (; i >= 0; i--)
2333
+ for (; i > 0; i--)
19522334 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
19532335 return ret;
19542336 }
....@@ -1963,7 +2345,7 @@
19632345 (qsize / 5));
19642346 if (ret)
19652347 break;
1966
- ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
2348
+ ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false);
19672349 if (ret)
19682350 break;
19692351
....@@ -2001,6 +2383,7 @@
20012383
20022384 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
20032385 blk_cleanup_queue(ctrl->ctrl.admin_q);
2386
+ blk_cleanup_queue(ctrl->ctrl.fabrics_q);
20042387 blk_mq_free_tag_set(&ctrl->admin_tag_set);
20052388
20062389 kfree(ctrl->queues);
....@@ -2040,24 +2423,112 @@
20402423 nvme_fc_ctrl_put(ctrl);
20412424 }
20422425
2426
+/*
2427
+ * This routine is used by the transport when it needs to find active
2428
+ * io on a queue that is to be terminated. The transport uses
2429
+ * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
2430
+ * this routine to kill them on a 1 by 1 basis.
2431
+ *
2432
+ * As FC allocates FC exchange for each io, the transport must contact
2433
+ * the LLDD to terminate the exchange, thus releasing the FC exchange.
2434
+ * After terminating the exchange the LLDD will call the transport's
2435
+ * normal io done path for the request, but it will have an aborted
2436
+ * status. The done path will return the io request back to the block
2437
+ * layer with an error status.
2438
+ */
2439
+static bool
2440
+nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
2441
+{
2442
+ struct nvme_ctrl *nctrl = data;
2443
+ struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
2444
+ struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
2445
+
2446
+ op->nreq.flags |= NVME_REQ_CANCELLED;
2447
+ __nvme_fc_abort_op(ctrl, op);
2448
+ return true;
2449
+}
2450
+
2451
+/*
2452
+ * This routine runs through all outstanding commands on the association
2453
+ * and aborts them. This routine is typically be called by the
2454
+ * delete_association routine. It is also called due to an error during
2455
+ * reconnect. In that scenario, it is most likely a command that initializes
2456
+ * the controller, including fabric Connect commands on io queues, that
2457
+ * may have timed out or failed thus the io must be killed for the connect
2458
+ * thread to see the error.
2459
+ */
2460
+static void
2461
+__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
2462
+{
2463
+ int q;
2464
+
2465
+ /*
2466
+ * if aborting io, the queues are no longer good, mark them
2467
+ * all as not live.
2468
+ */
2469
+ if (ctrl->ctrl.queue_count > 1) {
2470
+ for (q = 1; q < ctrl->ctrl.queue_count; q++)
2471
+ clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[q].flags);
2472
+ }
2473
+ clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags);
2474
+
2475
+ /*
2476
+ * If io queues are present, stop them and terminate all outstanding
2477
+ * ios on them. As FC allocates FC exchange for each io, the
2478
+ * transport must contact the LLDD to terminate the exchange,
2479
+ * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
2480
+ * to tell us what io's are busy and invoke a transport routine
2481
+ * to kill them with the LLDD. After terminating the exchange
2482
+ * the LLDD will call the transport's normal io done path, but it
2483
+ * will have an aborted status. The done path will return the
2484
+ * io requests back to the block layer as part of normal completions
2485
+ * (but with error status).
2486
+ */
2487
+ if (ctrl->ctrl.queue_count > 1) {
2488
+ nvme_stop_queues(&ctrl->ctrl);
2489
+ nvme_sync_io_queues(&ctrl->ctrl);
2490
+ blk_mq_tagset_busy_iter(&ctrl->tag_set,
2491
+ nvme_fc_terminate_exchange, &ctrl->ctrl);
2492
+ blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
2493
+ if (start_queues)
2494
+ nvme_start_queues(&ctrl->ctrl);
2495
+ }
2496
+
2497
+ /*
2498
+ * Other transports, which don't have link-level contexts bound
2499
+ * to sqe's, would try to gracefully shutdown the controller by
2500
+ * writing the registers for shutdown and polling (call
2501
+ * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
2502
+ * just aborted and we will wait on those contexts, and given
2503
+ * there was no indication of how live the controlelr is on the
2504
+ * link, don't send more io to create more contexts for the
2505
+ * shutdown. Let the controller fail via keepalive failure if
2506
+ * its still present.
2507
+ */
2508
+
2509
+ /*
2510
+ * clean up the admin queue. Same thing as above.
2511
+ */
2512
+ blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
2513
+ blk_sync_queue(ctrl->ctrl.admin_q);
2514
+ blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
2515
+ nvme_fc_terminate_exchange, &ctrl->ctrl);
2516
+ blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
2517
+}
2518
+
20432519 static void
20442520 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
20452521 {
2046
- int active;
2047
-
20482522 /*
2049
- * if an error (io timeout, etc) while (re)connecting,
2050
- * it's an error on creating the new association.
2051
- * Start the error recovery thread if it hasn't already
2052
- * been started. It is expected there could be multiple
2053
- * ios hitting this path before things are cleaned up.
2523
+ * if an error (io timeout, etc) while (re)connecting, the remote
2524
+ * port requested terminating of the association (disconnect_ls)
2525
+ * or an error (timeout or abort) occurred on an io while creating
2526
+ * the controller. Abort any ios on the association and let the
2527
+ * create_association error path resolve things.
20542528 */
20552529 if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
2056
- active = atomic_xchg(&ctrl->err_work_active, 1);
2057
- if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) {
2058
- atomic_set(&ctrl->err_work_active, 0);
2059
- WARN_ON(1);
2060
- }
2530
+ __nvme_fc_abort_outstanding_ios(ctrl, true);
2531
+ set_bit(ASSOC_FAILED, &ctrl->flags);
20612532 return;
20622533 }
20632534
....@@ -2066,7 +2537,7 @@
20662537 return;
20672538
20682539 dev_warn(ctrl->ctrl.device,
2069
- "NVME-FC{%d}: transport association error detected: %s\n",
2540
+ "NVME-FC{%d}: transport association event: %s\n",
20702541 ctrl->cnum, errmsg);
20712542 dev_warn(ctrl->ctrl.device,
20722543 "NVME-FC{%d}: resetting controller\n", ctrl->cnum);
....@@ -2079,15 +2550,20 @@
20792550 {
20802551 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
20812552 struct nvme_fc_ctrl *ctrl = op->ctrl;
2553
+ struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
2554
+ struct nvme_command *sqe = &cmdiu->sqe;
20822555
20832556 /*
2084
- * we can't individually ABTS an io without affecting the queue,
2085
- * thus killing the queue, and thus the association.
2086
- * So resolve by performing a controller reset, which will stop
2087
- * the host/io stack, terminate the association on the link,
2088
- * and recreate an association on the link.
2557
+ * Attempt to abort the offending command. Command completion
2558
+ * will detect the aborted io and will fail the connection.
20892559 */
2090
- nvme_fc_error_recovery(ctrl, "io timeout error");
2560
+ dev_info(ctrl->ctrl.device,
2561
+ "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
2562
+ "x%08x/x%08x\n",
2563
+ ctrl->cnum, op->queue->qnum, sqe->common.opcode,
2564
+ sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
2565
+ if (__nvme_fc_abort_op(ctrl, op))
2566
+ nvme_fc_error_recovery(ctrl, "io timeout abort failed");
20912567
20922568 /*
20932569 * the io abort has been initiated. Have the reset timer
....@@ -2102,27 +2578,26 @@
21022578 struct nvme_fc_fcp_op *op)
21032579 {
21042580 struct nvmefc_fcp_req *freq = &op->fcp_req;
2105
- enum dma_data_direction dir;
21062581 int ret;
21072582
21082583 freq->sg_cnt = 0;
21092584
2110
- if (!blk_rq_payload_bytes(rq))
2585
+ if (!blk_rq_nr_phys_segments(rq))
21112586 return 0;
21122587
21132588 freq->sg_table.sgl = freq->first_sgl;
21142589 ret = sg_alloc_table_chained(&freq->sg_table,
2115
- blk_rq_nr_phys_segments(rq), freq->sg_table.sgl);
2590
+ blk_rq_nr_phys_segments(rq), freq->sg_table.sgl,
2591
+ NVME_INLINE_SG_CNT);
21162592 if (ret)
21172593 return -ENOMEM;
21182594
21192595 op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl);
21202596 WARN_ON(op->nents > blk_rq_nr_phys_segments(rq));
2121
- dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
21222597 freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl,
2123
- op->nents, dir);
2598
+ op->nents, rq_dma_dir(rq));
21242599 if (unlikely(freq->sg_cnt <= 0)) {
2125
- sg_free_table_chained(&freq->sg_table, true);
2600
+ sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT);
21262601 freq->sg_cnt = 0;
21272602 return -EFAULT;
21282603 }
....@@ -2143,12 +2618,9 @@
21432618 return;
21442619
21452620 fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents,
2146
- ((rq_data_dir(rq) == WRITE) ?
2147
- DMA_TO_DEVICE : DMA_FROM_DEVICE));
2621
+ rq_dma_dir(rq));
21482622
2149
- nvme_cleanup_cmd(rq);
2150
-
2151
- sg_free_table_chained(&freq->sg_table, true);
2623
+ sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT);
21522624
21532625 freq->sg_cnt = 0;
21542626 }
....@@ -2275,8 +2747,10 @@
22752747 opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
22762748 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
22772749
2278
- if (!(op->flags & FCOP_FLAGS_AEN))
2750
+ if (!(op->flags & FCOP_FLAGS_AEN)) {
22792751 nvme_fc_unmap_data(ctrl, op->rq, op);
2752
+ nvme_cleanup_cmd(op->rq);
2753
+ }
22802754
22812755 nvme_fc_ctrl_put(ctrl);
22822756
....@@ -2314,46 +2788,25 @@
23142788 if (ret)
23152789 return ret;
23162790
2317
- data_len = blk_rq_payload_bytes(rq);
2318
- if (data_len)
2791
+ /*
2792
+ * nvme core doesn't quite treat the rq opaquely. Commands such
2793
+ * as WRITE ZEROES will return a non-zero rq payload_bytes yet
2794
+ * there is no actual payload to be transferred.
2795
+ * To get it right, key data transmission on there being 1 or
2796
+ * more physical segments in the sg list. If there is no
2797
+ * physical segments, there is no payload.
2798
+ */
2799
+ if (blk_rq_nr_phys_segments(rq)) {
2800
+ data_len = blk_rq_payload_bytes(rq);
23192801 io_dir = ((rq_data_dir(rq) == WRITE) ?
23202802 NVMEFC_FCP_WRITE : NVMEFC_FCP_READ);
2321
- else
2803
+ } else {
2804
+ data_len = 0;
23222805 io_dir = NVMEFC_FCP_NODATA;
2806
+ }
2807
+
23232808
23242809 return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir);
2325
-}
2326
-
2327
-static struct blk_mq_tags *
2328
-nvme_fc_tagset(struct nvme_fc_queue *queue)
2329
-{
2330
- if (queue->qnum == 0)
2331
- return queue->ctrl->admin_tag_set.tags[queue->qnum];
2332
-
2333
- return queue->ctrl->tag_set.tags[queue->qnum - 1];
2334
-}
2335
-
2336
-static int
2337
-nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
2338
-
2339
-{
2340
- struct nvme_fc_queue *queue = hctx->driver_data;
2341
- struct nvme_fc_ctrl *ctrl = queue->ctrl;
2342
- struct request *req;
2343
- struct nvme_fc_fcp_op *op;
2344
-
2345
- req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag);
2346
- if (!req)
2347
- return 0;
2348
-
2349
- op = blk_mq_rq_to_pdu(req);
2350
-
2351
- if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) &&
2352
- (ctrl->lport->ops->poll_queue))
2353
- ctrl->lport->ops->poll_queue(&ctrl->lport->localport,
2354
- queue->lldd_handle);
2355
-
2356
- return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE));
23572810 }
23582811
23592812 static void
....@@ -2361,16 +2814,9 @@
23612814 {
23622815 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
23632816 struct nvme_fc_fcp_op *aen_op;
2364
- unsigned long flags;
2365
- bool terminating = false;
23662817 blk_status_t ret;
23672818
2368
- spin_lock_irqsave(&ctrl->lock, flags);
2369
- if (ctrl->flags & FCCTRL_TERMIO)
2370
- terminating = true;
2371
- spin_unlock_irqrestore(&ctrl->lock, flags);
2372
-
2373
- if (terminating)
2819
+ if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
23742820 return;
23752821
23762822 aen_op = &ctrl->aen_ops[0];
....@@ -2389,33 +2835,11 @@
23892835 struct nvme_fc_ctrl *ctrl = op->ctrl;
23902836
23912837 atomic_set(&op->state, FCPOP_STATE_IDLE);
2838
+ op->flags &= ~FCOP_FLAGS_TERMIO;
23922839
23932840 nvme_fc_unmap_data(ctrl, rq, op);
23942841 nvme_complete_rq(rq);
23952842 nvme_fc_ctrl_put(ctrl);
2396
-}
2397
-
2398
-/*
2399
- * This routine is used by the transport when it needs to find active
2400
- * io on a queue that is to be terminated. The transport uses
2401
- * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
2402
- * this routine to kill them on a 1 by 1 basis.
2403
- *
2404
- * As FC allocates FC exchange for each io, the transport must contact
2405
- * the LLDD to terminate the exchange, thus releasing the FC exchange.
2406
- * After terminating the exchange the LLDD will call the transport's
2407
- * normal io done path for the request, but it will have an aborted
2408
- * status. The done path will return the io request back to the block
2409
- * layer with an error status.
2410
- */
2411
-static void
2412
-nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
2413
-{
2414
- struct nvme_ctrl *nctrl = data;
2415
- struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
2416
- struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
2417
-
2418
- __nvme_fc_abort_op(ctrl, op);
24192843 }
24202844
24212845
....@@ -2425,7 +2849,6 @@
24252849 .init_request = nvme_fc_init_request,
24262850 .exit_request = nvme_fc_exit_request,
24272851 .init_hctx = nvme_fc_init_hctx,
2428
- .poll = nvme_fc_poll,
24292852 .timeout = nvme_fc_timeout,
24302853 };
24312854
....@@ -2455,12 +2878,11 @@
24552878 ctrl->tag_set.ops = &nvme_fc_mq_ops;
24562879 ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
24572880 ctrl->tag_set.reserved_tags = 1; /* fabric connect */
2458
- ctrl->tag_set.numa_node = NUMA_NO_NODE;
2881
+ ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
24592882 ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
2460
- ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
2461
- (SG_CHUNK_SIZE *
2462
- sizeof(struct scatterlist)) +
2463
- ctrl->lport->ops->fcprqst_priv_sz;
2883
+ ctrl->tag_set.cmd_size =
2884
+ struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
2885
+ ctrl->lport->ops->fcprqst_priv_sz);
24642886 ctrl->tag_set.driver_data = ctrl;
24652887 ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
24662888 ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
....@@ -2507,6 +2929,7 @@
25072929 nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
25082930 {
25092931 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
2932
+ u32 prior_ioq_cnt = ctrl->ctrl.queue_count - 1;
25102933 unsigned int nr_io_queues;
25112934 int ret;
25122935
....@@ -2519,10 +2942,26 @@
25192942 return ret;
25202943 }
25212944
2945
+ if (!nr_io_queues && prior_ioq_cnt) {
2946
+ dev_info(ctrl->ctrl.device,
2947
+ "Fail Reconnect: At least 1 io queue "
2948
+ "required (was %d)\n", prior_ioq_cnt);
2949
+ return -ENOSPC;
2950
+ }
2951
+
25222952 ctrl->ctrl.queue_count = nr_io_queues + 1;
25232953 /* check for io queues existing */
25242954 if (ctrl->ctrl.queue_count == 1)
25252955 return 0;
2956
+
2957
+ if (prior_ioq_cnt != nr_io_queues) {
2958
+ dev_info(ctrl->ctrl.device,
2959
+ "reconnect: revising io queue count from %d to %d\n",
2960
+ prior_ioq_cnt, nr_io_queues);
2961
+ nvme_wait_freeze(&ctrl->ctrl);
2962
+ blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
2963
+ nvme_unfreeze(&ctrl->ctrl);
2964
+ }
25262965
25272966 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
25282967 if (ret)
....@@ -2531,8 +2970,6 @@
25312970 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
25322971 if (ret)
25332972 goto out_delete_hw_queues;
2534
-
2535
- blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
25362973
25372974 return 0;
25382975
....@@ -2568,10 +3005,9 @@
25683005 struct nvme_fc_rport *rport = ctrl->rport;
25693006 u32 cnt;
25703007
2571
- if (ctrl->assoc_active)
3008
+ if (test_and_set_bit(ASSOC_ACTIVE, &ctrl->flags))
25723009 return 1;
25733010
2574
- ctrl->assoc_active = true;
25753011 cnt = atomic_inc_return(&rport->act_ctrl_cnt);
25763012 if (cnt == 1)
25773013 nvme_fc_rport_active_on_lport(rport);
....@@ -2586,7 +3022,7 @@
25863022 struct nvme_fc_lport *lport = rport->lport;
25873023 u32 cnt;
25883024
2589
- /* ctrl->assoc_active=false will be set independently */
3025
+ /* clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete */
25903026
25913027 cnt = atomic_dec_return(&rport->act_ctrl_cnt);
25923028 if (cnt == 0) {
....@@ -2606,6 +3042,8 @@
26063042 nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
26073043 {
26083044 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
3045
+ struct nvmefc_ls_rcv_op *disls = NULL;
3046
+ unsigned long flags;
26093047 int ret;
26103048 bool changed;
26113049
....@@ -2616,6 +3054,14 @@
26163054
26173055 if (nvme_fc_ctlr_active_on_rport(ctrl))
26183056 return -ENOTUNIQ;
3057
+
3058
+ dev_info(ctrl->ctrl.device,
3059
+ "NVME-FC{%d}: create association : host wwpn 0x%016llx "
3060
+ " rport wwpn 0x%016llx: NQN \"%s\"\n",
3061
+ ctrl->cnum, ctrl->lport->localport.port_name,
3062
+ ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn);
3063
+
3064
+ clear_bit(ASSOC_FAILED, &ctrl->flags);
26193065
26203066 /*
26213067 * Create the admin queue
....@@ -2631,8 +3077,6 @@
26313077 if (ret)
26323078 goto out_delete_hw_queue;
26333079
2634
- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
2635
-
26363080 ret = nvmf_connect_admin_queue(&ctrl->ctrl);
26373081 if (ret)
26383082 goto out_disconnect_admin_queue;
....@@ -2646,25 +3090,18 @@
26463090 * prior connection values
26473091 */
26483092
2649
- ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
2650
- if (ret) {
2651
- dev_err(ctrl->ctrl.device,
2652
- "prop_get NVME_REG_CAP failed\n");
2653
- goto out_disconnect_admin_queue;
2654
- }
2655
-
2656
- ctrl->ctrl.sqsize =
2657
- min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
2658
-
2659
- ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
2660
- if (ret)
3093
+ ret = nvme_enable_ctrl(&ctrl->ctrl);
3094
+ if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
26613095 goto out_disconnect_admin_queue;
26623096
2663
- ctrl->ctrl.max_hw_sectors =
2664
- (ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9);
3097
+ ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments;
3098
+ ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments <<
3099
+ (ilog2(SZ_4K) - 9);
3100
+
3101
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
26653102
26663103 ret = nvme_init_identify(&ctrl->ctrl);
2667
- if (ret)
3104
+ if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
26683105 goto out_disconnect_admin_queue;
26693106
26703107 /* sanity checks */
....@@ -2682,7 +3119,7 @@
26823119 /* warn if maxcmd is lower than queue_size */
26833120 dev_warn(ctrl->ctrl.device,
26843121 "queue_size %zu > ctrl maxcmd %u, reducing "
2685
- "to queue_size\n",
3122
+ "to maxcmd\n",
26863123 opts->queue_size, ctrl->ctrl.maxcmd);
26873124 opts->queue_size = ctrl->ctrl.maxcmd;
26883125 }
....@@ -2690,7 +3127,8 @@
26903127 if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
26913128 /* warn if sqsize is lower than queue_size */
26923129 dev_warn(ctrl->ctrl.device,
2693
- "queue_size %zu > ctrl sqsize %u, clamping down\n",
3130
+ "queue_size %zu > ctrl sqsize %u, reducing "
3131
+ "to sqsize\n",
26943132 opts->queue_size, ctrl->ctrl.sqsize + 1);
26953133 opts->queue_size = ctrl->ctrl.sqsize + 1;
26963134 }
....@@ -2708,9 +3146,9 @@
27083146 ret = nvme_fc_create_io_queues(ctrl);
27093147 else
27103148 ret = nvme_fc_recreate_io_queues(ctrl);
2711
- if (ret)
2712
- goto out_term_aen_ops;
27133149 }
3150
+ if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
3151
+ goto out_term_aen_ops;
27143152
27153153 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
27163154
....@@ -2726,15 +3164,23 @@
27263164 out_disconnect_admin_queue:
27273165 /* send a Disconnect(association) LS to fc-nvme target */
27283166 nvme_fc_xmt_disconnect_assoc(ctrl);
3167
+ spin_lock_irqsave(&ctrl->lock, flags);
3168
+ ctrl->association_id = 0;
3169
+ disls = ctrl->rcv_disconn;
3170
+ ctrl->rcv_disconn = NULL;
3171
+ spin_unlock_irqrestore(&ctrl->lock, flags);
3172
+ if (disls)
3173
+ nvme_fc_xmt_ls_rsp(disls);
27293174 out_delete_hw_queue:
27303175 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
27313176 out_free_queue:
27323177 nvme_fc_free_queue(&ctrl->queues[0]);
2733
- ctrl->assoc_active = false;
3178
+ clear_bit(ASSOC_ACTIVE, &ctrl->flags);
27343179 nvme_fc_ctlr_inactive_on_rport(ctrl);
27353180
27363181 return ret;
27373182 }
3183
+
27383184
27393185 /*
27403186 * This routine stops operation of the controller on the host side.
....@@ -2745,55 +3191,18 @@
27453191 static void
27463192 nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
27473193 {
3194
+ struct nvmefc_ls_rcv_op *disls = NULL;
27483195 unsigned long flags;
27493196
2750
- if (!ctrl->assoc_active)
3197
+ if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
27513198 return;
2752
- ctrl->assoc_active = false;
27533199
27543200 spin_lock_irqsave(&ctrl->lock, flags);
2755
- ctrl->flags |= FCCTRL_TERMIO;
3201
+ set_bit(FCCTRL_TERMIO, &ctrl->flags);
27563202 ctrl->iocnt = 0;
27573203 spin_unlock_irqrestore(&ctrl->lock, flags);
27583204
2759
- /*
2760
- * If io queues are present, stop them and terminate all outstanding
2761
- * ios on them. As FC allocates FC exchange for each io, the
2762
- * transport must contact the LLDD to terminate the exchange,
2763
- * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
2764
- * to tell us what io's are busy and invoke a transport routine
2765
- * to kill them with the LLDD. After terminating the exchange
2766
- * the LLDD will call the transport's normal io done path, but it
2767
- * will have an aborted status. The done path will return the
2768
- * io requests back to the block layer as part of normal completions
2769
- * (but with error status).
2770
- */
2771
- if (ctrl->ctrl.queue_count > 1) {
2772
- nvme_stop_queues(&ctrl->ctrl);
2773
- blk_mq_tagset_busy_iter(&ctrl->tag_set,
2774
- nvme_fc_terminate_exchange, &ctrl->ctrl);
2775
- }
2776
-
2777
- /*
2778
- * Other transports, which don't have link-level contexts bound
2779
- * to sqe's, would try to gracefully shutdown the controller by
2780
- * writing the registers for shutdown and polling (call
2781
- * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
2782
- * just aborted and we will wait on those contexts, and given
2783
- * there was no indication of how live the controlelr is on the
2784
- * link, don't send more io to create more contexts for the
2785
- * shutdown. Let the controller fail via keepalive failure if
2786
- * its still present.
2787
- */
2788
-
2789
- /*
2790
- * clean up the admin queue. Same thing as above.
2791
- * use blk_mq_tagset_busy_itr() and the transport routine to
2792
- * terminate the exchanges.
2793
- */
2794
- blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
2795
- blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
2796
- nvme_fc_terminate_exchange, &ctrl->ctrl);
3205
+ __nvme_fc_abort_outstanding_ios(ctrl, false);
27973206
27983207 /* kill the aens as they are a separate path */
27993208 nvme_fc_abort_aen_ops(ctrl);
....@@ -2801,7 +3210,7 @@
28013210 /* wait for all io that had to be aborted */
28023211 spin_lock_irq(&ctrl->lock);
28033212 wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
2804
- ctrl->flags &= ~FCCTRL_TERMIO;
3213
+ clear_bit(FCCTRL_TERMIO, &ctrl->flags);
28053214 spin_unlock_irq(&ctrl->lock);
28063215
28073216 nvme_fc_term_aen_ops(ctrl);
....@@ -2814,6 +3223,18 @@
28143223 */
28153224 if (ctrl->association_id)
28163225 nvme_fc_xmt_disconnect_assoc(ctrl);
3226
+
3227
+ spin_lock_irqsave(&ctrl->lock, flags);
3228
+ ctrl->association_id = 0;
3229
+ disls = ctrl->rcv_disconn;
3230
+ ctrl->rcv_disconn = NULL;
3231
+ spin_unlock_irqrestore(&ctrl->lock, flags);
3232
+ if (disls)
3233
+ /*
3234
+ * if a Disconnect Request was waiting for a response, send
3235
+ * now that all ABTS's have been issued (and are complete).
3236
+ */
3237
+ nvme_fc_xmt_ls_rsp(disls);
28173238
28183239 if (ctrl->ctrl.tagset) {
28193240 nvme_fc_delete_hw_io_queues(ctrl);
....@@ -2837,7 +3258,7 @@
28373258 {
28383259 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
28393260
2840
- cancel_work_sync(&ctrl->err_work);
3261
+ cancel_work_sync(&ctrl->ioerr_work);
28413262 cancel_delayed_work_sync(&ctrl->connect_work);
28423263 /*
28433264 * kill the association on the link side. this will block
....@@ -2884,36 +3305,11 @@
28843305 dev_warn(ctrl->ctrl.device,
28853306 "NVME-FC{%d}: dev_loss_tmo (%d) expired "
28863307 "while waiting for remoteport connectivity.\n",
2887
- ctrl->cnum, portptr->dev_loss_tmo);
3308
+ ctrl->cnum, min_t(int, portptr->dev_loss_tmo,
3309
+ (ctrl->ctrl.opts->max_reconnects *
3310
+ ctrl->ctrl.opts->reconnect_delay)));
28883311 WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
28893312 }
2890
-}
2891
-
2892
-static void
2893
-__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
2894
-{
2895
- /*
2896
- * if state is connecting - the error occurred as part of a
2897
- * reconnect attempt. The create_association error paths will
2898
- * clean up any outstanding io.
2899
- *
2900
- * if it's a different state - ensure all pending io is
2901
- * terminated. Given this can delay while waiting for the
2902
- * aborted io to return, we recheck adapter state below
2903
- * before changing state.
2904
- */
2905
- if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
2906
- nvme_stop_keep_alive(&ctrl->ctrl);
2907
-
2908
- /* will block will waiting for io to terminate */
2909
- nvme_fc_delete_association(ctrl);
2910
- }
2911
-
2912
- if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
2913
- !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
2914
- dev_err(ctrl->ctrl.device,
2915
- "NVME-FC{%d}: error_recovery: Couldn't change state "
2916
- "to CONNECTING\n", ctrl->cnum);
29173313 }
29183314
29193315 static void
....@@ -2921,42 +3317,30 @@
29213317 {
29223318 struct nvme_fc_ctrl *ctrl =
29233319 container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
2924
- int ret;
2925
-
2926
- __nvme_fc_terminate_io(ctrl);
29273320
29283321 nvme_stop_ctrl(&ctrl->ctrl);
29293322
2930
- if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE)
2931
- ret = nvme_fc_create_association(ctrl);
2932
- else
2933
- ret = -ENOTCONN;
3323
+ /* will block will waiting for io to terminate */
3324
+ nvme_fc_delete_association(ctrl);
29343325
2935
- if (ret)
2936
- nvme_fc_reconnect_or_delete(ctrl, ret);
2937
- else
2938
- dev_info(ctrl->ctrl.device,
2939
- "NVME-FC{%d}: controller reset complete\n",
2940
- ctrl->cnum);
3326
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
3327
+ dev_err(ctrl->ctrl.device,
3328
+ "NVME-FC{%d}: error_recovery: Couldn't change state "
3329
+ "to CONNECTING\n", ctrl->cnum);
3330
+
3331
+ if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
3332
+ if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
3333
+ dev_err(ctrl->ctrl.device,
3334
+ "NVME-FC{%d}: failed to schedule connect "
3335
+ "after reset\n", ctrl->cnum);
3336
+ } else {
3337
+ flush_delayed_work(&ctrl->connect_work);
3338
+ }
3339
+ } else {
3340
+ nvme_fc_reconnect_or_delete(ctrl, -ENOTCONN);
3341
+ }
29413342 }
29423343
2943
-static void
2944
-nvme_fc_connect_err_work(struct work_struct *work)
2945
-{
2946
- struct nvme_fc_ctrl *ctrl =
2947
- container_of(work, struct nvme_fc_ctrl, err_work);
2948
-
2949
- __nvme_fc_terminate_io(ctrl);
2950
-
2951
- atomic_set(&ctrl->err_work_active, 0);
2952
-
2953
- /*
2954
- * Rescheduling the connection after recovering
2955
- * from the io error is left to the reconnect work
2956
- * item, which is what should have stalled waiting on
2957
- * the io that had the error that scheduled this work.
2958
- */
2959
-}
29603344
29613345 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
29623346 .name = "fc",
....@@ -3033,7 +3417,7 @@
30333417 {
30343418 struct nvme_fc_ctrl *ctrl;
30353419 unsigned long flags;
3036
- int ret, idx;
3420
+ int ret, idx, ctrl_loss_tmo;
30373421
30383422 if (!(rport->remoteport.port_role &
30393423 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
....@@ -3059,16 +3443,31 @@
30593443 goto out_free_ctrl;
30603444 }
30613445
3446
+ /*
3447
+ * if ctrl_loss_tmo is being enforced and the default reconnect delay
3448
+ * is being used, change to a shorter reconnect delay for FC.
3449
+ */
3450
+ if (opts->max_reconnects != -1 &&
3451
+ opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY &&
3452
+ opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) {
3453
+ ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay;
3454
+ opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO;
3455
+ opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
3456
+ opts->reconnect_delay);
3457
+ }
3458
+
30623459 ctrl->ctrl.opts = opts;
30633460 ctrl->ctrl.nr_reconnects = 0;
3461
+ if (lport->dev)
3462
+ ctrl->ctrl.numa_node = dev_to_node(lport->dev);
3463
+ else
3464
+ ctrl->ctrl.numa_node = NUMA_NO_NODE;
30643465 INIT_LIST_HEAD(&ctrl->ctrl_list);
30653466 ctrl->lport = lport;
30663467 ctrl->rport = rport;
30673468 ctrl->dev = lport->dev;
30683469 ctrl->cnum = idx;
30693470 ctrl->ioq_live = false;
3070
- ctrl->assoc_active = false;
3071
- atomic_set(&ctrl->err_work_active, 0);
30723471 init_waitqueue_head(&ctrl->ioabort_wait);
30733472
30743473 get_device(ctrl->dev);
....@@ -3076,7 +3475,7 @@
30763475
30773476 INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
30783477 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
3079
- INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work);
3478
+ INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
30803479 spin_lock_init(&ctrl->lock);
30813480
30823481 /* io queue count */
....@@ -3101,11 +3500,10 @@
31013500 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
31023501 ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
31033502 ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
3104
- ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
3105
- ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
3106
- (SG_CHUNK_SIZE *
3107
- sizeof(struct scatterlist)) +
3108
- ctrl->lport->ops->fcprqst_priv_sz;
3503
+ ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
3504
+ ctrl->admin_tag_set.cmd_size =
3505
+ struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
3506
+ ctrl->lport->ops->fcprqst_priv_sz);
31093507 ctrl->admin_tag_set.driver_data = ctrl;
31103508 ctrl->admin_tag_set.nr_hw_queues = 1;
31113509 ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
....@@ -3116,10 +3514,16 @@
31163514 goto out_free_queues;
31173515 ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
31183516
3517
+ ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
3518
+ if (IS_ERR(ctrl->ctrl.fabrics_q)) {
3519
+ ret = PTR_ERR(ctrl->ctrl.fabrics_q);
3520
+ goto out_free_admin_tag_set;
3521
+ }
3522
+
31193523 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
31203524 if (IS_ERR(ctrl->ctrl.admin_q)) {
31213525 ret = PTR_ERR(ctrl->ctrl.admin_q);
3122
- goto out_free_admin_tag_set;
3526
+ goto out_cleanup_fabrics_q;
31233527 }
31243528
31253529 /*
....@@ -3146,10 +3550,7 @@
31463550 goto fail_ctrl;
31473551 }
31483552
3149
- nvme_get_ctrl(&ctrl->ctrl);
3150
-
31513553 if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
3152
- nvme_put_ctrl(&ctrl->ctrl);
31533554 dev_err(ctrl->ctrl.device,
31543555 "NVME-FC{%d}: failed to schedule initial connect\n",
31553556 ctrl->cnum);
....@@ -3166,8 +3567,8 @@
31663567
31673568 fail_ctrl:
31683569 nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
3570
+ cancel_work_sync(&ctrl->ioerr_work);
31693571 cancel_work_sync(&ctrl->ctrl.reset_work);
3170
- cancel_work_sync(&ctrl->err_work);
31713572 cancel_delayed_work_sync(&ctrl->connect_work);
31723573
31733574 ctrl->ctrl.opts = NULL;
....@@ -3191,6 +3592,8 @@
31913592
31923593 out_cleanup_admin_q:
31933594 blk_cleanup_queue(ctrl->ctrl.admin_q);
3595
+out_cleanup_fabrics_q:
3596
+ blk_cleanup_queue(ctrl->ctrl.fabrics_q);
31943597 out_free_admin_tag_set:
31953598 blk_mq_free_tag_set(&ctrl->admin_tag_set);
31963599 out_free_queues:
....@@ -3235,7 +3638,7 @@
32353638 substring_t wwn = { name, &name[sizeof(name)-1] };
32363639 int nnoffset, pnoffset;
32373640
3238
- /* validate it string one of the 2 allowed formats */
3641
+ /* validate if string is one of the 2 allowed formats */
32393642 if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
32403643 !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
32413644 !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
....@@ -3332,6 +3735,90 @@
33323735 .create_ctrl = nvme_fc_create_ctrl,
33333736 };
33343737
3738
+/* Arbitrary successive failures max. With lots of subsystems could be high */
3739
+#define DISCOVERY_MAX_FAIL 20
3740
+
3741
+static ssize_t nvme_fc_nvme_discovery_store(struct device *dev,
3742
+ struct device_attribute *attr, const char *buf, size_t count)
3743
+{
3744
+ unsigned long flags;
3745
+ LIST_HEAD(local_disc_list);
3746
+ struct nvme_fc_lport *lport;
3747
+ struct nvme_fc_rport *rport;
3748
+ int failcnt = 0;
3749
+
3750
+ spin_lock_irqsave(&nvme_fc_lock, flags);
3751
+restart:
3752
+ list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
3753
+ list_for_each_entry(rport, &lport->endp_list, endp_list) {
3754
+ if (!nvme_fc_lport_get(lport))
3755
+ continue;
3756
+ if (!nvme_fc_rport_get(rport)) {
3757
+ /*
3758
+ * This is a temporary condition. Upon restart
3759
+ * this rport will be gone from the list.
3760
+ *
3761
+ * Revert the lport put and retry. Anything
3762
+ * added to the list already will be skipped (as
3763
+ * they are no longer list_empty). Loops should
3764
+ * resume at rports that were not yet seen.
3765
+ */
3766
+ nvme_fc_lport_put(lport);
3767
+
3768
+ if (failcnt++ < DISCOVERY_MAX_FAIL)
3769
+ goto restart;
3770
+
3771
+ pr_err("nvme_discovery: too many reference "
3772
+ "failures\n");
3773
+ goto process_local_list;
3774
+ }
3775
+ if (list_empty(&rport->disc_list))
3776
+ list_add_tail(&rport->disc_list,
3777
+ &local_disc_list);
3778
+ }
3779
+ }
3780
+
3781
+process_local_list:
3782
+ while (!list_empty(&local_disc_list)) {
3783
+ rport = list_first_entry(&local_disc_list,
3784
+ struct nvme_fc_rport, disc_list);
3785
+ list_del_init(&rport->disc_list);
3786
+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
3787
+
3788
+ lport = rport->lport;
3789
+ /* signal discovery. Won't hurt if it repeats */
3790
+ nvme_fc_signal_discovery_scan(lport, rport);
3791
+ nvme_fc_rport_put(rport);
3792
+ nvme_fc_lport_put(lport);
3793
+
3794
+ spin_lock_irqsave(&nvme_fc_lock, flags);
3795
+ }
3796
+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
3797
+
3798
+ return count;
3799
+}
3800
+static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
3801
+
3802
+static struct attribute *nvme_fc_attrs[] = {
3803
+ &dev_attr_nvme_discovery.attr,
3804
+ NULL
3805
+};
3806
+
3807
+static struct attribute_group nvme_fc_attr_group = {
3808
+ .attrs = nvme_fc_attrs,
3809
+};
3810
+
3811
+static const struct attribute_group *nvme_fc_attr_groups[] = {
3812
+ &nvme_fc_attr_group,
3813
+ NULL
3814
+};
3815
+
3816
+static struct class fc_class = {
3817
+ .name = "fc",
3818
+ .dev_groups = nvme_fc_attr_groups,
3819
+ .owner = THIS_MODULE,
3820
+};
3821
+
33353822 static int __init nvme_fc_init_module(void)
33363823 {
33373824 int ret;
....@@ -3354,17 +3841,16 @@
33543841 * put in place, this code will move to a more generic
33553842 * location for the class.
33563843 */
3357
- fc_class = class_create(THIS_MODULE, "fc");
3358
- if (IS_ERR(fc_class)) {
3844
+ ret = class_register(&fc_class);
3845
+ if (ret) {
33593846 pr_err("couldn't register class fc\n");
3360
- ret = PTR_ERR(fc_class);
33613847 goto out_destroy_wq;
33623848 }
33633849
33643850 /*
33653851 * Create a device for the FC-centric udev events
33663852 */
3367
- fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL,
3853
+ fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL,
33683854 "fc_udev_device");
33693855 if (IS_ERR(fc_udev_device)) {
33703856 pr_err("couldn't create fc_udev device!\n");
....@@ -3379,28 +3865,68 @@
33793865 return 0;
33803866
33813867 out_destroy_device:
3382
- device_destroy(fc_class, MKDEV(0, 0));
3868
+ device_destroy(&fc_class, MKDEV(0, 0));
33833869 out_destroy_class:
3384
- class_destroy(fc_class);
3870
+ class_unregister(&fc_class);
33853871 out_destroy_wq:
33863872 destroy_workqueue(nvme_fc_wq);
33873873
33883874 return ret;
33893875 }
33903876
3877
+static void
3878
+nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
3879
+{
3880
+ struct nvme_fc_ctrl *ctrl;
3881
+
3882
+ spin_lock(&rport->lock);
3883
+ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
3884
+ dev_warn(ctrl->ctrl.device,
3885
+ "NVME-FC{%d}: transport unloading: deleting ctrl\n",
3886
+ ctrl->cnum);
3887
+ nvme_delete_ctrl(&ctrl->ctrl);
3888
+ }
3889
+ spin_unlock(&rport->lock);
3890
+}
3891
+
3892
+static void
3893
+nvme_fc_cleanup_for_unload(void)
3894
+{
3895
+ struct nvme_fc_lport *lport;
3896
+ struct nvme_fc_rport *rport;
3897
+
3898
+ list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
3899
+ list_for_each_entry(rport, &lport->endp_list, endp_list) {
3900
+ nvme_fc_delete_controllers(rport);
3901
+ }
3902
+ }
3903
+}
3904
+
33913905 static void __exit nvme_fc_exit_module(void)
33923906 {
3393
- /* sanity check - all lports should be removed */
3394
- if (!list_empty(&nvme_fc_lport_list))
3395
- pr_warn("%s: localport list not empty\n", __func__);
3907
+ unsigned long flags;
3908
+ bool need_cleanup = false;
3909
+
3910
+ spin_lock_irqsave(&nvme_fc_lock, flags);
3911
+ nvme_fc_waiting_to_unload = true;
3912
+ if (!list_empty(&nvme_fc_lport_list)) {
3913
+ need_cleanup = true;
3914
+ nvme_fc_cleanup_for_unload();
3915
+ }
3916
+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
3917
+ if (need_cleanup) {
3918
+ pr_info("%s: waiting for ctlr deletes\n", __func__);
3919
+ wait_for_completion(&nvme_fc_unload_proceed);
3920
+ pr_info("%s: ctrl deletes complete\n", __func__);
3921
+ }
33963922
33973923 nvmf_unregister_transport(&nvme_fc_transport);
33983924
33993925 ida_destroy(&nvme_fc_local_port_cnt);
34003926 ida_destroy(&nvme_fc_ctrl_cnt);
34013927
3402
- device_destroy(fc_class, MKDEV(0, 0));
3403
- class_destroy(fc_class);
3928
+ device_destroy(&fc_class, MKDEV(0, 0));
3929
+ class_unregister(&fc_class);
34043930 destroy_workqueue(nvme_fc_wq);
34053931 }
34063932