~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,18 +1,6 @@
	1	+// SPDX-License-Identifier: GPL-2.0
1	2	/*
2	3	* Copyright (c) 2016 Avago Technologies. All rights reserved.
3		- *
4		- * This program is free software; you can redistribute it and/or modify
5		- * it under the terms of version 2 of the GNU General Public License as
6		- * published by the Free Software Foundation.
7		- *
8		- * This program is distributed in the hope that it will be useful.
9		- * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
10		- * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
11		- * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO
12		- * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID.
13		- * See the GNU General Public License for more details, a copy of which
14		- * can be found in the file COPYING included with this package
15		- *
16	4	*/
17	5	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18	6	#include <linux/module.h>
..	..	@@ -20,12 +8,14 @@
20	8	#include <uapi/scsi/fc/fc_fs.h>
21	9	#include <uapi/scsi/fc/fc_els.h>
22	10	#include <linux/delay.h>
	11	+#include <linux/overflow.h>
23	12
24	13	#include "nvme.h"
25	14	#include "fabrics.h"
26	15	#include <linux/nvme-fc-driver.h>
27	16	#include <linux/nvme-fc.h>
28		-
	17	+#include "fc.h"
	18	+#include <scsi/scsi_transport_fc.h>
29	19
30	20	/* ************************* Data Structures/Defines **************** */
31	21
..	..	@@ -36,6 +26,10 @@
36	26	};
37	27
38	28	#define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */
	29	+#define NVME_FC_DEFAULT_RECONNECT_TMO 2 /* delay between reconnects
	30	+ * when connected and a
	31	+ * connection failure.
	32	+ */
39	33
40	34	struct nvme_fc_queue {
41	35	struct nvme_fc_ctrl *ctrl;
..	..	@@ -72,6 +66,17 @@
72	66	bool req_queued;
73	67	};
74	68
	69	+struct nvmefc_ls_rcv_op {
	70	+ struct nvme_fc_rport *rport;
	71	+ struct nvmefc_ls_rsp *lsrsp;
	72	+ union nvmefc_ls_requests *rqstbuf;
	73	+ union nvmefc_ls_responses *rspbuf;
	74	+ u16 rqstdatalen;
	75	+ bool handled;
	76	+ dma_addr_t rspdma;
	77	+ struct list_head lsrcv_list; /* rport->ls_rcv_list */
	78	+} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
	79	+
75	80	enum nvme_fcpop_state {
76	81	FCPOP_STATE_UNINIT = 0,
77	82	FCPOP_STATE_IDLE = 1,
..	..	@@ -104,6 +109,12 @@
104	109	struct nvme_fc_ersp_iu rsp_iu;
105	110	};
106	111
	112	+struct nvme_fcp_op_w_sgl {
	113	+ struct nvme_fc_fcp_op op;
	114	+ struct scatterlist sgl[NVME_INLINE_SG_CNT];
	115	+ uint8_t priv[];
	116	+};
	117	+
107	118	struct nvme_fc_lport {
108	119	struct nvme_fc_local_port localport;
109	120
..	..	@@ -122,17 +133,21 @@
122	133	struct list_head endp_list; /* for lport->endp_list */
123	134	struct list_head ctrl_list;
124	135	struct list_head ls_req_list;
	136	+ struct list_head ls_rcv_list;
	137	+ struct list_head disc_list;
125	138	struct device dev; / physical device for dma */
126	139	struct nvme_fc_lport *lport;
127	140	spinlock_t lock;
128	141	struct kref ref;
129	142	atomic_t act_ctrl_cnt;
130	143	unsigned long dev_loss_end;
	144	+ struct work_struct lsrcv_work;
131	145	} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
132	146
133		-enum nvme_fcctrl_flags {
134		- FCCTRL_TERMIO = (1 << 0),
135		-};
	147	+/* fc_ctrl flags values - specified as bit positions */
	148	+#define ASSOC_ACTIVE 0
	149	+#define ASSOC_FAILED 1
	150	+#define FCCTRL_TERMIO 2
136	151
137	152	struct nvme_fc_ctrl {
138	153	spinlock_t lock;
..	..	@@ -143,20 +158,19 @@
143	158	u32 cnum;
144	159
145	160	bool ioq_live;
146		- bool assoc_active;
147		- atomic_t err_work_active;
148	161	u64 association_id;
	162	+ struct nvmefc_ls_rcv_op *rcv_disconn;
149	163
150	164	struct list_head ctrl_list; /* rport->ctrl_list */
151	165
152	166	struct blk_mq_tag_set admin_tag_set;
153	167	struct blk_mq_tag_set tag_set;
154	168
	169	+ struct work_struct ioerr_work;
155	170	struct delayed_work connect_work;
156		- struct work_struct err_work;
157	171
158	172	struct kref ref;
159		- u32 flags;
	173	+ unsigned long flags;
160	174	u32 iocnt;
161	175	wait_queue_head_t ioabort_wait;
162	176
..	..	@@ -208,18 +222,24 @@
208	222
209	223	static struct workqueue_struct *nvme_fc_wq;
210	224
	225	+static bool nvme_fc_waiting_to_unload;
	226	+static DECLARE_COMPLETION(nvme_fc_unload_proceed);
	227	+
211	228	/*
212	229	* These items are short-term. They will eventually be moved into
213	230	* a generic FC class. See comments in module init.
214	231	*/
215		-static struct class *fc_class;
216	232	static struct device *fc_udev_device;
217	233
	234	+static void nvme_fc_complete_rq(struct request *rq);
218	235
219	236	/* ********************* FC-NVME Port Management ********************** */
220	237
221	238	static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
222	239	struct nvme_fc_queue *, unsigned int);
	240	+
	241	+static void nvme_fc_handle_ls_rqst_work(struct work_struct *work);
	242	+
223	243
224	244	static void
225	245	nvme_fc_free_lport(struct kref *ref)
..	..	@@ -234,6 +254,8 @@
234	254	/* remove from transport list */
235	255	spin_lock_irqsave(&nvme_fc_lock, flags);
236	256	list_del(&lport->port_list);
	257	+ if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list))
	258	+ complete(&nvme_fc_unload_proceed);
237	259	spin_unlock_irqrestore(&nvme_fc_lock, flags);
238	260
239	261	ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
..	..	@@ -319,7 +341,7 @@
319	341	* @template: LLDD entrypoints and operational parameters for the port
320	342	* @dev: physical hardware device node port corresponds to. Will be
321	343	* used for DMA mappings
322		- * @lport_p: pointer to a local port pointer. Upon success, the routine
	344	+ * @portptr: pointer to a local port pointer. Upon success, the routine
323	345	* will allocate a nvme_fc_local_port structure and place its
324	346	* address in the local port pointer. Upon failure, local port
325	347	* pointer will be set to 0.
..	..	@@ -394,7 +416,10 @@
394	416	newrec->ops = template;
395	417	newrec->dev = dev;
396	418	ida_init(&newrec->endp_cnt);
397		- newrec->localport.private = &newrec[1];
	419	+ if (template->local_priv_sz)
	420	+ newrec->localport.private = &newrec[1];
	421	+ else
	422	+ newrec->localport.private = NULL;
398	423	newrec->localport.node_name = pinfo->node_name;
399	424	newrec->localport.port_name = pinfo->port_name;
400	425	newrec->localport.port_role = pinfo->port_role;
..	..	@@ -427,8 +452,7 @@
427	452	* nvme_fc_unregister_localport - transport entry point called by an
428	453	* LLDD to deregister/remove a previously
429	454	* registered a NVME host FC port.
430		- * @localport: pointer to the (registered) local port that is to be
431		- * deregistered.
	455	+ * @portptr: pointer to the (registered) local port that is to be deregistered.
432	456	*
433	457	* Returns:
434	458	* a completion status. Must be 0 upon success; a negative errno
..	..	@@ -509,6 +533,7 @@
509	533	list_del(&rport->endp_list);
510	534	spin_unlock_irqrestore(&nvme_fc_lock, flags);
511	535
	536	+ WARN_ON(!list_empty(&rport->disc_list));
512	537	ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
513	538
514	539	kfree(rport);
..	..	@@ -633,7 +658,7 @@
633	658	* @localport: pointer to the (registered) local port that the remote
634	659	* subsystem port is connected to.
635	660	* @pinfo: pointer to information about the port to be registered
636		- * @rport_p: pointer to a remote port pointer. Upon success, the routine
	661	+ * @portptr: pointer to a remote port pointer. Upon success, the routine
637	662	* will allocate a nvme_fc_remote_port structure and place its
638	663	* address in the remote port pointer. Upon failure, remote port
639	664	* pointer will be set to 0.
..	..	@@ -696,13 +721,18 @@
696	721	INIT_LIST_HEAD(&newrec->endp_list);
697	722	INIT_LIST_HEAD(&newrec->ctrl_list);
698	723	INIT_LIST_HEAD(&newrec->ls_req_list);
	724	+ INIT_LIST_HEAD(&newrec->disc_list);
699	725	kref_init(&newrec->ref);
700	726	atomic_set(&newrec->act_ctrl_cnt, 0);
701	727	spin_lock_init(&newrec->lock);
702	728	newrec->remoteport.localport = &lport->localport;
	729	+ INIT_LIST_HEAD(&newrec->ls_rcv_list);
703	730	newrec->dev = lport->dev;
704	731	newrec->lport = lport;
705		- newrec->remoteport.private = &newrec[1];
	732	+ if (lport->ops->remote_priv_sz)
	733	+ newrec->remoteport.private = &newrec[1];
	734	+ else
	735	+ newrec->remoteport.private = NULL;
706	736	newrec->remoteport.port_role = pinfo->port_role;
707	737	newrec->remoteport.node_name = pinfo->node_name;
708	738	newrec->remoteport.port_name = pinfo->port_name;
..	..	@@ -710,6 +740,7 @@
710	740	newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
711	741	newrec->remoteport.port_num = idx;
712	742	__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
	743	+ INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work);
713	744
714	745	spin_lock_irqsave(&nvme_fc_lock, flags);
715	746	list_add_tail(&newrec->endp_list, &lport->endp_list);
..	..	@@ -799,6 +830,7 @@
799	830	break;
800	831
801	832	case NVME_CTRL_DELETING:
	833	+ case NVME_CTRL_DELETING_NOIO:
802	834	default:
803	835	/* no action to take - let it delete */
804	836	break;
..	..	@@ -809,8 +841,8 @@
809	841	* nvme_fc_unregister_remoteport - transport entry point called by an
810	842	* LLDD to deregister/remove a previously
811	843	* registered a NVME subsystem FC port.
812		- * @remoteport: pointer to the (registered) remote port that is to be
813		- * deregistered.
	844	+ * @portptr: pointer to the (registered) remote port that is to be
	845	+ * deregistered.
814	846	*
815	847	* Returns:
816	848	* a completion status. Must be 0 upon success; a negative errno
..	..	@@ -999,6 +1031,7 @@
999	1031	static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *);
1000	1032	static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *);
1001	1033
	1034	+static void nvme_fc_error_recovery(struct nvme_fc_ctrl ctrl, char errmsg);
1002	1035
1003	1036	static void
1004	1037	__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop)
..	..	@@ -1139,41 +1172,6 @@
1139	1172	return __nvme_fc_send_ls_req(rport, lsop, done);
1140	1173	}
1141	1174
1142		-/* Validation Error indexes into the string table below */
1143		-enum {
1144		- VERR_NO_ERROR = 0,
1145		- VERR_LSACC = 1,
1146		- VERR_LSDESC_RQST = 2,
1147		- VERR_LSDESC_RQST_LEN = 3,
1148		- VERR_ASSOC_ID = 4,
1149		- VERR_ASSOC_ID_LEN = 5,
1150		- VERR_CONN_ID = 6,
1151		- VERR_CONN_ID_LEN = 7,
1152		- VERR_CR_ASSOC = 8,
1153		- VERR_CR_ASSOC_ACC_LEN = 9,
1154		- VERR_CR_CONN = 10,
1155		- VERR_CR_CONN_ACC_LEN = 11,
1156		- VERR_DISCONN = 12,
1157		- VERR_DISCONN_ACC_LEN = 13,
1158		-};
1159		-
1160		-static char *validation_errors[] = {
1161		- "OK",
1162		- "Not LS_ACC",
1163		- "Not LSDESC_RQST",
1164		- "Bad LSDESC_RQST Length",
1165		- "Not Association ID",
1166		- "Bad Association ID Length",
1167		- "Not Connection ID",
1168		- "Bad Connection ID Length",
1169		- "Not CR_ASSOC Rqst",
1170		- "Bad CR_ASSOC ACC Length",
1171		- "Not CR_CONN Rqst",
1172		- "Bad CR_CONN ACC Length",
1173		- "Not Disconnect Rqst",
1174		- "Bad Disconnect ACC Length",
1175		-};
1176		-
1177	1175	static int
1178	1176	nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
1179	1177	struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio)
..	..	@@ -1182,21 +1180,27 @@
1182	1180	struct nvmefc_ls_req *lsreq;
1183	1181	struct fcnvme_ls_cr_assoc_rqst *assoc_rqst;
1184	1182	struct fcnvme_ls_cr_assoc_acc *assoc_acc;
	1183	+ unsigned long flags;
1185	1184	int ret, fcret = 0;
1186	1185
1187	1186	lsop = kzalloc((sizeof(*lsop) +
1188		- ctrl->lport->ops->lsrqst_priv_sz +
1189		- sizeof(assoc_rqst) + sizeof(assoc_acc)), GFP_KERNEL);
	1187	+ sizeof(assoc_rqst) + sizeof(assoc_acc) +
	1188	+ ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
1190	1189	if (!lsop) {
	1190	+ dev_info(ctrl->ctrl.device,
	1191	+ "NVME-FC{%d}: send Create Association failed: ENOMEM\n",
	1192	+ ctrl->cnum);
1191	1193	ret = -ENOMEM;
1192	1194	goto out_no_memory;
1193	1195	}
1194		- lsreq = &lsop->ls_req;
1195	1196
1196		- lsreq->private = (void *)&lsop[1];
1197		- assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)
1198		- (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
	1197	+ assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[1];
1199	1198	assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1];
	1199	+ lsreq = &lsop->ls_req;
	1200	+ if (ctrl->lport->ops->lsrqst_priv_sz)
	1201	+ lsreq->private = &assoc_acc[1];
	1202	+ else
	1203	+ lsreq->private = NULL;
1200	1204
1201	1205	assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION;
1202	1206	assoc_rqst->desc_list_len =
..	..	@@ -1223,7 +1227,7 @@
1223	1227	lsreq->rqstlen = sizeof(*assoc_rqst);
1224	1228	lsreq->rspaddr = assoc_acc;
1225	1229	lsreq->rsplen = sizeof(*assoc_acc);
1226		- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
	1230	+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
1227	1231
1228	1232	ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
1229	1233	if (ret)
..	..	@@ -1263,14 +1267,16 @@
1263	1267	if (fcret) {
1264	1268	ret = -EBADF;
1265	1269	dev_err(ctrl->dev,
1266		- "q %d connect failed: %s\n",
	1270	+ "q %d Create Association LS failed: %s\n",
1267	1271	queue->qnum, validation_errors[fcret]);
1268	1272	} else {
	1273	+ spin_lock_irqsave(&ctrl->lock, flags);
1269	1274	ctrl->association_id =
1270	1275	be64_to_cpu(assoc_acc->associd.association_id);
1271	1276	queue->connection_id =
1272	1277	be64_to_cpu(assoc_acc->connectid.connection_id);
1273	1278	set_bit(NVME_FC_Q_CONNECTED, &queue->flags);
	1279	+ spin_unlock_irqrestore(&ctrl->lock, flags);
1274	1280	}
1275	1281
1276	1282	out_free_buffer:
..	..	@@ -1294,18 +1300,23 @@
1294	1300	int ret, fcret = 0;
1295	1301
1296	1302	lsop = kzalloc((sizeof(*lsop) +
1297		- ctrl->lport->ops->lsrqst_priv_sz +
1298		- sizeof(conn_rqst) + sizeof(conn_acc)), GFP_KERNEL);
	1303	+ sizeof(conn_rqst) + sizeof(conn_acc) +
	1304	+ ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
1299	1305	if (!lsop) {
	1306	+ dev_info(ctrl->ctrl.device,
	1307	+ "NVME-FC{%d}: send Create Connection failed: ENOMEM\n",
	1308	+ ctrl->cnum);
1300	1309	ret = -ENOMEM;
1301	1310	goto out_no_memory;
1302	1311	}
1303		- lsreq = &lsop->ls_req;
1304	1312
1305		- lsreq->private = (void *)&lsop[1];
1306		- conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)
1307		- (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
	1313	+ conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[1];
1308	1314	conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1];
	1315	+ lsreq = &lsop->ls_req;
	1316	+ if (ctrl->lport->ops->lsrqst_priv_sz)
	1317	+ lsreq->private = (void *)&conn_acc[1];
	1318	+ else
	1319	+ lsreq->private = NULL;
1309	1320
1310	1321	conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION;
1311	1322	conn_rqst->desc_list_len = cpu_to_be32(
..	..	@@ -1331,7 +1342,7 @@
1331	1342	lsreq->rqstlen = sizeof(*conn_rqst);
1332	1343	lsreq->rspaddr = conn_acc;
1333	1344	lsreq->rsplen = sizeof(*conn_acc);
1334		- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
	1345	+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
1335	1346
1336	1347	ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
1337	1348	if (ret)
..	..	@@ -1362,7 +1373,7 @@
1362	1373	if (fcret) {
1363	1374	ret = -EBADF;
1364	1375	dev_err(ctrl->dev,
1365		- "q %d connect failed: %s\n",
	1376	+ "q %d Create I/O Connection LS failed: %s\n",
1366	1377	queue->qnum, validation_errors[fcret]);
1367	1378	} else {
1368	1379	queue->connection_id =
..	..	@@ -1375,7 +1386,7 @@
1375	1386	out_no_memory:
1376	1387	if (ret)
1377	1388	dev_err(ctrl->dev,
1378		- "queue %d connect command failed (%d).\n",
	1389	+ "queue %d connect I/O queue failed (%d).\n",
1379	1390	queue->qnum, ret);
1380	1391	return ret;
1381	1392	}
..	..	@@ -1387,7 +1398,7 @@
1387	1398
1388	1399	__nvme_fc_finish_ls_req(lsop);
1389	1400
1390		- /* fc-nvme iniator doesn't care about success or failure of cmd */
	1401	+ /* fc-nvme initiator doesn't care about success or failure of cmd */
1391	1402
1392	1403	kfree(lsop);
1393	1404	}
..	..	@@ -1412,66 +1423,392 @@
1412	1423	static void
1413	1424	nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
1414	1425	{
1415		- struct fcnvme_ls_disconnect_rqst *discon_rqst;
1416		- struct fcnvme_ls_disconnect_acc *discon_acc;
	1426	+ struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst;
	1427	+ struct fcnvme_ls_disconnect_assoc_acc *discon_acc;
1417	1428	struct nvmefc_ls_req_op *lsop;
1418	1429	struct nvmefc_ls_req *lsreq;
1419	1430	int ret;
1420	1431
1421	1432	lsop = kzalloc((sizeof(*lsop) +
1422		- ctrl->lport->ops->lsrqst_priv_sz +
1423		- sizeof(discon_rqst) + sizeof(discon_acc)),
1424		- GFP_KERNEL);
1425		- if (!lsop)
1426		- /* couldn't sent it... too bad */
	1433	+ sizeof(discon_rqst) + sizeof(discon_acc) +
	1434	+ ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
	1435	+ if (!lsop) {
	1436	+ dev_info(ctrl->ctrl.device,
	1437	+ "NVME-FC{%d}: send Disconnect Association "
	1438	+ "failed: ENOMEM\n",
	1439	+ ctrl->cnum);
1427	1440	return;
	1441	+ }
1428	1442
	1443	+ discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1];
	1444	+ discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
1429	1445	lsreq = &lsop->ls_req;
	1446	+ if (ctrl->lport->ops->lsrqst_priv_sz)
	1447	+ lsreq->private = (void *)&discon_acc[1];
	1448	+ else
	1449	+ lsreq->private = NULL;
1430	1450
1431		- lsreq->private = (void *)&lsop[1];
1432		- discon_rqst = (struct fcnvme_ls_disconnect_rqst *)
1433		- (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
1434		- discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1];
1435		-
1436		- discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT;
1437		- discon_rqst->desc_list_len = cpu_to_be32(
1438		- sizeof(struct fcnvme_lsdesc_assoc_id) +
1439		- sizeof(struct fcnvme_lsdesc_disconn_cmd));
1440		-
1441		- discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID);
1442		- discon_rqst->associd.desc_len =
1443		- fcnvme_lsdesc_len(
1444		- sizeof(struct fcnvme_lsdesc_assoc_id));
1445		-
1446		- discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id);
1447		-
1448		- discon_rqst->discon_cmd.desc_tag = cpu_to_be32(
1449		- FCNVME_LSDESC_DISCONN_CMD);
1450		- discon_rqst->discon_cmd.desc_len =
1451		- fcnvme_lsdesc_len(
1452		- sizeof(struct fcnvme_lsdesc_disconn_cmd));
1453		- discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION;
1454		- discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id);
1455		-
1456		- lsreq->rqstaddr = discon_rqst;
1457		- lsreq->rqstlen = sizeof(*discon_rqst);
1458		- lsreq->rspaddr = discon_acc;
1459		- lsreq->rsplen = sizeof(*discon_acc);
1460		- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
	1451	+ nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc,
	1452	+ ctrl->association_id);
1461	1453
1462	1454	ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
1463	1455	nvme_fc_disconnect_assoc_done);
1464	1456	if (ret)
1465	1457	kfree(lsop);
1466		-
1467		- /* only meaningful part to terminating the association */
1468		- ctrl->association_id = 0;
1469	1458	}
	1459	+
	1460	+static void
	1461	+nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
	1462	+{
	1463	+ struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private;
	1464	+ struct nvme_fc_rport *rport = lsop->rport;
	1465	+ struct nvme_fc_lport *lport = rport->lport;
	1466	+ unsigned long flags;
	1467	+
	1468	+ spin_lock_irqsave(&rport->lock, flags);
	1469	+ list_del(&lsop->lsrcv_list);
	1470	+ spin_unlock_irqrestore(&rport->lock, flags);
	1471	+
	1472	+ fc_dma_sync_single_for_cpu(lport->dev, lsop->rspdma,
	1473	+ sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
	1474	+ fc_dma_unmap_single(lport->dev, lsop->rspdma,
	1475	+ sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
	1476	+
	1477	+ kfree(lsop);
	1478	+
	1479	+ nvme_fc_rport_put(rport);
	1480	+}
	1481	+
	1482	+static void
	1483	+nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop)
	1484	+{
	1485	+ struct nvme_fc_rport *rport = lsop->rport;
	1486	+ struct nvme_fc_lport *lport = rport->lport;
	1487	+ struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
	1488	+ int ret;
	1489	+
	1490	+ fc_dma_sync_single_for_device(lport->dev, lsop->rspdma,
	1491	+ sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
	1492	+
	1493	+ ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport,
	1494	+ lsop->lsrsp);
	1495	+ if (ret) {
	1496	+ dev_warn(lport->dev,
	1497	+ "LLDD rejected LS RSP xmt: LS %d status %d\n",
	1498	+ w0->ls_cmd, ret);
	1499	+ nvme_fc_xmt_ls_rsp_done(lsop->lsrsp);
	1500	+ return;
	1501	+ }
	1502	+}
	1503	+
	1504	+static struct nvme_fc_ctrl *
	1505	+nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport,
	1506	+ struct nvmefc_ls_rcv_op *lsop)
	1507	+{
	1508	+ struct fcnvme_ls_disconnect_assoc_rqst *rqst =
	1509	+ &lsop->rqstbuf->rq_dis_assoc;
	1510	+ struct nvme_fc_ctrl ctrl, ret = NULL;
	1511	+ struct nvmefc_ls_rcv_op *oldls = NULL;
	1512	+ u64 association_id = be64_to_cpu(rqst->associd.association_id);
	1513	+ unsigned long flags;
	1514	+
	1515	+ spin_lock_irqsave(&rport->lock, flags);
	1516	+
	1517	+ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
	1518	+ if (!nvme_fc_ctrl_get(ctrl))
	1519	+ continue;
	1520	+ spin_lock(&ctrl->lock);
	1521	+ if (association_id == ctrl->association_id) {
	1522	+ oldls = ctrl->rcv_disconn;
	1523	+ ctrl->rcv_disconn = lsop;
	1524	+ ret = ctrl;
	1525	+ }
	1526	+ spin_unlock(&ctrl->lock);
	1527	+ if (ret)
	1528	+ /* leave the ctrl get reference */
	1529	+ break;
	1530	+ nvme_fc_ctrl_put(ctrl);
	1531	+ }
	1532	+
	1533	+ spin_unlock_irqrestore(&rport->lock, flags);
	1534	+
	1535	+ /* transmit a response for anything that was pending */
	1536	+ if (oldls) {
	1537	+ dev_info(rport->lport->dev,
	1538	+ "NVME-FC{%d}: Multiple Disconnect Association "
	1539	+ "LS's received\n", ctrl->cnum);
	1540	+ /* overwrite good response with bogus failure */
	1541	+ oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf,
	1542	+ sizeof(*oldls->rspbuf),
	1543	+ rqst->w0.ls_cmd,
	1544	+ FCNVME_RJT_RC_UNAB,
	1545	+ FCNVME_RJT_EXP_NONE, 0);
	1546	+ nvme_fc_xmt_ls_rsp(oldls);
	1547	+ }
	1548	+
	1549	+ return ret;
	1550	+}
	1551	+
	1552	+/*
	1553	+ * returns true to mean LS handled and ls_rsp can be sent
	1554	+ * returns false to defer ls_rsp xmt (will be done as part of
	1555	+ * association termination)
	1556	+ */
	1557	+static bool
	1558	+nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop)
	1559	+{
	1560	+ struct nvme_fc_rport *rport = lsop->rport;
	1561	+ struct fcnvme_ls_disconnect_assoc_rqst *rqst =
	1562	+ &lsop->rqstbuf->rq_dis_assoc;
	1563	+ struct fcnvme_ls_disconnect_assoc_acc *acc =
	1564	+ &lsop->rspbuf->rsp_dis_assoc;
	1565	+ struct nvme_fc_ctrl *ctrl = NULL;
	1566	+ int ret = 0;
	1567	+
	1568	+ memset(acc, 0, sizeof(*acc));
	1569	+
	1570	+ ret = nvmefc_vldt_lsreq_discon_assoc(lsop->rqstdatalen, rqst);
	1571	+ if (!ret) {
	1572	+ /* match an active association */
	1573	+ ctrl = nvme_fc_match_disconn_ls(rport, lsop);
	1574	+ if (!ctrl)
	1575	+ ret = VERR_NO_ASSOC;
	1576	+ }
	1577	+
	1578	+ if (ret) {
	1579	+ dev_info(rport->lport->dev,
	1580	+ "Disconnect LS failed: %s\n",
	1581	+ validation_errors[ret]);
	1582	+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(acc,
	1583	+ sizeof(*acc), rqst->w0.ls_cmd,
	1584	+ (ret == VERR_NO_ASSOC) ?
	1585	+ FCNVME_RJT_RC_INV_ASSOC :
	1586	+ FCNVME_RJT_RC_LOGIC,
	1587	+ FCNVME_RJT_EXP_NONE, 0);
	1588	+ return true;
	1589	+ }
	1590	+
	1591	+ /* format an ACCept response */
	1592	+
	1593	+ lsop->lsrsp->rsplen = sizeof(*acc);
	1594	+
	1595	+ nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
	1596	+ fcnvme_lsdesc_len(
	1597	+ sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
	1598	+ FCNVME_LS_DISCONNECT_ASSOC);
	1599	+
	1600	+ /*
	1601	+ * the transmit of the response will occur after the exchanges
	1602	+ * for the association have been ABTS'd by
	1603	+ * nvme_fc_delete_association().
	1604	+ */
	1605	+
	1606	+ /* fail the association */
	1607	+ nvme_fc_error_recovery(ctrl, "Disconnect Association LS received");
	1608	+
	1609	+ /* release the reference taken by nvme_fc_match_disconn_ls() */
	1610	+ nvme_fc_ctrl_put(ctrl);
	1611	+
	1612	+ return false;
	1613	+}
	1614	+
	1615	+/*
	1616	+ * Actual Processing routine for received FC-NVME LS Requests from the LLD
	1617	+ * returns true if a response should be sent afterward, false if rsp will
	1618	+ * be sent asynchronously.
	1619	+ */
	1620	+static bool
	1621	+nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop)
	1622	+{
	1623	+ struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
	1624	+ bool ret = true;
	1625	+
	1626	+ lsop->lsrsp->nvme_fc_private = lsop;
	1627	+ lsop->lsrsp->rspbuf = lsop->rspbuf;
	1628	+ lsop->lsrsp->rspdma = lsop->rspdma;
	1629	+ lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done;
	1630	+ /* Be preventative. handlers will later set to valid length */
	1631	+ lsop->lsrsp->rsplen = 0;
	1632	+
	1633	+ /*
	1634	+ * handlers:
	1635	+ * parse request input, execute the request, and format the
	1636	+ * LS response
	1637	+ */
	1638	+ switch (w0->ls_cmd) {
	1639	+ case FCNVME_LS_DISCONNECT_ASSOC:
	1640	+ ret = nvme_fc_ls_disconnect_assoc(lsop);
	1641	+ break;
	1642	+ case FCNVME_LS_DISCONNECT_CONN:
	1643	+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
	1644	+ sizeof(*lsop->rspbuf), w0->ls_cmd,
	1645	+ FCNVME_RJT_RC_UNSUP, FCNVME_RJT_EXP_NONE, 0);
	1646	+ break;
	1647	+ case FCNVME_LS_CREATE_ASSOCIATION:
	1648	+ case FCNVME_LS_CREATE_CONNECTION:
	1649	+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
	1650	+ sizeof(*lsop->rspbuf), w0->ls_cmd,
	1651	+ FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0);
	1652	+ break;
	1653	+ default:
	1654	+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
	1655	+ sizeof(*lsop->rspbuf), w0->ls_cmd,
	1656	+ FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0);
	1657	+ break;
	1658	+ }
	1659	+
	1660	+ return(ret);
	1661	+}
	1662	+
	1663	+static void
	1664	+nvme_fc_handle_ls_rqst_work(struct work_struct *work)
	1665	+{
	1666	+ struct nvme_fc_rport *rport =
	1667	+ container_of(work, struct nvme_fc_rport, lsrcv_work);
	1668	+ struct fcnvme_ls_rqst_w0 *w0;
	1669	+ struct nvmefc_ls_rcv_op *lsop;
	1670	+ unsigned long flags;
	1671	+ bool sendrsp;
	1672	+
	1673	+restart:
	1674	+ sendrsp = true;
	1675	+ spin_lock_irqsave(&rport->lock, flags);
	1676	+ list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) {
	1677	+ if (lsop->handled)
	1678	+ continue;
	1679	+
	1680	+ lsop->handled = true;
	1681	+ if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
	1682	+ spin_unlock_irqrestore(&rport->lock, flags);
	1683	+ sendrsp = nvme_fc_handle_ls_rqst(lsop);
	1684	+ } else {
	1685	+ spin_unlock_irqrestore(&rport->lock, flags);
	1686	+ w0 = &lsop->rqstbuf->w0;
	1687	+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(
	1688	+ lsop->rspbuf,
	1689	+ sizeof(*lsop->rspbuf),
	1690	+ w0->ls_cmd,
	1691	+ FCNVME_RJT_RC_UNAB,
	1692	+ FCNVME_RJT_EXP_NONE, 0);
	1693	+ }
	1694	+ if (sendrsp)
	1695	+ nvme_fc_xmt_ls_rsp(lsop);
	1696	+ goto restart;
	1697	+ }
	1698	+ spin_unlock_irqrestore(&rport->lock, flags);
	1699	+}
	1700	+
	1701	+/**
	1702	+ * nvme_fc_rcv_ls_req - transport entry point called by an LLDD
	1703	+ * upon the reception of a NVME LS request.
	1704	+ *
	1705	+ * The nvme-fc layer will copy payload to an internal structure for
	1706	+ * processing. As such, upon completion of the routine, the LLDD may
	1707	+ * immediately free/reuse the LS request buffer passed in the call.
	1708	+ *
	1709	+ * If this routine returns error, the LLDD should abort the exchange.
	1710	+ *
	1711	+ * @remoteport: pointer to the (registered) remote port that the LS
	1712	+ * was received from. The remoteport is associated with
	1713	+ * a specific localport.
	1714	+ * @lsrsp: pointer to a nvmefc_ls_rsp response structure to be
	1715	+ * used to reference the exchange corresponding to the LS
	1716	+ * when issuing an ls response.
	1717	+ * @lsreqbuf: pointer to the buffer containing the LS Request
	1718	+ * @lsreqbuf_len: length, in bytes, of the received LS request
	1719	+ */
	1720	+int
	1721	+nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr,
	1722	+ struct nvmefc_ls_rsp *lsrsp,
	1723	+ void *lsreqbuf, u32 lsreqbuf_len)
	1724	+{
	1725	+ struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
	1726	+ struct nvme_fc_lport *lport = rport->lport;
	1727	+ struct fcnvme_ls_rqst_w0 w0 = (struct fcnvme_ls_rqst_w0 )lsreqbuf;
	1728	+ struct nvmefc_ls_rcv_op *lsop;
	1729	+ unsigned long flags;
	1730	+ int ret;
	1731	+
	1732	+ nvme_fc_rport_get(rport);
	1733	+
	1734	+ /* validate there's a routine to transmit a response */
	1735	+ if (!lport->ops->xmt_ls_rsp) {
	1736	+ dev_info(lport->dev,
	1737	+ "RCV %s LS failed: no LLDD xmt_ls_rsp\n",
	1738	+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
	1739	+ nvmefc_ls_names[w0->ls_cmd] : "");
	1740	+ ret = -EINVAL;
	1741	+ goto out_put;
	1742	+ }
	1743	+
	1744	+ if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) {
	1745	+ dev_info(lport->dev,
	1746	+ "RCV %s LS failed: payload too large\n",
	1747	+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
	1748	+ nvmefc_ls_names[w0->ls_cmd] : "");
	1749	+ ret = -E2BIG;
	1750	+ goto out_put;
	1751	+ }
	1752	+
	1753	+ lsop = kzalloc(sizeof(*lsop) +
	1754	+ sizeof(union nvmefc_ls_requests) +
	1755	+ sizeof(union nvmefc_ls_responses),
	1756	+ GFP_KERNEL);
	1757	+ if (!lsop) {
	1758	+ dev_info(lport->dev,
	1759	+ "RCV %s LS failed: No memory\n",
	1760	+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
	1761	+ nvmefc_ls_names[w0->ls_cmd] : "");
	1762	+ ret = -ENOMEM;
	1763	+ goto out_put;
	1764	+ }
	1765	+ lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1];
	1766	+ lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1];
	1767	+
	1768	+ lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf,
	1769	+ sizeof(*lsop->rspbuf),
	1770	+ DMA_TO_DEVICE);
	1771	+ if (fc_dma_mapping_error(lport->dev, lsop->rspdma)) {
	1772	+ dev_info(lport->dev,
	1773	+ "RCV %s LS failed: DMA mapping failure\n",
	1774	+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
	1775	+ nvmefc_ls_names[w0->ls_cmd] : "");
	1776	+ ret = -EFAULT;
	1777	+ goto out_free;
	1778	+ }
	1779	+
	1780	+ lsop->rport = rport;
	1781	+ lsop->lsrsp = lsrsp;
	1782	+
	1783	+ memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len);
	1784	+ lsop->rqstdatalen = lsreqbuf_len;
	1785	+
	1786	+ spin_lock_irqsave(&rport->lock, flags);
	1787	+ if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) {
	1788	+ spin_unlock_irqrestore(&rport->lock, flags);
	1789	+ ret = -ENOTCONN;
	1790	+ goto out_unmap;
	1791	+ }
	1792	+ list_add_tail(&lsop->lsrcv_list, &rport->ls_rcv_list);
	1793	+ spin_unlock_irqrestore(&rport->lock, flags);
	1794	+
	1795	+ schedule_work(&rport->lsrcv_work);
	1796	+
	1797	+ return 0;
	1798	+
	1799	+out_unmap:
	1800	+ fc_dma_unmap_single(lport->dev, lsop->rspdma,
	1801	+ sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
	1802	+out_free:
	1803	+ kfree(lsop);
	1804	+out_put:
	1805	+ nvme_fc_rport_put(rport);
	1806	+ return ret;
	1807	+}
	1808	+EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req);
1470	1809
1471	1810
1472	1811	/* ********************* NVME Ctrl Routines ************************** */
1473		-
1474		-static void nvme_fc_error_recovery(struct nvme_fc_ctrl ctrl, char errmsg);
1475	1812
1476	1813	static void
1477	1814	__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
..	..	@@ -1504,8 +1841,10 @@
1504	1841	opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
1505	1842	if (opstate != FCPOP_STATE_ACTIVE)
1506	1843	atomic_set(&op->state, opstate);
1507		- else if (ctrl->flags & FCCTRL_TERMIO)
	1844	+ else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
	1845	+ op->flags \|= FCOP_FLAGS_TERMIO;
1508	1846	ctrl->iocnt++;
	1847	+ }
1509	1848	spin_unlock_irqrestore(&ctrl->lock, flags);
1510	1849
1511	1850	if (opstate != FCPOP_STATE_ACTIVE)
..	..	@@ -1541,12 +1880,22 @@
1541	1880
1542	1881	if (opstate == FCPOP_STATE_ABORTED) {
1543	1882	spin_lock_irqsave(&ctrl->lock, flags);
1544		- if (ctrl->flags & FCCTRL_TERMIO) {
	1883	+ if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
	1884	+ op->flags & FCOP_FLAGS_TERMIO) {
1545	1885	if (!--ctrl->iocnt)
1546	1886	wake_up(&ctrl->ioabort_wait);
1547	1887	}
1548	1888	spin_unlock_irqrestore(&ctrl->lock, flags);
1549	1889	}
	1890	+}
	1891	+
	1892	+static void
	1893	+nvme_fc_ctrl_ioerr_work(struct work_struct *work)
	1894	+{
	1895	+ struct nvme_fc_ctrl *ctrl =
	1896	+ container_of(work, struct nvme_fc_ctrl, ioerr_work);
	1897	+
	1898	+ nvme_fc_error_recovery(ctrl, "transport detected io error");
1550	1899	}
1551	1900
1552	1901	static void
..	..	@@ -1607,9 +1956,13 @@
1607	1956	sizeof(op->rsp_iu), DMA_FROM_DEVICE);
1608	1957
1609	1958	if (opstate == FCPOP_STATE_ABORTED)
1610		- status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
1611		- else if (freq->status)
1612		- status = cpu_to_le16(NVME_SC_INTERNAL << 1);
	1959	+ status = cpu_to_le16(NVME_SC_HOST_ABORTED_CMD << 1);
	1960	+ else if (freq->status) {
	1961	+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
	1962	+ dev_info(ctrl->ctrl.device,
	1963	+ "NVME-FC{%d}: io failed due to lldd error %d\n",
	1964	+ ctrl->cnum, freq->status);
	1965	+ }
1613	1966
1614	1967	/*
1615	1968	* For the linux implementation, if we have an unsuccesful
..	..	@@ -1636,8 +1989,13 @@
1636	1989	* no payload in the CQE by the transport.
1637	1990	*/
1638	1991	if (freq->transferred_length !=
1639		- be32_to_cpu(op->cmd_iu.data_len)) {
1640		- status = cpu_to_le16(NVME_SC_INTERNAL << 1);
	1992	+ be32_to_cpu(op->cmd_iu.data_len)) {
	1993	+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
	1994	+ dev_info(ctrl->ctrl.device,
	1995	+ "NVME-FC{%d}: io failed due to bad transfer "
	1996	+ "length: %d vs expected %d\n",
	1997	+ ctrl->cnum, freq->transferred_length,
	1998	+ be32_to_cpu(op->cmd_iu.data_len));
1641	1999	goto done;
1642	2000	}
1643	2001	result.u64 = 0;
..	..	@@ -1652,9 +2010,19 @@
1652	2010	(freq->rcv_rsplen / 4) \|\|
1653	2011	be32_to_cpu(op->rsp_iu.xfrd_len) !=
1654	2012	freq->transferred_length \|\|
1655		- op->rsp_iu.status_code \|\|
	2013	+ op->rsp_iu.ersp_result \|\|
1656	2014	sqe->common.command_id != cqe->command_id)) {
1657		- status = cpu_to_le16(NVME_SC_INTERNAL << 1);
	2015	+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
	2016	+ dev_info(ctrl->ctrl.device,
	2017	+ "NVME-FC{%d}: io failed due to bad NVMe_ERSP: "
	2018	+ "iu len %d, xfr len %d vs %d, status code "
	2019	+ "%d, cmdid %d vs %d\n",
	2020	+ ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len),
	2021	+ be32_to_cpu(op->rsp_iu.xfrd_len),
	2022	+ freq->transferred_length,
	2023	+ op->rsp_iu.ersp_result,
	2024	+ sqe->common.command_id,
	2025	+ cqe->command_id);
1658	2026	goto done;
1659	2027	}
1660	2028	result = cqe->result;
..	..	@@ -1662,7 +2030,11 @@
1662	2030	break;
1663	2031
1664	2032	default:
1665		- status = cpu_to_le16(NVME_SC_INTERNAL << 1);
	2033	+ status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
	2034	+ dev_info(ctrl->ctrl.device,
	2035	+ "NVME-FC{%d}: io failed due to odd NVMe_xRSP iu "
	2036	+ "len %d\n",
	2037	+ ctrl->cnum, freq->rcv_rsplen);
1666	2038	goto done;
1667	2039	}
1668	2040
..	..	@@ -1679,11 +2051,12 @@
1679	2051	}
1680	2052
1681	2053	__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
1682		- nvme_end_request(rq, status, result);
	2054	+ if (!nvme_try_complete_req(rq, status, result))
	2055	+ nvme_fc_complete_rq(rq);
1683	2056
1684	2057	check_error:
1685		- if (terminate_assoc)
1686		- nvme_fc_error_recovery(ctrl, "transport detected io error");
	2058	+ if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING)
	2059	+ queue_work(nvme_reset_wq, &ctrl->ioerr_work);
1687	2060	}
1688	2061
1689	2062	static int
..	..	@@ -1691,6 +2064,8 @@
1691	2064	struct nvme_fc_queue queue, struct nvme_fc_fcp_op op,
1692	2065	struct request *rq, u32 rqno)
1693	2066	{
	2067	+ struct nvme_fcp_op_w_sgl *op_w_sgl =
	2068	+ container_of(op, typeof(*op_w_sgl), op);
1694	2069	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
1695	2070	int ret = 0;
1696	2071
..	..	@@ -1700,16 +2075,19 @@
1700	2075	op->fcp_req.rspaddr = &op->rsp_iu;
1701	2076	op->fcp_req.rsplen = sizeof(op->rsp_iu);
1702	2077	op->fcp_req.done = nvme_fc_fcpio_done;
1703		- op->fcp_req.first_sgl = (struct scatterlist *)&op[1];
1704		- op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE];
1705	2078	op->ctrl = ctrl;
1706	2079	op->queue = queue;
1707	2080	op->rq = rq;
1708	2081	op->rqno = rqno;
1709	2082
1710		- cmdiu->scsi_id = NVME_CMD_SCSI_ID;
	2083	+ cmdiu->format_id = NVME_CMD_FORMAT_ID;
1711	2084	cmdiu->fc_id = NVME_CMD_FC_ID;
1712	2085	cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
	2086	+ if (queue->qnum)
	2087	+ cmdiu->rsv_cat = fccmnd_set_cat_css(0,
	2088	+ (NVME_CC_CSS_NVM >> NVME_CC_CSS_SHIFT));
	2089	+ else
	2090	+ cmdiu->rsv_cat = fccmnd_set_cat_admin(0);
1713	2091
1714	2092	op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev,
1715	2093	&op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE);
..	..	@@ -1739,12 +2117,18 @@
1739	2117	unsigned int hctx_idx, unsigned int numa_node)
1740	2118	{
1741	2119	struct nvme_fc_ctrl *ctrl = set->driver_data;
1742		- struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
	2120	+ struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq);
1743	2121	int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
1744	2122	struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
	2123	+ int res;
1745	2124
	2125	+ res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++);
	2126	+ if (res)
	2127	+ return res;
	2128	+ op->op.fcp_req.first_sgl = op->sgl;
	2129	+ op->op.fcp_req.private = &op->priv[0];
1746	2130	nvme_req(rq)->ctrl = &ctrl->ctrl;
1747		- return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
	2131	+ return res;
1748	2132	}
1749	2133
1750	2134	static int
..	..	@@ -1753,15 +2137,17 @@
1753	2137	struct nvme_fc_fcp_op *aen_op;
1754	2138	struct nvme_fc_cmd_iu *cmdiu;
1755	2139	struct nvme_command *sqe;
1756		- void *private;
	2140	+ void *private = NULL;
1757	2141	int i, ret;
1758	2142
1759	2143	aen_op = ctrl->aen_ops;
1760	2144	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
1761		- private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
	2145	+ if (ctrl->lport->ops->fcprqst_priv_sz) {
	2146	+ private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
1762	2147	GFP_KERNEL);
1763		- if (!private)
1764		- return -ENOMEM;
	2148	+ if (!private)
	2149	+ return -ENOMEM;
	2150	+ }
1765	2151
1766	2152	cmdiu = &aen_op->cmd_iu;
1767	2153	sqe = &cmdiu->sqe;
..	..	@@ -1774,7 +2160,6 @@
1774	2160	}
1775	2161
1776	2162	aen_op->flags = FCOP_FLAGS_AEN;
1777		- aen_op->fcp_req.first_sgl = NULL; /* no sg list */
1778	2163	aen_op->fcp_req.private = private;
1779	2164
1780	2165	memset(sqe, 0, sizeof(*sqe));
..	..	@@ -1794,9 +2179,6 @@
1794	2179	cancel_work_sync(&ctrl->ctrl.async_event_work);
1795	2180	aen_op = ctrl->aen_ops;
1796	2181	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
1797		- if (!aen_op->fcp_req.private)
1798		- continue;
1799		-
1800	2182	__nvme_fc_exit_request(ctrl, aen_op);
1801	2183
1802	2184	kfree(aen_op->fcp_req.private);
..	..	@@ -1948,7 +2330,7 @@
1948	2330	return 0;
1949	2331
1950	2332	delete_queues:
1951		- for (; i >= 0; i--)
	2333	+ for (; i > 0; i--)
1952	2334	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
1953	2335	return ret;
1954	2336	}
..	..	@@ -1963,7 +2345,7 @@
1963	2345	(qsize / 5));
1964	2346	if (ret)
1965	2347	break;
1966		- ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
	2348	+ ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false);
1967	2349	if (ret)
1968	2350	break;
1969	2351
..	..	@@ -2001,6 +2383,7 @@
2001	2383
2002	2384	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
2003	2385	blk_cleanup_queue(ctrl->ctrl.admin_q);
	2386	+ blk_cleanup_queue(ctrl->ctrl.fabrics_q);
2004	2387	blk_mq_free_tag_set(&ctrl->admin_tag_set);
2005	2388
2006	2389	kfree(ctrl->queues);
..	..	@@ -2040,24 +2423,112 @@
2040	2423	nvme_fc_ctrl_put(ctrl);
2041	2424	}
2042	2425
	2426	+/*
	2427	+ * This routine is used by the transport when it needs to find active
	2428	+ * io on a queue that is to be terminated. The transport uses
	2429	+ * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
	2430	+ * this routine to kill them on a 1 by 1 basis.
	2431	+ *
	2432	+ * As FC allocates FC exchange for each io, the transport must contact
	2433	+ * the LLDD to terminate the exchange, thus releasing the FC exchange.
	2434	+ * After terminating the exchange the LLDD will call the transport's
	2435	+ * normal io done path for the request, but it will have an aborted
	2436	+ * status. The done path will return the io request back to the block
	2437	+ * layer with an error status.
	2438	+ */
	2439	+static bool
	2440	+nvme_fc_terminate_exchange(struct request req, void data, bool reserved)
	2441	+{
	2442	+ struct nvme_ctrl *nctrl = data;
	2443	+ struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
	2444	+ struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
	2445	+
	2446	+ op->nreq.flags \|= NVME_REQ_CANCELLED;
	2447	+ __nvme_fc_abort_op(ctrl, op);
	2448	+ return true;
	2449	+}
	2450	+
	2451	+/*
	2452	+ * This routine runs through all outstanding commands on the association
	2453	+ * and aborts them. This routine is typically be called by the
	2454	+ * delete_association routine. It is also called due to an error during
	2455	+ * reconnect. In that scenario, it is most likely a command that initializes
	2456	+ * the controller, including fabric Connect commands on io queues, that
	2457	+ * may have timed out or failed thus the io must be killed for the connect
	2458	+ * thread to see the error.
	2459	+ */
	2460	+static void
	2461	+__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
	2462	+{
	2463	+ int q;
	2464	+
	2465	+ /*
	2466	+ * if aborting io, the queues are no longer good, mark them
	2467	+ * all as not live.
	2468	+ */
	2469	+ if (ctrl->ctrl.queue_count > 1) {
	2470	+ for (q = 1; q < ctrl->ctrl.queue_count; q++)
	2471	+ clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[q].flags);
	2472	+ }
	2473	+ clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags);
	2474	+
	2475	+ /*
	2476	+ * If io queues are present, stop them and terminate all outstanding
	2477	+ * ios on them. As FC allocates FC exchange for each io, the
	2478	+ * transport must contact the LLDD to terminate the exchange,
	2479	+ * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
	2480	+ * to tell us what io's are busy and invoke a transport routine
	2481	+ * to kill them with the LLDD. After terminating the exchange
	2482	+ * the LLDD will call the transport's normal io done path, but it
	2483	+ * will have an aborted status. The done path will return the
	2484	+ * io requests back to the block layer as part of normal completions
	2485	+ * (but with error status).
	2486	+ */
	2487	+ if (ctrl->ctrl.queue_count > 1) {
	2488	+ nvme_stop_queues(&ctrl->ctrl);
	2489	+ nvme_sync_io_queues(&ctrl->ctrl);
	2490	+ blk_mq_tagset_busy_iter(&ctrl->tag_set,
	2491	+ nvme_fc_terminate_exchange, &ctrl->ctrl);
	2492	+ blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
	2493	+ if (start_queues)
	2494	+ nvme_start_queues(&ctrl->ctrl);
	2495	+ }
	2496	+
	2497	+ /*
	2498	+ * Other transports, which don't have link-level contexts bound
	2499	+ * to sqe's, would try to gracefully shutdown the controller by
	2500	+ * writing the registers for shutdown and polling (call
	2501	+ * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
	2502	+ * just aborted and we will wait on those contexts, and given
	2503	+ * there was no indication of how live the controlelr is on the
	2504	+ * link, don't send more io to create more contexts for the
	2505	+ * shutdown. Let the controller fail via keepalive failure if
	2506	+ * its still present.
	2507	+ */
	2508	+
	2509	+ /*
	2510	+ * clean up the admin queue. Same thing as above.
	2511	+ */
	2512	+ blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
	2513	+ blk_sync_queue(ctrl->ctrl.admin_q);
	2514	+ blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
	2515	+ nvme_fc_terminate_exchange, &ctrl->ctrl);
	2516	+ blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
	2517	+}
	2518	+
2043	2519	static void
2044	2520	nvme_fc_error_recovery(struct nvme_fc_ctrl ctrl, char errmsg)
2045	2521	{
2046		- int active;
2047		-
2048	2522	/*
2049		- * if an error (io timeout, etc) while (re)connecting,
2050		- * it's an error on creating the new association.
2051		- * Start the error recovery thread if it hasn't already
2052		- * been started. It is expected there could be multiple
2053		- * ios hitting this path before things are cleaned up.
	2523	+ * if an error (io timeout, etc) while (re)connecting, the remote
	2524	+ * port requested terminating of the association (disconnect_ls)
	2525	+ * or an error (timeout or abort) occurred on an io while creating
	2526	+ * the controller. Abort any ios on the association and let the
	2527	+ * create_association error path resolve things.
2054	2528	*/
2055	2529	if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
2056		- active = atomic_xchg(&ctrl->err_work_active, 1);
2057		- if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) {
2058		- atomic_set(&ctrl->err_work_active, 0);
2059		- WARN_ON(1);
2060		- }
	2530	+ __nvme_fc_abort_outstanding_ios(ctrl, true);
	2531	+ set_bit(ASSOC_FAILED, &ctrl->flags);
2061	2532	return;
2062	2533	}
2063	2534
..	..	@@ -2066,7 +2537,7 @@
2066	2537	return;
2067	2538
2068	2539	dev_warn(ctrl->ctrl.device,
2069		- "NVME-FC{%d}: transport association error detected: %s\n",
	2540	+ "NVME-FC{%d}: transport association event: %s\n",
2070	2541	ctrl->cnum, errmsg);
2071	2542	dev_warn(ctrl->ctrl.device,
2072	2543	"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
..	..	@@ -2079,15 +2550,20 @@
2079	2550	{
2080	2551	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
2081	2552	struct nvme_fc_ctrl *ctrl = op->ctrl;
	2553	+ struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
	2554	+ struct nvme_command *sqe = &cmdiu->sqe;
2082	2555
2083	2556	/*
2084		- * we can't individually ABTS an io without affecting the queue,
2085		- * thus killing the queue, and thus the association.
2086		- * So resolve by performing a controller reset, which will stop
2087		- * the host/io stack, terminate the association on the link,
2088		- * and recreate an association on the link.
	2557	+ * Attempt to abort the offending command. Command completion
	2558	+ * will detect the aborted io and will fail the connection.
2089	2559	*/
2090		- nvme_fc_error_recovery(ctrl, "io timeout error");
	2560	+ dev_info(ctrl->ctrl.device,
	2561	+ "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
	2562	+ "x%08x/x%08x\n",
	2563	+ ctrl->cnum, op->queue->qnum, sqe->common.opcode,
	2564	+ sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
	2565	+ if (__nvme_fc_abort_op(ctrl, op))
	2566	+ nvme_fc_error_recovery(ctrl, "io timeout abort failed");
2091	2567
2092	2568	/*
2093	2569	* the io abort has been initiated. Have the reset timer
..	..	@@ -2102,27 +2578,26 @@
2102	2578	struct nvme_fc_fcp_op *op)
2103	2579	{
2104	2580	struct nvmefc_fcp_req *freq = &op->fcp_req;
2105		- enum dma_data_direction dir;
2106	2581	int ret;
2107	2582
2108	2583	freq->sg_cnt = 0;
2109	2584
2110		- if (!blk_rq_payload_bytes(rq))
	2585	+ if (!blk_rq_nr_phys_segments(rq))
2111	2586	return 0;
2112	2587
2113	2588	freq->sg_table.sgl = freq->first_sgl;
2114	2589	ret = sg_alloc_table_chained(&freq->sg_table,
2115		- blk_rq_nr_phys_segments(rq), freq->sg_table.sgl);
	2590	+ blk_rq_nr_phys_segments(rq), freq->sg_table.sgl,
	2591	+ NVME_INLINE_SG_CNT);
2116	2592	if (ret)
2117	2593	return -ENOMEM;
2118	2594
2119	2595	op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl);
2120	2596	WARN_ON(op->nents > blk_rq_nr_phys_segments(rq));
2121		- dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
2122	2597	freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl,
2123		- op->nents, dir);
	2598	+ op->nents, rq_dma_dir(rq));
2124	2599	if (unlikely(freq->sg_cnt <= 0)) {
2125		- sg_free_table_chained(&freq->sg_table, true);
	2600	+ sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT);
2126	2601	freq->sg_cnt = 0;
2127	2602	return -EFAULT;
2128	2603	}
..	..	@@ -2143,12 +2618,9 @@
2143	2618	return;
2144	2619
2145	2620	fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents,
2146		- ((rq_data_dir(rq) == WRITE) ?
2147		- DMA_TO_DEVICE : DMA_FROM_DEVICE));
	2621	+ rq_dma_dir(rq));
2148	2622
2149		- nvme_cleanup_cmd(rq);
2150		-
2151		- sg_free_table_chained(&freq->sg_table, true);
	2623	+ sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT);
2152	2624
2153	2625	freq->sg_cnt = 0;
2154	2626	}
..	..	@@ -2275,8 +2747,10 @@
2275	2747	opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
2276	2748	__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
2277	2749
2278		- if (!(op->flags & FCOP_FLAGS_AEN))
	2750	+ if (!(op->flags & FCOP_FLAGS_AEN)) {
2279	2751	nvme_fc_unmap_data(ctrl, op->rq, op);
	2752	+ nvme_cleanup_cmd(op->rq);
	2753	+ }
2280	2754
2281	2755	nvme_fc_ctrl_put(ctrl);
2282	2756
..	..	@@ -2314,46 +2788,25 @@
2314	2788	if (ret)
2315	2789	return ret;
2316	2790
2317		- data_len = blk_rq_payload_bytes(rq);
2318		- if (data_len)
	2791	+ /*
	2792	+ * nvme core doesn't quite treat the rq opaquely. Commands such
	2793	+ * as WRITE ZEROES will return a non-zero rq payload_bytes yet
	2794	+ * there is no actual payload to be transferred.
	2795	+ * To get it right, key data transmission on there being 1 or
	2796	+ * more physical segments in the sg list. If there is no
	2797	+ * physical segments, there is no payload.
	2798	+ */
	2799	+ if (blk_rq_nr_phys_segments(rq)) {
	2800	+ data_len = blk_rq_payload_bytes(rq);
2319	2801	io_dir = ((rq_data_dir(rq) == WRITE) ?
2320	2802	NVMEFC_FCP_WRITE : NVMEFC_FCP_READ);
2321		- else
	2803	+ } else {
	2804	+ data_len = 0;
2322	2805	io_dir = NVMEFC_FCP_NODATA;
	2806	+ }
	2807	+
2323	2808
2324	2809	return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir);
2325		-}
2326		-
2327		-static struct blk_mq_tags *
2328		-nvme_fc_tagset(struct nvme_fc_queue *queue)
2329		-{
2330		- if (queue->qnum == 0)
2331		- return queue->ctrl->admin_tag_set.tags[queue->qnum];
2332		-
2333		- return queue->ctrl->tag_set.tags[queue->qnum - 1];
2334		-}
2335		-
2336		-static int
2337		-nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
2338		-
2339		-{
2340		- struct nvme_fc_queue *queue = hctx->driver_data;
2341		- struct nvme_fc_ctrl *ctrl = queue->ctrl;
2342		- struct request *req;
2343		- struct nvme_fc_fcp_op *op;
2344		-
2345		- req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag);
2346		- if (!req)
2347		- return 0;
2348		-
2349		- op = blk_mq_rq_to_pdu(req);
2350		-
2351		- if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) &&
2352		- (ctrl->lport->ops->poll_queue))
2353		- ctrl->lport->ops->poll_queue(&ctrl->lport->localport,
2354		- queue->lldd_handle);
2355		-
2356		- return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE));
2357	2810	}
2358	2811
2359	2812	static void
..	..	@@ -2361,16 +2814,9 @@
2361	2814	{
2362	2815	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
2363	2816	struct nvme_fc_fcp_op *aen_op;
2364		- unsigned long flags;
2365		- bool terminating = false;
2366	2817	blk_status_t ret;
2367	2818
2368		- spin_lock_irqsave(&ctrl->lock, flags);
2369		- if (ctrl->flags & FCCTRL_TERMIO)
2370		- terminating = true;
2371		- spin_unlock_irqrestore(&ctrl->lock, flags);
2372		-
2373		- if (terminating)
	2819	+ if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
2374	2820	return;
2375	2821
2376	2822	aen_op = &ctrl->aen_ops[0];
..	..	@@ -2389,33 +2835,11 @@
2389	2835	struct nvme_fc_ctrl *ctrl = op->ctrl;
2390	2836
2391	2837	atomic_set(&op->state, FCPOP_STATE_IDLE);
	2838	+ op->flags &= ~FCOP_FLAGS_TERMIO;
2392	2839
2393	2840	nvme_fc_unmap_data(ctrl, rq, op);
2394	2841	nvme_complete_rq(rq);
2395	2842	nvme_fc_ctrl_put(ctrl);
2396		-}
2397		-
2398		-/*
2399		- * This routine is used by the transport when it needs to find active
2400		- * io on a queue that is to be terminated. The transport uses
2401		- * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
2402		- * this routine to kill them on a 1 by 1 basis.
2403		- *
2404		- * As FC allocates FC exchange for each io, the transport must contact
2405		- * the LLDD to terminate the exchange, thus releasing the FC exchange.
2406		- * After terminating the exchange the LLDD will call the transport's
2407		- * normal io done path for the request, but it will have an aborted
2408		- * status. The done path will return the io request back to the block
2409		- * layer with an error status.
2410		- */
2411		-static void
2412		-nvme_fc_terminate_exchange(struct request req, void data, bool reserved)
2413		-{
2414		- struct nvme_ctrl *nctrl = data;
2415		- struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
2416		- struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
2417		-
2418		- __nvme_fc_abort_op(ctrl, op);
2419	2843	}
2420	2844
2421	2845
..	..	@@ -2425,7 +2849,6 @@
2425	2849	.init_request = nvme_fc_init_request,
2426	2850	.exit_request = nvme_fc_exit_request,
2427	2851	.init_hctx = nvme_fc_init_hctx,
2428		- .poll = nvme_fc_poll,
2429	2852	.timeout = nvme_fc_timeout,
2430	2853	};
2431	2854
..	..	@@ -2455,12 +2878,11 @@
2455	2878	ctrl->tag_set.ops = &nvme_fc_mq_ops;
2456	2879	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
2457	2880	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
2458		- ctrl->tag_set.numa_node = NUMA_NO_NODE;
	2881	+ ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
2459	2882	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
2460		- ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
2461		- (SG_CHUNK_SIZE *
2462		- sizeof(struct scatterlist)) +
2463		- ctrl->lport->ops->fcprqst_priv_sz;
	2883	+ ctrl->tag_set.cmd_size =
	2884	+ struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
	2885	+ ctrl->lport->ops->fcprqst_priv_sz);
2464	2886	ctrl->tag_set.driver_data = ctrl;
2465	2887	ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
2466	2888	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
..	..	@@ -2507,6 +2929,7 @@
2507	2929	nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
2508	2930	{
2509	2931	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
	2932	+ u32 prior_ioq_cnt = ctrl->ctrl.queue_count - 1;
2510	2933	unsigned int nr_io_queues;
2511	2934	int ret;
2512	2935
..	..	@@ -2519,10 +2942,26 @@
2519	2942	return ret;
2520	2943	}
2521	2944
	2945	+ if (!nr_io_queues && prior_ioq_cnt) {
	2946	+ dev_info(ctrl->ctrl.device,
	2947	+ "Fail Reconnect: At least 1 io queue "
	2948	+ "required (was %d)\n", prior_ioq_cnt);
	2949	+ return -ENOSPC;
	2950	+ }
	2951	+
2522	2952	ctrl->ctrl.queue_count = nr_io_queues + 1;
2523	2953	/* check for io queues existing */
2524	2954	if (ctrl->ctrl.queue_count == 1)
2525	2955	return 0;
	2956	+
	2957	+ if (prior_ioq_cnt != nr_io_queues) {
	2958	+ dev_info(ctrl->ctrl.device,
	2959	+ "reconnect: revising io queue count from %d to %d\n",
	2960	+ prior_ioq_cnt, nr_io_queues);
	2961	+ nvme_wait_freeze(&ctrl->ctrl);
	2962	+ blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
	2963	+ nvme_unfreeze(&ctrl->ctrl);
	2964	+ }
2526	2965
2527	2966	ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
2528	2967	if (ret)
..	..	@@ -2531,8 +2970,6 @@
2531	2970	ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
2532	2971	if (ret)
2533	2972	goto out_delete_hw_queues;
2534		-
2535		- blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
2536	2973
2537	2974	return 0;
2538	2975
..	..	@@ -2568,10 +3005,9 @@
2568	3005	struct nvme_fc_rport *rport = ctrl->rport;
2569	3006	u32 cnt;
2570	3007
2571		- if (ctrl->assoc_active)
	3008	+ if (test_and_set_bit(ASSOC_ACTIVE, &ctrl->flags))
2572	3009	return 1;
2573	3010
2574		- ctrl->assoc_active = true;
2575	3011	cnt = atomic_inc_return(&rport->act_ctrl_cnt);
2576	3012	if (cnt == 1)
2577	3013	nvme_fc_rport_active_on_lport(rport);
..	..	@@ -2586,7 +3022,7 @@
2586	3022	struct nvme_fc_lport *lport = rport->lport;
2587	3023	u32 cnt;
2588	3024
2589		- /* ctrl->assoc_active=false will be set independently */
	3025	+ /* clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete */
2590	3026
2591	3027	cnt = atomic_dec_return(&rport->act_ctrl_cnt);
2592	3028	if (cnt == 0) {
..	..	@@ -2606,6 +3042,8 @@
2606	3042	nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
2607	3043	{
2608	3044	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
	3045	+ struct nvmefc_ls_rcv_op *disls = NULL;
	3046	+ unsigned long flags;
2609	3047	int ret;
2610	3048	bool changed;
2611	3049
..	..	@@ -2616,6 +3054,14 @@
2616	3054
2617	3055	if (nvme_fc_ctlr_active_on_rport(ctrl))
2618	3056	return -ENOTUNIQ;
	3057	+
	3058	+ dev_info(ctrl->ctrl.device,
	3059	+ "NVME-FC{%d}: create association : host wwpn 0x%016llx "
	3060	+ " rport wwpn 0x%016llx: NQN \"%s\"\n",
	3061	+ ctrl->cnum, ctrl->lport->localport.port_name,
	3062	+ ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn);
	3063	+
	3064	+ clear_bit(ASSOC_FAILED, &ctrl->flags);
2619	3065
2620	3066	/*
2621	3067	* Create the admin queue
..	..	@@ -2631,8 +3077,6 @@
2631	3077	if (ret)
2632	3078	goto out_delete_hw_queue;
2633	3079
2634		- blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
2635		-
2636	3080	ret = nvmf_connect_admin_queue(&ctrl->ctrl);
2637	3081	if (ret)
2638	3082	goto out_disconnect_admin_queue;
..	..	@@ -2646,25 +3090,18 @@
2646	3090	* prior connection values
2647	3091	*/
2648	3092
2649		- ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
2650		- if (ret) {
2651		- dev_err(ctrl->ctrl.device,
2652		- "prop_get NVME_REG_CAP failed\n");
2653		- goto out_disconnect_admin_queue;
2654		- }
2655		-
2656		- ctrl->ctrl.sqsize =
2657		- min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
2658		-
2659		- ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
2660		- if (ret)
	3093	+ ret = nvme_enable_ctrl(&ctrl->ctrl);
	3094	+ if (ret \|\| test_bit(ASSOC_FAILED, &ctrl->flags))
2661	3095	goto out_disconnect_admin_queue;
2662	3096
2663		- ctrl->ctrl.max_hw_sectors =
2664		- (ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9);
	3097	+ ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments;
	3098	+ ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments <<
	3099	+ (ilog2(SZ_4K) - 9);
	3100	+
	3101	+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
2665	3102
2666	3103	ret = nvme_init_identify(&ctrl->ctrl);
2667		- if (ret)
	3104	+ if (ret \|\| test_bit(ASSOC_FAILED, &ctrl->flags))
2668	3105	goto out_disconnect_admin_queue;
2669	3106
2670	3107	/* sanity checks */
..	..	@@ -2682,7 +3119,7 @@
2682	3119	/* warn if maxcmd is lower than queue_size */
2683	3120	dev_warn(ctrl->ctrl.device,
2684	3121	"queue_size %zu > ctrl maxcmd %u, reducing "
2685		- "to queue_size\n",
	3122	+ "to maxcmd\n",
2686	3123	opts->queue_size, ctrl->ctrl.maxcmd);
2687	3124	opts->queue_size = ctrl->ctrl.maxcmd;
2688	3125	}
..	..	@@ -2690,7 +3127,8 @@
2690	3127	if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
2691	3128	/* warn if sqsize is lower than queue_size */
2692	3129	dev_warn(ctrl->ctrl.device,
2693		- "queue_size %zu > ctrl sqsize %u, clamping down\n",
	3130	+ "queue_size %zu > ctrl sqsize %u, reducing "
	3131	+ "to sqsize\n",
2694	3132	opts->queue_size, ctrl->ctrl.sqsize + 1);
2695	3133	opts->queue_size = ctrl->ctrl.sqsize + 1;
2696	3134	}
..	..	@@ -2708,9 +3146,9 @@
2708	3146	ret = nvme_fc_create_io_queues(ctrl);
2709	3147	else
2710	3148	ret = nvme_fc_recreate_io_queues(ctrl);
2711		- if (ret)
2712		- goto out_term_aen_ops;
2713	3149	}
	3150	+ if (ret \|\| test_bit(ASSOC_FAILED, &ctrl->flags))
	3151	+ goto out_term_aen_ops;
2714	3152
2715	3153	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
2716	3154
..	..	@@ -2726,15 +3164,23 @@
2726	3164	out_disconnect_admin_queue:
2727	3165	/* send a Disconnect(association) LS to fc-nvme target */
2728	3166	nvme_fc_xmt_disconnect_assoc(ctrl);
	3167	+ spin_lock_irqsave(&ctrl->lock, flags);
	3168	+ ctrl->association_id = 0;
	3169	+ disls = ctrl->rcv_disconn;
	3170	+ ctrl->rcv_disconn = NULL;
	3171	+ spin_unlock_irqrestore(&ctrl->lock, flags);
	3172	+ if (disls)
	3173	+ nvme_fc_xmt_ls_rsp(disls);
2729	3174	out_delete_hw_queue:
2730	3175	__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
2731	3176	out_free_queue:
2732	3177	nvme_fc_free_queue(&ctrl->queues[0]);
2733		- ctrl->assoc_active = false;
	3178	+ clear_bit(ASSOC_ACTIVE, &ctrl->flags);
2734	3179	nvme_fc_ctlr_inactive_on_rport(ctrl);
2735	3180
2736	3181	return ret;
2737	3182	}
	3183	+
2738	3184
2739	3185	/*
2740	3186	* This routine stops operation of the controller on the host side.
..	..	@@ -2745,55 +3191,18 @@
2745	3191	static void
2746	3192	nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
2747	3193	{
	3194	+ struct nvmefc_ls_rcv_op *disls = NULL;
2748	3195	unsigned long flags;
2749	3196
2750		- if (!ctrl->assoc_active)
	3197	+ if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
2751	3198	return;
2752		- ctrl->assoc_active = false;
2753	3199
2754	3200	spin_lock_irqsave(&ctrl->lock, flags);
2755		- ctrl->flags \|= FCCTRL_TERMIO;
	3201	+ set_bit(FCCTRL_TERMIO, &ctrl->flags);
2756	3202	ctrl->iocnt = 0;
2757	3203	spin_unlock_irqrestore(&ctrl->lock, flags);
2758	3204
2759		- /*
2760		- * If io queues are present, stop them and terminate all outstanding
2761		- * ios on them. As FC allocates FC exchange for each io, the
2762		- * transport must contact the LLDD to terminate the exchange,
2763		- * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
2764		- * to tell us what io's are busy and invoke a transport routine
2765		- * to kill them with the LLDD. After terminating the exchange
2766		- * the LLDD will call the transport's normal io done path, but it
2767		- * will have an aborted status. The done path will return the
2768		- * io requests back to the block layer as part of normal completions
2769		- * (but with error status).
2770		- */
2771		- if (ctrl->ctrl.queue_count > 1) {
2772		- nvme_stop_queues(&ctrl->ctrl);
2773		- blk_mq_tagset_busy_iter(&ctrl->tag_set,
2774		- nvme_fc_terminate_exchange, &ctrl->ctrl);
2775		- }
2776		-
2777		- /*
2778		- * Other transports, which don't have link-level contexts bound
2779		- * to sqe's, would try to gracefully shutdown the controller by
2780		- * writing the registers for shutdown and polling (call
2781		- * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
2782		- * just aborted and we will wait on those contexts, and given
2783		- * there was no indication of how live the controlelr is on the
2784		- * link, don't send more io to create more contexts for the
2785		- * shutdown. Let the controller fail via keepalive failure if
2786		- * its still present.
2787		- */
2788		-
2789		- /*
2790		- * clean up the admin queue. Same thing as above.
2791		- * use blk_mq_tagset_busy_itr() and the transport routine to
2792		- * terminate the exchanges.
2793		- */
2794		- blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
2795		- blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
2796		- nvme_fc_terminate_exchange, &ctrl->ctrl);
	3205	+ __nvme_fc_abort_outstanding_ios(ctrl, false);
2797	3206
2798	3207	/* kill the aens as they are a separate path */
2799	3208	nvme_fc_abort_aen_ops(ctrl);
..	..	@@ -2801,7 +3210,7 @@
2801	3210	/* wait for all io that had to be aborted */
2802	3211	spin_lock_irq(&ctrl->lock);
2803	3212	wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
2804		- ctrl->flags &= ~FCCTRL_TERMIO;
	3213	+ clear_bit(FCCTRL_TERMIO, &ctrl->flags);
2805	3214	spin_unlock_irq(&ctrl->lock);
2806	3215
2807	3216	nvme_fc_term_aen_ops(ctrl);
..	..	@@ -2814,6 +3223,18 @@
2814	3223	*/
2815	3224	if (ctrl->association_id)
2816	3225	nvme_fc_xmt_disconnect_assoc(ctrl);
	3226	+
	3227	+ spin_lock_irqsave(&ctrl->lock, flags);
	3228	+ ctrl->association_id = 0;
	3229	+ disls = ctrl->rcv_disconn;
	3230	+ ctrl->rcv_disconn = NULL;
	3231	+ spin_unlock_irqrestore(&ctrl->lock, flags);
	3232	+ if (disls)
	3233	+ /*
	3234	+ * if a Disconnect Request was waiting for a response, send
	3235	+ * now that all ABTS's have been issued (and are complete).
	3236	+ */
	3237	+ nvme_fc_xmt_ls_rsp(disls);
2817	3238
2818	3239	if (ctrl->ctrl.tagset) {
2819	3240	nvme_fc_delete_hw_io_queues(ctrl);
..	..	@@ -2837,7 +3258,7 @@
2837	3258	{
2838	3259	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
2839	3260
2840		- cancel_work_sync(&ctrl->err_work);
	3261	+ cancel_work_sync(&ctrl->ioerr_work);
2841	3262	cancel_delayed_work_sync(&ctrl->connect_work);
2842	3263	/*
2843	3264	* kill the association on the link side. this will block
..	..	@@ -2884,36 +3305,11 @@
2884	3305	dev_warn(ctrl->ctrl.device,
2885	3306	"NVME-FC{%d}: dev_loss_tmo (%d) expired "
2886	3307	"while waiting for remoteport connectivity.\n",
2887		- ctrl->cnum, portptr->dev_loss_tmo);
	3308	+ ctrl->cnum, min_t(int, portptr->dev_loss_tmo,
	3309	+ (ctrl->ctrl.opts->max_reconnects *
	3310	+ ctrl->ctrl.opts->reconnect_delay)));
2888	3311	WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
2889	3312	}
2890		-}
2891		-
2892		-static void
2893		-__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
2894		-{
2895		- /*
2896		- * if state is connecting - the error occurred as part of a
2897		- * reconnect attempt. The create_association error paths will
2898		- * clean up any outstanding io.
2899		- *
2900		- * if it's a different state - ensure all pending io is
2901		- * terminated. Given this can delay while waiting for the
2902		- * aborted io to return, we recheck adapter state below
2903		- * before changing state.
2904		- */
2905		- if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
2906		- nvme_stop_keep_alive(&ctrl->ctrl);
2907		-
2908		- /* will block will waiting for io to terminate */
2909		- nvme_fc_delete_association(ctrl);
2910		- }
2911		-
2912		- if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
2913		- !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
2914		- dev_err(ctrl->ctrl.device,
2915		- "NVME-FC{%d}: error_recovery: Couldn't change state "
2916		- "to CONNECTING\n", ctrl->cnum);
2917	3313	}
2918	3314
2919	3315	static void
..	..	@@ -2921,42 +3317,30 @@
2921	3317	{
2922	3318	struct nvme_fc_ctrl *ctrl =
2923	3319	container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
2924		- int ret;
2925		-
2926		- __nvme_fc_terminate_io(ctrl);
2927	3320
2928	3321	nvme_stop_ctrl(&ctrl->ctrl);
2929	3322
2930		- if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE)
2931		- ret = nvme_fc_create_association(ctrl);
2932		- else
2933		- ret = -ENOTCONN;
	3323	+ /* will block will waiting for io to terminate */
	3324	+ nvme_fc_delete_association(ctrl);
2934	3325
2935		- if (ret)
2936		- nvme_fc_reconnect_or_delete(ctrl, ret);
2937		- else
2938		- dev_info(ctrl->ctrl.device,
2939		- "NVME-FC{%d}: controller reset complete\n",
2940		- ctrl->cnum);
	3326	+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
	3327	+ dev_err(ctrl->ctrl.device,
	3328	+ "NVME-FC{%d}: error_recovery: Couldn't change state "
	3329	+ "to CONNECTING\n", ctrl->cnum);
	3330	+
	3331	+ if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
	3332	+ if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
	3333	+ dev_err(ctrl->ctrl.device,
	3334	+ "NVME-FC{%d}: failed to schedule connect "
	3335	+ "after reset\n", ctrl->cnum);
	3336	+ } else {
	3337	+ flush_delayed_work(&ctrl->connect_work);
	3338	+ }
	3339	+ } else {
	3340	+ nvme_fc_reconnect_or_delete(ctrl, -ENOTCONN);
	3341	+ }
2941	3342	}
2942	3343
2943		-static void
2944		-nvme_fc_connect_err_work(struct work_struct *work)
2945		-{
2946		- struct nvme_fc_ctrl *ctrl =
2947		- container_of(work, struct nvme_fc_ctrl, err_work);
2948		-
2949		- __nvme_fc_terminate_io(ctrl);
2950		-
2951		- atomic_set(&ctrl->err_work_active, 0);
2952		-
2953		- /*
2954		- * Rescheduling the connection after recovering
2955		- * from the io error is left to the reconnect work
2956		- * item, which is what should have stalled waiting on
2957		- * the io that had the error that scheduled this work.
2958		- */
2959		-}
2960	3344
2961	3345	static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
2962	3346	.name = "fc",
..	..	@@ -3033,7 +3417,7 @@
3033	3417	{
3034	3418	struct nvme_fc_ctrl *ctrl;
3035	3419	unsigned long flags;
3036		- int ret, idx;
	3420	+ int ret, idx, ctrl_loss_tmo;
3037	3421
3038	3422	if (!(rport->remoteport.port_role &
3039	3423	(FC_PORT_ROLE_NVME_DISCOVERY \| FC_PORT_ROLE_NVME_TARGET))) {
..	..	@@ -3059,16 +3443,31 @@
3059	3443	goto out_free_ctrl;
3060	3444	}
3061	3445
	3446	+ /*
	3447	+ * if ctrl_loss_tmo is being enforced and the default reconnect delay
	3448	+ * is being used, change to a shorter reconnect delay for FC.
	3449	+ */
	3450	+ if (opts->max_reconnects != -1 &&
	3451	+ opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY &&
	3452	+ opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) {
	3453	+ ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay;
	3454	+ opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO;
	3455	+ opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
	3456	+ opts->reconnect_delay);
	3457	+ }
	3458	+
3062	3459	ctrl->ctrl.opts = opts;
3063	3460	ctrl->ctrl.nr_reconnects = 0;
	3461	+ if (lport->dev)
	3462	+ ctrl->ctrl.numa_node = dev_to_node(lport->dev);
	3463	+ else
	3464	+ ctrl->ctrl.numa_node = NUMA_NO_NODE;
3064	3465	INIT_LIST_HEAD(&ctrl->ctrl_list);
3065	3466	ctrl->lport = lport;
3066	3467	ctrl->rport = rport;
3067	3468	ctrl->dev = lport->dev;
3068	3469	ctrl->cnum = idx;
3069	3470	ctrl->ioq_live = false;
3070		- ctrl->assoc_active = false;
3071		- atomic_set(&ctrl->err_work_active, 0);
3072	3471	init_waitqueue_head(&ctrl->ioabort_wait);
3073	3472
3074	3473	get_device(ctrl->dev);
..	..	@@ -3076,7 +3475,7 @@
3076	3475
3077	3476	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
3078	3477	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
3079		- INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work);
	3478	+ INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work);
3080	3479	spin_lock_init(&ctrl->lock);
3081	3480
3082	3481	/* io queue count */
..	..	@@ -3101,11 +3500,10 @@
3101	3500	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
3102	3501	ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
3103	3502	ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
3104		- ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
3105		- ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
3106		- (SG_CHUNK_SIZE *
3107		- sizeof(struct scatterlist)) +
3108		- ctrl->lport->ops->fcprqst_priv_sz;
	3503	+ ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
	3504	+ ctrl->admin_tag_set.cmd_size =
	3505	+ struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
	3506	+ ctrl->lport->ops->fcprqst_priv_sz);
3109	3507	ctrl->admin_tag_set.driver_data = ctrl;
3110	3508	ctrl->admin_tag_set.nr_hw_queues = 1;
3111	3509	ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
..	..	@@ -3116,10 +3514,16 @@
3116	3514	goto out_free_queues;
3117	3515	ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
3118	3516
	3517	+ ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
	3518	+ if (IS_ERR(ctrl->ctrl.fabrics_q)) {
	3519	+ ret = PTR_ERR(ctrl->ctrl.fabrics_q);
	3520	+ goto out_free_admin_tag_set;
	3521	+ }
	3522	+
3119	3523	ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
3120	3524	if (IS_ERR(ctrl->ctrl.admin_q)) {
3121	3525	ret = PTR_ERR(ctrl->ctrl.admin_q);
3122		- goto out_free_admin_tag_set;
	3526	+ goto out_cleanup_fabrics_q;
3123	3527	}
3124	3528
3125	3529	/*
..	..	@@ -3146,10 +3550,7 @@
3146	3550	goto fail_ctrl;
3147	3551	}
3148	3552
3149		- nvme_get_ctrl(&ctrl->ctrl);
3150		-
3151	3553	if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
3152		- nvme_put_ctrl(&ctrl->ctrl);
3153	3554	dev_err(ctrl->ctrl.device,
3154	3555	"NVME-FC{%d}: failed to schedule initial connect\n",
3155	3556	ctrl->cnum);
..	..	@@ -3166,8 +3567,8 @@
3166	3567
3167	3568	fail_ctrl:
3168	3569	nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
	3570	+ cancel_work_sync(&ctrl->ioerr_work);
3169	3571	cancel_work_sync(&ctrl->ctrl.reset_work);
3170		- cancel_work_sync(&ctrl->err_work);
3171	3572	cancel_delayed_work_sync(&ctrl->connect_work);
3172	3573
3173	3574	ctrl->ctrl.opts = NULL;
..	..	@@ -3191,6 +3592,8 @@
3191	3592
3192	3593	out_cleanup_admin_q:
3193	3594	blk_cleanup_queue(ctrl->ctrl.admin_q);
	3595	+out_cleanup_fabrics_q:
	3596	+ blk_cleanup_queue(ctrl->ctrl.fabrics_q);
3194	3597	out_free_admin_tag_set:
3195	3598	blk_mq_free_tag_set(&ctrl->admin_tag_set);
3196	3599	out_free_queues:
..	..	@@ -3235,7 +3638,7 @@
3235	3638	substring_t wwn = { name, &name[sizeof(name)-1] };
3236	3639	int nnoffset, pnoffset;
3237	3640
3238		- /* validate it string one of the 2 allowed formats */
	3641	+ /* validate if string is one of the 2 allowed formats */
3239	3642	if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
3240	3643	!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
3241	3644	!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
..	..	@@ -3332,6 +3735,90 @@
3332	3735	.create_ctrl = nvme_fc_create_ctrl,
3333	3736	};
3334	3737
	3738	+/* Arbitrary successive failures max. With lots of subsystems could be high */
	3739	+#define DISCOVERY_MAX_FAIL 20
	3740	+
	3741	+static ssize_t nvme_fc_nvme_discovery_store(struct device *dev,
	3742	+ struct device_attribute attr, const char buf, size_t count)
	3743	+{
	3744	+ unsigned long flags;
	3745	+ LIST_HEAD(local_disc_list);
	3746	+ struct nvme_fc_lport *lport;
	3747	+ struct nvme_fc_rport *rport;
	3748	+ int failcnt = 0;
	3749	+
	3750	+ spin_lock_irqsave(&nvme_fc_lock, flags);
	3751	+restart:
	3752	+ list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
	3753	+ list_for_each_entry(rport, &lport->endp_list, endp_list) {
	3754	+ if (!nvme_fc_lport_get(lport))
	3755	+ continue;
	3756	+ if (!nvme_fc_rport_get(rport)) {
	3757	+ /*
	3758	+ * This is a temporary condition. Upon restart
	3759	+ * this rport will be gone from the list.
	3760	+ *
	3761	+ * Revert the lport put and retry. Anything
	3762	+ * added to the list already will be skipped (as
	3763	+ * they are no longer list_empty). Loops should
	3764	+ * resume at rports that were not yet seen.
	3765	+ */
	3766	+ nvme_fc_lport_put(lport);
	3767	+
	3768	+ if (failcnt++ < DISCOVERY_MAX_FAIL)
	3769	+ goto restart;
	3770	+
	3771	+ pr_err("nvme_discovery: too many reference "
	3772	+ "failures\n");
	3773	+ goto process_local_list;
	3774	+ }
	3775	+ if (list_empty(&rport->disc_list))
	3776	+ list_add_tail(&rport->disc_list,
	3777	+ &local_disc_list);
	3778	+ }
	3779	+ }
	3780	+
	3781	+process_local_list:
	3782	+ while (!list_empty(&local_disc_list)) {
	3783	+ rport = list_first_entry(&local_disc_list,
	3784	+ struct nvme_fc_rport, disc_list);
	3785	+ list_del_init(&rport->disc_list);
	3786	+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
	3787	+
	3788	+ lport = rport->lport;
	3789	+ /* signal discovery. Won't hurt if it repeats */
	3790	+ nvme_fc_signal_discovery_scan(lport, rport);
	3791	+ nvme_fc_rport_put(rport);
	3792	+ nvme_fc_lport_put(lport);
	3793	+
	3794	+ spin_lock_irqsave(&nvme_fc_lock, flags);
	3795	+ }
	3796	+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
	3797	+
	3798	+ return count;
	3799	+}
	3800	+static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);
	3801	+
	3802	+static struct attribute *nvme_fc_attrs[] = {
	3803	+ &dev_attr_nvme_discovery.attr,
	3804	+ NULL
	3805	+};
	3806	+
	3807	+static struct attribute_group nvme_fc_attr_group = {
	3808	+ .attrs = nvme_fc_attrs,
	3809	+};
	3810	+
	3811	+static const struct attribute_group *nvme_fc_attr_groups[] = {
	3812	+ &nvme_fc_attr_group,
	3813	+ NULL
	3814	+};
	3815	+
	3816	+static struct class fc_class = {
	3817	+ .name = "fc",
	3818	+ .dev_groups = nvme_fc_attr_groups,
	3819	+ .owner = THIS_MODULE,
	3820	+};
	3821	+
3335	3822	static int __init nvme_fc_init_module(void)
3336	3823	{
3337	3824	int ret;
..	..	@@ -3354,17 +3841,16 @@
3354	3841	* put in place, this code will move to a more generic
3355	3842	* location for the class.
3356	3843	*/
3357		- fc_class = class_create(THIS_MODULE, "fc");
3358		- if (IS_ERR(fc_class)) {
	3844	+ ret = class_register(&fc_class);
	3845	+ if (ret) {
3359	3846	pr_err("couldn't register class fc\n");
3360		- ret = PTR_ERR(fc_class);
3361	3847	goto out_destroy_wq;
3362	3848	}
3363	3849
3364	3850	/*
3365	3851	* Create a device for the FC-centric udev events
3366	3852	*/
3367		- fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL,
	3853	+ fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL,
3368	3854	"fc_udev_device");
3369	3855	if (IS_ERR(fc_udev_device)) {
3370	3856	pr_err("couldn't create fc_udev device!\n");
..	..	@@ -3379,28 +3865,68 @@
3379	3865	return 0;
3380	3866
3381	3867	out_destroy_device:
3382		- device_destroy(fc_class, MKDEV(0, 0));
	3868	+ device_destroy(&fc_class, MKDEV(0, 0));
3383	3869	out_destroy_class:
3384		- class_destroy(fc_class);
	3870	+ class_unregister(&fc_class);
3385	3871	out_destroy_wq:
3386	3872	destroy_workqueue(nvme_fc_wq);
3387	3873
3388	3874	return ret;
3389	3875	}
3390	3876
	3877	+static void
	3878	+nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
	3879	+{
	3880	+ struct nvme_fc_ctrl *ctrl;
	3881	+
	3882	+ spin_lock(&rport->lock);
	3883	+ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
	3884	+ dev_warn(ctrl->ctrl.device,
	3885	+ "NVME-FC{%d}: transport unloading: deleting ctrl\n",
	3886	+ ctrl->cnum);
	3887	+ nvme_delete_ctrl(&ctrl->ctrl);
	3888	+ }
	3889	+ spin_unlock(&rport->lock);
	3890	+}
	3891	+
	3892	+static void
	3893	+nvme_fc_cleanup_for_unload(void)
	3894	+{
	3895	+ struct nvme_fc_lport *lport;
	3896	+ struct nvme_fc_rport *rport;
	3897	+
	3898	+ list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
	3899	+ list_for_each_entry(rport, &lport->endp_list, endp_list) {
	3900	+ nvme_fc_delete_controllers(rport);
	3901	+ }
	3902	+ }
	3903	+}
	3904	+
3391	3905	static void __exit nvme_fc_exit_module(void)
3392	3906	{
3393		- /* sanity check - all lports should be removed */
3394		- if (!list_empty(&nvme_fc_lport_list))
3395		- pr_warn("%s: localport list not empty\n", __func__);
	3907	+ unsigned long flags;
	3908	+ bool need_cleanup = false;
	3909	+
	3910	+ spin_lock_irqsave(&nvme_fc_lock, flags);
	3911	+ nvme_fc_waiting_to_unload = true;
	3912	+ if (!list_empty(&nvme_fc_lport_list)) {
	3913	+ need_cleanup = true;
	3914	+ nvme_fc_cleanup_for_unload();
	3915	+ }
	3916	+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
	3917	+ if (need_cleanup) {
	3918	+ pr_info("%s: waiting for ctlr deletes\n", __func__);
	3919	+ wait_for_completion(&nvme_fc_unload_proceed);
	3920	+ pr_info("%s: ctrl deletes complete\n", __func__);
	3921	+ }
3396	3922
3397	3923	nvmf_unregister_transport(&nvme_fc_transport);
3398	3924
3399	3925	ida_destroy(&nvme_fc_local_port_cnt);
3400	3926	ida_destroy(&nvme_fc_ctrl_cnt);
3401	3927
3402		- device_destroy(fc_class, MKDEV(0, 0));
3403		- class_destroy(fc_class);
	3928	+ device_destroy(&fc_class, MKDEV(0, 0));
	3929	+ class_unregister(&fc_class);
3404	3930	destroy_workqueue(nvme_fc_wq);
3405	3931	}
3406	3932