hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/drivers/scsi/scsi_error.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * scsi_error.c Copyright (C) 1997 Eric Youngdale
34 *
....@@ -115,6 +116,14 @@
115116 return 1;
116117 }
117118
119
+static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd)
120
+{
121
+ if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
122
+ return true;
123
+
124
+ return ++cmd->retries <= cmd->allowed;
125
+}
126
+
118127 /**
119128 * scmd_eh_abort_handler - Handle command aborts
120129 * @work: command to be aborted.
....@@ -150,7 +159,7 @@
150159 "eh timeout, not retrying "
151160 "aborted command\n"));
152161 } else if (!scsi_noretry_cmd(scmd) &&
153
- (++scmd->retries <= scmd->allowed)) {
162
+ scsi_cmd_retry_allowed(scmd)) {
154163 SCSI_LOG_ERROR_RECOVERY(3,
155164 scmd_printk(KERN_WARNING, scmd,
156165 "retry aborted command\n"));
....@@ -297,19 +306,19 @@
297306
298307 if (rtn == BLK_EH_DONE) {
299308 /*
300
- * For blk-mq, we must set the request state to complete now
301
- * before sending the request to the scsi error handler. This
302
- * will prevent a use-after-free in the event the LLD manages
303
- * to complete the request before the error handler finishes
304
- * processing this timed out request.
309
+ * Set the command to complete first in order to prevent a real
310
+ * completion from releasing the command while error handling
311
+ * is using it. If the command was already completed, then the
312
+ * lower level driver beat the timeout handler, and it is safe
313
+ * to return without escalating error recovery.
305314 *
306
- * If the request was already completed, then the LLD beat the
307
- * time out handler from transferring the request to the scsi
308
- * error handler. In that case we can return immediately as no
309
- * further action is required.
315
+ * If timeout handling lost the race to a real completion, the
316
+ * block layer may ignore that due to a fake timeout injection,
317
+ * so return RESET_TIMER to allow error handling another shot
318
+ * at this command.
310319 */
311
- if (req->q->mq_ops && !blk_mq_mark_complete(req))
312
- return rtn;
320
+ if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state))
321
+ return BLK_EH_RESET_TIMER;
313322 if (scsi_abort_command(scmd) != SUCCESS) {
314323 set_host_byte(scmd, DID_TIME_OUT);
315324 scsi_eh_scmd_add(scmd);
....@@ -337,9 +346,6 @@
337346 wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));
338347
339348 online = scsi_device_online(sdev);
340
-
341
- SCSI_LOG_ERROR_RECOVERY(5, sdev_printk(KERN_INFO, sdev,
342
- "%s: rtn: %d\n", __func__, online));
343349
344350 return online;
345351 }
....@@ -601,7 +607,7 @@
601607 set_host_byte(scmd, DID_ALLOC_FAILURE);
602608 return SUCCESS;
603609 }
604
- /* FALLTHROUGH */
610
+ fallthrough;
605611 case COPY_ABORTED:
606612 case VOLUME_OVERFLOW:
607613 case MISCOMPARE:
....@@ -623,7 +629,7 @@
623629 return ADD_TO_MLQUEUE;
624630 else
625631 set_host_byte(scmd, DID_TARGET_FAILURE);
626
- /* FALLTHROUGH */
632
+ fallthrough;
627633
628634 case ILLEGAL_REQUEST:
629635 if (sshdr.asc == 0x20 || /* Invalid command operation code */
....@@ -736,7 +742,7 @@
736742 switch (status_byte(scmd->result)) {
737743 case GOOD:
738744 scsi_handle_queue_ramp_up(scmd->device);
739
- /* FALLTHROUGH */
745
+ fallthrough;
740746 case COMMAND_TERMINATED:
741747 return SUCCESS;
742748 case CHECK_CONDITION:
....@@ -757,7 +763,7 @@
757763 return FAILED;
758764 case QUEUE_FULL:
759765 scsi_handle_queue_full(scmd->device);
760
- /* fall through */
766
+ fallthrough;
761767 case BUSY:
762768 return NEEDS_RETRY;
763769 default:
....@@ -968,7 +974,6 @@
968974 ses->cmnd = scmd->cmnd;
969975 ses->data_direction = scmd->sc_data_direction;
970976 ses->sdb = scmd->sdb;
971
- ses->next_rq = scmd->request->next_rq;
972977 ses->result = scmd->result;
973978 ses->resid_len = scmd->req.resid_len;
974979 ses->underflow = scmd->underflow;
....@@ -980,7 +985,6 @@
980985 scmd->cmnd = ses->eh_cmnd;
981986 memset(scmd->cmnd, 0, BLK_MAX_CDB);
982987 memset(&scmd->sdb, 0, sizeof(scmd->sdb));
983
- scmd->request->next_rq = NULL;
984988 scmd->result = 0;
985989 scmd->req.resid_len = 0;
986990
....@@ -1034,7 +1038,6 @@
10341038 scmd->cmnd = ses->cmnd;
10351039 scmd->sc_data_direction = ses->data_direction;
10361040 scmd->sdb = ses->sdb;
1037
- scmd->request->next_rq = ses->next_rq;
10381041 scmd->result = ses->result;
10391042 scmd->req.resid_len = ses->resid_len;
10401043 scmd->underflow = ses->underflow;
....@@ -1063,7 +1066,7 @@
10631066 struct scsi_device *sdev = scmd->device;
10641067 struct Scsi_Host *shost = sdev->host;
10651068 DECLARE_COMPLETION_ONSTACK(done);
1066
- unsigned long timeleft = timeout;
1069
+ unsigned long timeleft = timeout, delay;
10671070 struct scsi_eh_save ses;
10681071 const unsigned long stall_for = msecs_to_jiffies(100);
10691072 int rtn;
....@@ -1074,7 +1077,29 @@
10741077
10751078 scsi_log_send(scmd);
10761079 scmd->scsi_done = scsi_eh_done;
1077
- rtn = shost->hostt->queuecommand(shost, scmd);
1080
+
1081
+ /*
1082
+ * Lock sdev->state_mutex to avoid that scsi_device_quiesce() can
1083
+ * change the SCSI device state after we have examined it and before
1084
+ * .queuecommand() is called.
1085
+ */
1086
+ mutex_lock(&sdev->state_mutex);
1087
+ while (sdev->sdev_state == SDEV_BLOCK && timeleft > 0) {
1088
+ mutex_unlock(&sdev->state_mutex);
1089
+ SCSI_LOG_ERROR_RECOVERY(5, sdev_printk(KERN_DEBUG, sdev,
1090
+ "%s: state %d <> %d\n", __func__, sdev->sdev_state,
1091
+ SDEV_BLOCK));
1092
+ delay = min(timeleft, stall_for);
1093
+ timeleft -= delay;
1094
+ msleep(jiffies_to_msecs(delay));
1095
+ mutex_lock(&sdev->state_mutex);
1096
+ }
1097
+ if (sdev->sdev_state != SDEV_BLOCK)
1098
+ rtn = shost->hostt->queuecommand(shost, scmd);
1099
+ else
1100
+ rtn = SCSI_MLQUEUE_DEVICE_BUSY;
1101
+ mutex_unlock(&sdev->state_mutex);
1102
+
10781103 if (rtn) {
10791104 if (timeleft > stall_for) {
10801105 scsi_eh_restore_cmnd(scmd, &ses);
....@@ -1247,11 +1272,18 @@
12471272 * upper level.
12481273 */
12491274 if (rtn == SUCCESS)
1250
- /* we don't want this command reissued, just
1251
- * finished with the sense data, so set
1252
- * retries to the max allowed to ensure it
1253
- * won't get reissued */
1254
- scmd->retries = scmd->allowed;
1275
+ /*
1276
+ * We don't want this command reissued, just finished
1277
+ * with the sense data, so set retries to the max
1278
+ * allowed to ensure it won't get reissued. If the user
1279
+ * has requested infinite retries, we also want to
1280
+ * finish this command, so force completion by setting
1281
+ * retries and allowed to the same value.
1282
+ */
1283
+ if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT)
1284
+ scmd->retries = scmd->allowed = 1;
1285
+ else
1286
+ scmd->retries = scmd->allowed;
12551287 else if (rtn != NEEDS_RETRY)
12561288 continue;
12571289
....@@ -1285,7 +1317,7 @@
12851317 case NEEDS_RETRY:
12861318 if (retry_cnt--)
12871319 goto retry_tur;
1288
- /*FALLTHRU*/
1320
+ fallthrough;
12891321 case SUCCESS:
12901322 return 0;
12911323 default:
....@@ -1395,6 +1427,7 @@
13951427 sdev_printk(KERN_INFO, sdev,
13961428 "%s: skip START_UNIT, past eh deadline\n",
13971429 current->comm));
1430
+ scsi_device_put(sdev);
13981431 break;
13991432 }
14001433 stu_scmd = NULL;
....@@ -1461,6 +1494,7 @@
14611494 sdev_printk(KERN_INFO, sdev,
14621495 "%s: skip BDR, past eh deadline\n",
14631496 current->comm));
1497
+ scsi_device_put(sdev);
14641498 break;
14651499 }
14661500 bdr_scmd = NULL;
....@@ -1720,7 +1754,7 @@
17201754 if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
17211755 status_byte(scmd->result) == RESERVATION_CONFLICT)
17221756 return 0;
1723
- /* fall through */
1757
+ fallthrough;
17241758 case DID_SOFT_ERROR:
17251759 return (scmd->request->cmd_flags & REQ_FAILFAST_DRIVER);
17261760 }
....@@ -1736,8 +1770,8 @@
17361770 if (scmd->request->cmd_flags & REQ_FAILFAST_DEV ||
17371771 blk_rq_is_passthrough(scmd->request))
17381772 return 1;
1739
- else
1740
- return 0;
1773
+
1774
+ return 0;
17411775 }
17421776
17431777 /**
....@@ -1791,7 +1825,7 @@
17911825 set_host_byte(scmd, DID_TIME_OUT);
17921826 return SUCCESS;
17931827 }
1794
- /* FALLTHROUGH */
1828
+ fallthrough;
17951829 case DID_NO_CONNECT:
17961830 case DID_BAD_TARGET:
17971831 /*
....@@ -1835,7 +1869,7 @@
18351869 * lower down
18361870 */
18371871 break;
1838
- /* fallthrough */
1872
+ fallthrough;
18391873 case DID_BUS_BUSY:
18401874 case DID_PARITY:
18411875 goto maybe_retry;
....@@ -1873,7 +1907,7 @@
18731907 * the case of trying to send too many commands to a
18741908 * tagged queueing device.
18751909 */
1876
- /* FALLTHROUGH */
1910
+ fallthrough;
18771911 case BUSY:
18781912 /*
18791913 * device can't talk to us at the moment. Should only
....@@ -1886,7 +1920,7 @@
18861920 if (scmd->cmnd[0] == REPORT_LUNS)
18871921 scmd->device->sdev_target->expecting_lun_change = 0;
18881922 scsi_handle_queue_ramp_up(scmd->device);
1889
- /* FALLTHROUGH */
1923
+ fallthrough;
18901924 case COMMAND_TERMINATED:
18911925 return SUCCESS;
18921926 case TASK_ABORTED:
....@@ -1925,8 +1959,7 @@
19251959 * the request was not marked fast fail. Note that above,
19261960 * even if the request is marked fast fail, we still requeue
19271961 * for queue congestion conditions (QUEUE_FULL or BUSY) */
1928
- if ((++scmd->retries) <= scmd->allowed
1929
- && !scsi_noretry_cmd(scmd)) {
1962
+ if (scsi_cmd_retry_allowed(scmd) && !scsi_noretry_cmd(scmd)) {
19301963 return NEEDS_RETRY;
19311964 } else {
19321965 /*
....@@ -1938,7 +1971,7 @@
19381971
19391972 static void eh_lock_door_done(struct request *req, blk_status_t status)
19401973 {
1941
- __blk_put_request(req->q, req);
1974
+ blk_put_request(req);
19421975 }
19431976
19441977 /**
....@@ -2072,8 +2105,7 @@
20722105 list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
20732106 list_del_init(&scmd->eh_entry);
20742107 if (scsi_device_online(scmd->device) &&
2075
- !scsi_noretry_cmd(scmd) &&
2076
- (++scmd->retries <= scmd->allowed)) {
2108
+ !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd)) {
20772109 SCSI_LOG_ERROR_RECOVERY(3,
20782110 scmd_printk(KERN_INFO, scmd,
20792111 "%s: flush retry cmd\n",
....@@ -2357,22 +2389,22 @@
23572389 rtn = scsi_try_bus_device_reset(scmd);
23582390 if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE))
23592391 break;
2360
- /* FALLTHROUGH */
2392
+ fallthrough;
23612393 case SG_SCSI_RESET_TARGET:
23622394 rtn = scsi_try_target_reset(scmd);
23632395 if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE))
23642396 break;
2365
- /* FALLTHROUGH */
2397
+ fallthrough;
23662398 case SG_SCSI_RESET_BUS:
23672399 rtn = scsi_try_bus_reset(scmd);
23682400 if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE))
23692401 break;
2370
- /* FALLTHROUGH */
2402
+ fallthrough;
23712403 case SG_SCSI_RESET_HOST:
23722404 rtn = scsi_try_host_reset(scmd);
23732405 if (rtn == SUCCESS)
23742406 break;
2375
- /* FALLTHROUGH */
2407
+ fallthrough;
23762408 default:
23772409 rtn = FAILED;
23782410 break;
....@@ -2395,14 +2427,12 @@
23952427 wake_up(&shost->host_wait);
23962428 scsi_run_host_queues(shost);
23972429
2398
- scsi_put_command(scmd);
23992430 kfree(rq);
24002431
24012432 out_put_autopm_host:
24022433 scsi_autopm_put_host(shost);
24032434 return error;
24042435 }
2405
-EXPORT_SYMBOL(scsi_ioctl_reset);
24062436
24072437 bool scsi_command_normalize_sense(const struct scsi_cmnd *cmd,
24082438 struct scsi_sense_hdr *sshdr)