hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/drivers/scsi/qla2xxx/qla_os.c
....@@ -879,8 +879,8 @@
879879 goto qc24_fail_command;
880880 }
881881
882
- if (!fcport) {
883
- cmd->result = DID_NO_CONNECT << 16;
882
+ if (!fcport || fcport->deleted) {
883
+ cmd->result = DID_IMM_RETRY << 16;
884884 goto qc24_fail_command;
885885 }
886886
....@@ -961,8 +961,15 @@
961961 goto qc24_fail_command;
962962 }
963963
964
- if (!fcport) {
964
+ if (!qpair->online) {
965
+ ql_dbg(ql_dbg_io, vha, 0x3077,
966
+ "qpair not online. eeh_busy=%d.\n", ha->flags.eeh_busy);
965967 cmd->result = DID_NO_CONNECT << 16;
968
+ goto qc24_fail_command;
969
+ }
970
+
971
+ if (!fcport || fcport->deleted) {
972
+ cmd->result = DID_IMM_RETRY << 16;
966973 goto qc24_fail_command;
967974 }
968975
....@@ -1190,35 +1197,6 @@
11901197 return return_status;
11911198 }
11921199
1193
-#define ISP_REG_DISCONNECT 0xffffffffU
1194
-/**************************************************************************
1195
-* qla2x00_isp_reg_stat
1196
-*
1197
-* Description:
1198
-* Read the host status register of ISP before aborting the command.
1199
-*
1200
-* Input:
1201
-* ha = pointer to host adapter structure.
1202
-*
1203
-*
1204
-* Returns:
1205
-* Either true or false.
1206
-*
1207
-* Note: Return true if there is register disconnect.
1208
-**************************************************************************/
1209
-static inline
1210
-uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha)
1211
-{
1212
- struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
1213
- struct device_reg_82xx __iomem *reg82 = &ha->iobase->isp82;
1214
-
1215
- if (IS_P3P_TYPE(ha))
1216
- return ((rd_reg_dword(&reg82->host_int)) == ISP_REG_DISCONNECT);
1217
- else
1218
- return ((rd_reg_dword(&reg->host_status)) ==
1219
- ISP_REG_DISCONNECT);
1220
-}
1221
-
12221200 /**************************************************************************
12231201 * qla2xxx_eh_abort
12241202 *
....@@ -1253,6 +1231,7 @@
12531231 if (qla2x00_isp_reg_stat(ha)) {
12541232 ql_log(ql_log_info, vha, 0x8042,
12551233 "PCI/Register disconnect, exiting.\n");
1234
+ qla_pci_set_eeh_busy(vha);
12561235 return FAILED;
12571236 }
12581237
....@@ -1444,6 +1423,7 @@
14441423 if (qla2x00_isp_reg_stat(ha)) {
14451424 ql_log(ql_log_info, vha, 0x803e,
14461425 "PCI/Register disconnect, exiting.\n");
1426
+ qla_pci_set_eeh_busy(vha);
14471427 return FAILED;
14481428 }
14491429
....@@ -1460,6 +1440,7 @@
14601440 if (qla2x00_isp_reg_stat(ha)) {
14611441 ql_log(ql_log_info, vha, 0x803f,
14621442 "PCI/Register disconnect, exiting.\n");
1443
+ qla_pci_set_eeh_busy(vha);
14631444 return FAILED;
14641445 }
14651446
....@@ -1495,6 +1476,7 @@
14951476 if (qla2x00_isp_reg_stat(ha)) {
14961477 ql_log(ql_log_info, vha, 0x8040,
14971478 "PCI/Register disconnect, exiting.\n");
1479
+ qla_pci_set_eeh_busy(vha);
14981480 return FAILED;
14991481 }
15001482
....@@ -1572,7 +1554,7 @@
15721554 if (qla2x00_isp_reg_stat(ha)) {
15731555 ql_log(ql_log_info, vha, 0x8041,
15741556 "PCI/Register disconnect, exiting.\n");
1575
- schedule_work(&ha->board_disable);
1557
+ qla_pci_set_eeh_busy(vha);
15761558 return SUCCESS;
15771559 }
15781560
....@@ -1762,6 +1744,17 @@
17621744 for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
17631745 sp = req->outstanding_cmds[cnt];
17641746 if (sp) {
1747
+ /*
1748
+ * perform lockless completion during driver unload
1749
+ */
1750
+ if (qla2x00_chip_is_down(vha)) {
1751
+ req->outstanding_cmds[cnt] = NULL;
1752
+ spin_unlock_irqrestore(qp->qp_lock_ptr, flags);
1753
+ sp->done(sp, res);
1754
+ spin_lock_irqsave(qp->qp_lock_ptr, flags);
1755
+ continue;
1756
+ }
1757
+
17651758 switch (sp->cmd_type) {
17661759 case TYPE_SRB:
17671760 qla2x00_abort_srb(qp, sp, res, &flags);
....@@ -2855,7 +2848,6 @@
28552848 ha->max_exchg = FW_MAX_EXCHANGES_CNT;
28562849 atomic_set(&ha->num_pend_mbx_stage1, 0);
28572850 atomic_set(&ha->num_pend_mbx_stage2, 0);
2858
- atomic_set(&ha->num_pend_mbx_stage3, 0);
28592851 atomic_set(&ha->zio_threshold, DEFAULT_ZIO_THRESHOLD);
28602852 ha->last_zio_threshold = DEFAULT_ZIO_THRESHOLD;
28612853
....@@ -3130,6 +3122,13 @@
31303122 host->max_id = ha->max_fibre_devices;
31313123 host->cmd_per_lun = 3;
31323124 host->unique_id = host->host_no;
3125
+
3126
+ if (ql2xenabledif && ql2xenabledif != 2) {
3127
+ ql_log(ql_log_warn, base_vha, 0x302d,
3128
+ "Invalid value for ql2xenabledif, resetting it to default (2)\n");
3129
+ ql2xenabledif = 2;
3130
+ }
3131
+
31333132 if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif)
31343133 host->max_cmd_len = 32;
31353134 else
....@@ -3362,8 +3361,6 @@
33623361 base_vha->flags.difdix_supported = 1;
33633362 ql_dbg(ql_dbg_init, base_vha, 0x00f1,
33643363 "Registering for DIF/DIX type 1 and 3 protection.\n");
3365
- if (ql2xenabledif == 1)
3366
- prot = SHOST_DIX_TYPE0_PROTECTION;
33673364 if (ql2xprotmask)
33683365 scsi_host_set_prot(host, ql2xprotmask);
33693366 else
....@@ -4866,7 +4863,8 @@
48664863 }
48674864 INIT_DELAYED_WORK(&vha->scan.scan_work, qla_scan_work_fn);
48684865
4869
- sprintf(vha->host_str, "%s_%lu", QLA2XXX_DRIVER_NAME, vha->host_no);
4866
+ snprintf(vha->host_str, sizeof(vha->host_str), "%s_%lu",
4867
+ QLA2XXX_DRIVER_NAME, vha->host_no);
48704868 ql_dbg(ql_dbg_init, vha, 0x0041,
48714869 "Allocated the host=%p hw=%p vha=%p dev_name=%s",
48724870 vha->host, vha->hw, vha,
....@@ -6660,6 +6658,9 @@
66606658
66616659 schedule();
66626660
6661
+ if (test_and_clear_bit(DO_EEH_RECOVERY, &base_vha->dpc_flags))
6662
+ qla_pci_set_eeh_busy(base_vha);
6663
+
66636664 if (!base_vha->flags.init_done || ha->flags.mbox_busy)
66646665 goto end_loop;
66656666
....@@ -6899,9 +6900,12 @@
68996900 }
69006901 }
69016902 loop_resync_check:
6902
- if (test_and_clear_bit(LOOP_RESYNC_NEEDED,
6903
+ if (!qla2x00_reset_active(base_vha) &&
6904
+ test_and_clear_bit(LOOP_RESYNC_NEEDED,
69036905 &base_vha->dpc_flags)) {
6904
-
6906
+ /*
6907
+ * Allow abort_isp to complete before moving on to scanning.
6908
+ */
69056909 ql_dbg(ql_dbg_dpc, base_vha, 0x400f,
69066910 "Loop resync scheduled.\n");
69076911
....@@ -6953,26 +6957,21 @@
69536957 mutex_unlock(&ha->mq_lock);
69546958 }
69556959
6956
- if (test_and_clear_bit(SET_NVME_ZIO_THRESHOLD_NEEDED,
6957
- &base_vha->dpc_flags)) {
6958
- ql_log(ql_log_info, base_vha, 0xffffff,
6959
- "nvme: SET ZIO Activity exchange threshold to %d.\n",
6960
- ha->nvme_last_rptd_aen);
6961
- if (qla27xx_set_zio_threshold(base_vha,
6962
- ha->nvme_last_rptd_aen)) {
6963
- ql_log(ql_log_info, base_vha, 0xffffff,
6964
- "nvme: Unable to SET ZIO Activity exchange threshold to %d.\n",
6965
- ha->nvme_last_rptd_aen);
6966
- }
6967
- }
6968
-
69696960 if (test_and_clear_bit(SET_ZIO_THRESHOLD_NEEDED,
6970
- &base_vha->dpc_flags)) {
6961
+ &base_vha->dpc_flags)) {
6962
+ u16 threshold = ha->nvme_last_rptd_aen + ha->last_zio_threshold;
6963
+
6964
+ if (threshold > ha->orig_fw_xcb_count)
6965
+ threshold = ha->orig_fw_xcb_count;
6966
+
69716967 ql_log(ql_log_info, base_vha, 0xffffff,
6972
- "SET ZIO Activity exchange threshold to %d.\n",
6973
- ha->last_zio_threshold);
6974
- qla27xx_set_zio_threshold(base_vha,
6975
- ha->last_zio_threshold);
6968
+ "SET ZIO Activity exchange threshold to %d.\n",
6969
+ threshold);
6970
+ if (qla27xx_set_zio_threshold(base_vha, threshold)) {
6971
+ ql_log(ql_log_info, base_vha, 0xffffff,
6972
+ "Unable to SET ZIO Activity exchange threshold to %d.\n",
6973
+ threshold);
6974
+ }
69766975 }
69776976
69786977 if (!IS_QLAFX00(ha))
....@@ -7145,7 +7144,7 @@
71457144
71467145 /* if the loop has been down for 4 minutes, reinit adapter */
71477146 if (atomic_dec_and_test(&vha->loop_down_timer) != 0) {
7148
- if (!(vha->device_flags & DFLG_NO_CABLE)) {
7147
+ if (!(vha->device_flags & DFLG_NO_CABLE) && !vha->vp_idx) {
71497148 ql_log(ql_log_warn, vha, 0x6009,
71507149 "Loop down - aborting ISP.\n");
71517150
....@@ -7190,14 +7189,13 @@
71907189 index = atomic_read(&ha->nvme_active_aen_cnt);
71917190 if (!vha->vp_idx &&
71927191 (index != ha->nvme_last_rptd_aen) &&
7193
- (index >= DEFAULT_ZIO_THRESHOLD) &&
71947192 ha->zio_mode == QLA_ZIO_MODE_6 &&
71957193 !ha->flags.host_shutting_down) {
7194
+ ha->nvme_last_rptd_aen = atomic_read(&ha->nvme_active_aen_cnt);
71967195 ql_log(ql_log_info, vha, 0x3002,
71977196 "nvme: Sched: Set ZIO exchange threshold to %d.\n",
71987197 ha->nvme_last_rptd_aen);
7199
- ha->nvme_last_rptd_aen = atomic_read(&ha->nvme_active_aen_cnt);
7200
- set_bit(SET_NVME_ZIO_THRESHOLD_NEEDED, &vha->dpc_flags);
7198
+ set_bit(SET_ZIO_THRESHOLD_NEEDED, &vha->dpc_flags);
72017199 start_dpc++;
72027200 }
72037201
....@@ -7370,6 +7368,8 @@
73707368 int i;
73717369 unsigned long flags;
73727370
7371
+ ql_dbg(ql_dbg_aer, vha, 0x9000,
7372
+ "%s\n", __func__);
73737373 ha->chip_reset++;
73747374
73757375 ha->base_qpair->chip_reset = ha->chip_reset;
....@@ -7379,28 +7379,16 @@
73797379 ha->base_qpair->chip_reset;
73807380 }
73817381
7382
- /* purge MBox commands */
7383
- if (atomic_read(&ha->num_pend_mbx_stage3)) {
7384
- clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
7385
- complete(&ha->mbx_intr_comp);
7386
- }
7387
-
7388
- i = 0;
7389
-
7390
- while (atomic_read(&ha->num_pend_mbx_stage3) ||
7391
- atomic_read(&ha->num_pend_mbx_stage2) ||
7392
- atomic_read(&ha->num_pend_mbx_stage1)) {
7393
- msleep(20);
7394
- i++;
7395
- if (i > 50)
7396
- break;
7397
- }
7398
-
7399
- ha->flags.purge_mbox = 0;
7382
+ /*
7383
+ * purge mailbox might take a while. Slot Reset/chip reset
7384
+ * will take care of the purge
7385
+ */
74007386
74017387 mutex_lock(&ha->mq_lock);
7388
+ ha->base_qpair->online = 0;
74027389 list_for_each_entry(qpair, &base_vha->qp_list, qp_list_elem)
74037390 qpair->online = 0;
7391
+ wmb();
74047392 mutex_unlock(&ha->mq_lock);
74057393
74067394 qla2x00_mark_all_devices_lost(vha);
....@@ -7437,14 +7425,17 @@
74377425 {
74387426 scsi_qla_host_t *vha = pci_get_drvdata(pdev);
74397427 struct qla_hw_data *ha = vha->hw;
7428
+ pci_ers_result_t ret = PCI_ERS_RESULT_NEED_RESET;
74407429
7441
- ql_dbg(ql_dbg_aer, vha, 0x9000,
7442
- "PCI error detected, state %x.\n", state);
7430
+ ql_log(ql_log_warn, vha, 0x9000,
7431
+ "PCI error detected, state %x.\n", state);
7432
+ ha->pci_error_state = QLA_PCI_ERR_DETECTED;
74437433
74447434 if (!atomic_read(&pdev->enable_cnt)) {
74457435 ql_log(ql_log_info, vha, 0xffff,
74467436 "PCI device is disabled,state %x\n", state);
7447
- return PCI_ERS_RESULT_NEED_RESET;
7437
+ ret = PCI_ERS_RESULT_NEED_RESET;
7438
+ goto out;
74487439 }
74497440
74507441 switch (state) {
....@@ -7454,11 +7445,12 @@
74547445 set_bit(QPAIR_ONLINE_CHECK_NEEDED, &vha->dpc_flags);
74557446 qla2xxx_wake_dpc(vha);
74567447 }
7457
- return PCI_ERS_RESULT_CAN_RECOVER;
7448
+ ret = PCI_ERS_RESULT_CAN_RECOVER;
7449
+ break;
74587450 case pci_channel_io_frozen:
7459
- ha->flags.eeh_busy = 1;
7460
- qla_pci_error_cleanup(vha);
7461
- return PCI_ERS_RESULT_NEED_RESET;
7451
+ qla_pci_set_eeh_busy(vha);
7452
+ ret = PCI_ERS_RESULT_NEED_RESET;
7453
+ break;
74627454 case pci_channel_io_perm_failure:
74637455 ha->flags.pci_channel_io_perm_failure = 1;
74647456 qla2x00_abort_all_cmds(vha, DID_NO_CONNECT << 16);
....@@ -7466,9 +7458,12 @@
74667458 set_bit(QPAIR_ONLINE_CHECK_NEEDED, &vha->dpc_flags);
74677459 qla2xxx_wake_dpc(vha);
74687460 }
7469
- return PCI_ERS_RESULT_DISCONNECT;
7461
+ ret = PCI_ERS_RESULT_DISCONNECT;
74707462 }
7471
- return PCI_ERS_RESULT_NEED_RESET;
7463
+out:
7464
+ ql_dbg(ql_dbg_aer, vha, 0x600d,
7465
+ "PCI error detected returning [%x].\n", ret);
7466
+ return ret;
74727467 }
74737468
74747469 static pci_ers_result_t
....@@ -7482,6 +7477,10 @@
74827477 struct device_reg_2xxx __iomem *reg = &ha->iobase->isp;
74837478 struct device_reg_24xx __iomem *reg24 = &ha->iobase->isp24;
74847479
7480
+ ql_log(ql_log_warn, base_vha, 0x9000,
7481
+ "mmio enabled\n");
7482
+
7483
+ ha->pci_error_state = QLA_PCI_MMIO_ENABLED;
74857484 if (IS_QLA82XX(ha))
74867485 return PCI_ERS_RESULT_RECOVERED;
74877486
....@@ -7505,10 +7504,11 @@
75057504 ql_log(ql_log_info, base_vha, 0x9003,
75067505 "RISC paused -- mmio_enabled, Dumping firmware.\n");
75077506 qla2xxx_dump_fw(base_vha);
7508
-
7509
- return PCI_ERS_RESULT_NEED_RESET;
7510
- } else
7511
- return PCI_ERS_RESULT_RECOVERED;
7507
+ }
7508
+ /* set PCI_ERS_RESULT_NEED_RESET to trigger call to qla2xxx_pci_slot_reset */
7509
+ ql_dbg(ql_dbg_aer, base_vha, 0x600d,
7510
+ "mmio enabled returning.\n");
7511
+ return PCI_ERS_RESULT_NEED_RESET;
75127512 }
75137513
75147514 static pci_ers_result_t
....@@ -7520,9 +7520,10 @@
75207520 int rc;
75217521 struct qla_qpair *qpair = NULL;
75227522
7523
- ql_dbg(ql_dbg_aer, base_vha, 0x9004,
7524
- "Slot Reset.\n");
7523
+ ql_log(ql_log_warn, base_vha, 0x9004,
7524
+ "Slot Reset.\n");
75257525
7526
+ ha->pci_error_state = QLA_PCI_SLOT_RESET;
75267527 /* Workaround: qla2xxx driver which access hardware earlier
75277528 * needs error state to be pci_channel_io_online.
75287529 * Otherwise mailbox command timesout.
....@@ -7556,16 +7557,24 @@
75567557 qpair->online = 1;
75577558 mutex_unlock(&ha->mq_lock);
75587559
7560
+ ha->flags.eeh_busy = 0;
75597561 base_vha->flags.online = 1;
75607562 set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
7561
- if (ha->isp_ops->abort_isp(base_vha) == QLA_SUCCESS)
7562
- ret = PCI_ERS_RESULT_RECOVERED;
7563
+ ha->isp_ops->abort_isp(base_vha);
75637564 clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
75647565
7566
+ if (qla2x00_isp_reg_stat(ha)) {
7567
+ ha->flags.eeh_busy = 1;
7568
+ qla_pci_error_cleanup(base_vha);
7569
+ ql_log(ql_log_warn, base_vha, 0x9005,
7570
+ "Device unable to recover from PCI error.\n");
7571
+ } else {
7572
+ ret = PCI_ERS_RESULT_RECOVERED;
7573
+ }
75657574
75667575 exit_slot_reset:
75677576 ql_dbg(ql_dbg_aer, base_vha, 0x900e,
7568
- "slot_reset return %x.\n", ret);
7577
+ "Slot Reset returning %x.\n", ret);
75697578
75707579 return ret;
75717580 }
....@@ -7577,16 +7586,55 @@
75777586 struct qla_hw_data *ha = base_vha->hw;
75787587 int ret;
75797588
7580
- ql_dbg(ql_dbg_aer, base_vha, 0x900f,
7581
- "pci_resume.\n");
7589
+ ql_log(ql_log_warn, base_vha, 0x900f,
7590
+ "Pci Resume.\n");
75827591
7583
- ha->flags.eeh_busy = 0;
75847592
75857593 ret = qla2x00_wait_for_hba_online(base_vha);
75867594 if (ret != QLA_SUCCESS) {
75877595 ql_log(ql_log_fatal, base_vha, 0x9002,
75887596 "The device failed to resume I/O from slot/link_reset.\n");
75897597 }
7598
+ ha->pci_error_state = QLA_PCI_RESUME;
7599
+ ql_dbg(ql_dbg_aer, base_vha, 0x600d,
7600
+ "Pci Resume returning.\n");
7601
+}
7602
+
7603
+void qla_pci_set_eeh_busy(struct scsi_qla_host *vha)
7604
+{
7605
+ struct qla_hw_data *ha = vha->hw;
7606
+ struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
7607
+ bool do_cleanup = false;
7608
+ unsigned long flags;
7609
+
7610
+ if (ha->flags.eeh_busy)
7611
+ return;
7612
+
7613
+ spin_lock_irqsave(&base_vha->work_lock, flags);
7614
+ if (!ha->flags.eeh_busy) {
7615
+ ha->flags.eeh_busy = 1;
7616
+ do_cleanup = true;
7617
+ }
7618
+ spin_unlock_irqrestore(&base_vha->work_lock, flags);
7619
+
7620
+ if (do_cleanup)
7621
+ qla_pci_error_cleanup(base_vha);
7622
+}
7623
+
7624
+/*
7625
+ * this routine will schedule a task to pause IO from interrupt context
7626
+ * if caller sees a PCIE error event (register read = 0xf's)
7627
+ */
7628
+void qla_schedule_eeh_work(struct scsi_qla_host *vha)
7629
+{
7630
+ struct qla_hw_data *ha = vha->hw;
7631
+ struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
7632
+
7633
+ if (ha->flags.eeh_busy)
7634
+ return;
7635
+
7636
+ set_bit(DO_EEH_RECOVERY, &base_vha->dpc_flags);
7637
+ qla2xxx_wake_dpc(base_vha);
75907638 }
75917639
75927640 static void