hc
2024-01-05 071106ecf68c401173c58808b1cf5f68cc50d390
kernel/drivers/ntb/ntb_transport.c
....@@ -93,6 +93,12 @@
9393 module_param(use_dma, bool, 0644);
9494 MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
9595
96
+static bool use_msi;
97
+#ifdef CONFIG_NTB_MSI
98
+module_param(use_msi, bool, 0644);
99
+MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
100
+#endif
101
+
96102 static struct dentry *nt_debugfs_dir;
97103
98104 /* Only two-ports NTB devices are supported */
....@@ -144,7 +150,9 @@
144150 struct list_head tx_free_q;
145151 spinlock_t ntb_tx_free_q_lock;
146152 void __iomem *tx_mw;
147
- dma_addr_t tx_mw_phys;
153
+ phys_addr_t tx_mw_phys;
154
+ size_t tx_mw_size;
155
+ dma_addr_t tx_mw_dma_addr;
148156 unsigned int tx_index;
149157 unsigned int tx_max_entry;
150158 unsigned int tx_max_frame;
....@@ -186,6 +194,11 @@
186194 u64 tx_err_no_buf;
187195 u64 tx_memcpy;
188196 u64 tx_async;
197
+
198
+ bool use_msi;
199
+ int msi_irq;
200
+ struct ntb_msi_desc msi_desc;
201
+ struct ntb_msi_desc peer_msi_desc;
189202 };
190203
191204 struct ntb_transport_mw {
....@@ -194,6 +207,8 @@
194207 void __iomem *vbase;
195208 size_t xlat_size;
196209 size_t buff_size;
210
+ size_t alloc_size;
211
+ void *alloc_addr;
197212 void *virt_addr;
198213 dma_addr_t dma_addr;
199214 };
....@@ -216,6 +231,10 @@
216231 unsigned int qp_count;
217232 u64 qp_bitmap;
218233 u64 qp_bitmap_free;
234
+
235
+ bool use_msi;
236
+ unsigned int msi_spad_offset;
237
+ u64 msi_db_mask;
219238
220239 bool link_is_up;
221240 struct delayed_work link_work;
....@@ -273,7 +292,7 @@
273292 static int ntb_transport_bus_probe(struct device *dev)
274293 {
275294 const struct ntb_transport_client *client;
276
- int rc = -EINVAL;
295
+ int rc;
277296
278297 get_device(dev);
279298
....@@ -393,7 +412,7 @@
393412
394413 rc = device_register(dev);
395414 if (rc) {
396
- kfree(client_dev);
415
+ put_device(dev);
397416 goto err;
398417 }
399418
....@@ -462,70 +481,70 @@
462481 return -ENOMEM;
463482
464483 out_offset = 0;
465
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
484
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
466485 "\nNTB QP stats:\n\n");
467
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
486
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
468487 "rx_bytes - \t%llu\n", qp->rx_bytes);
469
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
488
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
470489 "rx_pkts - \t%llu\n", qp->rx_pkts);
471
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
490
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
472491 "rx_memcpy - \t%llu\n", qp->rx_memcpy);
473
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
492
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
474493 "rx_async - \t%llu\n", qp->rx_async);
475
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
494
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
476495 "rx_ring_empty - %llu\n", qp->rx_ring_empty);
477
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
496
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
478497 "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
479
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
498
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
480499 "rx_err_oflow - \t%llu\n", qp->rx_err_oflow);
481
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
500
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
482501 "rx_err_ver - \t%llu\n", qp->rx_err_ver);
483
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
502
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
484503 "rx_buff - \t0x%p\n", qp->rx_buff);
485
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
504
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
486505 "rx_index - \t%u\n", qp->rx_index);
487
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
506
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
488507 "rx_max_entry - \t%u\n", qp->rx_max_entry);
489
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
508
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
490509 "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
491510
492
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
511
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
493512 "tx_bytes - \t%llu\n", qp->tx_bytes);
494
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
513
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
495514 "tx_pkts - \t%llu\n", qp->tx_pkts);
496
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
515
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
497516 "tx_memcpy - \t%llu\n", qp->tx_memcpy);
498
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
517
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
499518 "tx_async - \t%llu\n", qp->tx_async);
500
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
519
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
501520 "tx_ring_full - \t%llu\n", qp->tx_ring_full);
502
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
521
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
503522 "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
504
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
523
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
505524 "tx_mw - \t0x%p\n", qp->tx_mw);
506
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
525
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
507526 "tx_index (H) - \t%u\n", qp->tx_index);
508
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
527
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
509528 "RRI (T) - \t%u\n",
510529 qp->remote_rx_info->entry);
511
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
530
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
512531 "tx_max_entry - \t%u\n", qp->tx_max_entry);
513
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
532
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
514533 "free tx - \t%u\n",
515534 ntb_transport_tx_free_entry(qp));
516535
517
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
536
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
518537 "\n");
519
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
538
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
520539 "Using TX DMA - \t%s\n",
521540 qp->tx_dma_chan ? "Yes" : "No");
522
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
541
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
523542 "Using RX DMA - \t%s\n",
524543 qp->rx_dma_chan ? "Yes" : "No");
525
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
544
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
526545 "QP Link - \t%s\n",
527546 qp->link_is_up ? "Up" : "Down");
528
- out_offset += snprintf(buf + out_offset, out_count - out_offset,
547
+ out_offset += scnprintf(buf + out_offset, out_count - out_offset,
529548 "\n");
530549
531550 if (out_offset > out_count)
....@@ -663,6 +682,114 @@
663682 return 0;
664683 }
665684
685
+static irqreturn_t ntb_transport_isr(int irq, void *dev)
686
+{
687
+ struct ntb_transport_qp *qp = dev;
688
+
689
+ tasklet_schedule(&qp->rxc_db_work);
690
+
691
+ return IRQ_HANDLED;
692
+}
693
+
694
+static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt,
695
+ unsigned int qp_num)
696
+{
697
+ struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
698
+ int spad = qp_num * 2 + nt->msi_spad_offset;
699
+
700
+ if (!nt->use_msi)
701
+ return;
702
+
703
+ if (spad >= ntb_spad_count(nt->ndev))
704
+ return;
705
+
706
+ qp->peer_msi_desc.addr_offset =
707
+ ntb_peer_spad_read(qp->ndev, PIDX, spad);
708
+ qp->peer_msi_desc.data =
709
+ ntb_peer_spad_read(qp->ndev, PIDX, spad + 1);
710
+
711
+ dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n",
712
+ qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data);
713
+
714
+ if (qp->peer_msi_desc.addr_offset) {
715
+ qp->use_msi = true;
716
+ dev_info(&qp->ndev->pdev->dev,
717
+ "Using MSI interrupts for QP%d\n", qp_num);
718
+ }
719
+}
720
+
721
+static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt,
722
+ unsigned int qp_num)
723
+{
724
+ struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
725
+ int spad = qp_num * 2 + nt->msi_spad_offset;
726
+ int rc;
727
+
728
+ if (!nt->use_msi)
729
+ return;
730
+
731
+ if (spad >= ntb_spad_count(nt->ndev)) {
732
+ dev_warn_once(&qp->ndev->pdev->dev,
733
+ "Not enough SPADS to use MSI interrupts\n");
734
+ return;
735
+ }
736
+
737
+ ntb_spad_write(qp->ndev, spad, 0);
738
+ ntb_spad_write(qp->ndev, spad + 1, 0);
739
+
740
+ if (!qp->msi_irq) {
741
+ qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr,
742
+ KBUILD_MODNAME, qp,
743
+ &qp->msi_desc);
744
+ if (qp->msi_irq < 0) {
745
+ dev_warn(&qp->ndev->pdev->dev,
746
+ "Unable to allocate MSI interrupt for qp%d\n",
747
+ qp_num);
748
+ return;
749
+ }
750
+ }
751
+
752
+ rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset);
753
+ if (rc)
754
+ goto err_free_interrupt;
755
+
756
+ rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data);
757
+ if (rc)
758
+ goto err_free_interrupt;
759
+
760
+ dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n",
761
+ qp_num, qp->msi_irq, qp->msi_desc.addr_offset,
762
+ qp->msi_desc.data);
763
+
764
+ return;
765
+
766
+err_free_interrupt:
767
+ devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp);
768
+}
769
+
770
+static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt)
771
+{
772
+ int i;
773
+
774
+ dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed");
775
+
776
+ for (i = 0; i < nt->qp_count; i++)
777
+ ntb_transport_setup_qp_peer_msi(nt, i);
778
+}
779
+
780
+static void ntb_transport_msi_desc_changed(void *data)
781
+{
782
+ struct ntb_transport_ctx *nt = data;
783
+ int i;
784
+
785
+ dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed");
786
+
787
+ for (i = 0; i < nt->qp_count; i++)
788
+ ntb_transport_setup_qp_msi(nt, i);
789
+
790
+ ntb_peer_db_set(nt->ndev, nt->msi_db_mask);
791
+}
792
+
666793 static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
667794 {
668795 struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
....@@ -672,11 +799,57 @@
672799 return;
673800
674801 ntb_mw_clear_trans(nt->ndev, PIDX, num_mw);
675
- dma_free_coherent(&pdev->dev, mw->buff_size,
676
- mw->virt_addr, mw->dma_addr);
802
+ dma_free_coherent(&pdev->dev, mw->alloc_size,
803
+ mw->alloc_addr, mw->dma_addr);
677804 mw->xlat_size = 0;
678805 mw->buff_size = 0;
806
+ mw->alloc_size = 0;
807
+ mw->alloc_addr = NULL;
679808 mw->virt_addr = NULL;
809
+}
810
+
811
+static int ntb_alloc_mw_buffer(struct ntb_transport_mw *mw,
812
+ struct device *dma_dev, size_t align)
813
+{
814
+ dma_addr_t dma_addr;
815
+ void *alloc_addr, *virt_addr;
816
+ int rc;
817
+
818
+ alloc_addr = dma_alloc_coherent(dma_dev, mw->alloc_size,
819
+ &dma_addr, GFP_KERNEL);
820
+ if (!alloc_addr) {
821
+ dev_err(dma_dev, "Unable to alloc MW buff of size %zu\n",
822
+ mw->alloc_size);
823
+ return -ENOMEM;
824
+ }
825
+ virt_addr = alloc_addr;
826
+
827
+ /*
828
+ * we must ensure that the memory address allocated is BAR size
829
+ * aligned in order for the XLAT register to take the value. This
830
+ * is a requirement of the hardware. It is recommended to setup CMA
831
+ * for BAR sizes equal or greater than 4MB.
832
+ */
833
+ if (!IS_ALIGNED(dma_addr, align)) {
834
+ if (mw->alloc_size > mw->buff_size) {
835
+ virt_addr = PTR_ALIGN(alloc_addr, align);
836
+ dma_addr = ALIGN(dma_addr, align);
837
+ } else {
838
+ rc = -ENOMEM;
839
+ goto err;
840
+ }
841
+ }
842
+
843
+ mw->alloc_addr = alloc_addr;
844
+ mw->virt_addr = virt_addr;
845
+ mw->dma_addr = dma_addr;
846
+
847
+ return 0;
848
+
849
+err:
850
+ dma_free_coherent(dma_dev, mw->alloc_size, alloc_addr, dma_addr);
851
+
852
+ return rc;
680853 }
681854
682855 static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
....@@ -710,28 +883,20 @@
710883 /* Alloc memory for receiving data. Must be aligned */
711884 mw->xlat_size = xlat_size;
712885 mw->buff_size = buff_size;
886
+ mw->alloc_size = buff_size;
713887
714
- mw->virt_addr = dma_alloc_coherent(&pdev->dev, buff_size,
715
- &mw->dma_addr, GFP_KERNEL);
716
- if (!mw->virt_addr) {
717
- mw->xlat_size = 0;
718
- mw->buff_size = 0;
719
- dev_err(&pdev->dev, "Unable to alloc MW buff of size %zu\n",
720
- buff_size);
721
- return -ENOMEM;
722
- }
723
-
724
- /*
725
- * we must ensure that the memory address allocated is BAR size
726
- * aligned in order for the XLAT register to take the value. This
727
- * is a requirement of the hardware. It is recommended to setup CMA
728
- * for BAR sizes equal or greater than 4MB.
729
- */
730
- if (!IS_ALIGNED(mw->dma_addr, xlat_align)) {
731
- dev_err(&pdev->dev, "DMA memory %pad is not aligned\n",
732
- &mw->dma_addr);
733
- ntb_free_mw(nt, num_mw);
734
- return -ENOMEM;
888
+ rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
889
+ if (rc) {
890
+ mw->alloc_size *= 2;
891
+ rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
892
+ if (rc) {
893
+ dev_err(&pdev->dev,
894
+ "Unable to alloc aligned MW buff\n");
895
+ mw->xlat_size = 0;
896
+ mw->buff_size = 0;
897
+ mw->alloc_size = 0;
898
+ return rc;
899
+ }
735900 }
736901
737902 /* Notify HW the memory location of the receive buffer */
....@@ -746,7 +911,7 @@
746911 return 0;
747912 }
748913
749
-static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
914
+static void ntb_qp_link_context_reset(struct ntb_transport_qp *qp)
750915 {
751916 qp->link_is_up = false;
752917 qp->active = false;
....@@ -767,6 +932,13 @@
767932 qp->tx_err_no_buf = 0;
768933 qp->tx_memcpy = 0;
769934 qp->tx_async = 0;
935
+}
936
+
937
+static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
938
+{
939
+ ntb_qp_link_context_reset(qp);
940
+ if (qp->remote_rx_info)
941
+ qp->remote_rx_info->entry = qp->rx_max_entry - 1;
770942 }
771943
772944 static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
....@@ -822,6 +994,9 @@
822994 if (!nt->link_is_up)
823995 cancel_delayed_work_sync(&nt->link_work);
824996
997
+ for (i = 0; i < nt->mw_count; i++)
998
+ ntb_free_mw(nt, i);
999
+
8251000 /* The scratchpad registers keep the values if the remote side
8261001 * goes down, blast them now to give them a sane value the next
8271002 * time they are accessed
....@@ -860,6 +1035,20 @@
8601035 int rc = 0, i, spad;
8611036
8621037 /* send the local info, in the opposite order of the way we read it */
1038
+
1039
+ if (nt->use_msi) {
1040
+ rc = ntb_msi_setup_mws(ndev);
1041
+ if (rc) {
1042
+ dev_warn(&pdev->dev,
1043
+ "Failed to register MSI memory window: %d\n",
1044
+ rc);
1045
+ nt->use_msi = false;
1046
+ }
1047
+ }
1048
+
1049
+ for (i = 0; i < nt->qp_count; i++)
1050
+ ntb_transport_setup_qp_msi(nt, i);
1051
+
8631052 for (i = 0; i < nt->mw_count; i++) {
8641053 size = nt->mw_vec[i].phys_size;
8651054
....@@ -917,6 +1106,7 @@
9171106 struct ntb_transport_qp *qp = &nt->qp_vec[i];
9181107
9191108 ntb_transport_setup_qp_mw(nt, i);
1109
+ ntb_transport_setup_qp_peer_msi(nt, i);
9201110
9211111 if (qp->client_ready)
9221112 schedule_delayed_work(&qp->link_work, 0);
....@@ -993,7 +1183,7 @@
9931183 qp->ndev = nt->ndev;
9941184 qp->client_ready = false;
9951185 qp->event_handler = NULL;
996
- ntb_qp_link_down_reset(qp);
1186
+ ntb_qp_link_context_reset(qp);
9971187
9981188 if (mw_num < qp_count % mw_count)
9991189 num_qps_mw = qp_count / mw_count + 1;
....@@ -1009,6 +1199,7 @@
10091199 tx_size = (unsigned int)mw_size / num_qps_mw;
10101200 qp_offset = tx_size * (qp_num / mw_count);
10111201
1202
+ qp->tx_mw_size = tx_size;
10121203 qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
10131204 if (!qp->tx_mw)
10141205 return -EINVAL;
....@@ -1089,6 +1280,19 @@
10891280 return -ENOMEM;
10901281
10911282 nt->ndev = ndev;
1283
+
1284
+ /*
1285
+ * If we are using MSI, and have at least one extra memory window,
1286
+ * we will reserve the last MW for the MSI window.
1287
+ */
1288
+ if (use_msi && mw_count > 1) {
1289
+ rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed);
1290
+ if (!rc) {
1291
+ mw_count -= 1;
1292
+ nt->use_msi = true;
1293
+ }
1294
+ }
1295
+
10921296 spad_count = ntb_spad_count(ndev);
10931297
10941298 /* Limit the MW's based on the availability of scratchpads */
....@@ -1101,6 +1305,8 @@
11011305
11021306 max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2;
11031307 nt->mw_count = min(mw_count, max_mw_count_for_spads);
1308
+
1309
+ nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH;
11041310
11051311 nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec),
11061312 GFP_KERNEL, node);
....@@ -1132,6 +1338,12 @@
11321338 qp_bitmap = ntb_db_valid_mask(ndev);
11331339
11341340 qp_count = ilog2(qp_bitmap);
1341
+ if (nt->use_msi) {
1342
+ qp_count -= 1;
1343
+ nt->msi_db_mask = 1 << qp_count;
1344
+ ntb_db_clear_mask(ndev, nt->msi_db_mask);
1345
+ }
1346
+
11351347 if (max_num_clients && max_num_clients < qp_count)
11361348 qp_count = max_num_clients;
11371349 else if (nt->mw_count < qp_count)
....@@ -1278,6 +1490,7 @@
12781490 case DMA_TRANS_READ_FAILED:
12791491 case DMA_TRANS_WRITE_FAILED:
12801492 entry->errors++;
1493
+ fallthrough;
12811494 case DMA_TRANS_ABORTED:
12821495 {
12831496 struct ntb_transport_qp *qp = entry->qp;
....@@ -1533,6 +1746,7 @@
15331746 case DMA_TRANS_READ_FAILED:
15341747 case DMA_TRANS_WRITE_FAILED:
15351748 entry->errors++;
1749
+ fallthrough;
15361750 case DMA_TRANS_ABORTED:
15371751 {
15381752 void __iomem *offset =
....@@ -1553,7 +1767,10 @@
15531767
15541768 iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags);
15551769
1556
- ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
1770
+ if (qp->use_msi)
1771
+ ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc);
1772
+ else
1773
+ ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
15571774
15581775 /* The entry length can only be zero if the packet is intended to be a
15591776 * "link down" or similar. Since no payload is being sent in these
....@@ -1602,7 +1819,7 @@
16021819 dma_cookie_t cookie;
16031820
16041821 device = chan->device;
1605
- dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
1822
+ dest = qp->tx_mw_dma_addr + qp->tx_max_frame * entry->tx_index;
16061823 buff_off = (size_t)buf & ~PAGE_MASK;
16071824 dest_off = (size_t)dest & ~PAGE_MASK;
16081825
....@@ -1821,6 +2038,19 @@
18212038 qp->rx_dma_chan = NULL;
18222039 }
18232040
2041
+ qp->tx_mw_dma_addr = 0;
2042
+ if (qp->tx_dma_chan) {
2043
+ qp->tx_mw_dma_addr =
2044
+ dma_map_resource(qp->tx_dma_chan->device->dev,
2045
+ qp->tx_mw_phys, qp->tx_mw_size,
2046
+ DMA_FROM_DEVICE, 0);
2047
+ if (dma_mapping_error(qp->tx_dma_chan->device->dev,
2048
+ qp->tx_mw_dma_addr)) {
2049
+ qp->tx_mw_dma_addr = 0;
2050
+ goto err1;
2051
+ }
2052
+ }
2053
+
18242054 dev_dbg(&pdev->dev, "Using %s memcpy for TX\n",
18252055 qp->tx_dma_chan ? "DMA" : "CPU");
18262056
....@@ -1862,6 +2092,10 @@
18622092 qp->rx_alloc_entry = 0;
18632093 while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
18642094 kfree(entry);
2095
+ if (qp->tx_mw_dma_addr)
2096
+ dma_unmap_resource(qp->tx_dma_chan->device->dev,
2097
+ qp->tx_mw_dma_addr, qp->tx_mw_size,
2098
+ DMA_FROM_DEVICE, 0);
18652099 if (qp->tx_dma_chan)
18662100 dma_release_channel(qp->tx_dma_chan);
18672101 if (qp->rx_dma_chan)
....@@ -1903,6 +2137,11 @@
19032137 */
19042138 dma_sync_wait(chan, qp->last_cookie);
19052139 dmaengine_terminate_all(chan);
2140
+
2141
+ dma_unmap_resource(chan->device->dev,
2142
+ qp->tx_mw_dma_addr, qp->tx_mw_size,
2143
+ DMA_FROM_DEVICE, 0);
2144
+
19062145 dma_release_channel(chan);
19072146 }
19082147
....@@ -2046,8 +2285,12 @@
20462285 struct ntb_queue_entry *entry;
20472286 int rc;
20482287
2049
- if (!qp || !qp->link_is_up || !len)
2288
+ if (!qp || !len)
20502289 return -EINVAL;
2290
+
2291
+ /* If the qp link is down already, just ignore. */
2292
+ if (!qp->link_is_up)
2293
+ return 0;
20512294
20522295 entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
20532296 if (!entry) {
....@@ -2188,7 +2431,7 @@
21882431 unsigned int head = qp->tx_index;
21892432 unsigned int tail = qp->remote_rx_info->entry;
21902433
2191
- return tail > head ? tail - head : qp->tx_max_entry + tail - head;
2434
+ return tail >= head ? tail - head : qp->tx_max_entry + tail - head;
21922435 }
21932436 EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry);
21942437
....@@ -2199,6 +2442,11 @@
21992442 u64 db_bits;
22002443 unsigned int qp_num;
22012444
2445
+ if (ntb_db_read(nt->ndev) & nt->msi_db_mask) {
2446
+ ntb_transport_msi_peer_desc_changed(nt);
2447
+ ntb_db_clear(nt->ndev, nt->msi_db_mask);
2448
+ }
2449
+
22022450 db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free &
22032451 ntb_db_vector_mask(nt->ndev, vector));
22042452