.. | .. |
---|
93 | 93 | module_param(use_dma, bool, 0644); |
---|
94 | 94 | MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy"); |
---|
95 | 95 | |
---|
| 96 | +static bool use_msi; |
---|
| 97 | +#ifdef CONFIG_NTB_MSI |
---|
| 98 | +module_param(use_msi, bool, 0644); |
---|
| 99 | +MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells"); |
---|
| 100 | +#endif |
---|
| 101 | + |
---|
96 | 102 | static struct dentry *nt_debugfs_dir; |
---|
97 | 103 | |
---|
98 | 104 | /* Only two-ports NTB devices are supported */ |
---|
.. | .. |
---|
144 | 150 | struct list_head tx_free_q; |
---|
145 | 151 | spinlock_t ntb_tx_free_q_lock; |
---|
146 | 152 | void __iomem *tx_mw; |
---|
147 | | - dma_addr_t tx_mw_phys; |
---|
| 153 | + phys_addr_t tx_mw_phys; |
---|
| 154 | + size_t tx_mw_size; |
---|
| 155 | + dma_addr_t tx_mw_dma_addr; |
---|
148 | 156 | unsigned int tx_index; |
---|
149 | 157 | unsigned int tx_max_entry; |
---|
150 | 158 | unsigned int tx_max_frame; |
---|
.. | .. |
---|
186 | 194 | u64 tx_err_no_buf; |
---|
187 | 195 | u64 tx_memcpy; |
---|
188 | 196 | u64 tx_async; |
---|
| 197 | + |
---|
| 198 | + bool use_msi; |
---|
| 199 | + int msi_irq; |
---|
| 200 | + struct ntb_msi_desc msi_desc; |
---|
| 201 | + struct ntb_msi_desc peer_msi_desc; |
---|
189 | 202 | }; |
---|
190 | 203 | |
---|
191 | 204 | struct ntb_transport_mw { |
---|
.. | .. |
---|
194 | 207 | void __iomem *vbase; |
---|
195 | 208 | size_t xlat_size; |
---|
196 | 209 | size_t buff_size; |
---|
| 210 | + size_t alloc_size; |
---|
| 211 | + void *alloc_addr; |
---|
197 | 212 | void *virt_addr; |
---|
198 | 213 | dma_addr_t dma_addr; |
---|
199 | 214 | }; |
---|
.. | .. |
---|
216 | 231 | unsigned int qp_count; |
---|
217 | 232 | u64 qp_bitmap; |
---|
218 | 233 | u64 qp_bitmap_free; |
---|
| 234 | + |
---|
| 235 | + bool use_msi; |
---|
| 236 | + unsigned int msi_spad_offset; |
---|
| 237 | + u64 msi_db_mask; |
---|
219 | 238 | |
---|
220 | 239 | bool link_is_up; |
---|
221 | 240 | struct delayed_work link_work; |
---|
.. | .. |
---|
273 | 292 | static int ntb_transport_bus_probe(struct device *dev) |
---|
274 | 293 | { |
---|
275 | 294 | const struct ntb_transport_client *client; |
---|
276 | | - int rc = -EINVAL; |
---|
| 295 | + int rc; |
---|
277 | 296 | |
---|
278 | 297 | get_device(dev); |
---|
279 | 298 | |
---|
.. | .. |
---|
462 | 481 | return -ENOMEM; |
---|
463 | 482 | |
---|
464 | 483 | out_offset = 0; |
---|
465 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 484 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
466 | 485 | "\nNTB QP stats:\n\n"); |
---|
467 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 486 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
468 | 487 | "rx_bytes - \t%llu\n", qp->rx_bytes); |
---|
469 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 488 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
470 | 489 | "rx_pkts - \t%llu\n", qp->rx_pkts); |
---|
471 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 490 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
472 | 491 | "rx_memcpy - \t%llu\n", qp->rx_memcpy); |
---|
473 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 492 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
474 | 493 | "rx_async - \t%llu\n", qp->rx_async); |
---|
475 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 494 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
476 | 495 | "rx_ring_empty - %llu\n", qp->rx_ring_empty); |
---|
477 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 496 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
478 | 497 | "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); |
---|
479 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 498 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
480 | 499 | "rx_err_oflow - \t%llu\n", qp->rx_err_oflow); |
---|
481 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 500 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
482 | 501 | "rx_err_ver - \t%llu\n", qp->rx_err_ver); |
---|
483 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 502 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
484 | 503 | "rx_buff - \t0x%p\n", qp->rx_buff); |
---|
485 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 504 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
486 | 505 | "rx_index - \t%u\n", qp->rx_index); |
---|
487 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 506 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
488 | 507 | "rx_max_entry - \t%u\n", qp->rx_max_entry); |
---|
489 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 508 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
490 | 509 | "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); |
---|
491 | 510 | |
---|
492 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 511 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
493 | 512 | "tx_bytes - \t%llu\n", qp->tx_bytes); |
---|
494 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 513 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
495 | 514 | "tx_pkts - \t%llu\n", qp->tx_pkts); |
---|
496 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 515 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
497 | 516 | "tx_memcpy - \t%llu\n", qp->tx_memcpy); |
---|
498 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 517 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
499 | 518 | "tx_async - \t%llu\n", qp->tx_async); |
---|
500 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 519 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
501 | 520 | "tx_ring_full - \t%llu\n", qp->tx_ring_full); |
---|
502 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 521 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
503 | 522 | "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); |
---|
504 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 523 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
505 | 524 | "tx_mw - \t0x%p\n", qp->tx_mw); |
---|
506 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 525 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
507 | 526 | "tx_index (H) - \t%u\n", qp->tx_index); |
---|
508 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 527 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
509 | 528 | "RRI (T) - \t%u\n", |
---|
510 | 529 | qp->remote_rx_info->entry); |
---|
511 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 530 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
512 | 531 | "tx_max_entry - \t%u\n", qp->tx_max_entry); |
---|
513 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 532 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
514 | 533 | "free tx - \t%u\n", |
---|
515 | 534 | ntb_transport_tx_free_entry(qp)); |
---|
516 | 535 | |
---|
517 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 536 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
518 | 537 | "\n"); |
---|
519 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 538 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
520 | 539 | "Using TX DMA - \t%s\n", |
---|
521 | 540 | qp->tx_dma_chan ? "Yes" : "No"); |
---|
522 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 541 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
523 | 542 | "Using RX DMA - \t%s\n", |
---|
524 | 543 | qp->rx_dma_chan ? "Yes" : "No"); |
---|
525 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 544 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
526 | 545 | "QP Link - \t%s\n", |
---|
527 | 546 | qp->link_is_up ? "Up" : "Down"); |
---|
528 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
---|
| 547 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
---|
529 | 548 | "\n"); |
---|
530 | 549 | |
---|
531 | 550 | if (out_offset > out_count) |
---|
.. | .. |
---|
663 | 682 | return 0; |
---|
664 | 683 | } |
---|
665 | 684 | |
---|
| 685 | +static irqreturn_t ntb_transport_isr(int irq, void *dev) |
---|
| 686 | +{ |
---|
| 687 | + struct ntb_transport_qp *qp = dev; |
---|
| 688 | + |
---|
| 689 | + tasklet_schedule(&qp->rxc_db_work); |
---|
| 690 | + |
---|
| 691 | + return IRQ_HANDLED; |
---|
| 692 | +} |
---|
| 693 | + |
---|
| 694 | +static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt, |
---|
| 695 | + unsigned int qp_num) |
---|
| 696 | +{ |
---|
| 697 | + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; |
---|
| 698 | + int spad = qp_num * 2 + nt->msi_spad_offset; |
---|
| 699 | + |
---|
| 700 | + if (!nt->use_msi) |
---|
| 701 | + return; |
---|
| 702 | + |
---|
| 703 | + if (spad >= ntb_spad_count(nt->ndev)) |
---|
| 704 | + return; |
---|
| 705 | + |
---|
| 706 | + qp->peer_msi_desc.addr_offset = |
---|
| 707 | + ntb_peer_spad_read(qp->ndev, PIDX, spad); |
---|
| 708 | + qp->peer_msi_desc.data = |
---|
| 709 | + ntb_peer_spad_read(qp->ndev, PIDX, spad + 1); |
---|
| 710 | + |
---|
| 711 | + dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n", |
---|
| 712 | + qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data); |
---|
| 713 | + |
---|
| 714 | + if (qp->peer_msi_desc.addr_offset) { |
---|
| 715 | + qp->use_msi = true; |
---|
| 716 | + dev_info(&qp->ndev->pdev->dev, |
---|
| 717 | + "Using MSI interrupts for QP%d\n", qp_num); |
---|
| 718 | + } |
---|
| 719 | +} |
---|
| 720 | + |
---|
| 721 | +static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt, |
---|
| 722 | + unsigned int qp_num) |
---|
| 723 | +{ |
---|
| 724 | + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; |
---|
| 725 | + int spad = qp_num * 2 + nt->msi_spad_offset; |
---|
| 726 | + int rc; |
---|
| 727 | + |
---|
| 728 | + if (!nt->use_msi) |
---|
| 729 | + return; |
---|
| 730 | + |
---|
| 731 | + if (spad >= ntb_spad_count(nt->ndev)) { |
---|
| 732 | + dev_warn_once(&qp->ndev->pdev->dev, |
---|
| 733 | + "Not enough SPADS to use MSI interrupts\n"); |
---|
| 734 | + return; |
---|
| 735 | + } |
---|
| 736 | + |
---|
| 737 | + ntb_spad_write(qp->ndev, spad, 0); |
---|
| 738 | + ntb_spad_write(qp->ndev, spad + 1, 0); |
---|
| 739 | + |
---|
| 740 | + if (!qp->msi_irq) { |
---|
| 741 | + qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr, |
---|
| 742 | + KBUILD_MODNAME, qp, |
---|
| 743 | + &qp->msi_desc); |
---|
| 744 | + if (qp->msi_irq < 0) { |
---|
| 745 | + dev_warn(&qp->ndev->pdev->dev, |
---|
| 746 | + "Unable to allocate MSI interrupt for qp%d\n", |
---|
| 747 | + qp_num); |
---|
| 748 | + return; |
---|
| 749 | + } |
---|
| 750 | + } |
---|
| 751 | + |
---|
| 752 | + rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset); |
---|
| 753 | + if (rc) |
---|
| 754 | + goto err_free_interrupt; |
---|
| 755 | + |
---|
| 756 | + rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data); |
---|
| 757 | + if (rc) |
---|
| 758 | + goto err_free_interrupt; |
---|
| 759 | + |
---|
| 760 | + dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n", |
---|
| 761 | + qp_num, qp->msi_irq, qp->msi_desc.addr_offset, |
---|
| 762 | + qp->msi_desc.data); |
---|
| 763 | + |
---|
| 764 | + return; |
---|
| 765 | + |
---|
| 766 | +err_free_interrupt: |
---|
| 767 | + devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp); |
---|
| 768 | +} |
---|
| 769 | + |
---|
| 770 | +static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt) |
---|
| 771 | +{ |
---|
| 772 | + int i; |
---|
| 773 | + |
---|
| 774 | + dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed"); |
---|
| 775 | + |
---|
| 776 | + for (i = 0; i < nt->qp_count; i++) |
---|
| 777 | + ntb_transport_setup_qp_peer_msi(nt, i); |
---|
| 778 | +} |
---|
| 779 | + |
---|
| 780 | +static void ntb_transport_msi_desc_changed(void *data) |
---|
| 781 | +{ |
---|
| 782 | + struct ntb_transport_ctx *nt = data; |
---|
| 783 | + int i; |
---|
| 784 | + |
---|
| 785 | + dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed"); |
---|
| 786 | + |
---|
| 787 | + for (i = 0; i < nt->qp_count; i++) |
---|
| 788 | + ntb_transport_setup_qp_msi(nt, i); |
---|
| 789 | + |
---|
| 790 | + ntb_peer_db_set(nt->ndev, nt->msi_db_mask); |
---|
| 791 | +} |
---|
| 792 | + |
---|
666 | 793 | static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) |
---|
667 | 794 | { |
---|
668 | 795 | struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; |
---|
.. | .. |
---|
672 | 799 | return; |
---|
673 | 800 | |
---|
674 | 801 | ntb_mw_clear_trans(nt->ndev, PIDX, num_mw); |
---|
675 | | - dma_free_coherent(&pdev->dev, mw->buff_size, |
---|
676 | | - mw->virt_addr, mw->dma_addr); |
---|
| 802 | + dma_free_coherent(&pdev->dev, mw->alloc_size, |
---|
| 803 | + mw->alloc_addr, mw->dma_addr); |
---|
677 | 804 | mw->xlat_size = 0; |
---|
678 | 805 | mw->buff_size = 0; |
---|
| 806 | + mw->alloc_size = 0; |
---|
| 807 | + mw->alloc_addr = NULL; |
---|
679 | 808 | mw->virt_addr = NULL; |
---|
| 809 | +} |
---|
| 810 | + |
---|
| 811 | +static int ntb_alloc_mw_buffer(struct ntb_transport_mw *mw, |
---|
| 812 | + struct device *dma_dev, size_t align) |
---|
| 813 | +{ |
---|
| 814 | + dma_addr_t dma_addr; |
---|
| 815 | + void *alloc_addr, *virt_addr; |
---|
| 816 | + int rc; |
---|
| 817 | + |
---|
| 818 | + alloc_addr = dma_alloc_coherent(dma_dev, mw->alloc_size, |
---|
| 819 | + &dma_addr, GFP_KERNEL); |
---|
| 820 | + if (!alloc_addr) { |
---|
| 821 | + dev_err(dma_dev, "Unable to alloc MW buff of size %zu\n", |
---|
| 822 | + mw->alloc_size); |
---|
| 823 | + return -ENOMEM; |
---|
| 824 | + } |
---|
| 825 | + virt_addr = alloc_addr; |
---|
| 826 | + |
---|
| 827 | + /* |
---|
| 828 | + * we must ensure that the memory address allocated is BAR size |
---|
| 829 | + * aligned in order for the XLAT register to take the value. This |
---|
| 830 | + * is a requirement of the hardware. It is recommended to setup CMA |
---|
| 831 | + * for BAR sizes equal or greater than 4MB. |
---|
| 832 | + */ |
---|
| 833 | + if (!IS_ALIGNED(dma_addr, align)) { |
---|
| 834 | + if (mw->alloc_size > mw->buff_size) { |
---|
| 835 | + virt_addr = PTR_ALIGN(alloc_addr, align); |
---|
| 836 | + dma_addr = ALIGN(dma_addr, align); |
---|
| 837 | + } else { |
---|
| 838 | + rc = -ENOMEM; |
---|
| 839 | + goto err; |
---|
| 840 | + } |
---|
| 841 | + } |
---|
| 842 | + |
---|
| 843 | + mw->alloc_addr = alloc_addr; |
---|
| 844 | + mw->virt_addr = virt_addr; |
---|
| 845 | + mw->dma_addr = dma_addr; |
---|
| 846 | + |
---|
| 847 | + return 0; |
---|
| 848 | + |
---|
| 849 | +err: |
---|
| 850 | + dma_free_coherent(dma_dev, mw->alloc_size, alloc_addr, dma_addr); |
---|
| 851 | + |
---|
| 852 | + return rc; |
---|
680 | 853 | } |
---|
681 | 854 | |
---|
682 | 855 | static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, |
---|
.. | .. |
---|
710 | 883 | /* Alloc memory for receiving data. Must be aligned */ |
---|
711 | 884 | mw->xlat_size = xlat_size; |
---|
712 | 885 | mw->buff_size = buff_size; |
---|
| 886 | + mw->alloc_size = buff_size; |
---|
713 | 887 | |
---|
714 | | - mw->virt_addr = dma_alloc_coherent(&pdev->dev, buff_size, |
---|
715 | | - &mw->dma_addr, GFP_KERNEL); |
---|
716 | | - if (!mw->virt_addr) { |
---|
717 | | - mw->xlat_size = 0; |
---|
718 | | - mw->buff_size = 0; |
---|
719 | | - dev_err(&pdev->dev, "Unable to alloc MW buff of size %zu\n", |
---|
720 | | - buff_size); |
---|
721 | | - return -ENOMEM; |
---|
722 | | - } |
---|
723 | | - |
---|
724 | | - /* |
---|
725 | | - * we must ensure that the memory address allocated is BAR size |
---|
726 | | - * aligned in order for the XLAT register to take the value. This |
---|
727 | | - * is a requirement of the hardware. It is recommended to setup CMA |
---|
728 | | - * for BAR sizes equal or greater than 4MB. |
---|
729 | | - */ |
---|
730 | | - if (!IS_ALIGNED(mw->dma_addr, xlat_align)) { |
---|
731 | | - dev_err(&pdev->dev, "DMA memory %pad is not aligned\n", |
---|
732 | | - &mw->dma_addr); |
---|
733 | | - ntb_free_mw(nt, num_mw); |
---|
734 | | - return -ENOMEM; |
---|
| 888 | + rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align); |
---|
| 889 | + if (rc) { |
---|
| 890 | + mw->alloc_size *= 2; |
---|
| 891 | + rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align); |
---|
| 892 | + if (rc) { |
---|
| 893 | + dev_err(&pdev->dev, |
---|
| 894 | + "Unable to alloc aligned MW buff\n"); |
---|
| 895 | + mw->xlat_size = 0; |
---|
| 896 | + mw->buff_size = 0; |
---|
| 897 | + mw->alloc_size = 0; |
---|
| 898 | + return rc; |
---|
| 899 | + } |
---|
735 | 900 | } |
---|
736 | 901 | |
---|
737 | 902 | /* Notify HW the memory location of the receive buffer */ |
---|
.. | .. |
---|
822 | 987 | if (!nt->link_is_up) |
---|
823 | 988 | cancel_delayed_work_sync(&nt->link_work); |
---|
824 | 989 | |
---|
| 990 | + for (i = 0; i < nt->mw_count; i++) |
---|
| 991 | + ntb_free_mw(nt, i); |
---|
| 992 | + |
---|
825 | 993 | /* The scratchpad registers keep the values if the remote side |
---|
826 | 994 | * goes down, blast them now to give them a sane value the next |
---|
827 | 995 | * time they are accessed |
---|
.. | .. |
---|
860 | 1028 | int rc = 0, i, spad; |
---|
861 | 1029 | |
---|
862 | 1030 | /* send the local info, in the opposite order of the way we read it */ |
---|
| 1031 | + |
---|
| 1032 | + if (nt->use_msi) { |
---|
| 1033 | + rc = ntb_msi_setup_mws(ndev); |
---|
| 1034 | + if (rc) { |
---|
| 1035 | + dev_warn(&pdev->dev, |
---|
| 1036 | + "Failed to register MSI memory window: %d\n", |
---|
| 1037 | + rc); |
---|
| 1038 | + nt->use_msi = false; |
---|
| 1039 | + } |
---|
| 1040 | + } |
---|
| 1041 | + |
---|
| 1042 | + for (i = 0; i < nt->qp_count; i++) |
---|
| 1043 | + ntb_transport_setup_qp_msi(nt, i); |
---|
| 1044 | + |
---|
863 | 1045 | for (i = 0; i < nt->mw_count; i++) { |
---|
864 | 1046 | size = nt->mw_vec[i].phys_size; |
---|
865 | 1047 | |
---|
.. | .. |
---|
917 | 1099 | struct ntb_transport_qp *qp = &nt->qp_vec[i]; |
---|
918 | 1100 | |
---|
919 | 1101 | ntb_transport_setup_qp_mw(nt, i); |
---|
| 1102 | + ntb_transport_setup_qp_peer_msi(nt, i); |
---|
920 | 1103 | |
---|
921 | 1104 | if (qp->client_ready) |
---|
922 | 1105 | schedule_delayed_work(&qp->link_work, 0); |
---|
.. | .. |
---|
1009 | 1192 | tx_size = (unsigned int)mw_size / num_qps_mw; |
---|
1010 | 1193 | qp_offset = tx_size * (qp_num / mw_count); |
---|
1011 | 1194 | |
---|
| 1195 | + qp->tx_mw_size = tx_size; |
---|
1012 | 1196 | qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset; |
---|
1013 | 1197 | if (!qp->tx_mw) |
---|
1014 | 1198 | return -EINVAL; |
---|
.. | .. |
---|
1089 | 1273 | return -ENOMEM; |
---|
1090 | 1274 | |
---|
1091 | 1275 | nt->ndev = ndev; |
---|
| 1276 | + |
---|
| 1277 | + /* |
---|
| 1278 | + * If we are using MSI, and have at least one extra memory window, |
---|
| 1279 | + * we will reserve the last MW for the MSI window. |
---|
| 1280 | + */ |
---|
| 1281 | + if (use_msi && mw_count > 1) { |
---|
| 1282 | + rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed); |
---|
| 1283 | + if (!rc) { |
---|
| 1284 | + mw_count -= 1; |
---|
| 1285 | + nt->use_msi = true; |
---|
| 1286 | + } |
---|
| 1287 | + } |
---|
| 1288 | + |
---|
1092 | 1289 | spad_count = ntb_spad_count(ndev); |
---|
1093 | 1290 | |
---|
1094 | 1291 | /* Limit the MW's based on the availability of scratchpads */ |
---|
.. | .. |
---|
1101 | 1298 | |
---|
1102 | 1299 | max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2; |
---|
1103 | 1300 | nt->mw_count = min(mw_count, max_mw_count_for_spads); |
---|
| 1301 | + |
---|
| 1302 | + nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH; |
---|
1104 | 1303 | |
---|
1105 | 1304 | nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec), |
---|
1106 | 1305 | GFP_KERNEL, node); |
---|
.. | .. |
---|
1132 | 1331 | qp_bitmap = ntb_db_valid_mask(ndev); |
---|
1133 | 1332 | |
---|
1134 | 1333 | qp_count = ilog2(qp_bitmap); |
---|
| 1334 | + if (nt->use_msi) { |
---|
| 1335 | + qp_count -= 1; |
---|
| 1336 | + nt->msi_db_mask = 1 << qp_count; |
---|
| 1337 | + ntb_db_clear_mask(ndev, nt->msi_db_mask); |
---|
| 1338 | + } |
---|
| 1339 | + |
---|
1135 | 1340 | if (max_num_clients && max_num_clients < qp_count) |
---|
1136 | 1341 | qp_count = max_num_clients; |
---|
1137 | 1342 | else if (nt->mw_count < qp_count) |
---|
.. | .. |
---|
1278 | 1483 | case DMA_TRANS_READ_FAILED: |
---|
1279 | 1484 | case DMA_TRANS_WRITE_FAILED: |
---|
1280 | 1485 | entry->errors++; |
---|
| 1486 | + fallthrough; |
---|
1281 | 1487 | case DMA_TRANS_ABORTED: |
---|
1282 | 1488 | { |
---|
1283 | 1489 | struct ntb_transport_qp *qp = entry->qp; |
---|
.. | .. |
---|
1533 | 1739 | case DMA_TRANS_READ_FAILED: |
---|
1534 | 1740 | case DMA_TRANS_WRITE_FAILED: |
---|
1535 | 1741 | entry->errors++; |
---|
| 1742 | + fallthrough; |
---|
1536 | 1743 | case DMA_TRANS_ABORTED: |
---|
1537 | 1744 | { |
---|
1538 | 1745 | void __iomem *offset = |
---|
.. | .. |
---|
1553 | 1760 | |
---|
1554 | 1761 | iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); |
---|
1555 | 1762 | |
---|
1556 | | - ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); |
---|
| 1763 | + if (qp->use_msi) |
---|
| 1764 | + ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc); |
---|
| 1765 | + else |
---|
| 1766 | + ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); |
---|
1557 | 1767 | |
---|
1558 | 1768 | /* The entry length can only be zero if the packet is intended to be a |
---|
1559 | 1769 | * "link down" or similar. Since no payload is being sent in these |
---|
.. | .. |
---|
1602 | 1812 | dma_cookie_t cookie; |
---|
1603 | 1813 | |
---|
1604 | 1814 | device = chan->device; |
---|
1605 | | - dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index; |
---|
| 1815 | + dest = qp->tx_mw_dma_addr + qp->tx_max_frame * entry->tx_index; |
---|
1606 | 1816 | buff_off = (size_t)buf & ~PAGE_MASK; |
---|
1607 | 1817 | dest_off = (size_t)dest & ~PAGE_MASK; |
---|
1608 | 1818 | |
---|
.. | .. |
---|
1821 | 2031 | qp->rx_dma_chan = NULL; |
---|
1822 | 2032 | } |
---|
1823 | 2033 | |
---|
| 2034 | + qp->tx_mw_dma_addr = 0; |
---|
| 2035 | + if (qp->tx_dma_chan) { |
---|
| 2036 | + qp->tx_mw_dma_addr = |
---|
| 2037 | + dma_map_resource(qp->tx_dma_chan->device->dev, |
---|
| 2038 | + qp->tx_mw_phys, qp->tx_mw_size, |
---|
| 2039 | + DMA_FROM_DEVICE, 0); |
---|
| 2040 | + if (dma_mapping_error(qp->tx_dma_chan->device->dev, |
---|
| 2041 | + qp->tx_mw_dma_addr)) { |
---|
| 2042 | + qp->tx_mw_dma_addr = 0; |
---|
| 2043 | + goto err1; |
---|
| 2044 | + } |
---|
| 2045 | + } |
---|
| 2046 | + |
---|
1824 | 2047 | dev_dbg(&pdev->dev, "Using %s memcpy for TX\n", |
---|
1825 | 2048 | qp->tx_dma_chan ? "DMA" : "CPU"); |
---|
1826 | 2049 | |
---|
.. | .. |
---|
1862 | 2085 | qp->rx_alloc_entry = 0; |
---|
1863 | 2086 | while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) |
---|
1864 | 2087 | kfree(entry); |
---|
| 2088 | + if (qp->tx_mw_dma_addr) |
---|
| 2089 | + dma_unmap_resource(qp->tx_dma_chan->device->dev, |
---|
| 2090 | + qp->tx_mw_dma_addr, qp->tx_mw_size, |
---|
| 2091 | + DMA_FROM_DEVICE, 0); |
---|
1865 | 2092 | if (qp->tx_dma_chan) |
---|
1866 | 2093 | dma_release_channel(qp->tx_dma_chan); |
---|
1867 | 2094 | if (qp->rx_dma_chan) |
---|
.. | .. |
---|
1903 | 2130 | */ |
---|
1904 | 2131 | dma_sync_wait(chan, qp->last_cookie); |
---|
1905 | 2132 | dmaengine_terminate_all(chan); |
---|
| 2133 | + |
---|
| 2134 | + dma_unmap_resource(chan->device->dev, |
---|
| 2135 | + qp->tx_mw_dma_addr, qp->tx_mw_size, |
---|
| 2136 | + DMA_FROM_DEVICE, 0); |
---|
| 2137 | + |
---|
1906 | 2138 | dma_release_channel(chan); |
---|
1907 | 2139 | } |
---|
1908 | 2140 | |
---|
.. | .. |
---|
2199 | 2431 | u64 db_bits; |
---|
2200 | 2432 | unsigned int qp_num; |
---|
2201 | 2433 | |
---|
| 2434 | + if (ntb_db_read(nt->ndev) & nt->msi_db_mask) { |
---|
| 2435 | + ntb_transport_msi_peer_desc_changed(nt); |
---|
| 2436 | + ntb_db_clear(nt->ndev, nt->msi_db_mask); |
---|
| 2437 | + } |
---|
| 2438 | + |
---|
2202 | 2439 | db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free & |
---|
2203 | 2440 | ntb_db_vector_mask(nt->ndev, vector)); |
---|
2204 | 2441 | |
---|