| .. | .. |
|---|
| 93 | 93 | module_param(use_dma, bool, 0644); |
|---|
| 94 | 94 | MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy"); |
|---|
| 95 | 95 | |
|---|
| 96 | +static bool use_msi; |
|---|
| 97 | +#ifdef CONFIG_NTB_MSI |
|---|
| 98 | +module_param(use_msi, bool, 0644); |
|---|
| 99 | +MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells"); |
|---|
| 100 | +#endif |
|---|
| 101 | + |
|---|
| 96 | 102 | static struct dentry *nt_debugfs_dir; |
|---|
| 97 | 103 | |
|---|
| 98 | 104 | /* Only two-ports NTB devices are supported */ |
|---|
| .. | .. |
|---|
| 144 | 150 | struct list_head tx_free_q; |
|---|
| 145 | 151 | spinlock_t ntb_tx_free_q_lock; |
|---|
| 146 | 152 | void __iomem *tx_mw; |
|---|
| 147 | | - dma_addr_t tx_mw_phys; |
|---|
| 153 | + phys_addr_t tx_mw_phys; |
|---|
| 154 | + size_t tx_mw_size; |
|---|
| 155 | + dma_addr_t tx_mw_dma_addr; |
|---|
| 148 | 156 | unsigned int tx_index; |
|---|
| 149 | 157 | unsigned int tx_max_entry; |
|---|
| 150 | 158 | unsigned int tx_max_frame; |
|---|
| .. | .. |
|---|
| 186 | 194 | u64 tx_err_no_buf; |
|---|
| 187 | 195 | u64 tx_memcpy; |
|---|
| 188 | 196 | u64 tx_async; |
|---|
| 197 | + |
|---|
| 198 | + bool use_msi; |
|---|
| 199 | + int msi_irq; |
|---|
| 200 | + struct ntb_msi_desc msi_desc; |
|---|
| 201 | + struct ntb_msi_desc peer_msi_desc; |
|---|
| 189 | 202 | }; |
|---|
| 190 | 203 | |
|---|
| 191 | 204 | struct ntb_transport_mw { |
|---|
| .. | .. |
|---|
| 194 | 207 | void __iomem *vbase; |
|---|
| 195 | 208 | size_t xlat_size; |
|---|
| 196 | 209 | size_t buff_size; |
|---|
| 210 | + size_t alloc_size; |
|---|
| 211 | + void *alloc_addr; |
|---|
| 197 | 212 | void *virt_addr; |
|---|
| 198 | 213 | dma_addr_t dma_addr; |
|---|
| 199 | 214 | }; |
|---|
| .. | .. |
|---|
| 216 | 231 | unsigned int qp_count; |
|---|
| 217 | 232 | u64 qp_bitmap; |
|---|
| 218 | 233 | u64 qp_bitmap_free; |
|---|
| 234 | + |
|---|
| 235 | + bool use_msi; |
|---|
| 236 | + unsigned int msi_spad_offset; |
|---|
| 237 | + u64 msi_db_mask; |
|---|
| 219 | 238 | |
|---|
| 220 | 239 | bool link_is_up; |
|---|
| 221 | 240 | struct delayed_work link_work; |
|---|
| .. | .. |
|---|
| 273 | 292 | static int ntb_transport_bus_probe(struct device *dev) |
|---|
| 274 | 293 | { |
|---|
| 275 | 294 | const struct ntb_transport_client *client; |
|---|
| 276 | | - int rc = -EINVAL; |
|---|
| 295 | + int rc; |
|---|
| 277 | 296 | |
|---|
| 278 | 297 | get_device(dev); |
|---|
| 279 | 298 | |
|---|
| .. | .. |
|---|
| 462 | 481 | return -ENOMEM; |
|---|
| 463 | 482 | |
|---|
| 464 | 483 | out_offset = 0; |
|---|
| 465 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 484 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 466 | 485 | "\nNTB QP stats:\n\n"); |
|---|
| 467 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 486 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 468 | 487 | "rx_bytes - \t%llu\n", qp->rx_bytes); |
|---|
| 469 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 488 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 470 | 489 | "rx_pkts - \t%llu\n", qp->rx_pkts); |
|---|
| 471 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 490 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 472 | 491 | "rx_memcpy - \t%llu\n", qp->rx_memcpy); |
|---|
| 473 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 492 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 474 | 493 | "rx_async - \t%llu\n", qp->rx_async); |
|---|
| 475 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 494 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 476 | 495 | "rx_ring_empty - %llu\n", qp->rx_ring_empty); |
|---|
| 477 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 496 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 478 | 497 | "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); |
|---|
| 479 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 498 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 480 | 499 | "rx_err_oflow - \t%llu\n", qp->rx_err_oflow); |
|---|
| 481 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 500 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 482 | 501 | "rx_err_ver - \t%llu\n", qp->rx_err_ver); |
|---|
| 483 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 502 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 484 | 503 | "rx_buff - \t0x%p\n", qp->rx_buff); |
|---|
| 485 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 504 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 486 | 505 | "rx_index - \t%u\n", qp->rx_index); |
|---|
| 487 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 506 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 488 | 507 | "rx_max_entry - \t%u\n", qp->rx_max_entry); |
|---|
| 489 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 508 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 490 | 509 | "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); |
|---|
| 491 | 510 | |
|---|
| 492 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 511 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 493 | 512 | "tx_bytes - \t%llu\n", qp->tx_bytes); |
|---|
| 494 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 513 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 495 | 514 | "tx_pkts - \t%llu\n", qp->tx_pkts); |
|---|
| 496 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 515 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 497 | 516 | "tx_memcpy - \t%llu\n", qp->tx_memcpy); |
|---|
| 498 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 517 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 499 | 518 | "tx_async - \t%llu\n", qp->tx_async); |
|---|
| 500 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 519 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 501 | 520 | "tx_ring_full - \t%llu\n", qp->tx_ring_full); |
|---|
| 502 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 521 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 503 | 522 | "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); |
|---|
| 504 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 523 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 505 | 524 | "tx_mw - \t0x%p\n", qp->tx_mw); |
|---|
| 506 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 525 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 507 | 526 | "tx_index (H) - \t%u\n", qp->tx_index); |
|---|
| 508 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 527 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 509 | 528 | "RRI (T) - \t%u\n", |
|---|
| 510 | 529 | qp->remote_rx_info->entry); |
|---|
| 511 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 530 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 512 | 531 | "tx_max_entry - \t%u\n", qp->tx_max_entry); |
|---|
| 513 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 532 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 514 | 533 | "free tx - \t%u\n", |
|---|
| 515 | 534 | ntb_transport_tx_free_entry(qp)); |
|---|
| 516 | 535 | |
|---|
| 517 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 536 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 518 | 537 | "\n"); |
|---|
| 519 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 538 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 520 | 539 | "Using TX DMA - \t%s\n", |
|---|
| 521 | 540 | qp->tx_dma_chan ? "Yes" : "No"); |
|---|
| 522 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 541 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 523 | 542 | "Using RX DMA - \t%s\n", |
|---|
| 524 | 543 | qp->rx_dma_chan ? "Yes" : "No"); |
|---|
| 525 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 544 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 526 | 545 | "QP Link - \t%s\n", |
|---|
| 527 | 546 | qp->link_is_up ? "Up" : "Down"); |
|---|
| 528 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 547 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 529 | 548 | "\n"); |
|---|
| 530 | 549 | |
|---|
| 531 | 550 | if (out_offset > out_count) |
|---|
| .. | .. |
|---|
| 663 | 682 | return 0; |
|---|
| 664 | 683 | } |
|---|
| 665 | 684 | |
|---|
| 685 | +static irqreturn_t ntb_transport_isr(int irq, void *dev) |
|---|
| 686 | +{ |
|---|
| 687 | + struct ntb_transport_qp *qp = dev; |
|---|
| 688 | + |
|---|
| 689 | + tasklet_schedule(&qp->rxc_db_work); |
|---|
| 690 | + |
|---|
| 691 | + return IRQ_HANDLED; |
|---|
| 692 | +} |
|---|
| 693 | + |
|---|
| 694 | +static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt, |
|---|
| 695 | + unsigned int qp_num) |
|---|
| 696 | +{ |
|---|
| 697 | + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; |
|---|
| 698 | + int spad = qp_num * 2 + nt->msi_spad_offset; |
|---|
| 699 | + |
|---|
| 700 | + if (!nt->use_msi) |
|---|
| 701 | + return; |
|---|
| 702 | + |
|---|
| 703 | + if (spad >= ntb_spad_count(nt->ndev)) |
|---|
| 704 | + return; |
|---|
| 705 | + |
|---|
| 706 | + qp->peer_msi_desc.addr_offset = |
|---|
| 707 | + ntb_peer_spad_read(qp->ndev, PIDX, spad); |
|---|
| 708 | + qp->peer_msi_desc.data = |
|---|
| 709 | + ntb_peer_spad_read(qp->ndev, PIDX, spad + 1); |
|---|
| 710 | + |
|---|
| 711 | + dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n", |
|---|
| 712 | + qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data); |
|---|
| 713 | + |
|---|
| 714 | + if (qp->peer_msi_desc.addr_offset) { |
|---|
| 715 | + qp->use_msi = true; |
|---|
| 716 | + dev_info(&qp->ndev->pdev->dev, |
|---|
| 717 | + "Using MSI interrupts for QP%d\n", qp_num); |
|---|
| 718 | + } |
|---|
| 719 | +} |
|---|
| 720 | + |
|---|
| 721 | +static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt, |
|---|
| 722 | + unsigned int qp_num) |
|---|
| 723 | +{ |
|---|
| 724 | + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; |
|---|
| 725 | + int spad = qp_num * 2 + nt->msi_spad_offset; |
|---|
| 726 | + int rc; |
|---|
| 727 | + |
|---|
| 728 | + if (!nt->use_msi) |
|---|
| 729 | + return; |
|---|
| 730 | + |
|---|
| 731 | + if (spad >= ntb_spad_count(nt->ndev)) { |
|---|
| 732 | + dev_warn_once(&qp->ndev->pdev->dev, |
|---|
| 733 | + "Not enough SPADS to use MSI interrupts\n"); |
|---|
| 734 | + return; |
|---|
| 735 | + } |
|---|
| 736 | + |
|---|
| 737 | + ntb_spad_write(qp->ndev, spad, 0); |
|---|
| 738 | + ntb_spad_write(qp->ndev, spad + 1, 0); |
|---|
| 739 | + |
|---|
| 740 | + if (!qp->msi_irq) { |
|---|
| 741 | + qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr, |
|---|
| 742 | + KBUILD_MODNAME, qp, |
|---|
| 743 | + &qp->msi_desc); |
|---|
| 744 | + if (qp->msi_irq < 0) { |
|---|
| 745 | + dev_warn(&qp->ndev->pdev->dev, |
|---|
| 746 | + "Unable to allocate MSI interrupt for qp%d\n", |
|---|
| 747 | + qp_num); |
|---|
| 748 | + return; |
|---|
| 749 | + } |
|---|
| 750 | + } |
|---|
| 751 | + |
|---|
| 752 | + rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset); |
|---|
| 753 | + if (rc) |
|---|
| 754 | + goto err_free_interrupt; |
|---|
| 755 | + |
|---|
| 756 | + rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data); |
|---|
| 757 | + if (rc) |
|---|
| 758 | + goto err_free_interrupt; |
|---|
| 759 | + |
|---|
| 760 | + dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n", |
|---|
| 761 | + qp_num, qp->msi_irq, qp->msi_desc.addr_offset, |
|---|
| 762 | + qp->msi_desc.data); |
|---|
| 763 | + |
|---|
| 764 | + return; |
|---|
| 765 | + |
|---|
| 766 | +err_free_interrupt: |
|---|
| 767 | + devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp); |
|---|
| 768 | +} |
|---|
| 769 | + |
|---|
| 770 | +static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt) |
|---|
| 771 | +{ |
|---|
| 772 | + int i; |
|---|
| 773 | + |
|---|
| 774 | + dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed"); |
|---|
| 775 | + |
|---|
| 776 | + for (i = 0; i < nt->qp_count; i++) |
|---|
| 777 | + ntb_transport_setup_qp_peer_msi(nt, i); |
|---|
| 778 | +} |
|---|
| 779 | + |
|---|
| 780 | +static void ntb_transport_msi_desc_changed(void *data) |
|---|
| 781 | +{ |
|---|
| 782 | + struct ntb_transport_ctx *nt = data; |
|---|
| 783 | + int i; |
|---|
| 784 | + |
|---|
| 785 | + dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed"); |
|---|
| 786 | + |
|---|
| 787 | + for (i = 0; i < nt->qp_count; i++) |
|---|
| 788 | + ntb_transport_setup_qp_msi(nt, i); |
|---|
| 789 | + |
|---|
| 790 | + ntb_peer_db_set(nt->ndev, nt->msi_db_mask); |
|---|
| 791 | +} |
|---|
| 792 | + |
|---|
| 666 | 793 | static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) |
|---|
| 667 | 794 | { |
|---|
| 668 | 795 | struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; |
|---|
| .. | .. |
|---|
| 672 | 799 | return; |
|---|
| 673 | 800 | |
|---|
| 674 | 801 | ntb_mw_clear_trans(nt->ndev, PIDX, num_mw); |
|---|
| 675 | | - dma_free_coherent(&pdev->dev, mw->buff_size, |
|---|
| 676 | | - mw->virt_addr, mw->dma_addr); |
|---|
| 802 | + dma_free_coherent(&pdev->dev, mw->alloc_size, |
|---|
| 803 | + mw->alloc_addr, mw->dma_addr); |
|---|
| 677 | 804 | mw->xlat_size = 0; |
|---|
| 678 | 805 | mw->buff_size = 0; |
|---|
| 806 | + mw->alloc_size = 0; |
|---|
| 807 | + mw->alloc_addr = NULL; |
|---|
| 679 | 808 | mw->virt_addr = NULL; |
|---|
| 809 | +} |
|---|
| 810 | + |
|---|
| 811 | +static int ntb_alloc_mw_buffer(struct ntb_transport_mw *mw, |
|---|
| 812 | + struct device *dma_dev, size_t align) |
|---|
| 813 | +{ |
|---|
| 814 | + dma_addr_t dma_addr; |
|---|
| 815 | + void *alloc_addr, *virt_addr; |
|---|
| 816 | + int rc; |
|---|
| 817 | + |
|---|
| 818 | + alloc_addr = dma_alloc_coherent(dma_dev, mw->alloc_size, |
|---|
| 819 | + &dma_addr, GFP_KERNEL); |
|---|
| 820 | + if (!alloc_addr) { |
|---|
| 821 | + dev_err(dma_dev, "Unable to alloc MW buff of size %zu\n", |
|---|
| 822 | + mw->alloc_size); |
|---|
| 823 | + return -ENOMEM; |
|---|
| 824 | + } |
|---|
| 825 | + virt_addr = alloc_addr; |
|---|
| 826 | + |
|---|
| 827 | + /* |
|---|
| 828 | + * we must ensure that the memory address allocated is BAR size |
|---|
| 829 | + * aligned in order for the XLAT register to take the value. This |
|---|
| 830 | + * is a requirement of the hardware. It is recommended to setup CMA |
|---|
| 831 | + * for BAR sizes equal or greater than 4MB. |
|---|
| 832 | + */ |
|---|
| 833 | + if (!IS_ALIGNED(dma_addr, align)) { |
|---|
| 834 | + if (mw->alloc_size > mw->buff_size) { |
|---|
| 835 | + virt_addr = PTR_ALIGN(alloc_addr, align); |
|---|
| 836 | + dma_addr = ALIGN(dma_addr, align); |
|---|
| 837 | + } else { |
|---|
| 838 | + rc = -ENOMEM; |
|---|
| 839 | + goto err; |
|---|
| 840 | + } |
|---|
| 841 | + } |
|---|
| 842 | + |
|---|
| 843 | + mw->alloc_addr = alloc_addr; |
|---|
| 844 | + mw->virt_addr = virt_addr; |
|---|
| 845 | + mw->dma_addr = dma_addr; |
|---|
| 846 | + |
|---|
| 847 | + return 0; |
|---|
| 848 | + |
|---|
| 849 | +err: |
|---|
| 850 | + dma_free_coherent(dma_dev, mw->alloc_size, alloc_addr, dma_addr); |
|---|
| 851 | + |
|---|
| 852 | + return rc; |
|---|
| 680 | 853 | } |
|---|
| 681 | 854 | |
|---|
| 682 | 855 | static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, |
|---|
| .. | .. |
|---|
| 710 | 883 | /* Alloc memory for receiving data. Must be aligned */ |
|---|
| 711 | 884 | mw->xlat_size = xlat_size; |
|---|
| 712 | 885 | mw->buff_size = buff_size; |
|---|
| 886 | + mw->alloc_size = buff_size; |
|---|
| 713 | 887 | |
|---|
| 714 | | - mw->virt_addr = dma_alloc_coherent(&pdev->dev, buff_size, |
|---|
| 715 | | - &mw->dma_addr, GFP_KERNEL); |
|---|
| 716 | | - if (!mw->virt_addr) { |
|---|
| 717 | | - mw->xlat_size = 0; |
|---|
| 718 | | - mw->buff_size = 0; |
|---|
| 719 | | - dev_err(&pdev->dev, "Unable to alloc MW buff of size %zu\n", |
|---|
| 720 | | - buff_size); |
|---|
| 721 | | - return -ENOMEM; |
|---|
| 722 | | - } |
|---|
| 723 | | - |
|---|
| 724 | | - /* |
|---|
| 725 | | - * we must ensure that the memory address allocated is BAR size |
|---|
| 726 | | - * aligned in order for the XLAT register to take the value. This |
|---|
| 727 | | - * is a requirement of the hardware. It is recommended to setup CMA |
|---|
| 728 | | - * for BAR sizes equal or greater than 4MB. |
|---|
| 729 | | - */ |
|---|
| 730 | | - if (!IS_ALIGNED(mw->dma_addr, xlat_align)) { |
|---|
| 731 | | - dev_err(&pdev->dev, "DMA memory %pad is not aligned\n", |
|---|
| 732 | | - &mw->dma_addr); |
|---|
| 733 | | - ntb_free_mw(nt, num_mw); |
|---|
| 734 | | - return -ENOMEM; |
|---|
| 888 | + rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align); |
|---|
| 889 | + if (rc) { |
|---|
| 890 | + mw->alloc_size *= 2; |
|---|
| 891 | + rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align); |
|---|
| 892 | + if (rc) { |
|---|
| 893 | + dev_err(&pdev->dev, |
|---|
| 894 | + "Unable to alloc aligned MW buff\n"); |
|---|
| 895 | + mw->xlat_size = 0; |
|---|
| 896 | + mw->buff_size = 0; |
|---|
| 897 | + mw->alloc_size = 0; |
|---|
| 898 | + return rc; |
|---|
| 899 | + } |
|---|
| 735 | 900 | } |
|---|
| 736 | 901 | |
|---|
| 737 | 902 | /* Notify HW the memory location of the receive buffer */ |
|---|
| .. | .. |
|---|
| 822 | 987 | if (!nt->link_is_up) |
|---|
| 823 | 988 | cancel_delayed_work_sync(&nt->link_work); |
|---|
| 824 | 989 | |
|---|
| 990 | + for (i = 0; i < nt->mw_count; i++) |
|---|
| 991 | + ntb_free_mw(nt, i); |
|---|
| 992 | + |
|---|
| 825 | 993 | /* The scratchpad registers keep the values if the remote side |
|---|
| 826 | 994 | * goes down, blast them now to give them a sane value the next |
|---|
| 827 | 995 | * time they are accessed |
|---|
| .. | .. |
|---|
| 860 | 1028 | int rc = 0, i, spad; |
|---|
| 861 | 1029 | |
|---|
| 862 | 1030 | /* send the local info, in the opposite order of the way we read it */ |
|---|
| 1031 | + |
|---|
| 1032 | + if (nt->use_msi) { |
|---|
| 1033 | + rc = ntb_msi_setup_mws(ndev); |
|---|
| 1034 | + if (rc) { |
|---|
| 1035 | + dev_warn(&pdev->dev, |
|---|
| 1036 | + "Failed to register MSI memory window: %d\n", |
|---|
| 1037 | + rc); |
|---|
| 1038 | + nt->use_msi = false; |
|---|
| 1039 | + } |
|---|
| 1040 | + } |
|---|
| 1041 | + |
|---|
| 1042 | + for (i = 0; i < nt->qp_count; i++) |
|---|
| 1043 | + ntb_transport_setup_qp_msi(nt, i); |
|---|
| 1044 | + |
|---|
| 863 | 1045 | for (i = 0; i < nt->mw_count; i++) { |
|---|
| 864 | 1046 | size = nt->mw_vec[i].phys_size; |
|---|
| 865 | 1047 | |
|---|
| .. | .. |
|---|
| 917 | 1099 | struct ntb_transport_qp *qp = &nt->qp_vec[i]; |
|---|
| 918 | 1100 | |
|---|
| 919 | 1101 | ntb_transport_setup_qp_mw(nt, i); |
|---|
| 1102 | + ntb_transport_setup_qp_peer_msi(nt, i); |
|---|
| 920 | 1103 | |
|---|
| 921 | 1104 | if (qp->client_ready) |
|---|
| 922 | 1105 | schedule_delayed_work(&qp->link_work, 0); |
|---|
| .. | .. |
|---|
| 1009 | 1192 | tx_size = (unsigned int)mw_size / num_qps_mw; |
|---|
| 1010 | 1193 | qp_offset = tx_size * (qp_num / mw_count); |
|---|
| 1011 | 1194 | |
|---|
| 1195 | + qp->tx_mw_size = tx_size; |
|---|
| 1012 | 1196 | qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset; |
|---|
| 1013 | 1197 | if (!qp->tx_mw) |
|---|
| 1014 | 1198 | return -EINVAL; |
|---|
| .. | .. |
|---|
| 1089 | 1273 | return -ENOMEM; |
|---|
| 1090 | 1274 | |
|---|
| 1091 | 1275 | nt->ndev = ndev; |
|---|
| 1276 | + |
|---|
| 1277 | + /* |
|---|
| 1278 | + * If we are using MSI, and have at least one extra memory window, |
|---|
| 1279 | + * we will reserve the last MW for the MSI window. |
|---|
| 1280 | + */ |
|---|
| 1281 | + if (use_msi && mw_count > 1) { |
|---|
| 1282 | + rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed); |
|---|
| 1283 | + if (!rc) { |
|---|
| 1284 | + mw_count -= 1; |
|---|
| 1285 | + nt->use_msi = true; |
|---|
| 1286 | + } |
|---|
| 1287 | + } |
|---|
| 1288 | + |
|---|
| 1092 | 1289 | spad_count = ntb_spad_count(ndev); |
|---|
| 1093 | 1290 | |
|---|
| 1094 | 1291 | /* Limit the MW's based on the availability of scratchpads */ |
|---|
| .. | .. |
|---|
| 1101 | 1298 | |
|---|
| 1102 | 1299 | max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2; |
|---|
| 1103 | 1300 | nt->mw_count = min(mw_count, max_mw_count_for_spads); |
|---|
| 1301 | + |
|---|
| 1302 | + nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH; |
|---|
| 1104 | 1303 | |
|---|
| 1105 | 1304 | nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec), |
|---|
| 1106 | 1305 | GFP_KERNEL, node); |
|---|
| .. | .. |
|---|
| 1132 | 1331 | qp_bitmap = ntb_db_valid_mask(ndev); |
|---|
| 1133 | 1332 | |
|---|
| 1134 | 1333 | qp_count = ilog2(qp_bitmap); |
|---|
| 1334 | + if (nt->use_msi) { |
|---|
| 1335 | + qp_count -= 1; |
|---|
| 1336 | + nt->msi_db_mask = 1 << qp_count; |
|---|
| 1337 | + ntb_db_clear_mask(ndev, nt->msi_db_mask); |
|---|
| 1338 | + } |
|---|
| 1339 | + |
|---|
| 1135 | 1340 | if (max_num_clients && max_num_clients < qp_count) |
|---|
| 1136 | 1341 | qp_count = max_num_clients; |
|---|
| 1137 | 1342 | else if (nt->mw_count < qp_count) |
|---|
| .. | .. |
|---|
| 1278 | 1483 | case DMA_TRANS_READ_FAILED: |
|---|
| 1279 | 1484 | case DMA_TRANS_WRITE_FAILED: |
|---|
| 1280 | 1485 | entry->errors++; |
|---|
| 1486 | + fallthrough; |
|---|
| 1281 | 1487 | case DMA_TRANS_ABORTED: |
|---|
| 1282 | 1488 | { |
|---|
| 1283 | 1489 | struct ntb_transport_qp *qp = entry->qp; |
|---|
| .. | .. |
|---|
| 1533 | 1739 | case DMA_TRANS_READ_FAILED: |
|---|
| 1534 | 1740 | case DMA_TRANS_WRITE_FAILED: |
|---|
| 1535 | 1741 | entry->errors++; |
|---|
| 1742 | + fallthrough; |
|---|
| 1536 | 1743 | case DMA_TRANS_ABORTED: |
|---|
| 1537 | 1744 | { |
|---|
| 1538 | 1745 | void __iomem *offset = |
|---|
| .. | .. |
|---|
| 1553 | 1760 | |
|---|
| 1554 | 1761 | iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); |
|---|
| 1555 | 1762 | |
|---|
| 1556 | | - ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); |
|---|
| 1763 | + if (qp->use_msi) |
|---|
| 1764 | + ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc); |
|---|
| 1765 | + else |
|---|
| 1766 | + ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); |
|---|
| 1557 | 1767 | |
|---|
| 1558 | 1768 | /* The entry length can only be zero if the packet is intended to be a |
|---|
| 1559 | 1769 | * "link down" or similar. Since no payload is being sent in these |
|---|
| .. | .. |
|---|
| 1602 | 1812 | dma_cookie_t cookie; |
|---|
| 1603 | 1813 | |
|---|
| 1604 | 1814 | device = chan->device; |
|---|
| 1605 | | - dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index; |
|---|
| 1815 | + dest = qp->tx_mw_dma_addr + qp->tx_max_frame * entry->tx_index; |
|---|
| 1606 | 1816 | buff_off = (size_t)buf & ~PAGE_MASK; |
|---|
| 1607 | 1817 | dest_off = (size_t)dest & ~PAGE_MASK; |
|---|
| 1608 | 1818 | |
|---|
| .. | .. |
|---|
| 1821 | 2031 | qp->rx_dma_chan = NULL; |
|---|
| 1822 | 2032 | } |
|---|
| 1823 | 2033 | |
|---|
| 2034 | + qp->tx_mw_dma_addr = 0; |
|---|
| 2035 | + if (qp->tx_dma_chan) { |
|---|
| 2036 | + qp->tx_mw_dma_addr = |
|---|
| 2037 | + dma_map_resource(qp->tx_dma_chan->device->dev, |
|---|
| 2038 | + qp->tx_mw_phys, qp->tx_mw_size, |
|---|
| 2039 | + DMA_FROM_DEVICE, 0); |
|---|
| 2040 | + if (dma_mapping_error(qp->tx_dma_chan->device->dev, |
|---|
| 2041 | + qp->tx_mw_dma_addr)) { |
|---|
| 2042 | + qp->tx_mw_dma_addr = 0; |
|---|
| 2043 | + goto err1; |
|---|
| 2044 | + } |
|---|
| 2045 | + } |
|---|
| 2046 | + |
|---|
| 1824 | 2047 | dev_dbg(&pdev->dev, "Using %s memcpy for TX\n", |
|---|
| 1825 | 2048 | qp->tx_dma_chan ? "DMA" : "CPU"); |
|---|
| 1826 | 2049 | |
|---|
| .. | .. |
|---|
| 1862 | 2085 | qp->rx_alloc_entry = 0; |
|---|
| 1863 | 2086 | while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) |
|---|
| 1864 | 2087 | kfree(entry); |
|---|
| 2088 | + if (qp->tx_mw_dma_addr) |
|---|
| 2089 | + dma_unmap_resource(qp->tx_dma_chan->device->dev, |
|---|
| 2090 | + qp->tx_mw_dma_addr, qp->tx_mw_size, |
|---|
| 2091 | + DMA_FROM_DEVICE, 0); |
|---|
| 1865 | 2092 | if (qp->tx_dma_chan) |
|---|
| 1866 | 2093 | dma_release_channel(qp->tx_dma_chan); |
|---|
| 1867 | 2094 | if (qp->rx_dma_chan) |
|---|
| .. | .. |
|---|
| 1903 | 2130 | */ |
|---|
| 1904 | 2131 | dma_sync_wait(chan, qp->last_cookie); |
|---|
| 1905 | 2132 | dmaengine_terminate_all(chan); |
|---|
| 2133 | + |
|---|
| 2134 | + dma_unmap_resource(chan->device->dev, |
|---|
| 2135 | + qp->tx_mw_dma_addr, qp->tx_mw_size, |
|---|
| 2136 | + DMA_FROM_DEVICE, 0); |
|---|
| 2137 | + |
|---|
| 1906 | 2138 | dma_release_channel(chan); |
|---|
| 1907 | 2139 | } |
|---|
| 1908 | 2140 | |
|---|
| .. | .. |
|---|
| 2199 | 2431 | u64 db_bits; |
|---|
| 2200 | 2432 | unsigned int qp_num; |
|---|
| 2201 | 2433 | |
|---|
| 2434 | + if (ntb_db_read(nt->ndev) & nt->msi_db_mask) { |
|---|
| 2435 | + ntb_transport_msi_peer_desc_changed(nt); |
|---|
| 2436 | + ntb_db_clear(nt->ndev, nt->msi_db_mask); |
|---|
| 2437 | + } |
|---|
| 2438 | + |
|---|
| 2202 | 2439 | db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free & |
|---|
| 2203 | 2440 | ntb_db_vector_mask(nt->ndev, vector)); |
|---|
| 2204 | 2441 | |
|---|