| .. | .. |
|---|
| 93 | 93 | module_param(use_dma, bool, 0644); |
|---|
| 94 | 94 | MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy"); |
|---|
| 95 | 95 | |
|---|
| 96 | +static bool use_msi; |
|---|
| 97 | +#ifdef CONFIG_NTB_MSI |
|---|
| 98 | +module_param(use_msi, bool, 0644); |
|---|
| 99 | +MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells"); |
|---|
| 100 | +#endif |
|---|
| 101 | + |
|---|
| 96 | 102 | static struct dentry *nt_debugfs_dir; |
|---|
| 97 | 103 | |
|---|
| 98 | 104 | /* Only two-ports NTB devices are supported */ |
|---|
| .. | .. |
|---|
| 144 | 150 | struct list_head tx_free_q; |
|---|
| 145 | 151 | spinlock_t ntb_tx_free_q_lock; |
|---|
| 146 | 152 | void __iomem *tx_mw; |
|---|
| 147 | | - dma_addr_t tx_mw_phys; |
|---|
| 153 | + phys_addr_t tx_mw_phys; |
|---|
| 154 | + size_t tx_mw_size; |
|---|
| 155 | + dma_addr_t tx_mw_dma_addr; |
|---|
| 148 | 156 | unsigned int tx_index; |
|---|
| 149 | 157 | unsigned int tx_max_entry; |
|---|
| 150 | 158 | unsigned int tx_max_frame; |
|---|
| .. | .. |
|---|
| 186 | 194 | u64 tx_err_no_buf; |
|---|
| 187 | 195 | u64 tx_memcpy; |
|---|
| 188 | 196 | u64 tx_async; |
|---|
| 197 | + |
|---|
| 198 | + bool use_msi; |
|---|
| 199 | + int msi_irq; |
|---|
| 200 | + struct ntb_msi_desc msi_desc; |
|---|
| 201 | + struct ntb_msi_desc peer_msi_desc; |
|---|
| 189 | 202 | }; |
|---|
| 190 | 203 | |
|---|
| 191 | 204 | struct ntb_transport_mw { |
|---|
| .. | .. |
|---|
| 194 | 207 | void __iomem *vbase; |
|---|
| 195 | 208 | size_t xlat_size; |
|---|
| 196 | 209 | size_t buff_size; |
|---|
| 210 | + size_t alloc_size; |
|---|
| 211 | + void *alloc_addr; |
|---|
| 197 | 212 | void *virt_addr; |
|---|
| 198 | 213 | dma_addr_t dma_addr; |
|---|
| 199 | 214 | }; |
|---|
| .. | .. |
|---|
| 216 | 231 | unsigned int qp_count; |
|---|
| 217 | 232 | u64 qp_bitmap; |
|---|
| 218 | 233 | u64 qp_bitmap_free; |
|---|
| 234 | + |
|---|
| 235 | + bool use_msi; |
|---|
| 236 | + unsigned int msi_spad_offset; |
|---|
| 237 | + u64 msi_db_mask; |
|---|
| 219 | 238 | |
|---|
| 220 | 239 | bool link_is_up; |
|---|
| 221 | 240 | struct delayed_work link_work; |
|---|
| .. | .. |
|---|
| 273 | 292 | static int ntb_transport_bus_probe(struct device *dev) |
|---|
| 274 | 293 | { |
|---|
| 275 | 294 | const struct ntb_transport_client *client; |
|---|
| 276 | | - int rc = -EINVAL; |
|---|
| 295 | + int rc; |
|---|
| 277 | 296 | |
|---|
| 278 | 297 | get_device(dev); |
|---|
| 279 | 298 | |
|---|
| .. | .. |
|---|
| 393 | 412 | |
|---|
| 394 | 413 | rc = device_register(dev); |
|---|
| 395 | 414 | if (rc) { |
|---|
| 396 | | - kfree(client_dev); |
|---|
| 415 | + put_device(dev); |
|---|
| 397 | 416 | goto err; |
|---|
| 398 | 417 | } |
|---|
| 399 | 418 | |
|---|
| .. | .. |
|---|
| 462 | 481 | return -ENOMEM; |
|---|
| 463 | 482 | |
|---|
| 464 | 483 | out_offset = 0; |
|---|
| 465 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 484 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 466 | 485 | "\nNTB QP stats:\n\n"); |
|---|
| 467 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 486 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 468 | 487 | "rx_bytes - \t%llu\n", qp->rx_bytes); |
|---|
| 469 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 488 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 470 | 489 | "rx_pkts - \t%llu\n", qp->rx_pkts); |
|---|
| 471 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 490 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 472 | 491 | "rx_memcpy - \t%llu\n", qp->rx_memcpy); |
|---|
| 473 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 492 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 474 | 493 | "rx_async - \t%llu\n", qp->rx_async); |
|---|
| 475 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 494 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 476 | 495 | "rx_ring_empty - %llu\n", qp->rx_ring_empty); |
|---|
| 477 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 496 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 478 | 497 | "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); |
|---|
| 479 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 498 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 480 | 499 | "rx_err_oflow - \t%llu\n", qp->rx_err_oflow); |
|---|
| 481 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 500 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 482 | 501 | "rx_err_ver - \t%llu\n", qp->rx_err_ver); |
|---|
| 483 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 502 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 484 | 503 | "rx_buff - \t0x%p\n", qp->rx_buff); |
|---|
| 485 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 504 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 486 | 505 | "rx_index - \t%u\n", qp->rx_index); |
|---|
| 487 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 506 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 488 | 507 | "rx_max_entry - \t%u\n", qp->rx_max_entry); |
|---|
| 489 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 508 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 490 | 509 | "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); |
|---|
| 491 | 510 | |
|---|
| 492 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 511 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 493 | 512 | "tx_bytes - \t%llu\n", qp->tx_bytes); |
|---|
| 494 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 513 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 495 | 514 | "tx_pkts - \t%llu\n", qp->tx_pkts); |
|---|
| 496 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 515 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 497 | 516 | "tx_memcpy - \t%llu\n", qp->tx_memcpy); |
|---|
| 498 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 517 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 499 | 518 | "tx_async - \t%llu\n", qp->tx_async); |
|---|
| 500 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 519 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 501 | 520 | "tx_ring_full - \t%llu\n", qp->tx_ring_full); |
|---|
| 502 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 521 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 503 | 522 | "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); |
|---|
| 504 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 523 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 505 | 524 | "tx_mw - \t0x%p\n", qp->tx_mw); |
|---|
| 506 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 525 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 507 | 526 | "tx_index (H) - \t%u\n", qp->tx_index); |
|---|
| 508 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 527 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 509 | 528 | "RRI (T) - \t%u\n", |
|---|
| 510 | 529 | qp->remote_rx_info->entry); |
|---|
| 511 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 530 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 512 | 531 | "tx_max_entry - \t%u\n", qp->tx_max_entry); |
|---|
| 513 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 532 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 514 | 533 | "free tx - \t%u\n", |
|---|
| 515 | 534 | ntb_transport_tx_free_entry(qp)); |
|---|
| 516 | 535 | |
|---|
| 517 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 536 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 518 | 537 | "\n"); |
|---|
| 519 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 538 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 520 | 539 | "Using TX DMA - \t%s\n", |
|---|
| 521 | 540 | qp->tx_dma_chan ? "Yes" : "No"); |
|---|
| 522 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 541 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 523 | 542 | "Using RX DMA - \t%s\n", |
|---|
| 524 | 543 | qp->rx_dma_chan ? "Yes" : "No"); |
|---|
| 525 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 544 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 526 | 545 | "QP Link - \t%s\n", |
|---|
| 527 | 546 | qp->link_is_up ? "Up" : "Down"); |
|---|
| 528 | | - out_offset += snprintf(buf + out_offset, out_count - out_offset, |
|---|
| 547 | + out_offset += scnprintf(buf + out_offset, out_count - out_offset, |
|---|
| 529 | 548 | "\n"); |
|---|
| 530 | 549 | |
|---|
| 531 | 550 | if (out_offset > out_count) |
|---|
| .. | .. |
|---|
| 663 | 682 | return 0; |
|---|
| 664 | 683 | } |
|---|
| 665 | 684 | |
|---|
| 685 | +static irqreturn_t ntb_transport_isr(int irq, void *dev) |
|---|
| 686 | +{ |
|---|
| 687 | + struct ntb_transport_qp *qp = dev; |
|---|
| 688 | + |
|---|
| 689 | + tasklet_schedule(&qp->rxc_db_work); |
|---|
| 690 | + |
|---|
| 691 | + return IRQ_HANDLED; |
|---|
| 692 | +} |
|---|
| 693 | + |
|---|
| 694 | +static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt, |
|---|
| 695 | + unsigned int qp_num) |
|---|
| 696 | +{ |
|---|
| 697 | + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; |
|---|
| 698 | + int spad = qp_num * 2 + nt->msi_spad_offset; |
|---|
| 699 | + |
|---|
| 700 | + if (!nt->use_msi) |
|---|
| 701 | + return; |
|---|
| 702 | + |
|---|
| 703 | + if (spad >= ntb_spad_count(nt->ndev)) |
|---|
| 704 | + return; |
|---|
| 705 | + |
|---|
| 706 | + qp->peer_msi_desc.addr_offset = |
|---|
| 707 | + ntb_peer_spad_read(qp->ndev, PIDX, spad); |
|---|
| 708 | + qp->peer_msi_desc.data = |
|---|
| 709 | + ntb_peer_spad_read(qp->ndev, PIDX, spad + 1); |
|---|
| 710 | + |
|---|
| 711 | + dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n", |
|---|
| 712 | + qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data); |
|---|
| 713 | + |
|---|
| 714 | + if (qp->peer_msi_desc.addr_offset) { |
|---|
| 715 | + qp->use_msi = true; |
|---|
| 716 | + dev_info(&qp->ndev->pdev->dev, |
|---|
| 717 | + "Using MSI interrupts for QP%d\n", qp_num); |
|---|
| 718 | + } |
|---|
| 719 | +} |
|---|
| 720 | + |
|---|
| 721 | +static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt, |
|---|
| 722 | + unsigned int qp_num) |
|---|
| 723 | +{ |
|---|
| 724 | + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; |
|---|
| 725 | + int spad = qp_num * 2 + nt->msi_spad_offset; |
|---|
| 726 | + int rc; |
|---|
| 727 | + |
|---|
| 728 | + if (!nt->use_msi) |
|---|
| 729 | + return; |
|---|
| 730 | + |
|---|
| 731 | + if (spad >= ntb_spad_count(nt->ndev)) { |
|---|
| 732 | + dev_warn_once(&qp->ndev->pdev->dev, |
|---|
| 733 | + "Not enough SPADS to use MSI interrupts\n"); |
|---|
| 734 | + return; |
|---|
| 735 | + } |
|---|
| 736 | + |
|---|
| 737 | + ntb_spad_write(qp->ndev, spad, 0); |
|---|
| 738 | + ntb_spad_write(qp->ndev, spad + 1, 0); |
|---|
| 739 | + |
|---|
| 740 | + if (!qp->msi_irq) { |
|---|
| 741 | + qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr, |
|---|
| 742 | + KBUILD_MODNAME, qp, |
|---|
| 743 | + &qp->msi_desc); |
|---|
| 744 | + if (qp->msi_irq < 0) { |
|---|
| 745 | + dev_warn(&qp->ndev->pdev->dev, |
|---|
| 746 | + "Unable to allocate MSI interrupt for qp%d\n", |
|---|
| 747 | + qp_num); |
|---|
| 748 | + return; |
|---|
| 749 | + } |
|---|
| 750 | + } |
|---|
| 751 | + |
|---|
| 752 | + rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset); |
|---|
| 753 | + if (rc) |
|---|
| 754 | + goto err_free_interrupt; |
|---|
| 755 | + |
|---|
| 756 | + rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data); |
|---|
| 757 | + if (rc) |
|---|
| 758 | + goto err_free_interrupt; |
|---|
| 759 | + |
|---|
| 760 | + dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n", |
|---|
| 761 | + qp_num, qp->msi_irq, qp->msi_desc.addr_offset, |
|---|
| 762 | + qp->msi_desc.data); |
|---|
| 763 | + |
|---|
| 764 | + return; |
|---|
| 765 | + |
|---|
| 766 | +err_free_interrupt: |
|---|
| 767 | + devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp); |
|---|
| 768 | +} |
|---|
| 769 | + |
|---|
| 770 | +static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt) |
|---|
| 771 | +{ |
|---|
| 772 | + int i; |
|---|
| 773 | + |
|---|
| 774 | + dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed"); |
|---|
| 775 | + |
|---|
| 776 | + for (i = 0; i < nt->qp_count; i++) |
|---|
| 777 | + ntb_transport_setup_qp_peer_msi(nt, i); |
|---|
| 778 | +} |
|---|
| 779 | + |
|---|
| 780 | +static void ntb_transport_msi_desc_changed(void *data) |
|---|
| 781 | +{ |
|---|
| 782 | + struct ntb_transport_ctx *nt = data; |
|---|
| 783 | + int i; |
|---|
| 784 | + |
|---|
| 785 | + dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed"); |
|---|
| 786 | + |
|---|
| 787 | + for (i = 0; i < nt->qp_count; i++) |
|---|
| 788 | + ntb_transport_setup_qp_msi(nt, i); |
|---|
| 789 | + |
|---|
| 790 | + ntb_peer_db_set(nt->ndev, nt->msi_db_mask); |
|---|
| 791 | +} |
|---|
| 792 | + |
|---|
| 666 | 793 | static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) |
|---|
| 667 | 794 | { |
|---|
| 668 | 795 | struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; |
|---|
| .. | .. |
|---|
| 672 | 799 | return; |
|---|
| 673 | 800 | |
|---|
| 674 | 801 | ntb_mw_clear_trans(nt->ndev, PIDX, num_mw); |
|---|
| 675 | | - dma_free_coherent(&pdev->dev, mw->buff_size, |
|---|
| 676 | | - mw->virt_addr, mw->dma_addr); |
|---|
| 802 | + dma_free_coherent(&pdev->dev, mw->alloc_size, |
|---|
| 803 | + mw->alloc_addr, mw->dma_addr); |
|---|
| 677 | 804 | mw->xlat_size = 0; |
|---|
| 678 | 805 | mw->buff_size = 0; |
|---|
| 806 | + mw->alloc_size = 0; |
|---|
| 807 | + mw->alloc_addr = NULL; |
|---|
| 679 | 808 | mw->virt_addr = NULL; |
|---|
| 809 | +} |
|---|
| 810 | + |
|---|
| 811 | +static int ntb_alloc_mw_buffer(struct ntb_transport_mw *mw, |
|---|
| 812 | + struct device *dma_dev, size_t align) |
|---|
| 813 | +{ |
|---|
| 814 | + dma_addr_t dma_addr; |
|---|
| 815 | + void *alloc_addr, *virt_addr; |
|---|
| 816 | + int rc; |
|---|
| 817 | + |
|---|
| 818 | + alloc_addr = dma_alloc_coherent(dma_dev, mw->alloc_size, |
|---|
| 819 | + &dma_addr, GFP_KERNEL); |
|---|
| 820 | + if (!alloc_addr) { |
|---|
| 821 | + dev_err(dma_dev, "Unable to alloc MW buff of size %zu\n", |
|---|
| 822 | + mw->alloc_size); |
|---|
| 823 | + return -ENOMEM; |
|---|
| 824 | + } |
|---|
| 825 | + virt_addr = alloc_addr; |
|---|
| 826 | + |
|---|
| 827 | + /* |
|---|
| 828 | + * we must ensure that the memory address allocated is BAR size |
|---|
| 829 | + * aligned in order for the XLAT register to take the value. This |
|---|
| 830 | + * is a requirement of the hardware. It is recommended to setup CMA |
|---|
| 831 | + * for BAR sizes equal or greater than 4MB. |
|---|
| 832 | + */ |
|---|
| 833 | + if (!IS_ALIGNED(dma_addr, align)) { |
|---|
| 834 | + if (mw->alloc_size > mw->buff_size) { |
|---|
| 835 | + virt_addr = PTR_ALIGN(alloc_addr, align); |
|---|
| 836 | + dma_addr = ALIGN(dma_addr, align); |
|---|
| 837 | + } else { |
|---|
| 838 | + rc = -ENOMEM; |
|---|
| 839 | + goto err; |
|---|
| 840 | + } |
|---|
| 841 | + } |
|---|
| 842 | + |
|---|
| 843 | + mw->alloc_addr = alloc_addr; |
|---|
| 844 | + mw->virt_addr = virt_addr; |
|---|
| 845 | + mw->dma_addr = dma_addr; |
|---|
| 846 | + |
|---|
| 847 | + return 0; |
|---|
| 848 | + |
|---|
| 849 | +err: |
|---|
| 850 | + dma_free_coherent(dma_dev, mw->alloc_size, alloc_addr, dma_addr); |
|---|
| 851 | + |
|---|
| 852 | + return rc; |
|---|
| 680 | 853 | } |
|---|
| 681 | 854 | |
|---|
| 682 | 855 | static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, |
|---|
| .. | .. |
|---|
| 710 | 883 | /* Alloc memory for receiving data. Must be aligned */ |
|---|
| 711 | 884 | mw->xlat_size = xlat_size; |
|---|
| 712 | 885 | mw->buff_size = buff_size; |
|---|
| 886 | + mw->alloc_size = buff_size; |
|---|
| 713 | 887 | |
|---|
| 714 | | - mw->virt_addr = dma_alloc_coherent(&pdev->dev, buff_size, |
|---|
| 715 | | - &mw->dma_addr, GFP_KERNEL); |
|---|
| 716 | | - if (!mw->virt_addr) { |
|---|
| 717 | | - mw->xlat_size = 0; |
|---|
| 718 | | - mw->buff_size = 0; |
|---|
| 719 | | - dev_err(&pdev->dev, "Unable to alloc MW buff of size %zu\n", |
|---|
| 720 | | - buff_size); |
|---|
| 721 | | - return -ENOMEM; |
|---|
| 722 | | - } |
|---|
| 723 | | - |
|---|
| 724 | | - /* |
|---|
| 725 | | - * we must ensure that the memory address allocated is BAR size |
|---|
| 726 | | - * aligned in order for the XLAT register to take the value. This |
|---|
| 727 | | - * is a requirement of the hardware. It is recommended to setup CMA |
|---|
| 728 | | - * for BAR sizes equal or greater than 4MB. |
|---|
| 729 | | - */ |
|---|
| 730 | | - if (!IS_ALIGNED(mw->dma_addr, xlat_align)) { |
|---|
| 731 | | - dev_err(&pdev->dev, "DMA memory %pad is not aligned\n", |
|---|
| 732 | | - &mw->dma_addr); |
|---|
| 733 | | - ntb_free_mw(nt, num_mw); |
|---|
| 734 | | - return -ENOMEM; |
|---|
| 888 | + rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align); |
|---|
| 889 | + if (rc) { |
|---|
| 890 | + mw->alloc_size *= 2; |
|---|
| 891 | + rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align); |
|---|
| 892 | + if (rc) { |
|---|
| 893 | + dev_err(&pdev->dev, |
|---|
| 894 | + "Unable to alloc aligned MW buff\n"); |
|---|
| 895 | + mw->xlat_size = 0; |
|---|
| 896 | + mw->buff_size = 0; |
|---|
| 897 | + mw->alloc_size = 0; |
|---|
| 898 | + return rc; |
|---|
| 899 | + } |
|---|
| 735 | 900 | } |
|---|
| 736 | 901 | |
|---|
| 737 | 902 | /* Notify HW the memory location of the receive buffer */ |
|---|
| .. | .. |
|---|
| 746 | 911 | return 0; |
|---|
| 747 | 912 | } |
|---|
| 748 | 913 | |
|---|
| 749 | | -static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp) |
|---|
| 914 | +static void ntb_qp_link_context_reset(struct ntb_transport_qp *qp) |
|---|
| 750 | 915 | { |
|---|
| 751 | 916 | qp->link_is_up = false; |
|---|
| 752 | 917 | qp->active = false; |
|---|
| .. | .. |
|---|
| 767 | 932 | qp->tx_err_no_buf = 0; |
|---|
| 768 | 933 | qp->tx_memcpy = 0; |
|---|
| 769 | 934 | qp->tx_async = 0; |
|---|
| 935 | +} |
|---|
| 936 | + |
|---|
| 937 | +static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp) |
|---|
| 938 | +{ |
|---|
| 939 | + ntb_qp_link_context_reset(qp); |
|---|
| 940 | + if (qp->remote_rx_info) |
|---|
| 941 | + qp->remote_rx_info->entry = qp->rx_max_entry - 1; |
|---|
| 770 | 942 | } |
|---|
| 771 | 943 | |
|---|
| 772 | 944 | static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp) |
|---|
| .. | .. |
|---|
| 822 | 994 | if (!nt->link_is_up) |
|---|
| 823 | 995 | cancel_delayed_work_sync(&nt->link_work); |
|---|
| 824 | 996 | |
|---|
| 997 | + for (i = 0; i < nt->mw_count; i++) |
|---|
| 998 | + ntb_free_mw(nt, i); |
|---|
| 999 | + |
|---|
| 825 | 1000 | /* The scratchpad registers keep the values if the remote side |
|---|
| 826 | 1001 | * goes down, blast them now to give them a sane value the next |
|---|
| 827 | 1002 | * time they are accessed |
|---|
| .. | .. |
|---|
| 860 | 1035 | int rc = 0, i, spad; |
|---|
| 861 | 1036 | |
|---|
| 862 | 1037 | /* send the local info, in the opposite order of the way we read it */ |
|---|
| 1038 | + |
|---|
| 1039 | + if (nt->use_msi) { |
|---|
| 1040 | + rc = ntb_msi_setup_mws(ndev); |
|---|
| 1041 | + if (rc) { |
|---|
| 1042 | + dev_warn(&pdev->dev, |
|---|
| 1043 | + "Failed to register MSI memory window: %d\n", |
|---|
| 1044 | + rc); |
|---|
| 1045 | + nt->use_msi = false; |
|---|
| 1046 | + } |
|---|
| 1047 | + } |
|---|
| 1048 | + |
|---|
| 1049 | + for (i = 0; i < nt->qp_count; i++) |
|---|
| 1050 | + ntb_transport_setup_qp_msi(nt, i); |
|---|
| 1051 | + |
|---|
| 863 | 1052 | for (i = 0; i < nt->mw_count; i++) { |
|---|
| 864 | 1053 | size = nt->mw_vec[i].phys_size; |
|---|
| 865 | 1054 | |
|---|
| .. | .. |
|---|
| 917 | 1106 | struct ntb_transport_qp *qp = &nt->qp_vec[i]; |
|---|
| 918 | 1107 | |
|---|
| 919 | 1108 | ntb_transport_setup_qp_mw(nt, i); |
|---|
| 1109 | + ntb_transport_setup_qp_peer_msi(nt, i); |
|---|
| 920 | 1110 | |
|---|
| 921 | 1111 | if (qp->client_ready) |
|---|
| 922 | 1112 | schedule_delayed_work(&qp->link_work, 0); |
|---|
| .. | .. |
|---|
| 993 | 1183 | qp->ndev = nt->ndev; |
|---|
| 994 | 1184 | qp->client_ready = false; |
|---|
| 995 | 1185 | qp->event_handler = NULL; |
|---|
| 996 | | - ntb_qp_link_down_reset(qp); |
|---|
| 1186 | + ntb_qp_link_context_reset(qp); |
|---|
| 997 | 1187 | |
|---|
| 998 | 1188 | if (mw_num < qp_count % mw_count) |
|---|
| 999 | 1189 | num_qps_mw = qp_count / mw_count + 1; |
|---|
| .. | .. |
|---|
| 1009 | 1199 | tx_size = (unsigned int)mw_size / num_qps_mw; |
|---|
| 1010 | 1200 | qp_offset = tx_size * (qp_num / mw_count); |
|---|
| 1011 | 1201 | |
|---|
| 1202 | + qp->tx_mw_size = tx_size; |
|---|
| 1012 | 1203 | qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset; |
|---|
| 1013 | 1204 | if (!qp->tx_mw) |
|---|
| 1014 | 1205 | return -EINVAL; |
|---|
| .. | .. |
|---|
| 1089 | 1280 | return -ENOMEM; |
|---|
| 1090 | 1281 | |
|---|
| 1091 | 1282 | nt->ndev = ndev; |
|---|
| 1283 | + |
|---|
| 1284 | + /* |
|---|
| 1285 | + * If we are using MSI, and have at least one extra memory window, |
|---|
| 1286 | + * we will reserve the last MW for the MSI window. |
|---|
| 1287 | + */ |
|---|
| 1288 | + if (use_msi && mw_count > 1) { |
|---|
| 1289 | + rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed); |
|---|
| 1290 | + if (!rc) { |
|---|
| 1291 | + mw_count -= 1; |
|---|
| 1292 | + nt->use_msi = true; |
|---|
| 1293 | + } |
|---|
| 1294 | + } |
|---|
| 1295 | + |
|---|
| 1092 | 1296 | spad_count = ntb_spad_count(ndev); |
|---|
| 1093 | 1297 | |
|---|
| 1094 | 1298 | /* Limit the MW's based on the availability of scratchpads */ |
|---|
| .. | .. |
|---|
| 1101 | 1305 | |
|---|
| 1102 | 1306 | max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2; |
|---|
| 1103 | 1307 | nt->mw_count = min(mw_count, max_mw_count_for_spads); |
|---|
| 1308 | + |
|---|
| 1309 | + nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH; |
|---|
| 1104 | 1310 | |
|---|
| 1105 | 1311 | nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec), |
|---|
| 1106 | 1312 | GFP_KERNEL, node); |
|---|
| .. | .. |
|---|
| 1132 | 1338 | qp_bitmap = ntb_db_valid_mask(ndev); |
|---|
| 1133 | 1339 | |
|---|
| 1134 | 1340 | qp_count = ilog2(qp_bitmap); |
|---|
| 1341 | + if (nt->use_msi) { |
|---|
| 1342 | + qp_count -= 1; |
|---|
| 1343 | + nt->msi_db_mask = 1 << qp_count; |
|---|
| 1344 | + ntb_db_clear_mask(ndev, nt->msi_db_mask); |
|---|
| 1345 | + } |
|---|
| 1346 | + |
|---|
| 1135 | 1347 | if (max_num_clients && max_num_clients < qp_count) |
|---|
| 1136 | 1348 | qp_count = max_num_clients; |
|---|
| 1137 | 1349 | else if (nt->mw_count < qp_count) |
|---|
| .. | .. |
|---|
| 1278 | 1490 | case DMA_TRANS_READ_FAILED: |
|---|
| 1279 | 1491 | case DMA_TRANS_WRITE_FAILED: |
|---|
| 1280 | 1492 | entry->errors++; |
|---|
| 1493 | + fallthrough; |
|---|
| 1281 | 1494 | case DMA_TRANS_ABORTED: |
|---|
| 1282 | 1495 | { |
|---|
| 1283 | 1496 | struct ntb_transport_qp *qp = entry->qp; |
|---|
| .. | .. |
|---|
| 1533 | 1746 | case DMA_TRANS_READ_FAILED: |
|---|
| 1534 | 1747 | case DMA_TRANS_WRITE_FAILED: |
|---|
| 1535 | 1748 | entry->errors++; |
|---|
| 1749 | + fallthrough; |
|---|
| 1536 | 1750 | case DMA_TRANS_ABORTED: |
|---|
| 1537 | 1751 | { |
|---|
| 1538 | 1752 | void __iomem *offset = |
|---|
| .. | .. |
|---|
| 1553 | 1767 | |
|---|
| 1554 | 1768 | iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); |
|---|
| 1555 | 1769 | |
|---|
| 1556 | | - ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); |
|---|
| 1770 | + if (qp->use_msi) |
|---|
| 1771 | + ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc); |
|---|
| 1772 | + else |
|---|
| 1773 | + ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); |
|---|
| 1557 | 1774 | |
|---|
| 1558 | 1775 | /* The entry length can only be zero if the packet is intended to be a |
|---|
| 1559 | 1776 | * "link down" or similar. Since no payload is being sent in these |
|---|
| .. | .. |
|---|
| 1602 | 1819 | dma_cookie_t cookie; |
|---|
| 1603 | 1820 | |
|---|
| 1604 | 1821 | device = chan->device; |
|---|
| 1605 | | - dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index; |
|---|
| 1822 | + dest = qp->tx_mw_dma_addr + qp->tx_max_frame * entry->tx_index; |
|---|
| 1606 | 1823 | buff_off = (size_t)buf & ~PAGE_MASK; |
|---|
| 1607 | 1824 | dest_off = (size_t)dest & ~PAGE_MASK; |
|---|
| 1608 | 1825 | |
|---|
| .. | .. |
|---|
| 1821 | 2038 | qp->rx_dma_chan = NULL; |
|---|
| 1822 | 2039 | } |
|---|
| 1823 | 2040 | |
|---|
| 2041 | + qp->tx_mw_dma_addr = 0; |
|---|
| 2042 | + if (qp->tx_dma_chan) { |
|---|
| 2043 | + qp->tx_mw_dma_addr = |
|---|
| 2044 | + dma_map_resource(qp->tx_dma_chan->device->dev, |
|---|
| 2045 | + qp->tx_mw_phys, qp->tx_mw_size, |
|---|
| 2046 | + DMA_FROM_DEVICE, 0); |
|---|
| 2047 | + if (dma_mapping_error(qp->tx_dma_chan->device->dev, |
|---|
| 2048 | + qp->tx_mw_dma_addr)) { |
|---|
| 2049 | + qp->tx_mw_dma_addr = 0; |
|---|
| 2050 | + goto err1; |
|---|
| 2051 | + } |
|---|
| 2052 | + } |
|---|
| 2053 | + |
|---|
| 1824 | 2054 | dev_dbg(&pdev->dev, "Using %s memcpy for TX\n", |
|---|
| 1825 | 2055 | qp->tx_dma_chan ? "DMA" : "CPU"); |
|---|
| 1826 | 2056 | |
|---|
| .. | .. |
|---|
| 1862 | 2092 | qp->rx_alloc_entry = 0; |
|---|
| 1863 | 2093 | while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) |
|---|
| 1864 | 2094 | kfree(entry); |
|---|
| 2095 | + if (qp->tx_mw_dma_addr) |
|---|
| 2096 | + dma_unmap_resource(qp->tx_dma_chan->device->dev, |
|---|
| 2097 | + qp->tx_mw_dma_addr, qp->tx_mw_size, |
|---|
| 2098 | + DMA_FROM_DEVICE, 0); |
|---|
| 1865 | 2099 | if (qp->tx_dma_chan) |
|---|
| 1866 | 2100 | dma_release_channel(qp->tx_dma_chan); |
|---|
| 1867 | 2101 | if (qp->rx_dma_chan) |
|---|
| .. | .. |
|---|
| 1903 | 2137 | */ |
|---|
| 1904 | 2138 | dma_sync_wait(chan, qp->last_cookie); |
|---|
| 1905 | 2139 | dmaengine_terminate_all(chan); |
|---|
| 2140 | + |
|---|
| 2141 | + dma_unmap_resource(chan->device->dev, |
|---|
| 2142 | + qp->tx_mw_dma_addr, qp->tx_mw_size, |
|---|
| 2143 | + DMA_FROM_DEVICE, 0); |
|---|
| 2144 | + |
|---|
| 1906 | 2145 | dma_release_channel(chan); |
|---|
| 1907 | 2146 | } |
|---|
| 1908 | 2147 | |
|---|
| .. | .. |
|---|
| 2046 | 2285 | struct ntb_queue_entry *entry; |
|---|
| 2047 | 2286 | int rc; |
|---|
| 2048 | 2287 | |
|---|
| 2049 | | - if (!qp || !qp->link_is_up || !len) |
|---|
| 2288 | + if (!qp || !len) |
|---|
| 2050 | 2289 | return -EINVAL; |
|---|
| 2290 | + |
|---|
| 2291 | + /* If the qp link is down already, just ignore. */ |
|---|
| 2292 | + if (!qp->link_is_up) |
|---|
| 2293 | + return 0; |
|---|
| 2051 | 2294 | |
|---|
| 2052 | 2295 | entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); |
|---|
| 2053 | 2296 | if (!entry) { |
|---|
| .. | .. |
|---|
| 2188 | 2431 | unsigned int head = qp->tx_index; |
|---|
| 2189 | 2432 | unsigned int tail = qp->remote_rx_info->entry; |
|---|
| 2190 | 2433 | |
|---|
| 2191 | | - return tail > head ? tail - head : qp->tx_max_entry + tail - head; |
|---|
| 2434 | + return tail >= head ? tail - head : qp->tx_max_entry + tail - head; |
|---|
| 2192 | 2435 | } |
|---|
| 2193 | 2436 | EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry); |
|---|
| 2194 | 2437 | |
|---|
| .. | .. |
|---|
| 2199 | 2442 | u64 db_bits; |
|---|
| 2200 | 2443 | unsigned int qp_num; |
|---|
| 2201 | 2444 | |
|---|
| 2445 | + if (ntb_db_read(nt->ndev) & nt->msi_db_mask) { |
|---|
| 2446 | + ntb_transport_msi_peer_desc_changed(nt); |
|---|
| 2447 | + ntb_db_clear(nt->ndev, nt->msi_db_mask); |
|---|
| 2448 | + } |
|---|
| 2449 | + |
|---|
| 2202 | 2450 | db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free & |
|---|
| 2203 | 2451 | ntb_db_vector_mask(nt->ndev, vector)); |
|---|
| 2204 | 2452 | |
|---|