| .. | .. |
|---|
| 1 | 1 | /* |
|---|
| 2 | | - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. |
|---|
| 2 | + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. |
|---|
| 3 | 3 | * |
|---|
| 4 | 4 | * This software is available to you under a choice of one of two |
|---|
| 5 | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
|---|
| .. | .. |
|---|
| 61 | 61 | recv->r_wr.num_sge = RDS_IB_RECV_SGE; |
|---|
| 62 | 62 | |
|---|
| 63 | 63 | sge = &recv->r_sge[0]; |
|---|
| 64 | | - sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); |
|---|
| 64 | + sge->addr = ic->i_recv_hdrs_dma[i]; |
|---|
| 65 | 65 | sge->length = sizeof(struct rds_header); |
|---|
| 66 | 66 | sge->lkey = ic->i_pd->local_dma_lkey; |
|---|
| 67 | 67 | |
|---|
| .. | .. |
|---|
| 168 | 168 | list_del(&inc->ii_cache_entry); |
|---|
| 169 | 169 | WARN_ON(!list_empty(&inc->ii_frags)); |
|---|
| 170 | 170 | kmem_cache_free(rds_ib_incoming_slab, inc); |
|---|
| 171 | + atomic_dec(&rds_ib_allocation); |
|---|
| 171 | 172 | } |
|---|
| 172 | 173 | |
|---|
| 173 | 174 | rds_ib_cache_xfer_to_ready(&ic->i_cache_frags); |
|---|
| .. | .. |
|---|
| 309 | 310 | struct rds_ib_connection *ic = conn->c_transport_data; |
|---|
| 310 | 311 | struct ib_sge *sge; |
|---|
| 311 | 312 | int ret = -ENOMEM; |
|---|
| 312 | | - gfp_t slab_mask = GFP_NOWAIT; |
|---|
| 313 | | - gfp_t page_mask = GFP_NOWAIT; |
|---|
| 313 | + gfp_t slab_mask = gfp; |
|---|
| 314 | + gfp_t page_mask = gfp; |
|---|
| 314 | 315 | |
|---|
| 315 | 316 | if (gfp & __GFP_DIRECT_RECLAIM) { |
|---|
| 316 | 317 | slab_mask = GFP_KERNEL; |
|---|
| .. | .. |
|---|
| 342 | 343 | WARN_ON(ret != 1); |
|---|
| 343 | 344 | |
|---|
| 344 | 345 | sge = &recv->r_sge[0]; |
|---|
| 345 | | - sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); |
|---|
| 346 | + sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; |
|---|
| 346 | 347 | sge->length = sizeof(struct rds_header); |
|---|
| 347 | 348 | |
|---|
| 348 | 349 | sge = &recv->r_sge[1]; |
|---|
| 349 | | - sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg); |
|---|
| 350 | | - sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg); |
|---|
| 350 | + sge->addr = sg_dma_address(&recv->r_frag->f_sg); |
|---|
| 351 | + sge->length = sg_dma_len(&recv->r_frag->f_sg); |
|---|
| 351 | 352 | |
|---|
| 352 | 353 | ret = 0; |
|---|
| 353 | 354 | out: |
|---|
| .. | .. |
|---|
| 362 | 363 | static void release_refill(struct rds_connection *conn) |
|---|
| 363 | 364 | { |
|---|
| 364 | 365 | clear_bit(RDS_RECV_REFILL, &conn->c_flags); |
|---|
| 366 | + smp_mb__after_atomic(); |
|---|
| 365 | 367 | |
|---|
| 366 | 368 | /* We don't use wait_on_bit()/wake_up_bit() because our waking is in a |
|---|
| 367 | 369 | * hot path and finding waiters is very rare. We don't want to walk |
|---|
| .. | .. |
|---|
| 384 | 386 | unsigned int posted = 0; |
|---|
| 385 | 387 | int ret = 0; |
|---|
| 386 | 388 | bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM); |
|---|
| 389 | + bool must_wake = false; |
|---|
| 387 | 390 | u32 pos; |
|---|
| 388 | 391 | |
|---|
| 389 | 392 | /* the goal here is to just make sure that someone, somewhere |
|---|
| .. | .. |
|---|
| 404 | 407 | recv = &ic->i_recvs[pos]; |
|---|
| 405 | 408 | ret = rds_ib_recv_refill_one(conn, recv, gfp); |
|---|
| 406 | 409 | if (ret) { |
|---|
| 410 | + must_wake = true; |
|---|
| 407 | 411 | break; |
|---|
| 408 | 412 | } |
|---|
| 409 | 413 | |
|---|
| 410 | 414 | rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv, |
|---|
| 411 | 415 | recv->r_ibinc, sg_page(&recv->r_frag->f_sg), |
|---|
| 412 | | - (long) ib_sg_dma_address( |
|---|
| 413 | | - ic->i_cm_id->device, |
|---|
| 414 | | - &recv->r_frag->f_sg)); |
|---|
| 416 | + (long)sg_dma_address(&recv->r_frag->f_sg)); |
|---|
| 415 | 417 | |
|---|
| 416 | 418 | /* XXX when can this fail? */ |
|---|
| 417 | 419 | ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL); |
|---|
| .. | .. |
|---|
| 424 | 426 | } |
|---|
| 425 | 427 | |
|---|
| 426 | 428 | posted++; |
|---|
| 429 | + |
|---|
| 430 | + if ((posted > 128 && need_resched()) || posted > 8192) { |
|---|
| 431 | + must_wake = true; |
|---|
| 432 | + break; |
|---|
| 433 | + } |
|---|
| 427 | 434 | } |
|---|
| 428 | 435 | |
|---|
| 429 | 436 | /* We're doing flow control - update the window. */ |
|---|
| .. | .. |
|---|
| 446 | 453 | * if we should requeue. |
|---|
| 447 | 454 | */ |
|---|
| 448 | 455 | if (rds_conn_up(conn) && |
|---|
| 449 | | - ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) || |
|---|
| 456 | + (must_wake || |
|---|
| 457 | + (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) || |
|---|
| 450 | 458 | rds_ib_ring_empty(&ic->i_recv_ring))) { |
|---|
| 451 | 459 | queue_delayed_work(rds_wq, &conn->c_recv_w, 1); |
|---|
| 452 | 460 | } |
|---|
| 461 | + if (can_wait) |
|---|
| 462 | + cond_resched(); |
|---|
| 453 | 463 | } |
|---|
| 454 | 464 | |
|---|
| 455 | 465 | /* |
|---|
| .. | .. |
|---|
| 653 | 663 | seq = rds_ib_get_ack(ic); |
|---|
| 654 | 664 | |
|---|
| 655 | 665 | rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); |
|---|
| 666 | + |
|---|
| 667 | + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, ic->i_ack_dma, |
|---|
| 668 | + sizeof(*hdr), DMA_TO_DEVICE); |
|---|
| 656 | 669 | rds_message_populate_header(hdr, 0, 0, 0); |
|---|
| 657 | 670 | hdr->h_ack = cpu_to_be64(seq); |
|---|
| 658 | 671 | hdr->h_credit = adv_credits; |
|---|
| 659 | 672 | rds_message_make_checksum(hdr); |
|---|
| 673 | + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, ic->i_ack_dma, |
|---|
| 674 | + sizeof(*hdr), DMA_TO_DEVICE); |
|---|
| 675 | + |
|---|
| 660 | 676 | ic->i_ack_queued = jiffies; |
|---|
| 661 | 677 | |
|---|
| 662 | 678 | ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL); |
|---|
| .. | .. |
|---|
| 774 | 790 | unsigned long frag_off; |
|---|
| 775 | 791 | unsigned long to_copy; |
|---|
| 776 | 792 | unsigned long copied; |
|---|
| 777 | | - uint64_t uncongested = 0; |
|---|
| 793 | + __le64 uncongested = 0; |
|---|
| 778 | 794 | void *addr; |
|---|
| 779 | 795 | |
|---|
| 780 | 796 | /* catch completely corrupt packets */ |
|---|
| .. | .. |
|---|
| 791 | 807 | copied = 0; |
|---|
| 792 | 808 | |
|---|
| 793 | 809 | while (copied < RDS_CONG_MAP_BYTES) { |
|---|
| 794 | | - uint64_t *src, *dst; |
|---|
| 810 | + __le64 *src, *dst; |
|---|
| 795 | 811 | unsigned int k; |
|---|
| 796 | 812 | |
|---|
| 797 | 813 | to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); |
|---|
| .. | .. |
|---|
| 826 | 842 | } |
|---|
| 827 | 843 | |
|---|
| 828 | 844 | /* the congestion map is in little endian order */ |
|---|
| 829 | | - uncongested = le64_to_cpu(uncongested); |
|---|
| 830 | | - |
|---|
| 831 | | - rds_cong_map_updated(map, uncongested); |
|---|
| 845 | + rds_cong_map_updated(map, le64_to_cpu(uncongested)); |
|---|
| 832 | 846 | } |
|---|
| 833 | 847 | |
|---|
| 834 | 848 | static void rds_ib_process_recv(struct rds_connection *conn, |
|---|
| .. | .. |
|---|
| 838 | 852 | struct rds_ib_connection *ic = conn->c_transport_data; |
|---|
| 839 | 853 | struct rds_ib_incoming *ibinc = ic->i_ibinc; |
|---|
| 840 | 854 | struct rds_header *ihdr, *hdr; |
|---|
| 855 | + dma_addr_t dma_addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; |
|---|
| 841 | 856 | |
|---|
| 842 | 857 | /* XXX shut down the connection if port 0,0 are seen? */ |
|---|
| 843 | 858 | |
|---|
| .. | .. |
|---|
| 854 | 869 | } |
|---|
| 855 | 870 | data_len -= sizeof(struct rds_header); |
|---|
| 856 | 871 | |
|---|
| 857 | | - ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs]; |
|---|
| 872 | + ihdr = ic->i_recv_hdrs[recv - ic->i_recvs]; |
|---|
| 858 | 873 | |
|---|
| 874 | + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, dma_addr, |
|---|
| 875 | + sizeof(*ihdr), DMA_FROM_DEVICE); |
|---|
| 859 | 876 | /* Validate the checksum. */ |
|---|
| 860 | 877 | if (!rds_message_verify_checksum(ihdr)) { |
|---|
| 861 | 878 | rds_ib_conn_error(conn, "incoming message " |
|---|
| .. | .. |
|---|
| 863 | 880 | "forcing a reconnect\n", |
|---|
| 864 | 881 | &conn->c_faddr); |
|---|
| 865 | 882 | rds_stats_inc(s_recv_drop_bad_checksum); |
|---|
| 866 | | - return; |
|---|
| 883 | + goto done; |
|---|
| 867 | 884 | } |
|---|
| 868 | 885 | |
|---|
| 869 | 886 | /* Process the ACK sequence which comes with every packet */ |
|---|
| .. | .. |
|---|
| 892 | 909 | */ |
|---|
| 893 | 910 | rds_ib_frag_free(ic, recv->r_frag); |
|---|
| 894 | 911 | recv->r_frag = NULL; |
|---|
| 895 | | - return; |
|---|
| 912 | + goto done; |
|---|
| 896 | 913 | } |
|---|
| 897 | 914 | |
|---|
| 898 | 915 | /* |
|---|
| .. | .. |
|---|
| 926 | 943 | hdr->h_dport != ihdr->h_dport) { |
|---|
| 927 | 944 | rds_ib_conn_error(conn, |
|---|
| 928 | 945 | "fragment header mismatch; forcing reconnect\n"); |
|---|
| 929 | | - return; |
|---|
| 946 | + goto done; |
|---|
| 930 | 947 | } |
|---|
| 931 | 948 | } |
|---|
| 932 | 949 | |
|---|
| .. | .. |
|---|
| 958 | 975 | |
|---|
| 959 | 976 | rds_inc_put(&ibinc->ii_inc); |
|---|
| 960 | 977 | } |
|---|
| 978 | +done: |
|---|
| 979 | + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, dma_addr, |
|---|
| 980 | + sizeof(*ihdr), DMA_FROM_DEVICE); |
|---|
| 961 | 981 | } |
|---|
| 962 | 982 | |
|---|
| 963 | 983 | void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, |
|---|
| .. | .. |
|---|
| 986 | 1006 | } else { |
|---|
| 987 | 1007 | /* We expect errors as the qp is drained during shutdown */ |
|---|
| 988 | 1008 | if (rds_conn_up(conn) || rds_conn_connecting(conn)) |
|---|
| 989 | | - rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n", |
|---|
| 1009 | + rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n", |
|---|
| 990 | 1010 | &conn->c_laddr, &conn->c_faddr, |
|---|
| 991 | | - wc->status, |
|---|
| 992 | | - ib_wc_status_msg(wc->status)); |
|---|
| 1011 | + conn->c_tos, wc->status, |
|---|
| 1012 | + ib_wc_status_msg(wc->status), |
|---|
| 1013 | + wc->vendor_err); |
|---|
| 993 | 1014 | } |
|---|
| 994 | 1015 | |
|---|
| 995 | 1016 | /* rds_ib_process_recv() doesn't always consume the frag, and |
|---|
| .. | .. |
|---|
| 1012 | 1033 | rds_ib_stats_inc(s_ib_rx_ring_empty); |
|---|
| 1013 | 1034 | |
|---|
| 1014 | 1035 | if (rds_ib_ring_low(&ic->i_recv_ring)) { |
|---|
| 1015 | | - rds_ib_recv_refill(conn, 0, GFP_NOWAIT); |
|---|
| 1036 | + rds_ib_recv_refill(conn, 0, GFP_NOWAIT | __GFP_NOWARN); |
|---|
| 1016 | 1037 | rds_ib_stats_inc(s_ib_rx_refill_from_cq); |
|---|
| 1017 | 1038 | } |
|---|
| 1018 | 1039 | } |
|---|
| .. | .. |
|---|
| 1041 | 1062 | si_meminfo(&si); |
|---|
| 1042 | 1063 | rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE; |
|---|
| 1043 | 1064 | |
|---|
| 1044 | | - rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming", |
|---|
| 1045 | | - sizeof(struct rds_ib_incoming), |
|---|
| 1046 | | - 0, SLAB_HWCACHE_ALIGN, NULL); |
|---|
| 1065 | + rds_ib_incoming_slab = |
|---|
| 1066 | + kmem_cache_create_usercopy("rds_ib_incoming", |
|---|
| 1067 | + sizeof(struct rds_ib_incoming), |
|---|
| 1068 | + 0, SLAB_HWCACHE_ALIGN, |
|---|
| 1069 | + offsetof(struct rds_ib_incoming, |
|---|
| 1070 | + ii_inc.i_usercopy), |
|---|
| 1071 | + sizeof(struct rds_inc_usercopy), |
|---|
| 1072 | + NULL); |
|---|
| 1047 | 1073 | if (!rds_ib_incoming_slab) |
|---|
| 1048 | 1074 | goto out; |
|---|
| 1049 | 1075 | |
|---|
| .. | .. |
|---|
| 1061 | 1087 | |
|---|
| 1062 | 1088 | void rds_ib_recv_exit(void) |
|---|
| 1063 | 1089 | { |
|---|
| 1090 | + WARN_ON(atomic_read(&rds_ib_allocation)); |
|---|
| 1091 | + |
|---|
| 1064 | 1092 | kmem_cache_destroy(rds_ib_incoming_slab); |
|---|
| 1065 | 1093 | kmem_cache_destroy(rds_ib_frag_slab); |
|---|
| 1066 | 1094 | } |
|---|