.. | .. |
---|
1 | 1 | /* |
---|
2 | | - * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. |
---|
| 2 | + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. |
---|
3 | 3 | * |
---|
4 | 4 | * This software is available to you under a choice of one of two |
---|
5 | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
---|
.. | .. |
---|
61 | 61 | recv->r_wr.num_sge = RDS_IB_RECV_SGE; |
---|
62 | 62 | |
---|
63 | 63 | sge = &recv->r_sge[0]; |
---|
64 | | - sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); |
---|
| 64 | + sge->addr = ic->i_recv_hdrs_dma[i]; |
---|
65 | 65 | sge->length = sizeof(struct rds_header); |
---|
66 | 66 | sge->lkey = ic->i_pd->local_dma_lkey; |
---|
67 | 67 | |
---|
.. | .. |
---|
168 | 168 | list_del(&inc->ii_cache_entry); |
---|
169 | 169 | WARN_ON(!list_empty(&inc->ii_frags)); |
---|
170 | 170 | kmem_cache_free(rds_ib_incoming_slab, inc); |
---|
| 171 | + atomic_dec(&rds_ib_allocation); |
---|
171 | 172 | } |
---|
172 | 173 | |
---|
173 | 174 | rds_ib_cache_xfer_to_ready(&ic->i_cache_frags); |
---|
.. | .. |
---|
309 | 310 | struct rds_ib_connection *ic = conn->c_transport_data; |
---|
310 | 311 | struct ib_sge *sge; |
---|
311 | 312 | int ret = -ENOMEM; |
---|
312 | | - gfp_t slab_mask = GFP_NOWAIT; |
---|
313 | | - gfp_t page_mask = GFP_NOWAIT; |
---|
| 313 | + gfp_t slab_mask = gfp; |
---|
| 314 | + gfp_t page_mask = gfp; |
---|
314 | 315 | |
---|
315 | 316 | if (gfp & __GFP_DIRECT_RECLAIM) { |
---|
316 | 317 | slab_mask = GFP_KERNEL; |
---|
.. | .. |
---|
342 | 343 | WARN_ON(ret != 1); |
---|
343 | 344 | |
---|
344 | 345 | sge = &recv->r_sge[0]; |
---|
345 | | - sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); |
---|
| 346 | + sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; |
---|
346 | 347 | sge->length = sizeof(struct rds_header); |
---|
347 | 348 | |
---|
348 | 349 | sge = &recv->r_sge[1]; |
---|
349 | | - sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg); |
---|
350 | | - sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg); |
---|
| 350 | + sge->addr = sg_dma_address(&recv->r_frag->f_sg); |
---|
| 351 | + sge->length = sg_dma_len(&recv->r_frag->f_sg); |
---|
351 | 352 | |
---|
352 | 353 | ret = 0; |
---|
353 | 354 | out: |
---|
.. | .. |
---|
362 | 363 | static void release_refill(struct rds_connection *conn) |
---|
363 | 364 | { |
---|
364 | 365 | clear_bit(RDS_RECV_REFILL, &conn->c_flags); |
---|
| 366 | + smp_mb__after_atomic(); |
---|
365 | 367 | |
---|
366 | 368 | /* We don't use wait_on_bit()/wake_up_bit() because our waking is in a |
---|
367 | 369 | * hot path and finding waiters is very rare. We don't want to walk |
---|
.. | .. |
---|
384 | 386 | unsigned int posted = 0; |
---|
385 | 387 | int ret = 0; |
---|
386 | 388 | bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM); |
---|
| 389 | + bool must_wake = false; |
---|
387 | 390 | u32 pos; |
---|
388 | 391 | |
---|
389 | 392 | /* the goal here is to just make sure that someone, somewhere |
---|
.. | .. |
---|
404 | 407 | recv = &ic->i_recvs[pos]; |
---|
405 | 408 | ret = rds_ib_recv_refill_one(conn, recv, gfp); |
---|
406 | 409 | if (ret) { |
---|
| 410 | + must_wake = true; |
---|
407 | 411 | break; |
---|
408 | 412 | } |
---|
409 | 413 | |
---|
410 | 414 | rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv, |
---|
411 | 415 | recv->r_ibinc, sg_page(&recv->r_frag->f_sg), |
---|
412 | | - (long) ib_sg_dma_address( |
---|
413 | | - ic->i_cm_id->device, |
---|
414 | | - &recv->r_frag->f_sg)); |
---|
| 416 | + (long)sg_dma_address(&recv->r_frag->f_sg)); |
---|
415 | 417 | |
---|
416 | 418 | /* XXX when can this fail? */ |
---|
417 | 419 | ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL); |
---|
.. | .. |
---|
424 | 426 | } |
---|
425 | 427 | |
---|
426 | 428 | posted++; |
---|
| 429 | + |
---|
| 430 | + if ((posted > 128 && need_resched()) || posted > 8192) { |
---|
| 431 | + must_wake = true; |
---|
| 432 | + break; |
---|
| 433 | + } |
---|
427 | 434 | } |
---|
428 | 435 | |
---|
429 | 436 | /* We're doing flow control - update the window. */ |
---|
.. | .. |
---|
446 | 453 | * if we should requeue. |
---|
447 | 454 | */ |
---|
448 | 455 | if (rds_conn_up(conn) && |
---|
449 | | - ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) || |
---|
| 456 | + (must_wake || |
---|
| 457 | + (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) || |
---|
450 | 458 | rds_ib_ring_empty(&ic->i_recv_ring))) { |
---|
451 | 459 | queue_delayed_work(rds_wq, &conn->c_recv_w, 1); |
---|
452 | 460 | } |
---|
| 461 | + if (can_wait) |
---|
| 462 | + cond_resched(); |
---|
453 | 463 | } |
---|
454 | 464 | |
---|
455 | 465 | /* |
---|
.. | .. |
---|
653 | 663 | seq = rds_ib_get_ack(ic); |
---|
654 | 664 | |
---|
655 | 665 | rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); |
---|
| 666 | + |
---|
| 667 | + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, ic->i_ack_dma, |
---|
| 668 | + sizeof(*hdr), DMA_TO_DEVICE); |
---|
656 | 669 | rds_message_populate_header(hdr, 0, 0, 0); |
---|
657 | 670 | hdr->h_ack = cpu_to_be64(seq); |
---|
658 | 671 | hdr->h_credit = adv_credits; |
---|
659 | 672 | rds_message_make_checksum(hdr); |
---|
| 673 | + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, ic->i_ack_dma, |
---|
| 674 | + sizeof(*hdr), DMA_TO_DEVICE); |
---|
| 675 | + |
---|
660 | 676 | ic->i_ack_queued = jiffies; |
---|
661 | 677 | |
---|
662 | 678 | ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL); |
---|
.. | .. |
---|
774 | 790 | unsigned long frag_off; |
---|
775 | 791 | unsigned long to_copy; |
---|
776 | 792 | unsigned long copied; |
---|
777 | | - uint64_t uncongested = 0; |
---|
| 793 | + __le64 uncongested = 0; |
---|
778 | 794 | void *addr; |
---|
779 | 795 | |
---|
780 | 796 | /* catch completely corrupt packets */ |
---|
.. | .. |
---|
791 | 807 | copied = 0; |
---|
792 | 808 | |
---|
793 | 809 | while (copied < RDS_CONG_MAP_BYTES) { |
---|
794 | | - uint64_t *src, *dst; |
---|
| 810 | + __le64 *src, *dst; |
---|
795 | 811 | unsigned int k; |
---|
796 | 812 | |
---|
797 | 813 | to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); |
---|
.. | .. |
---|
826 | 842 | } |
---|
827 | 843 | |
---|
828 | 844 | /* the congestion map is in little endian order */ |
---|
829 | | - uncongested = le64_to_cpu(uncongested); |
---|
830 | | - |
---|
831 | | - rds_cong_map_updated(map, uncongested); |
---|
| 845 | + rds_cong_map_updated(map, le64_to_cpu(uncongested)); |
---|
832 | 846 | } |
---|
833 | 847 | |
---|
834 | 848 | static void rds_ib_process_recv(struct rds_connection *conn, |
---|
.. | .. |
---|
838 | 852 | struct rds_ib_connection *ic = conn->c_transport_data; |
---|
839 | 853 | struct rds_ib_incoming *ibinc = ic->i_ibinc; |
---|
840 | 854 | struct rds_header *ihdr, *hdr; |
---|
| 855 | + dma_addr_t dma_addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; |
---|
841 | 856 | |
---|
842 | 857 | /* XXX shut down the connection if port 0,0 are seen? */ |
---|
843 | 858 | |
---|
.. | .. |
---|
854 | 869 | } |
---|
855 | 870 | data_len -= sizeof(struct rds_header); |
---|
856 | 871 | |
---|
857 | | - ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs]; |
---|
| 872 | + ihdr = ic->i_recv_hdrs[recv - ic->i_recvs]; |
---|
858 | 873 | |
---|
| 874 | + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, dma_addr, |
---|
| 875 | + sizeof(*ihdr), DMA_FROM_DEVICE); |
---|
859 | 876 | /* Validate the checksum. */ |
---|
860 | 877 | if (!rds_message_verify_checksum(ihdr)) { |
---|
861 | 878 | rds_ib_conn_error(conn, "incoming message " |
---|
.. | .. |
---|
863 | 880 | "forcing a reconnect\n", |
---|
864 | 881 | &conn->c_faddr); |
---|
865 | 882 | rds_stats_inc(s_recv_drop_bad_checksum); |
---|
866 | | - return; |
---|
| 883 | + goto done; |
---|
867 | 884 | } |
---|
868 | 885 | |
---|
869 | 886 | /* Process the ACK sequence which comes with every packet */ |
---|
.. | .. |
---|
892 | 909 | */ |
---|
893 | 910 | rds_ib_frag_free(ic, recv->r_frag); |
---|
894 | 911 | recv->r_frag = NULL; |
---|
895 | | - return; |
---|
| 912 | + goto done; |
---|
896 | 913 | } |
---|
897 | 914 | |
---|
898 | 915 | /* |
---|
.. | .. |
---|
926 | 943 | hdr->h_dport != ihdr->h_dport) { |
---|
927 | 944 | rds_ib_conn_error(conn, |
---|
928 | 945 | "fragment header mismatch; forcing reconnect\n"); |
---|
929 | | - return; |
---|
| 946 | + goto done; |
---|
930 | 947 | } |
---|
931 | 948 | } |
---|
932 | 949 | |
---|
.. | .. |
---|
958 | 975 | |
---|
959 | 976 | rds_inc_put(&ibinc->ii_inc); |
---|
960 | 977 | } |
---|
| 978 | +done: |
---|
| 979 | + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, dma_addr, |
---|
| 980 | + sizeof(*ihdr), DMA_FROM_DEVICE); |
---|
961 | 981 | } |
---|
962 | 982 | |
---|
963 | 983 | void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, |
---|
.. | .. |
---|
986 | 1006 | } else { |
---|
987 | 1007 | /* We expect errors as the qp is drained during shutdown */ |
---|
988 | 1008 | if (rds_conn_up(conn) || rds_conn_connecting(conn)) |
---|
989 | | - rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n", |
---|
| 1009 | + rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n", |
---|
990 | 1010 | &conn->c_laddr, &conn->c_faddr, |
---|
991 | | - wc->status, |
---|
992 | | - ib_wc_status_msg(wc->status)); |
---|
| 1011 | + conn->c_tos, wc->status, |
---|
| 1012 | + ib_wc_status_msg(wc->status), |
---|
| 1013 | + wc->vendor_err); |
---|
993 | 1014 | } |
---|
994 | 1015 | |
---|
995 | 1016 | /* rds_ib_process_recv() doesn't always consume the frag, and |
---|
.. | .. |
---|
1012 | 1033 | rds_ib_stats_inc(s_ib_rx_ring_empty); |
---|
1013 | 1034 | |
---|
1014 | 1035 | if (rds_ib_ring_low(&ic->i_recv_ring)) { |
---|
1015 | | - rds_ib_recv_refill(conn, 0, GFP_NOWAIT); |
---|
| 1036 | + rds_ib_recv_refill(conn, 0, GFP_NOWAIT | __GFP_NOWARN); |
---|
1016 | 1037 | rds_ib_stats_inc(s_ib_rx_refill_from_cq); |
---|
1017 | 1038 | } |
---|
1018 | 1039 | } |
---|
.. | .. |
---|
1041 | 1062 | si_meminfo(&si); |
---|
1042 | 1063 | rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE; |
---|
1043 | 1064 | |
---|
1044 | | - rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming", |
---|
1045 | | - sizeof(struct rds_ib_incoming), |
---|
1046 | | - 0, SLAB_HWCACHE_ALIGN, NULL); |
---|
| 1065 | + rds_ib_incoming_slab = |
---|
| 1066 | + kmem_cache_create_usercopy("rds_ib_incoming", |
---|
| 1067 | + sizeof(struct rds_ib_incoming), |
---|
| 1068 | + 0, SLAB_HWCACHE_ALIGN, |
---|
| 1069 | + offsetof(struct rds_ib_incoming, |
---|
| 1070 | + ii_inc.i_usercopy), |
---|
| 1071 | + sizeof(struct rds_inc_usercopy), |
---|
| 1072 | + NULL); |
---|
1047 | 1073 | if (!rds_ib_incoming_slab) |
---|
1048 | 1074 | goto out; |
---|
1049 | 1075 | |
---|
.. | .. |
---|
1061 | 1087 | |
---|
1062 | 1088 | void rds_ib_recv_exit(void) |
---|
1063 | 1089 | { |
---|
| 1090 | + WARN_ON(atomic_read(&rds_ib_allocation)); |
---|
| 1091 | + |
---|
1064 | 1092 | kmem_cache_destroy(rds_ib_incoming_slab); |
---|
1065 | 1093 | kmem_cache_destroy(rds_ib_frag_slab); |
---|
1066 | 1094 | } |
---|