hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/rds/ib_recv.c
....@@ -1,5 +1,5 @@
11 /*
2
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
2
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
33 *
44 * This software is available to you under a choice of one of two
55 * licenses. You may choose to be licensed under the terms of the GNU
....@@ -61,7 +61,7 @@
6161 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
6262
6363 sge = &recv->r_sge[0];
64
- sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
64
+ sge->addr = ic->i_recv_hdrs_dma[i];
6565 sge->length = sizeof(struct rds_header);
6666 sge->lkey = ic->i_pd->local_dma_lkey;
6767
....@@ -168,6 +168,7 @@
168168 list_del(&inc->ii_cache_entry);
169169 WARN_ON(!list_empty(&inc->ii_frags));
170170 kmem_cache_free(rds_ib_incoming_slab, inc);
171
+ atomic_dec(&rds_ib_allocation);
171172 }
172173
173174 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
....@@ -309,8 +310,8 @@
309310 struct rds_ib_connection *ic = conn->c_transport_data;
310311 struct ib_sge *sge;
311312 int ret = -ENOMEM;
312
- gfp_t slab_mask = GFP_NOWAIT;
313
- gfp_t page_mask = GFP_NOWAIT;
313
+ gfp_t slab_mask = gfp;
314
+ gfp_t page_mask = gfp;
314315
315316 if (gfp & __GFP_DIRECT_RECLAIM) {
316317 slab_mask = GFP_KERNEL;
....@@ -342,12 +343,12 @@
342343 WARN_ON(ret != 1);
343344
344345 sge = &recv->r_sge[0];
345
- sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
346
+ sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs];
346347 sge->length = sizeof(struct rds_header);
347348
348349 sge = &recv->r_sge[1];
349
- sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg);
350
- sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg);
350
+ sge->addr = sg_dma_address(&recv->r_frag->f_sg);
351
+ sge->length = sg_dma_len(&recv->r_frag->f_sg);
351352
352353 ret = 0;
353354 out:
....@@ -362,6 +363,7 @@
362363 static void release_refill(struct rds_connection *conn)
363364 {
364365 clear_bit(RDS_RECV_REFILL, &conn->c_flags);
366
+ smp_mb__after_atomic();
365367
366368 /* We don't use wait_on_bit()/wake_up_bit() because our waking is in a
367369 * hot path and finding waiters is very rare. We don't want to walk
....@@ -384,6 +386,7 @@
384386 unsigned int posted = 0;
385387 int ret = 0;
386388 bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
389
+ bool must_wake = false;
387390 u32 pos;
388391
389392 /* the goal here is to just make sure that someone, somewhere
....@@ -404,14 +407,13 @@
404407 recv = &ic->i_recvs[pos];
405408 ret = rds_ib_recv_refill_one(conn, recv, gfp);
406409 if (ret) {
410
+ must_wake = true;
407411 break;
408412 }
409413
410414 rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
411415 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
412
- (long) ib_sg_dma_address(
413
- ic->i_cm_id->device,
414
- &recv->r_frag->f_sg));
416
+ (long)sg_dma_address(&recv->r_frag->f_sg));
415417
416418 /* XXX when can this fail? */
417419 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL);
....@@ -424,6 +426,11 @@
424426 }
425427
426428 posted++;
429
+
430
+ if ((posted > 128 && need_resched()) || posted > 8192) {
431
+ must_wake = true;
432
+ break;
433
+ }
427434 }
428435
429436 /* We're doing flow control - update the window. */
....@@ -446,10 +453,13 @@
446453 * if we should requeue.
447454 */
448455 if (rds_conn_up(conn) &&
449
- ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
456
+ (must_wake ||
457
+ (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) ||
450458 rds_ib_ring_empty(&ic->i_recv_ring))) {
451459 queue_delayed_work(rds_wq, &conn->c_recv_w, 1);
452460 }
461
+ if (can_wait)
462
+ cond_resched();
453463 }
454464
455465 /*
....@@ -653,10 +663,16 @@
653663 seq = rds_ib_get_ack(ic);
654664
655665 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
666
+
667
+ ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, ic->i_ack_dma,
668
+ sizeof(*hdr), DMA_TO_DEVICE);
656669 rds_message_populate_header(hdr, 0, 0, 0);
657670 hdr->h_ack = cpu_to_be64(seq);
658671 hdr->h_credit = adv_credits;
659672 rds_message_make_checksum(hdr);
673
+ ib_dma_sync_single_for_device(ic->rds_ibdev->dev, ic->i_ack_dma,
674
+ sizeof(*hdr), DMA_TO_DEVICE);
675
+
660676 ic->i_ack_queued = jiffies;
661677
662678 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL);
....@@ -774,7 +790,7 @@
774790 unsigned long frag_off;
775791 unsigned long to_copy;
776792 unsigned long copied;
777
- uint64_t uncongested = 0;
793
+ __le64 uncongested = 0;
778794 void *addr;
779795
780796 /* catch completely corrupt packets */
....@@ -791,7 +807,7 @@
791807 copied = 0;
792808
793809 while (copied < RDS_CONG_MAP_BYTES) {
794
- uint64_t *src, *dst;
810
+ __le64 *src, *dst;
795811 unsigned int k;
796812
797813 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
....@@ -826,9 +842,7 @@
826842 }
827843
828844 /* the congestion map is in little endian order */
829
- uncongested = le64_to_cpu(uncongested);
830
-
831
- rds_cong_map_updated(map, uncongested);
845
+ rds_cong_map_updated(map, le64_to_cpu(uncongested));
832846 }
833847
834848 static void rds_ib_process_recv(struct rds_connection *conn,
....@@ -838,6 +852,7 @@
838852 struct rds_ib_connection *ic = conn->c_transport_data;
839853 struct rds_ib_incoming *ibinc = ic->i_ibinc;
840854 struct rds_header *ihdr, *hdr;
855
+ dma_addr_t dma_addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs];
841856
842857 /* XXX shut down the connection if port 0,0 are seen? */
843858
....@@ -854,8 +869,10 @@
854869 }
855870 data_len -= sizeof(struct rds_header);
856871
857
- ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
872
+ ihdr = ic->i_recv_hdrs[recv - ic->i_recvs];
858873
874
+ ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, dma_addr,
875
+ sizeof(*ihdr), DMA_FROM_DEVICE);
859876 /* Validate the checksum. */
860877 if (!rds_message_verify_checksum(ihdr)) {
861878 rds_ib_conn_error(conn, "incoming message "
....@@ -863,7 +880,7 @@
863880 "forcing a reconnect\n",
864881 &conn->c_faddr);
865882 rds_stats_inc(s_recv_drop_bad_checksum);
866
- return;
883
+ goto done;
867884 }
868885
869886 /* Process the ACK sequence which comes with every packet */
....@@ -892,7 +909,7 @@
892909 */
893910 rds_ib_frag_free(ic, recv->r_frag);
894911 recv->r_frag = NULL;
895
- return;
912
+ goto done;
896913 }
897914
898915 /*
....@@ -926,7 +943,7 @@
926943 hdr->h_dport != ihdr->h_dport) {
927944 rds_ib_conn_error(conn,
928945 "fragment header mismatch; forcing reconnect\n");
929
- return;
946
+ goto done;
930947 }
931948 }
932949
....@@ -958,6 +975,9 @@
958975
959976 rds_inc_put(&ibinc->ii_inc);
960977 }
978
+done:
979
+ ib_dma_sync_single_for_device(ic->rds_ibdev->dev, dma_addr,
980
+ sizeof(*ihdr), DMA_FROM_DEVICE);
961981 }
962982
963983 void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
....@@ -986,10 +1006,11 @@
9861006 } else {
9871007 /* We expect errors as the qp is drained during shutdown */
9881008 if (rds_conn_up(conn) || rds_conn_connecting(conn))
989
- rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
1009
+ rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n",
9901010 &conn->c_laddr, &conn->c_faddr,
991
- wc->status,
992
- ib_wc_status_msg(wc->status));
1011
+ conn->c_tos, wc->status,
1012
+ ib_wc_status_msg(wc->status),
1013
+ wc->vendor_err);
9931014 }
9941015
9951016 /* rds_ib_process_recv() doesn't always consume the frag, and
....@@ -1012,7 +1033,7 @@
10121033 rds_ib_stats_inc(s_ib_rx_ring_empty);
10131034
10141035 if (rds_ib_ring_low(&ic->i_recv_ring)) {
1015
- rds_ib_recv_refill(conn, 0, GFP_NOWAIT);
1036
+ rds_ib_recv_refill(conn, 0, GFP_NOWAIT | __GFP_NOWARN);
10161037 rds_ib_stats_inc(s_ib_rx_refill_from_cq);
10171038 }
10181039 }
....@@ -1041,9 +1062,14 @@
10411062 si_meminfo(&si);
10421063 rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
10431064
1044
- rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
1045
- sizeof(struct rds_ib_incoming),
1046
- 0, SLAB_HWCACHE_ALIGN, NULL);
1065
+ rds_ib_incoming_slab =
1066
+ kmem_cache_create_usercopy("rds_ib_incoming",
1067
+ sizeof(struct rds_ib_incoming),
1068
+ 0, SLAB_HWCACHE_ALIGN,
1069
+ offsetof(struct rds_ib_incoming,
1070
+ ii_inc.i_usercopy),
1071
+ sizeof(struct rds_inc_usercopy),
1072
+ NULL);
10471073 if (!rds_ib_incoming_slab)
10481074 goto out;
10491075
....@@ -1061,6 +1087,8 @@
10611087
10621088 void rds_ib_recv_exit(void)
10631089 {
1090
+ WARN_ON(atomic_read(&rds_ib_allocation));
1091
+
10641092 kmem_cache_destroy(rds_ib_incoming_slab);
10651093 kmem_cache_destroy(rds_ib_frag_slab);
10661094 }