hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/fs/cifs/smbdirect.c
....@@ -1,17 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Copyright (C) 2017, Microsoft Corporation.
34 *
45 * Author(s): Long Li <longli@microsoft.com>
5
- *
6
- * This program is free software; you can redistribute it and/or modify
7
- * it under the terms of the GNU General Public License as published by
8
- * the Free Software Foundation; either version 2 of the License, or
9
- * (at your option) any later version.
10
- *
11
- * This program is distributed in the hope that it will be useful,
12
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
14
- * the GNU General Public License for more details.
156 */
167 #include <linux/module.h>
178 #include <linux/highmem.h>
....@@ -164,95 +155,6 @@
164155 #define log_rdma_mr(level, fmt, args...) \
165156 log_rdma(level, LOG_RDMA_MR, fmt, ##args)
166157
167
-/*
168
- * Destroy the transport and related RDMA and memory resources
169
- * Need to go through all the pending counters and make sure on one is using
170
- * the transport while it is destroyed
171
- */
172
-static void smbd_destroy_rdma_work(struct work_struct *work)
173
-{
174
- struct smbd_response *response;
175
- struct smbd_connection *info =
176
- container_of(work, struct smbd_connection, destroy_work);
177
- unsigned long flags;
178
-
179
- log_rdma_event(INFO, "destroying qp\n");
180
- ib_drain_qp(info->id->qp);
181
- rdma_destroy_qp(info->id);
182
-
183
- /* Unblock all I/O waiting on the send queue */
184
- wake_up_interruptible_all(&info->wait_send_queue);
185
-
186
- log_rdma_event(INFO, "cancelling idle timer\n");
187
- cancel_delayed_work_sync(&info->idle_timer_work);
188
- log_rdma_event(INFO, "cancelling send immediate work\n");
189
- cancel_delayed_work_sync(&info->send_immediate_work);
190
-
191
- log_rdma_event(INFO, "wait for all send to finish\n");
192
- wait_event(info->wait_smbd_send_pending,
193
- info->smbd_send_pending == 0);
194
-
195
- log_rdma_event(INFO, "wait for all recv to finish\n");
196
- wake_up_interruptible(&info->wait_reassembly_queue);
197
- wait_event(info->wait_smbd_recv_pending,
198
- info->smbd_recv_pending == 0);
199
-
200
- log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
201
- wait_event(info->wait_send_pending,
202
- atomic_read(&info->send_pending) == 0);
203
- wait_event(info->wait_send_payload_pending,
204
- atomic_read(&info->send_payload_pending) == 0);
205
-
206
- log_rdma_event(INFO, "freeing mr list\n");
207
- wake_up_interruptible_all(&info->wait_mr);
208
- wait_event(info->wait_for_mr_cleanup,
209
- atomic_read(&info->mr_used_count) == 0);
210
- destroy_mr_list(info);
211
-
212
- /* It's not posssible for upper layer to get to reassembly */
213
- log_rdma_event(INFO, "drain the reassembly queue\n");
214
- do {
215
- spin_lock_irqsave(&info->reassembly_queue_lock, flags);
216
- response = _get_first_reassembly(info);
217
- if (response) {
218
- list_del(&response->list);
219
- spin_unlock_irqrestore(
220
- &info->reassembly_queue_lock, flags);
221
- put_receive_buffer(info, response);
222
- } else
223
- spin_unlock_irqrestore(&info->reassembly_queue_lock, flags);
224
- } while (response);
225
-
226
- info->reassembly_data_length = 0;
227
-
228
- log_rdma_event(INFO, "free receive buffers\n");
229
- wait_event(info->wait_receive_queues,
230
- info->count_receive_queue + info->count_empty_packet_queue
231
- == info->receive_credit_max);
232
- destroy_receive_buffers(info);
233
-
234
- ib_free_cq(info->send_cq);
235
- ib_free_cq(info->recv_cq);
236
- ib_dealloc_pd(info->pd);
237
- rdma_destroy_id(info->id);
238
-
239
- /* free mempools */
240
- mempool_destroy(info->request_mempool);
241
- kmem_cache_destroy(info->request_cache);
242
-
243
- mempool_destroy(info->response_mempool);
244
- kmem_cache_destroy(info->response_cache);
245
-
246
- info->transport_status = SMBD_DESTROYED;
247
- wake_up_all(&info->wait_destroy);
248
-}
249
-
250
-static int smbd_process_disconnected(struct smbd_connection *info)
251
-{
252
- schedule_work(&info->destroy_work);
253
- return 0;
254
-}
255
-
256158 static void smbd_disconnect_rdma_work(struct work_struct *work)
257159 {
258160 struct smbd_connection *info =
....@@ -319,7 +221,9 @@
319221 }
320222
321223 info->transport_status = SMBD_DISCONNECTED;
322
- smbd_process_disconnected(info);
224
+ wake_up_interruptible(&info->disconn_wait);
225
+ wake_up_interruptible(&info->wait_reassembly_queue);
226
+ wake_up_interruptible_all(&info->wait_send_queue);
323227 break;
324228
325229 default:
....@@ -380,28 +284,22 @@
380284 request->sge[i].length,
381285 DMA_TO_DEVICE);
382286
383
- if (request->has_payload) {
384
- if (atomic_dec_and_test(&request->info->send_payload_pending))
385
- wake_up(&request->info->wait_send_payload_pending);
386
- } else {
387
- if (atomic_dec_and_test(&request->info->send_pending))
388
- wake_up(&request->info->wait_send_pending);
389
- }
287
+ if (atomic_dec_and_test(&request->info->send_pending))
288
+ wake_up(&request->info->wait_send_pending);
289
+
290
+ wake_up(&request->info->wait_post_send);
390291
391292 mempool_free(request, request->info->request_mempool);
392293 }
393294
394295 static void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp)
395296 {
396
- log_rdma_event(INFO, "resp message min_version %u max_version %u "
397
- "negotiated_version %u credits_requested %u "
398
- "credits_granted %u status %u max_readwrite_size %u "
399
- "preferred_send_size %u max_receive_size %u "
400
- "max_fragmented_size %u\n",
401
- resp->min_version, resp->max_version, resp->negotiated_version,
402
- resp->credits_requested, resp->credits_granted, resp->status,
403
- resp->max_readwrite_size, resp->preferred_send_size,
404
- resp->max_receive_size, resp->max_fragmented_size);
297
+ log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n",
298
+ resp->min_version, resp->max_version,
299
+ resp->negotiated_version, resp->credits_requested,
300
+ resp->credits_granted, resp->status,
301
+ resp->max_readwrite_size, resp->preferred_send_size,
302
+ resp->max_receive_size, resp->max_fragmented_size);
405303 }
406304
407305 /*
....@@ -479,27 +377,6 @@
479377 return true;
480378 }
481379
482
-/*
483
- * Check and schedule to send an immediate packet
484
- * This is used to extend credtis to remote peer to keep the transport busy
485
- */
486
-static void check_and_send_immediate(struct smbd_connection *info)
487
-{
488
- if (info->transport_status != SMBD_CONNECTED)
489
- return;
490
-
491
- info->send_immediate = true;
492
-
493
- /*
494
- * Promptly send a packet if our peer is running low on receive
495
- * credits
496
- */
497
- if (atomic_read(&info->receive_credits) <
498
- info->receive_credit_target - 1)
499
- queue_delayed_work(
500
- info->workqueue, &info->send_immediate_work, 0);
501
-}
502
-
503380 static void smbd_post_send_credits(struct work_struct *work)
504381 {
505382 int ret = 0;
....@@ -549,29 +426,16 @@
549426 info->new_credits_offered += ret;
550427 spin_unlock(&info->lock_new_credits_offered);
551428
552
- atomic_add(ret, &info->receive_credits);
553
-
554
- /* Check if we can post new receive and grant credits to peer */
555
- check_and_send_immediate(info);
556
-}
557
-
558
-static void smbd_recv_done_work(struct work_struct *work)
559
-{
560
- struct smbd_connection *info =
561
- container_of(work, struct smbd_connection, recv_done_work);
562
-
563
- /*
564
- * We may have new send credits granted from remote peer
565
- * If any sender is blcoked on lack of credets, unblock it
566
- */
567
- if (atomic_read(&info->send_credits))
568
- wake_up_interruptible(&info->wait_send_queue);
569
-
570
- /*
571
- * Check if we need to send something to remote peer to
572
- * grant more credits or respond to KEEP_ALIVE packet
573
- */
574
- check_and_send_immediate(info);
429
+ /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */
430
+ info->send_immediate = true;
431
+ if (atomic_read(&info->receive_credits) <
432
+ info->receive_credit_target - 1) {
433
+ if (info->keep_alive_requested == KEEP_ALIVE_PENDING ||
434
+ info->send_immediate) {
435
+ log_keep_alive(INFO, "send an empty message\n");
436
+ smbd_post_send_empty(info);
437
+ }
438
+ }
575439 }
576440
577441 /* Called from softirq, when recv is done */
....@@ -583,10 +447,9 @@
583447 struct smbd_connection *info = response->info;
584448 int data_length = 0;
585449
586
- log_rdma_recv(INFO, "response=%p type=%d wc status=%d wc opcode %d "
587
- "byte_len=%d pkey_index=%x\n",
588
- response, response->type, wc->status, wc->opcode,
589
- wc->byte_len, wc->pkey_index);
450
+ log_rdma_recv(INFO, "response=%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%x\n",
451
+ response, response->type, wc->status, wc->opcode,
452
+ wc->byte_len, wc->pkey_index);
590453
591454 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
592455 log_rdma_recv(INFO, "wc->status=%d opcode=%d\n",
....@@ -642,15 +505,21 @@
642505 atomic_dec(&info->receive_credits);
643506 info->receive_credit_target =
644507 le16_to_cpu(data_transfer->credits_requested);
645
- atomic_add(le16_to_cpu(data_transfer->credits_granted),
646
- &info->send_credits);
508
+ if (le16_to_cpu(data_transfer->credits_granted)) {
509
+ atomic_add(le16_to_cpu(data_transfer->credits_granted),
510
+ &info->send_credits);
511
+ /*
512
+ * We have new send credits granted from remote peer
513
+ * If any sender is waiting for credits, unblock it
514
+ */
515
+ wake_up_interruptible(&info->wait_send_queue);
516
+ }
647517
648
- log_incoming(INFO, "data flags %d data_offset %d "
649
- "data_length %d remaining_data_length %d\n",
650
- le16_to_cpu(data_transfer->flags),
651
- le32_to_cpu(data_transfer->data_offset),
652
- le32_to_cpu(data_transfer->data_length),
653
- le32_to_cpu(data_transfer->remaining_data_length));
518
+ log_incoming(INFO, "data flags %d data_offset %d data_length %d remaining_data_length %d\n",
519
+ le16_to_cpu(data_transfer->flags),
520
+ le32_to_cpu(data_transfer->data_offset),
521
+ le32_to_cpu(data_transfer->data_length),
522
+ le32_to_cpu(data_transfer->remaining_data_length));
654523
655524 /* Send a KEEP_ALIVE response right away if requested */
656525 info->keep_alive_requested = KEEP_ALIVE_NONE;
....@@ -659,7 +528,6 @@
659528 info->keep_alive_requested = KEEP_ALIVE_PENDING;
660529 }
661530
662
- queue_work(info->workqueue, &info->recv_done_work);
663531 return;
664532
665533 default:
....@@ -759,14 +627,10 @@
759627 }
760628
761629 if (!frwr_is_supported(&info->id->device->attrs)) {
762
- log_rdma_event(ERR,
763
- "Fast Registration Work Requests "
764
- "(FRWR) is not supported\n");
765
- log_rdma_event(ERR,
766
- "Device capability flags = %llx "
767
- "max_fast_reg_page_list_len = %u\n",
768
- info->id->device->attrs.device_cap_flags,
769
- info->id->device->attrs.max_fast_reg_page_list_len);
630
+ log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n");
631
+ log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n",
632
+ info->id->device->attrs.device_cap_flags,
633
+ info->id->device->attrs.max_fast_reg_page_list_len);
770634 rc = -EPROTONOSUPPORT;
771635 goto out2;
772636 }
....@@ -852,7 +716,6 @@
852716 request->sge[0].addr,
853717 request->sge[0].length, request->sge[0].lkey);
854718
855
- request->has_payload = false;
856719 atomic_inc(&info->send_pending);
857720 rc = ib_post_send(info->id->qp, &send_wr, NULL);
858721 if (!rc)
....@@ -909,120 +772,9 @@
909772 return 0;
910773 }
911774
912
-/*
913
- * Build and prepare the SMBD packet header
914
- * This function waits for avaialbe send credits and build a SMBD packet
915
- * header. The caller then optional append payload to the packet after
916
- * the header
917
- * intput values
918
- * size: the size of the payload
919
- * remaining_data_length: remaining data to send if this is part of a
920
- * fragmented packet
921
- * output values
922
- * request_out: the request allocated from this function
923
- * return values: 0 on success, otherwise actual error code returned
924
- */
925
-static int smbd_create_header(struct smbd_connection *info,
926
- int size, int remaining_data_length,
927
- struct smbd_request **request_out)
928
-{
929
- struct smbd_request *request;
930
- struct smbd_data_transfer *packet;
931
- int header_length;
932
- int rc;
933
-
934
- /* Wait for send credits. A SMBD packet needs one credit */
935
- rc = wait_event_interruptible(info->wait_send_queue,
936
- atomic_read(&info->send_credits) > 0 ||
937
- info->transport_status != SMBD_CONNECTED);
938
- if (rc)
939
- return rc;
940
-
941
- if (info->transport_status != SMBD_CONNECTED) {
942
- log_outgoing(ERR, "disconnected not sending\n");
943
- return -ENOENT;
944
- }
945
- atomic_dec(&info->send_credits);
946
-
947
- request = mempool_alloc(info->request_mempool, GFP_KERNEL);
948
- if (!request) {
949
- rc = -ENOMEM;
950
- goto err;
951
- }
952
-
953
- request->info = info;
954
-
955
- /* Fill in the packet header */
956
- packet = smbd_request_payload(request);
957
- packet->credits_requested = cpu_to_le16(info->send_credit_target);
958
- packet->credits_granted =
959
- cpu_to_le16(manage_credits_prior_sending(info));
960
- info->send_immediate = false;
961
-
962
- packet->flags = 0;
963
- if (manage_keep_alive_before_sending(info))
964
- packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED);
965
-
966
- packet->reserved = 0;
967
- if (!size)
968
- packet->data_offset = 0;
969
- else
970
- packet->data_offset = cpu_to_le32(24);
971
- packet->data_length = cpu_to_le32(size);
972
- packet->remaining_data_length = cpu_to_le32(remaining_data_length);
973
- packet->padding = 0;
974
-
975
- log_outgoing(INFO, "credits_requested=%d credits_granted=%d "
976
- "data_offset=%d data_length=%d remaining_data_length=%d\n",
977
- le16_to_cpu(packet->credits_requested),
978
- le16_to_cpu(packet->credits_granted),
979
- le32_to_cpu(packet->data_offset),
980
- le32_to_cpu(packet->data_length),
981
- le32_to_cpu(packet->remaining_data_length));
982
-
983
- /* Map the packet to DMA */
984
- header_length = sizeof(struct smbd_data_transfer);
985
- /* If this is a packet without payload, don't send padding */
986
- if (!size)
987
- header_length = offsetof(struct smbd_data_transfer, padding);
988
-
989
- request->num_sge = 1;
990
- request->sge[0].addr = ib_dma_map_single(info->id->device,
991
- (void *)packet,
992
- header_length,
993
- DMA_BIDIRECTIONAL);
994
- if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
995
- mempool_free(request, info->request_mempool);
996
- rc = -EIO;
997
- goto err;
998
- }
999
-
1000
- request->sge[0].length = header_length;
1001
- request->sge[0].lkey = info->pd->local_dma_lkey;
1002
-
1003
- *request_out = request;
1004
- return 0;
1005
-
1006
-err:
1007
- atomic_inc(&info->send_credits);
1008
- return rc;
1009
-}
1010
-
1011
-static void smbd_destroy_header(struct smbd_connection *info,
1012
- struct smbd_request *request)
1013
-{
1014
-
1015
- ib_dma_unmap_single(info->id->device,
1016
- request->sge[0].addr,
1017
- request->sge[0].length,
1018
- DMA_TO_DEVICE);
1019
- mempool_free(request, info->request_mempool);
1020
- atomic_inc(&info->send_credits);
1021
-}
1022
-
1023775 /* Post the send request */
1024776 static int smbd_post_send(struct smbd_connection *info,
1025
- struct smbd_request *request, bool has_payload)
777
+ struct smbd_request *request)
1026778 {
1027779 struct ib_send_wr send_wr;
1028780 int rc, i;
....@@ -1047,25 +799,11 @@
1047799 send_wr.opcode = IB_WR_SEND;
1048800 send_wr.send_flags = IB_SEND_SIGNALED;
1049801
1050
- if (has_payload) {
1051
- request->has_payload = true;
1052
- atomic_inc(&info->send_payload_pending);
1053
- } else {
1054
- request->has_payload = false;
1055
- atomic_inc(&info->send_pending);
1056
- }
1057
-
1058802 rc = ib_post_send(info->id->qp, &send_wr, NULL);
1059803 if (rc) {
1060804 log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
1061
- if (has_payload) {
1062
- if (atomic_dec_and_test(&info->send_payload_pending))
1063
- wake_up(&info->wait_send_payload_pending);
1064
- } else {
1065
- if (atomic_dec_and_test(&info->send_pending))
1066
- wake_up(&info->wait_send_pending);
1067
- }
1068805 smbd_disconnect_rdma_connection(info);
806
+ rc = -EAGAIN;
1069807 } else
1070808 /* Reset timer for idle connection after packet is sent */
1071809 mod_delayed_work(info->workqueue, &info->idle_timer_work,
....@@ -1079,42 +817,150 @@
1079817 {
1080818 int num_sgs;
1081819 int i, rc;
820
+ int header_length;
1082821 struct smbd_request *request;
822
+ struct smbd_data_transfer *packet;
823
+ int new_credits;
1083824 struct scatterlist *sg;
1084825
1085
- rc = smbd_create_header(
1086
- info, data_length, remaining_data_length, &request);
826
+wait_credit:
827
+ /* Wait for send credits. A SMBD packet needs one credit */
828
+ rc = wait_event_interruptible(info->wait_send_queue,
829
+ atomic_read(&info->send_credits) > 0 ||
830
+ info->transport_status != SMBD_CONNECTED);
1087831 if (rc)
1088
- return rc;
832
+ goto err_wait_credit;
1089833
834
+ if (info->transport_status != SMBD_CONNECTED) {
835
+ log_outgoing(ERR, "disconnected not sending on wait_credit\n");
836
+ rc = -EAGAIN;
837
+ goto err_wait_credit;
838
+ }
839
+ if (unlikely(atomic_dec_return(&info->send_credits) < 0)) {
840
+ atomic_inc(&info->send_credits);
841
+ goto wait_credit;
842
+ }
843
+
844
+wait_send_queue:
845
+ wait_event(info->wait_post_send,
846
+ atomic_read(&info->send_pending) < info->send_credit_target ||
847
+ info->transport_status != SMBD_CONNECTED);
848
+
849
+ if (info->transport_status != SMBD_CONNECTED) {
850
+ log_outgoing(ERR, "disconnected not sending on wait_send_queue\n");
851
+ rc = -EAGAIN;
852
+ goto err_wait_send_queue;
853
+ }
854
+
855
+ if (unlikely(atomic_inc_return(&info->send_pending) >
856
+ info->send_credit_target)) {
857
+ atomic_dec(&info->send_pending);
858
+ goto wait_send_queue;
859
+ }
860
+
861
+ request = mempool_alloc(info->request_mempool, GFP_KERNEL);
862
+ if (!request) {
863
+ rc = -ENOMEM;
864
+ goto err_alloc;
865
+ }
866
+
867
+ request->info = info;
868
+
869
+ /* Fill in the packet header */
870
+ packet = smbd_request_payload(request);
871
+ packet->credits_requested = cpu_to_le16(info->send_credit_target);
872
+
873
+ new_credits = manage_credits_prior_sending(info);
874
+ atomic_add(new_credits, &info->receive_credits);
875
+ packet->credits_granted = cpu_to_le16(new_credits);
876
+
877
+ info->send_immediate = false;
878
+
879
+ packet->flags = 0;
880
+ if (manage_keep_alive_before_sending(info))
881
+ packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED);
882
+
883
+ packet->reserved = 0;
884
+ if (!data_length)
885
+ packet->data_offset = 0;
886
+ else
887
+ packet->data_offset = cpu_to_le32(24);
888
+ packet->data_length = cpu_to_le32(data_length);
889
+ packet->remaining_data_length = cpu_to_le32(remaining_data_length);
890
+ packet->padding = 0;
891
+
892
+ log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
893
+ le16_to_cpu(packet->credits_requested),
894
+ le16_to_cpu(packet->credits_granted),
895
+ le32_to_cpu(packet->data_offset),
896
+ le32_to_cpu(packet->data_length),
897
+ le32_to_cpu(packet->remaining_data_length));
898
+
899
+ /* Map the packet to DMA */
900
+ header_length = sizeof(struct smbd_data_transfer);
901
+ /* If this is a packet without payload, don't send padding */
902
+ if (!data_length)
903
+ header_length = offsetof(struct smbd_data_transfer, padding);
904
+
905
+ request->num_sge = 1;
906
+ request->sge[0].addr = ib_dma_map_single(info->id->device,
907
+ (void *)packet,
908
+ header_length,
909
+ DMA_TO_DEVICE);
910
+ if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
911
+ rc = -EIO;
912
+ request->sge[0].addr = 0;
913
+ goto err_dma;
914
+ }
915
+
916
+ request->sge[0].length = header_length;
917
+ request->sge[0].lkey = info->pd->local_dma_lkey;
918
+
919
+ /* Fill in the packet data payload */
1090920 num_sgs = sgl ? sg_nents(sgl) : 0;
1091921 for_each_sg(sgl, sg, num_sgs, i) {
1092922 request->sge[i+1].addr =
1093923 ib_dma_map_page(info->id->device, sg_page(sg),
1094
- sg->offset, sg->length, DMA_BIDIRECTIONAL);
924
+ sg->offset, sg->length, DMA_TO_DEVICE);
1095925 if (ib_dma_mapping_error(
1096926 info->id->device, request->sge[i+1].addr)) {
1097927 rc = -EIO;
1098928 request->sge[i+1].addr = 0;
1099
- goto dma_mapping_failure;
929
+ goto err_dma;
1100930 }
1101931 request->sge[i+1].length = sg->length;
1102932 request->sge[i+1].lkey = info->pd->local_dma_lkey;
1103933 request->num_sge++;
1104934 }
1105935
1106
- rc = smbd_post_send(info, request, data_length);
936
+ rc = smbd_post_send(info, request);
1107937 if (!rc)
1108938 return 0;
1109939
1110
-dma_mapping_failure:
1111
- for (i = 1; i < request->num_sge; i++)
940
+err_dma:
941
+ for (i = 0; i < request->num_sge; i++)
1112942 if (request->sge[i].addr)
1113943 ib_dma_unmap_single(info->id->device,
1114944 request->sge[i].addr,
1115945 request->sge[i].length,
1116946 DMA_TO_DEVICE);
1117
- smbd_destroy_header(info, request);
947
+ mempool_free(request, info->request_mempool);
948
+
949
+ /* roll back receive credits and credits to be offered */
950
+ spin_lock(&info->lock_new_credits_offered);
951
+ info->new_credits_offered += new_credits;
952
+ spin_unlock(&info->lock_new_credits_offered);
953
+ atomic_sub(new_credits, &info->receive_credits);
954
+
955
+err_alloc:
956
+ if (atomic_dec_and_test(&info->send_pending))
957
+ wake_up(&info->wait_send_pending);
958
+
959
+err_wait_send_queue:
960
+ /* roll back send credits and pending */
961
+ atomic_inc(&info->send_credits);
962
+
963
+err_wait_credit:
1118964 return rc;
1119965 }
1120966
....@@ -1222,11 +1068,9 @@
12221068
12231069 response->type = SMBD_NEGOTIATE_RESP;
12241070 rc = smbd_post_recv(info, response);
1225
- log_rdma_event(INFO,
1226
- "smbd_post_recv rc=%d iov.addr=%llx iov.length=%x "
1227
- "iov.lkey=%x\n",
1228
- rc, response->sge.addr,
1229
- response->sge.length, response->sge.lkey);
1071
+ log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=%llx iov.length=%x iov.lkey=%x\n",
1072
+ rc, response->sge.addr,
1073
+ response->sge.length, response->sge.lkey);
12301074 if (rc)
12311075 return rc;
12321076
....@@ -1436,25 +1280,6 @@
14361280 mempool_free(response, info->response_mempool);
14371281 }
14381282
1439
-/*
1440
- * Check and send an immediate or keep alive packet
1441
- * The condition to send those packets are defined in [MS-SMBD] 3.1.1.1
1442
- * Connection.KeepaliveRequested and Connection.SendImmediate
1443
- * The idea is to extend credits to server as soon as it becomes available
1444
- */
1445
-static void send_immediate_work(struct work_struct *work)
1446
-{
1447
- struct smbd_connection *info = container_of(
1448
- work, struct smbd_connection,
1449
- send_immediate_work.work);
1450
-
1451
- if (info->keep_alive_requested == KEEP_ALIVE_PENDING ||
1452
- info->send_immediate) {
1453
- log_keep_alive(INFO, "send an empty message\n");
1454
- smbd_post_send_empty(info);
1455
- }
1456
-}
1457
-
14581283 /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */
14591284 static void idle_connection_timer(struct work_struct *work)
14601285 {
....@@ -1478,21 +1303,98 @@
14781303 info->keep_alive_interval*HZ);
14791304 }
14801305
1481
-/* Destroy this SMBD connection, called from upper layer */
1482
-void smbd_destroy(struct smbd_connection *info)
1306
+/*
1307
+ * Destroy the transport and related RDMA and memory resources
1308
+ * Need to go through all the pending counters and make sure on one is using
1309
+ * the transport while it is destroyed
1310
+ */
1311
+void smbd_destroy(struct TCP_Server_Info *server)
14831312 {
1313
+ struct smbd_connection *info = server->smbd_conn;
1314
+ struct smbd_response *response;
1315
+ unsigned long flags;
1316
+
1317
+ if (!info) {
1318
+ log_rdma_event(INFO, "rdma session already destroyed\n");
1319
+ return;
1320
+ }
1321
+
14841322 log_rdma_event(INFO, "destroying rdma session\n");
1323
+ if (info->transport_status != SMBD_DISCONNECTED) {
1324
+ rdma_disconnect(server->smbd_conn->id);
1325
+ log_rdma_event(INFO, "wait for transport being disconnected\n");
1326
+ wait_event_interruptible(
1327
+ info->disconn_wait,
1328
+ info->transport_status == SMBD_DISCONNECTED);
1329
+ }
14851330
1486
- /* Kick off the disconnection process */
1487
- smbd_disconnect_rdma_connection(info);
1331
+ log_rdma_event(INFO, "destroying qp\n");
1332
+ ib_drain_qp(info->id->qp);
1333
+ rdma_destroy_qp(info->id);
14881334
1489
- log_rdma_event(INFO, "wait for transport being destroyed\n");
1490
- wait_event(info->wait_destroy,
1491
- info->transport_status == SMBD_DESTROYED);
1335
+ log_rdma_event(INFO, "cancelling idle timer\n");
1336
+ cancel_delayed_work_sync(&info->idle_timer_work);
1337
+
1338
+ log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
1339
+ wait_event(info->wait_send_pending,
1340
+ atomic_read(&info->send_pending) == 0);
1341
+
1342
+ /* It's not posssible for upper layer to get to reassembly */
1343
+ log_rdma_event(INFO, "drain the reassembly queue\n");
1344
+ do {
1345
+ spin_lock_irqsave(&info->reassembly_queue_lock, flags);
1346
+ response = _get_first_reassembly(info);
1347
+ if (response) {
1348
+ list_del(&response->list);
1349
+ spin_unlock_irqrestore(
1350
+ &info->reassembly_queue_lock, flags);
1351
+ put_receive_buffer(info, response);
1352
+ } else
1353
+ spin_unlock_irqrestore(
1354
+ &info->reassembly_queue_lock, flags);
1355
+ } while (response);
1356
+ info->reassembly_data_length = 0;
1357
+
1358
+ log_rdma_event(INFO, "free receive buffers\n");
1359
+ wait_event(info->wait_receive_queues,
1360
+ info->count_receive_queue + info->count_empty_packet_queue
1361
+ == info->receive_credit_max);
1362
+ destroy_receive_buffers(info);
1363
+
1364
+ /*
1365
+ * For performance reasons, memory registration and deregistration
1366
+ * are not locked by srv_mutex. It is possible some processes are
1367
+ * blocked on transport srv_mutex while holding memory registration.
1368
+ * Release the transport srv_mutex to allow them to hit the failure
1369
+ * path when sending data, and then release memory registartions.
1370
+ */
1371
+ log_rdma_event(INFO, "freeing mr list\n");
1372
+ wake_up_interruptible_all(&info->wait_mr);
1373
+ while (atomic_read(&info->mr_used_count)) {
1374
+ mutex_unlock(&server->srv_mutex);
1375
+ msleep(1000);
1376
+ mutex_lock(&server->srv_mutex);
1377
+ }
1378
+ destroy_mr_list(info);
1379
+
1380
+ ib_free_cq(info->send_cq);
1381
+ ib_free_cq(info->recv_cq);
1382
+ ib_dealloc_pd(info->pd);
1383
+ rdma_destroy_id(info->id);
1384
+
1385
+ /* free mempools */
1386
+ mempool_destroy(info->request_mempool);
1387
+ kmem_cache_destroy(info->request_cache);
1388
+
1389
+ mempool_destroy(info->response_mempool);
1390
+ kmem_cache_destroy(info->response_cache);
1391
+
1392
+ info->transport_status = SMBD_DESTROYED;
14921393
14931394 destroy_workqueue(info->workqueue);
14941395 log_rdma_event(INFO, "rdma session destroyed\n");
14951396 kfree(info);
1397
+ server->smbd_conn = NULL;
14961398 }
14971399
14981400 /*
....@@ -1514,16 +1416,8 @@
15141416 */
15151417 if (server->smbd_conn->transport_status == SMBD_CONNECTED) {
15161418 log_rdma_event(INFO, "disconnecting transport\n");
1517
- smbd_disconnect_rdma_connection(server->smbd_conn);
1419
+ smbd_destroy(server);
15181420 }
1519
-
1520
- /* wait until the transport is destroyed */
1521
- if (!wait_event_timeout(server->smbd_conn->wait_destroy,
1522
- server->smbd_conn->transport_status == SMBD_DESTROYED, 5*HZ))
1523
- return -EAGAIN;
1524
-
1525
- destroy_workqueue(server->smbd_conn->workqueue);
1526
- kfree(server->smbd_conn);
15271421
15281422 create_conn:
15291423 log_rdma_event(INFO, "creating rdma session\n");
....@@ -1552,7 +1446,7 @@
15521446 char name[MAX_NAME_LEN];
15531447 int rc;
15541448
1555
- snprintf(name, MAX_NAME_LEN, "smbd_request_%p", info);
1449
+ scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info);
15561450 info->request_cache =
15571451 kmem_cache_create(
15581452 name,
....@@ -1568,7 +1462,7 @@
15681462 if (!info->request_mempool)
15691463 goto out1;
15701464
1571
- snprintf(name, MAX_NAME_LEN, "smbd_response_%p", info);
1465
+ scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info);
15721466 info->response_cache =
15731467 kmem_cache_create(
15741468 name,
....@@ -1584,7 +1478,7 @@
15841478 if (!info->response_mempool)
15851479 goto out3;
15861480
1587
- snprintf(name, MAX_NAME_LEN, "smbd_%p", info);
1481
+ scnprintf(name, MAX_NAME_LEN, "smbd_%p", info);
15881482 info->workqueue = create_workqueue(name);
15891483 if (!info->workqueue)
15901484 goto out4;
....@@ -1635,25 +1529,19 @@
16351529
16361530 if (smbd_send_credit_target > info->id->device->attrs.max_cqe ||
16371531 smbd_send_credit_target > info->id->device->attrs.max_qp_wr) {
1638
- log_rdma_event(ERR,
1639
- "consider lowering send_credit_target = %d. "
1640
- "Possible CQE overrun, device "
1641
- "reporting max_cpe %d max_qp_wr %d\n",
1642
- smbd_send_credit_target,
1643
- info->id->device->attrs.max_cqe,
1644
- info->id->device->attrs.max_qp_wr);
1532
+ log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
1533
+ smbd_send_credit_target,
1534
+ info->id->device->attrs.max_cqe,
1535
+ info->id->device->attrs.max_qp_wr);
16451536 goto config_failed;
16461537 }
16471538
16481539 if (smbd_receive_credit_max > info->id->device->attrs.max_cqe ||
16491540 smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) {
1650
- log_rdma_event(ERR,
1651
- "consider lowering receive_credit_max = %d. "
1652
- "Possible CQE overrun, device "
1653
- "reporting max_cpe %d max_qp_wr %d\n",
1654
- smbd_receive_credit_max,
1655
- info->id->device->attrs.max_cqe,
1656
- info->id->device->attrs.max_qp_wr);
1541
+ log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
1542
+ smbd_receive_credit_max,
1543
+ info->id->device->attrs.max_cqe,
1544
+ info->id->device->attrs.max_qp_wr);
16571545 goto config_failed;
16581546 }
16591547
....@@ -1679,15 +1567,17 @@
16791567
16801568 info->send_cq = NULL;
16811569 info->recv_cq = NULL;
1682
- info->send_cq = ib_alloc_cq(info->id->device, info,
1683
- info->send_credit_target, 0, IB_POLL_SOFTIRQ);
1570
+ info->send_cq =
1571
+ ib_alloc_cq_any(info->id->device, info,
1572
+ info->send_credit_target, IB_POLL_SOFTIRQ);
16841573 if (IS_ERR(info->send_cq)) {
16851574 info->send_cq = NULL;
16861575 goto alloc_cq_failed;
16871576 }
16881577
1689
- info->recv_cq = ib_alloc_cq(info->id->device, info,
1690
- info->receive_credit_max, 0, IB_POLL_SOFTIRQ);
1578
+ info->recv_cq =
1579
+ ib_alloc_cq_any(info->id->device, info,
1580
+ info->receive_credit_max, IB_POLL_SOFTIRQ);
16911581 if (IS_ERR(info->recv_cq)) {
16921582 info->recv_cq = NULL;
16931583 goto alloc_cq_failed;
....@@ -1726,7 +1616,7 @@
17261616 info->responder_resources);
17271617
17281618 /* Need to send IRD/ORD in private data for iWARP */
1729
- info->id->device->get_port_immutable(
1619
+ info->id->device->ops.get_port_immutable(
17301620 info->id->device, info->id->port_num, &port_immutable);
17311621 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
17321622 ird_ord_hdr[0] = info->responder_resources;
....@@ -1741,12 +1631,13 @@
17411631 conn_param.retry_count = SMBD_CM_RETRY;
17421632 conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY;
17431633 conn_param.flow_control = 0;
1744
- init_waitqueue_head(&info->wait_destroy);
17451634
17461635 log_rdma_event(INFO, "connecting to IP %pI4 port %d\n",
17471636 &addr_in->sin_addr, port);
17481637
17491638 init_waitqueue_head(&info->conn_wait);
1639
+ init_waitqueue_head(&info->disconn_wait);
1640
+ init_waitqueue_head(&info->wait_reassembly_queue);
17501641 rc = rdma_connect(info->id, &conn_param);
17511642 if (rc) {
17521643 log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc);
....@@ -1770,28 +1661,16 @@
17701661 }
17711662
17721663 init_waitqueue_head(&info->wait_send_queue);
1773
- init_waitqueue_head(&info->wait_reassembly_queue);
1774
-
17751664 INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer);
1776
- INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work);
17771665 queue_delayed_work(info->workqueue, &info->idle_timer_work,
17781666 info->keep_alive_interval*HZ);
1779
-
1780
- init_waitqueue_head(&info->wait_smbd_send_pending);
1781
- info->smbd_send_pending = 0;
1782
-
1783
- init_waitqueue_head(&info->wait_smbd_recv_pending);
1784
- info->smbd_recv_pending = 0;
17851667
17861668 init_waitqueue_head(&info->wait_send_pending);
17871669 atomic_set(&info->send_pending, 0);
17881670
1789
- init_waitqueue_head(&info->wait_send_payload_pending);
1790
- atomic_set(&info->send_payload_pending, 0);
1671
+ init_waitqueue_head(&info->wait_post_send);
17911672
17921673 INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work);
1793
- INIT_WORK(&info->destroy_work, smbd_destroy_rdma_work);
1794
- INIT_WORK(&info->recv_done_work, smbd_recv_done_work);
17951674 INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits);
17961675 info->new_credits_offered = 0;
17971676 spin_lock_init(&info->lock_new_credits_offered);
....@@ -1812,7 +1691,8 @@
18121691
18131692 allocate_mr_failed:
18141693 /* At this point, need to a full transport shutdown */
1815
- smbd_destroy(info);
1694
+ server->smbd_conn = info;
1695
+ smbd_destroy(server);
18161696 return NULL;
18171697
18181698 negotiation_failed:
....@@ -1884,11 +1764,6 @@
18841764 int rc;
18851765
18861766 again:
1887
- if (info->transport_status != SMBD_CONNECTED) {
1888
- log_read(ERR, "disconnected\n");
1889
- return -ENODEV;
1890
- }
1891
-
18921767 /*
18931768 * No need to hold the reassembly queue lock all the time as we are
18941769 * the only one reading from the front of the queue. The transport
....@@ -1974,11 +1849,9 @@
19741849 to_read -= to_copy;
19751850 data_read += to_copy;
19761851
1977
- log_read(INFO, "_get_first_reassembly memcpy %d bytes "
1978
- "data_transfer_length-offset=%d after that "
1979
- "to_read=%d data_read=%d offset=%d\n",
1980
- to_copy, data_length - offset,
1981
- to_read, data_read, offset);
1852
+ log_read(INFO, "_get_first_reassembly memcpy %d bytes data_transfer_length-offset=%d after that to_read=%d data_read=%d offset=%d\n",
1853
+ to_copy, data_length - offset,
1854
+ to_read, data_read, offset);
19821855 }
19831856
19841857 spin_lock_irq(&info->reassembly_queue_lock);
....@@ -1987,10 +1860,9 @@
19871860 spin_unlock_irq(&info->reassembly_queue_lock);
19881861
19891862 info->first_entry_offset = offset;
1990
- log_read(INFO, "returning to thread data_read=%d "
1991
- "reassembly_data_length=%d first_entry_offset=%d\n",
1992
- data_read, info->reassembly_data_length,
1993
- info->first_entry_offset);
1863
+ log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
1864
+ data_read, info->reassembly_data_length,
1865
+ info->first_entry_offset);
19941866 read_rfc1002_done:
19951867 return data_read;
19961868 }
....@@ -2002,7 +1874,12 @@
20021874 info->transport_status != SMBD_CONNECTED);
20031875 /* Don't return any data if interrupted */
20041876 if (rc)
2005
- return -ENODEV;
1877
+ return rc;
1878
+
1879
+ if (info->transport_status != SMBD_CONNECTED) {
1880
+ log_read(ERR, "disconnected\n");
1881
+ return -ECONNABORTED;
1882
+ }
20061883
20071884 goto again;
20081885 }
....@@ -2054,16 +1931,22 @@
20541931 unsigned int to_read, page_offset;
20551932 int rc;
20561933
2057
- info->smbd_recv_pending++;
1934
+ if (iov_iter_rw(&msg->msg_iter) == WRITE) {
1935
+ /* It's a bug in upper layer to get there */
1936
+ cifs_dbg(VFS, "Invalid msg iter dir %u\n",
1937
+ iov_iter_rw(&msg->msg_iter));
1938
+ rc = -EINVAL;
1939
+ goto out;
1940
+ }
20581941
2059
- switch (msg->msg_iter.type) {
2060
- case READ | ITER_KVEC:
1942
+ switch (iov_iter_type(&msg->msg_iter)) {
1943
+ case ITER_KVEC:
20611944 buf = msg->msg_iter.kvec->iov_base;
20621945 to_read = msg->msg_iter.kvec->iov_len;
20631946 rc = smbd_recv_buf(info, buf, to_read);
20641947 break;
20651948
2066
- case READ | ITER_BVEC:
1949
+ case ITER_BVEC:
20671950 page = msg->msg_iter.bvec->bv_page;
20681951 page_offset = msg->msg_iter.bvec->bv_offset;
20691952 to_read = msg->msg_iter.bvec->bv_len;
....@@ -2072,14 +1955,12 @@
20721955
20731956 default:
20741957 /* It's a bug in upper layer to get there */
2075
- cifs_dbg(VFS, "CIFS: invalid msg type %d\n",
2076
- msg->msg_iter.type);
1958
+ cifs_dbg(VFS, "Invalid msg type %d\n",
1959
+ iov_iter_type(&msg->msg_iter));
20771960 rc = -EINVAL;
20781961 }
20791962
2080
- info->smbd_recv_pending--;
2081
- wake_up(&info->wait_smbd_recv_pending);
2082
-
1963
+out:
20831964 /* SMBDirect will read it all or nothing */
20841965 if (rc > 0)
20851966 msg->msg_iter.count = 0;
....@@ -2108,9 +1989,8 @@
21081989 struct smb_rqst *rqst;
21091990 int rqst_idx;
21101991
2111
- info->smbd_send_pending++;
21121992 if (info->transport_status != SMBD_CONNECTED) {
2113
- rc = -ENODEV;
1993
+ rc = -EAGAIN;
21141994 goto done;
21151995 }
21161996
....@@ -2123,16 +2003,17 @@
21232003 for (i = 0; i < num_rqst; i++)
21242004 remaining_data_length += smb_rqst_len(server, &rqst_array[i]);
21252005
2126
- if (remaining_data_length + sizeof(struct smbd_data_transfer) >
2127
- info->max_fragmented_send_size) {
2006
+ if (remaining_data_length > info->max_fragmented_send_size) {
21282007 log_write(ERR, "payload size %d > max size %d\n",
21292008 remaining_data_length, info->max_fragmented_send_size);
21302009 rc = -EINVAL;
21312010 goto done;
21322011 }
21332012
2134
- rqst_idx = 0;
2013
+ log_write(INFO, "num_rqst=%d total length=%u\n",
2014
+ num_rqst, remaining_data_length);
21352015
2016
+ rqst_idx = 0;
21362017 next_rqst:
21372018 rqst = &rqst_array[rqst_idx];
21382019 iov = rqst->rq_iov;
....@@ -2143,10 +2024,9 @@
21432024 dump_smb(iov[i].iov_base, iov[i].iov_len);
21442025
21452026
2146
- log_write(INFO, "rqst_idx=%d nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
2147
- "rq_tailsz=%d buflen=%lu\n",
2148
- rqst_idx, rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz,
2149
- rqst->rq_tailsz, smb_rqst_len(server, rqst));
2027
+ log_write(INFO, "rqst_idx=%d nvec=%d rqst->rq_npages=%d rq_pagesz=%d rq_tailsz=%d buflen=%lu\n",
2028
+ rqst_idx, rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz,
2029
+ rqst->rq_tailsz, smb_rqst_len(server, rqst));
21502030
21512031 start = i = 0;
21522032 buflen = 0;
....@@ -2156,11 +2036,9 @@
21562036 if (i > start) {
21572037 remaining_data_length -=
21582038 (buflen-iov[i].iov_len);
2159
- log_write(INFO, "sending iov[] from start=%d "
2160
- "i=%d nvecs=%d "
2161
- "remaining_data_length=%d\n",
2162
- start, i, i-start,
2163
- remaining_data_length);
2039
+ log_write(INFO, "sending iov[] from start=%d i=%d nvecs=%d remaining_data_length=%d\n",
2040
+ start, i, i - start,
2041
+ remaining_data_length);
21642042 rc = smbd_post_send_data(
21652043 info, &iov[start], i-start,
21662044 remaining_data_length);
....@@ -2169,10 +2047,9 @@
21692047 } else {
21702048 /* iov[start] is too big, break it */
21712049 nvecs = (buflen+max_iov_size-1)/max_iov_size;
2172
- log_write(INFO, "iov[%d] iov_base=%p buflen=%d"
2173
- " break to %d vectors\n",
2174
- start, iov[start].iov_base,
2175
- buflen, nvecs);
2050
+ log_write(INFO, "iov[%d] iov_base=%p buflen=%d break to %d vectors\n",
2051
+ start, iov[start].iov_base,
2052
+ buflen, nvecs);
21762053 for (j = 0; j < nvecs; j++) {
21772054 vec.iov_base =
21782055 (char *)iov[start].iov_base +
....@@ -2184,11 +2061,9 @@
21842061 max_iov_size*(nvecs-1);
21852062 remaining_data_length -= vec.iov_len;
21862063 log_write(INFO,
2187
- "sending vec j=%d iov_base=%p"
2188
- " iov_len=%zu "
2189
- "remaining_data_length=%d\n",
2190
- j, vec.iov_base, vec.iov_len,
2191
- remaining_data_length);
2064
+ "sending vec j=%d iov_base=%p iov_len=%zu remaining_data_length=%d\n",
2065
+ j, vec.iov_base, vec.iov_len,
2066
+ remaining_data_length);
21922067 rc = smbd_post_send_data(
21932068 info, &vec, 1,
21942069 remaining_data_length);
....@@ -2206,11 +2081,9 @@
22062081 if (i == rqst->rq_nvec) {
22072082 /* send out all remaining vecs */
22082083 remaining_data_length -= buflen;
2209
- log_write(INFO,
2210
- "sending iov[] from start=%d i=%d "
2211
- "nvecs=%d remaining_data_length=%d\n",
2212
- start, i, i-start,
2213
- remaining_data_length);
2084
+ log_write(INFO, "sending iov[] from start=%d i=%d nvecs=%d remaining_data_length=%d\n",
2085
+ start, i, i - start,
2086
+ remaining_data_length);
22142087 rc = smbd_post_send_data(info, &iov[start],
22152088 i-start, remaining_data_length);
22162089 if (rc)
....@@ -2234,10 +2107,9 @@
22342107 if (j == nvecs-1)
22352108 size = buflen - j*max_iov_size;
22362109 remaining_data_length -= size;
2237
- log_write(INFO, "sending pages i=%d offset=%d size=%d"
2238
- " remaining_data_length=%d\n",
2239
- i, j*max_iov_size+offset, size,
2240
- remaining_data_length);
2110
+ log_write(INFO, "sending pages i=%d offset=%d size=%d remaining_data_length=%d\n",
2111
+ i, j * max_iov_size + offset, size,
2112
+ remaining_data_length);
22412113 rc = smbd_post_send_page(
22422114 info, rqst->rq_pages[i],
22432115 j*max_iov_size + offset,
....@@ -2259,11 +2131,8 @@
22592131 * that means all the I/Os have been out and we are good to return
22602132 */
22612133
2262
- wait_event(info->wait_send_payload_pending,
2263
- atomic_read(&info->send_payload_pending) == 0);
2264
-
2265
- info->smbd_send_pending--;
2266
- wake_up(&info->wait_smbd_send_pending);
2134
+ wait_event(info->wait_send_pending,
2135
+ atomic_read(&info->send_pending) == 0);
22672136
22682137 return rc;
22692138 }
....@@ -2298,8 +2167,7 @@
22982167 int rc;
22992168
23002169 list_for_each_entry(smbdirect_mr, &info->mr_list, list) {
2301
- if (smbdirect_mr->state == MR_INVALIDATED ||
2302
- smbdirect_mr->state == MR_ERROR) {
2170
+ if (smbdirect_mr->state == MR_ERROR) {
23032171
23042172 /* recover this MR entry */
23052173 rc = ib_dereg_mr(smbdirect_mr->mr);
....@@ -2315,33 +2183,27 @@
23152183 info->pd, info->mr_type,
23162184 info->max_frmr_depth);
23172185 if (IS_ERR(smbdirect_mr->mr)) {
2318
- log_rdma_mr(ERR,
2319
- "ib_alloc_mr failed mr_type=%x "
2320
- "max_frmr_depth=%x\n",
2321
- info->mr_type,
2322
- info->max_frmr_depth);
2186
+ log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n",
2187
+ info->mr_type,
2188
+ info->max_frmr_depth);
23232189 smbd_disconnect_rdma_connection(info);
23242190 continue;
23252191 }
2192
+ } else
2193
+ /* This MR is being used, don't recover it */
2194
+ continue;
23262195
2327
- if (smbdirect_mr->state == MR_INVALIDATED)
2328
- ib_dma_unmap_sg(
2329
- info->id->device, smbdirect_mr->sgl,
2330
- smbdirect_mr->sgl_count,
2331
- smbdirect_mr->dir);
2196
+ smbdirect_mr->state = MR_READY;
23322197
2333
- smbdirect_mr->state = MR_READY;
2334
-
2335
- /* smbdirect_mr->state is updated by this function
2336
- * and is read and updated by I/O issuing CPUs trying
2337
- * to get a MR, the call to atomic_inc_return
2338
- * implicates a memory barrier and guarantees this
2339
- * value is updated before waking up any calls to
2340
- * get_mr() from the I/O issuing CPUs
2341
- */
2342
- if (atomic_inc_return(&info->mr_ready_count) == 1)
2343
- wake_up_interruptible(&info->wait_mr);
2344
- }
2198
+ /* smbdirect_mr->state is updated by this function
2199
+ * and is read and updated by I/O issuing CPUs trying
2200
+ * to get a MR, the call to atomic_inc_return
2201
+ * implicates a memory barrier and guarantees this
2202
+ * value is updated before waking up any calls to
2203
+ * get_mr() from the I/O issuing CPUs
2204
+ */
2205
+ if (atomic_inc_return(&info->mr_ready_count) == 1)
2206
+ wake_up_interruptible(&info->wait_mr);
23452207 }
23462208 }
23472209
....@@ -2378,6 +2240,7 @@
23782240 atomic_set(&info->mr_ready_count, 0);
23792241 atomic_set(&info->mr_used_count, 0);
23802242 init_waitqueue_head(&info->wait_for_mr_cleanup);
2243
+ INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work);
23812244 /* Allocate more MRs (2x) than hardware responder_resources */
23822245 for (i = 0; i < info->responder_resources * 2; i++) {
23832246 smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL);
....@@ -2386,9 +2249,8 @@
23862249 smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type,
23872250 info->max_frmr_depth);
23882251 if (IS_ERR(smbdirect_mr->mr)) {
2389
- log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x "
2390
- "max_frmr_depth=%x\n",
2391
- info->mr_type, info->max_frmr_depth);
2252
+ log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n",
2253
+ info->mr_type, info->max_frmr_depth);
23922254 goto out;
23932255 }
23942256 smbdirect_mr->sgl = kcalloc(
....@@ -2406,13 +2268,13 @@
24062268 list_add_tail(&smbdirect_mr->list, &info->mr_list);
24072269 atomic_inc(&info->mr_ready_count);
24082270 }
2409
- INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work);
24102271 return 0;
24112272
24122273 out:
24132274 kfree(smbdirect_mr);
24142275
24152276 list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) {
2277
+ list_del(&smbdirect_mr->list);
24162278 ib_dereg_mr(smbdirect_mr->mr);
24172279 kfree(smbdirect_mr->sgl);
24182280 kfree(smbdirect_mr);
....@@ -2631,11 +2493,20 @@
26312493 */
26322494 smbdirect_mr->state = MR_INVALIDATED;
26332495
2634
- /*
2635
- * Schedule the work to do MR recovery for future I/Os
2636
- * MR recovery is slow and we don't want it to block the current I/O
2637
- */
2638
- queue_work(info->workqueue, &info->mr_recovery_work);
2496
+ if (smbdirect_mr->state == MR_INVALIDATED) {
2497
+ ib_dma_unmap_sg(
2498
+ info->id->device, smbdirect_mr->sgl,
2499
+ smbdirect_mr->sgl_count,
2500
+ smbdirect_mr->dir);
2501
+ smbdirect_mr->state = MR_READY;
2502
+ if (atomic_inc_return(&info->mr_ready_count) == 1)
2503
+ wake_up_interruptible(&info->wait_mr);
2504
+ } else
2505
+ /*
2506
+ * Schedule the work to do MR recovery for future I/Os MR
2507
+ * recovery is slow and don't want it to block current I/O
2508
+ */
2509
+ queue_work(info->workqueue, &info->mr_recovery_work);
26392510
26402511 done:
26412512 if (atomic_dec_and_test(&info->mr_used_count))