forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-13 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e
kernel/drivers/infiniband/hw/hns/hns_roce_mr.c
....@@ -47,643 +47,152 @@
4747 {
4848 return (key << 24) | (key >> 8);
4949 }
50
-EXPORT_SYMBOL_GPL(key_to_hw_index);
5150
52
-static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
53
- struct hns_roce_cmd_mailbox *mailbox,
54
- unsigned long mpt_index)
51
+static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev,
52
+ struct hns_roce_cmd_mailbox *mailbox,
53
+ unsigned long mpt_index)
5554 {
5655 return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
57
- HNS_ROCE_CMD_SW2HW_MPT,
56
+ HNS_ROCE_CMD_CREATE_MPT,
5857 HNS_ROCE_CMD_TIMEOUT_MSECS);
5958 }
6059
61
-int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
62
- struct hns_roce_cmd_mailbox *mailbox,
63
- unsigned long mpt_index)
60
+int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
61
+ struct hns_roce_cmd_mailbox *mailbox,
62
+ unsigned long mpt_index)
6463 {
6564 return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
66
- mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
65
+ mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT,
6766 HNS_ROCE_CMD_TIMEOUT_MSECS);
6867 }
69
-EXPORT_SYMBOL_GPL(hns_roce_hw2sw_mpt);
7068
71
-static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
72
- unsigned long *seg)
69
+static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
70
+ u32 pd, u64 iova, u64 size, u32 access)
7371 {
74
- int o;
75
- u32 m;
76
-
77
- spin_lock(&buddy->lock);
78
-
79
- for (o = order; o <= buddy->max_order; ++o) {
80
- if (buddy->num_free[o]) {
81
- m = 1 << (buddy->max_order - o);
82
- *seg = find_first_bit(buddy->bits[o], m);
83
- if (*seg < m)
84
- goto found;
85
- }
86
- }
87
- spin_unlock(&buddy->lock);
88
- return -1;
89
-
90
- found:
91
- clear_bit(*seg, buddy->bits[o]);
92
- --buddy->num_free[o];
93
-
94
- while (o > order) {
95
- --o;
96
- *seg <<= 1;
97
- set_bit(*seg ^ 1, buddy->bits[o]);
98
- ++buddy->num_free[o];
99
- }
100
-
101
- spin_unlock(&buddy->lock);
102
-
103
- *seg <<= order;
104
- return 0;
105
-}
106
-
107
-static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg,
108
- int order)
109
-{
110
- seg >>= order;
111
-
112
- spin_lock(&buddy->lock);
113
-
114
- while (test_bit(seg ^ 1, buddy->bits[order])) {
115
- clear_bit(seg ^ 1, buddy->bits[order]);
116
- --buddy->num_free[order];
117
- seg >>= 1;
118
- ++order;
119
- }
120
-
121
- set_bit(seg, buddy->bits[order]);
122
- ++buddy->num_free[order];
123
-
124
- spin_unlock(&buddy->lock);
125
-}
126
-
127
-static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
128
-{
129
- int i, s;
130
-
131
- buddy->max_order = max_order;
132
- spin_lock_init(&buddy->lock);
133
- buddy->bits = kcalloc(buddy->max_order + 1,
134
- sizeof(*buddy->bits),
135
- GFP_KERNEL);
136
- buddy->num_free = kcalloc(buddy->max_order + 1,
137
- sizeof(*buddy->num_free),
138
- GFP_KERNEL);
139
- if (!buddy->bits || !buddy->num_free)
140
- goto err_out;
141
-
142
- for (i = 0; i <= buddy->max_order; ++i) {
143
- s = BITS_TO_LONGS(1 << (buddy->max_order - i));
144
- buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
145
- __GFP_NOWARN);
146
- if (!buddy->bits[i]) {
147
- buddy->bits[i] = vzalloc(array_size(s, sizeof(long)));
148
- if (!buddy->bits[i])
149
- goto err_out_free;
150
- }
151
- }
152
-
153
- set_bit(0, buddy->bits[buddy->max_order]);
154
- buddy->num_free[buddy->max_order] = 1;
155
-
156
- return 0;
157
-
158
-err_out_free:
159
- for (i = 0; i <= buddy->max_order; ++i)
160
- kvfree(buddy->bits[i]);
161
-
162
-err_out:
163
- kfree(buddy->bits);
164
- kfree(buddy->num_free);
165
- return -ENOMEM;
166
-}
167
-
168
-static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy)
169
-{
170
- int i;
171
-
172
- for (i = 0; i <= buddy->max_order; ++i)
173
- kvfree(buddy->bits[i]);
174
-
175
- kfree(buddy->bits);
176
- kfree(buddy->num_free);
177
-}
178
-
179
-static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
180
- unsigned long *seg, u32 mtt_type)
181
-{
182
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
183
- struct hns_roce_hem_table *table;
184
- struct hns_roce_buddy *buddy;
185
- int ret;
186
-
187
- if (mtt_type == MTT_TYPE_WQE) {
188
- buddy = &mr_table->mtt_buddy;
189
- table = &mr_table->mtt_table;
190
- } else {
191
- buddy = &mr_table->mtt_cqe_buddy;
192
- table = &mr_table->mtt_cqe_table;
193
- }
194
-
195
- ret = hns_roce_buddy_alloc(buddy, order, seg);
196
- if (ret == -1)
197
- return -1;
198
-
199
- if (hns_roce_table_get_range(hr_dev, table, *seg,
200
- *seg + (1 << order) - 1)) {
201
- hns_roce_buddy_free(buddy, *seg, order);
202
- return -1;
203
- }
204
-
205
- return 0;
206
-}
207
-
208
-int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
209
- struct hns_roce_mtt *mtt)
210
-{
211
- int ret;
212
- int i;
213
-
214
- /* Page num is zero, correspond to DMA memory register */
215
- if (!npages) {
216
- mtt->order = -1;
217
- mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT;
218
- return 0;
219
- }
220
-
221
- /* Note: if page_shift is zero, FAST memory register */
222
- mtt->page_shift = page_shift;
223
-
224
- /* Compute MTT entry necessary */
225
- for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages;
226
- i <<= 1)
227
- ++mtt->order;
228
-
229
- /* Allocate MTT entry */
230
- ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg,
231
- mtt->mtt_type);
232
- if (ret == -1)
233
- return -ENOMEM;
234
-
235
- return 0;
236
-}
237
-
238
-void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
239
-{
240
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
241
-
242
- if (mtt->order < 0)
243
- return;
244
-
245
- if (mtt->mtt_type == MTT_TYPE_WQE) {
246
- hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg,
247
- mtt->order);
248
- hns_roce_table_put_range(hr_dev, &mr_table->mtt_table,
249
- mtt->first_seg,
250
- mtt->first_seg + (1 << mtt->order) - 1);
251
- } else {
252
- hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg,
253
- mtt->order);
254
- hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table,
255
- mtt->first_seg,
256
- mtt->first_seg + (1 << mtt->order) - 1);
257
- }
258
-}
259
-EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup);
260
-
261
-static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
262
- struct hns_roce_mr *mr, int err_loop_index,
263
- int loop_i, int loop_j)
264
-{
265
- struct device *dev = hr_dev->dev;
266
- u32 mhop_num;
267
- u32 pbl_bt_sz;
268
- u64 bt_idx;
269
- int i, j;
270
-
271
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
272
- mhop_num = hr_dev->caps.pbl_hop_num;
273
-
274
- i = loop_i;
275
- if (mhop_num == 3 && err_loop_index == 2) {
276
- for (; i >= 0; i--) {
277
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
278
- mr->pbl_l1_dma_addr[i]);
279
-
280
- for (j = 0; j < pbl_bt_sz / 8; j++) {
281
- if (i == loop_i && j >= loop_j)
282
- break;
283
-
284
- bt_idx = i * pbl_bt_sz / 8 + j;
285
- dma_free_coherent(dev, pbl_bt_sz,
286
- mr->pbl_bt_l2[bt_idx],
287
- mr->pbl_l2_dma_addr[bt_idx]);
288
- }
289
- }
290
- } else if (mhop_num == 3 && err_loop_index == 1) {
291
- for (i -= 1; i >= 0; i--) {
292
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
293
- mr->pbl_l1_dma_addr[i]);
294
-
295
- for (j = 0; j < pbl_bt_sz / 8; j++) {
296
- bt_idx = i * pbl_bt_sz / 8 + j;
297
- dma_free_coherent(dev, pbl_bt_sz,
298
- mr->pbl_bt_l2[bt_idx],
299
- mr->pbl_l2_dma_addr[bt_idx]);
300
- }
301
- }
302
- } else if (mhop_num == 2 && err_loop_index == 1) {
303
- for (i -= 1; i >= 0; i--)
304
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
305
- mr->pbl_l1_dma_addr[i]);
306
- } else {
307
- dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.",
308
- mhop_num, err_loop_index);
309
- return;
310
- }
311
-
312
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr);
313
- mr->pbl_bt_l0 = NULL;
314
- mr->pbl_l0_dma_addr = 0;
315
-}
316
-
317
-/* PBL multi hop addressing */
318
-static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
319
- struct hns_roce_mr *mr)
320
-{
321
- struct device *dev = hr_dev->dev;
322
- int mr_alloc_done = 0;
323
- int npages_allocated;
324
- int i = 0, j = 0;
325
- u32 pbl_bt_sz;
326
- u32 mhop_num;
327
- u64 pbl_last_bt_num;
328
- u64 pbl_bt_cnt = 0;
329
- u64 bt_idx;
330
- u64 size;
331
-
332
- mhop_num = hr_dev->caps.pbl_hop_num;
333
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
334
- pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
335
-
336
- if (mhop_num == HNS_ROCE_HOP_NUM_0)
337
- return 0;
338
-
339
- /* hop_num = 1 */
340
- if (mhop_num == 1) {
341
- if (npages > pbl_bt_sz / 8) {
342
- dev_err(dev, "npages %d is larger than buf_pg_sz!",
343
- npages);
344
- return -EINVAL;
345
- }
346
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
347
- &(mr->pbl_dma_addr),
348
- GFP_KERNEL);
349
- if (!mr->pbl_buf)
350
- return -ENOMEM;
351
-
352
- mr->pbl_size = npages;
353
- mr->pbl_ba = mr->pbl_dma_addr;
354
- mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
355
- mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
356
- mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
357
- return 0;
358
- }
359
-
360
- mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
361
- sizeof(*mr->pbl_l1_dma_addr),
362
- GFP_KERNEL);
363
- if (!mr->pbl_l1_dma_addr)
364
- return -ENOMEM;
365
-
366
- mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
367
- GFP_KERNEL);
368
- if (!mr->pbl_bt_l1)
369
- goto err_kcalloc_bt_l1;
370
-
371
- if (mhop_num == 3) {
372
- mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
373
- sizeof(*mr->pbl_l2_dma_addr),
374
- GFP_KERNEL);
375
- if (!mr->pbl_l2_dma_addr)
376
- goto err_kcalloc_l2_dma;
377
-
378
- mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
379
- sizeof(*mr->pbl_bt_l2),
380
- GFP_KERNEL);
381
- if (!mr->pbl_bt_l2)
382
- goto err_kcalloc_bt_l2;
383
- }
384
-
385
- /* alloc L0 BT */
386
- mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
387
- &(mr->pbl_l0_dma_addr),
388
- GFP_KERNEL);
389
- if (!mr->pbl_bt_l0)
390
- goto err_dma_alloc_l0;
391
-
392
- if (mhop_num == 2) {
393
- /* alloc L1 BT */
394
- for (i = 0; i < pbl_bt_sz / 8; i++) {
395
- if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
396
- size = pbl_bt_sz;
397
- } else {
398
- npages_allocated = i * (pbl_bt_sz / 8);
399
- size = (npages - npages_allocated) * 8;
400
- }
401
- mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
402
- &(mr->pbl_l1_dma_addr[i]),
403
- GFP_KERNEL);
404
- if (!mr->pbl_bt_l1[i]) {
405
- hns_roce_loop_free(hr_dev, mr, 1, i, 0);
406
- goto err_dma_alloc_l0;
407
- }
408
-
409
- *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
410
-
411
- pbl_bt_cnt++;
412
- if (pbl_bt_cnt >= pbl_last_bt_num)
413
- break;
414
- }
415
- } else if (mhop_num == 3) {
416
- /* alloc L1, L2 BT */
417
- for (i = 0; i < pbl_bt_sz / 8; i++) {
418
- mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
419
- &(mr->pbl_l1_dma_addr[i]),
420
- GFP_KERNEL);
421
- if (!mr->pbl_bt_l1[i]) {
422
- hns_roce_loop_free(hr_dev, mr, 1, i, 0);
423
- goto err_dma_alloc_l0;
424
- }
425
-
426
- *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
427
-
428
- for (j = 0; j < pbl_bt_sz / 8; j++) {
429
- bt_idx = i * pbl_bt_sz / 8 + j;
430
-
431
- if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
432
- size = pbl_bt_sz;
433
- } else {
434
- npages_allocated = bt_idx *
435
- (pbl_bt_sz / 8);
436
- size = (npages - npages_allocated) * 8;
437
- }
438
- mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
439
- dev, size,
440
- &(mr->pbl_l2_dma_addr[bt_idx]),
441
- GFP_KERNEL);
442
- if (!mr->pbl_bt_l2[bt_idx]) {
443
- hns_roce_loop_free(hr_dev, mr, 2, i, j);
444
- goto err_dma_alloc_l0;
445
- }
446
-
447
- *(mr->pbl_bt_l1[i] + j) =
448
- mr->pbl_l2_dma_addr[bt_idx];
449
-
450
- pbl_bt_cnt++;
451
- if (pbl_bt_cnt >= pbl_last_bt_num) {
452
- mr_alloc_done = 1;
453
- break;
454
- }
455
- }
456
-
457
- if (mr_alloc_done)
458
- break;
459
- }
460
- }
461
-
462
- mr->l0_chunk_last_num = i + 1;
463
- if (mhop_num == 3)
464
- mr->l1_chunk_last_num = j + 1;
465
-
466
- mr->pbl_size = npages;
467
- mr->pbl_ba = mr->pbl_l0_dma_addr;
468
- mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
469
- mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
470
- mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
471
-
472
- return 0;
473
-
474
-err_dma_alloc_l0:
475
- kfree(mr->pbl_bt_l2);
476
- mr->pbl_bt_l2 = NULL;
477
-
478
-err_kcalloc_bt_l2:
479
- kfree(mr->pbl_l2_dma_addr);
480
- mr->pbl_l2_dma_addr = NULL;
481
-
482
-err_kcalloc_l2_dma:
483
- kfree(mr->pbl_bt_l1);
484
- mr->pbl_bt_l1 = NULL;
485
-
486
-err_kcalloc_bt_l1:
487
- kfree(mr->pbl_l1_dma_addr);
488
- mr->pbl_l1_dma_addr = NULL;
489
-
490
- return -ENOMEM;
491
-}
492
-
493
-static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
494
- u64 size, u32 access, int npages,
495
- struct hns_roce_mr *mr)
496
-{
497
- struct device *dev = hr_dev->dev;
498
- unsigned long index = 0;
499
- int ret = 0;
72
+ struct ib_device *ibdev = &hr_dev->ib_dev;
73
+ unsigned long obj = 0;
74
+ int err;
50075
50176 /* Allocate a key for mr from mr_table */
502
- ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
503
- if (ret == -1)
77
+ err = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &obj);
78
+ if (err) {
79
+ ibdev_err(ibdev,
80
+ "failed to alloc bitmap for MR key, ret = %d.\n",
81
+ err);
50482 return -ENOMEM;
83
+ }
50584
50685 mr->iova = iova; /* MR va starting addr */
50786 mr->size = size; /* MR addr range */
50887 mr->pd = pd; /* MR num */
50988 mr->access = access; /* MR access permit */
51089 mr->enabled = 0; /* MR active status */
511
- mr->key = hw_index_to_key(index); /* MR key */
90
+ mr->key = hw_index_to_key(obj); /* MR key */
51291
513
- if (size == ~0ull) {
514
- mr->type = MR_TYPE_DMA;
515
- mr->pbl_buf = NULL;
516
- mr->pbl_dma_addr = 0;
517
- /* PBL multi-hop addressing parameters */
518
- mr->pbl_bt_l2 = NULL;
519
- mr->pbl_bt_l1 = NULL;
520
- mr->pbl_bt_l0 = NULL;
521
- mr->pbl_l2_dma_addr = NULL;
522
- mr->pbl_l1_dma_addr = NULL;
523
- mr->pbl_l0_dma_addr = 0;
524
- } else {
525
- mr->type = MR_TYPE_MR;
526
- if (!hr_dev->caps.pbl_hop_num) {
527
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
528
- &(mr->pbl_dma_addr),
529
- GFP_KERNEL);
530
- if (!mr->pbl_buf)
531
- return -ENOMEM;
532
- } else {
533
- ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
534
- }
92
+ err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
93
+ if (err) {
94
+ ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err);
95
+ goto err_free_bitmap;
53596 }
53697
537
- return ret;
98
+ return 0;
99
+err_free_bitmap:
100
+ hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR);
101
+ return err;
538102 }
539103
540
-static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
541
- struct hns_roce_mr *mr)
104
+static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
542105 {
543
- struct device *dev = hr_dev->dev;
544
- int npages_allocated;
545
- int npages;
546
- int i, j;
547
- u32 pbl_bt_sz;
548
- u32 mhop_num;
549
- u64 bt_idx;
106
+ unsigned long obj = key_to_hw_index(mr->key);
550107
551
- npages = ib_umem_page_count(mr->umem);
552
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
553
- mhop_num = hr_dev->caps.pbl_hop_num;
108
+ hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
109
+ hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR);
110
+}
554111
555
- if (mhop_num == HNS_ROCE_HOP_NUM_0)
556
- return;
112
+static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
113
+ size_t length, struct ib_udata *udata, u64 start,
114
+ int access)
115
+{
116
+ struct ib_device *ibdev = &hr_dev->ib_dev;
117
+ bool is_fast = mr->type == MR_TYPE_FRMR;
118
+ struct hns_roce_buf_attr buf_attr = {};
119
+ int err;
557120
558
- /* hop_num = 1 */
559
- if (mhop_num == 1) {
560
- dma_free_coherent(dev, (unsigned int)(npages * 8),
561
- mr->pbl_buf, mr->pbl_dma_addr);
562
- return;
563
- }
121
+ mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num;
122
+ buf_attr.page_shift = is_fast ? PAGE_SHIFT :
123
+ hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT;
124
+ buf_attr.region[0].size = length;
125
+ buf_attr.region[0].hopnum = mr->pbl_hop_num;
126
+ buf_attr.region_count = 1;
127
+ buf_attr.fixed_page = true;
128
+ buf_attr.user_access = access;
129
+ /* fast MR's buffer is alloced before mapping, not at creation */
130
+ buf_attr.mtt_only = is_fast;
564131
565
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0,
566
- mr->pbl_l0_dma_addr);
132
+ err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
133
+ hr_dev->caps.pbl_ba_pg_sz + HNS_HW_PAGE_SHIFT,
134
+ udata, start);
135
+ if (err)
136
+ ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
137
+ else
138
+ mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
567139
568
- if (mhop_num == 2) {
569
- for (i = 0; i < mr->l0_chunk_last_num; i++) {
570
- if (i == mr->l0_chunk_last_num - 1) {
571
- npages_allocated = i * (pbl_bt_sz / 8);
140
+ return err;
141
+}
572142
573
- dma_free_coherent(dev,
574
- (npages - npages_allocated) * 8,
575
- mr->pbl_bt_l1[i],
576
- mr->pbl_l1_dma_addr[i]);
577
-
578
- break;
579
- }
580
-
581
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
582
- mr->pbl_l1_dma_addr[i]);
583
- }
584
- } else if (mhop_num == 3) {
585
- for (i = 0; i < mr->l0_chunk_last_num; i++) {
586
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
587
- mr->pbl_l1_dma_addr[i]);
588
-
589
- for (j = 0; j < pbl_bt_sz / 8; j++) {
590
- bt_idx = i * (pbl_bt_sz / 8) + j;
591
-
592
- if ((i == mr->l0_chunk_last_num - 1)
593
- && j == mr->l1_chunk_last_num - 1) {
594
- npages_allocated = bt_idx *
595
- (pbl_bt_sz / 8);
596
-
597
- dma_free_coherent(dev,
598
- (npages - npages_allocated) * 8,
599
- mr->pbl_bt_l2[bt_idx],
600
- mr->pbl_l2_dma_addr[bt_idx]);
601
-
602
- break;
603
- }
604
-
605
- dma_free_coherent(dev, pbl_bt_sz,
606
- mr->pbl_bt_l2[bt_idx],
607
- mr->pbl_l2_dma_addr[bt_idx]);
608
- }
609
- }
610
- }
611
-
612
- kfree(mr->pbl_bt_l1);
613
- kfree(mr->pbl_l1_dma_addr);
614
- mr->pbl_bt_l1 = NULL;
615
- mr->pbl_l1_dma_addr = NULL;
616
- if (mhop_num == 3) {
617
- kfree(mr->pbl_bt_l2);
618
- kfree(mr->pbl_l2_dma_addr);
619
- mr->pbl_bt_l2 = NULL;
620
- mr->pbl_l2_dma_addr = NULL;
621
- }
143
+static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
144
+{
145
+ hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
622146 }
623147
624148 static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
625149 struct hns_roce_mr *mr)
626150 {
627
- struct device *dev = hr_dev->dev;
628
- int npages = 0;
151
+ struct ib_device *ibdev = &hr_dev->ib_dev;
629152 int ret;
630153
631154 if (mr->enabled) {
632
- ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
633
- & (hr_dev->caps.num_mtpts - 1));
155
+ ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
156
+ key_to_hw_index(mr->key) &
157
+ (hr_dev->caps.num_mtpts - 1));
634158 if (ret)
635
- dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
159
+ ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n",
160
+ ret);
636161 }
637162
638
- if (mr->size != ~0ULL) {
639
- npages = ib_umem_page_count(mr->umem);
640
-
641
- if (!hr_dev->caps.pbl_hop_num)
642
- dma_free_coherent(dev, (unsigned int)(npages * 8),
643
- mr->pbl_buf, mr->pbl_dma_addr);
644
- else
645
- hns_roce_mhop_free(hr_dev, mr);
646
- }
647
-
648
- if (mr->enabled)
649
- hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
650
- key_to_hw_index(mr->key));
651
-
652
- hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
653
- key_to_hw_index(mr->key), BITMAP_NO_RR);
163
+ free_mr_pbl(hr_dev, mr);
164
+ free_mr_key(hr_dev, mr);
654165 }
655166
656167 static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
657168 struct hns_roce_mr *mr)
658169 {
659
- int ret;
660170 unsigned long mtpt_idx = key_to_hw_index(mr->key);
661
- struct device *dev = hr_dev->dev;
662171 struct hns_roce_cmd_mailbox *mailbox;
663
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
664
-
665
- /* Prepare HEM entry memory */
666
- ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
667
- if (ret)
668
- return ret;
172
+ struct device *dev = hr_dev->dev;
173
+ int ret;
669174
670175 /* Allocate mailbox memory */
671176 mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
672177 if (IS_ERR(mailbox)) {
673178 ret = PTR_ERR(mailbox);
674
- goto err_table;
179
+ return ret;
675180 }
676181
677
- ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
182
+ if (mr->type != MR_TYPE_FRMR)
183
+ ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr,
184
+ mtpt_idx);
185
+ else
186
+ ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr);
678187 if (ret) {
679
- dev_err(dev, "Write mtpt fail!\n");
188
+ dev_err(dev, "failed to write mtpt, ret = %d.\n", ret);
680189 goto err_page;
681190 }
682191
683
- ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
684
- mtpt_idx & (hr_dev->caps.num_mtpts - 1));
192
+ ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
193
+ mtpt_idx & (hr_dev->caps.num_mtpts - 1));
685194 if (ret) {
686
- dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
195
+ dev_err(dev, "failed to create mpt, ret = %d.\n", ret);
687196 goto err_page;
688197 }
689198
....@@ -694,111 +203,6 @@
694203
695204 err_page:
696205 hns_roce_free_cmd_mailbox(hr_dev, mailbox);
697
-
698
-err_table:
699
- hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
700
- return ret;
701
-}
702
-
703
-static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
704
- struct hns_roce_mtt *mtt, u32 start_index,
705
- u32 npages, u64 *page_list)
706
-{
707
- struct hns_roce_hem_table *table;
708
- dma_addr_t dma_handle;
709
- __le64 *mtts;
710
- u32 bt_page_size;
711
- u32 i;
712
-
713
- if (mtt->mtt_type == MTT_TYPE_WQE)
714
- bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
715
- else
716
- bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
717
-
718
- /* All MTTs must fit in the same page */
719
- if (start_index / (bt_page_size / sizeof(u64)) !=
720
- (start_index + npages - 1) / (bt_page_size / sizeof(u64)))
721
- return -EINVAL;
722
-
723
- if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
724
- return -EINVAL;
725
-
726
- if (mtt->mtt_type == MTT_TYPE_WQE)
727
- table = &hr_dev->mr_table.mtt_table;
728
- else
729
- table = &hr_dev->mr_table.mtt_cqe_table;
730
-
731
- mtts = hns_roce_table_find(hr_dev, table,
732
- mtt->first_seg +
733
- start_index / HNS_ROCE_MTT_ENTRY_PER_SEG,
734
- &dma_handle);
735
- if (!mtts)
736
- return -ENOMEM;
737
-
738
- /* Save page addr, low 12 bits : 0 */
739
- for (i = 0; i < npages; ++i) {
740
- if (!hr_dev->caps.mtt_hop_num)
741
- mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT);
742
- else
743
- mtts[i] = cpu_to_le64(page_list[i]);
744
- }
745
-
746
- return 0;
747
-}
748
-
749
-static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
750
- struct hns_roce_mtt *mtt, u32 start_index,
751
- u32 npages, u64 *page_list)
752
-{
753
- int chunk;
754
- int ret;
755
- u32 bt_page_size;
756
-
757
- if (mtt->order < 0)
758
- return -EINVAL;
759
-
760
- if (mtt->mtt_type == MTT_TYPE_WQE)
761
- bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
762
- else
763
- bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
764
-
765
- while (npages > 0) {
766
- chunk = min_t(int, bt_page_size / sizeof(u64), npages);
767
-
768
- ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk,
769
- page_list);
770
- if (ret)
771
- return ret;
772
-
773
- npages -= chunk;
774
- start_index += chunk;
775
- page_list += chunk;
776
- }
777
-
778
- return 0;
779
-}
780
-
781
-int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
782
- struct hns_roce_mtt *mtt, struct hns_roce_buf *buf)
783
-{
784
- u64 *page_list;
785
- int ret;
786
- u32 i;
787
-
788
- page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL);
789
- if (!page_list)
790
- return -ENOMEM;
791
-
792
- for (i = 0; i < buf->npages; ++i) {
793
- if (buf->nbufs == 1)
794
- page_list[i] = buf->direct.map + (i << buf->page_shift);
795
- else
796
- page_list[i] = buf->page_list[i].map;
797
-
798
- }
799
- ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list);
800
-
801
- kfree(page_list);
802206
803207 return ret;
804208 }
....@@ -812,27 +216,6 @@
812216 hr_dev->caps.num_mtpts,
813217 hr_dev->caps.num_mtpts - 1,
814218 hr_dev->caps.reserved_mrws, 0);
815
- if (ret)
816
- return ret;
817
-
818
- ret = hns_roce_buddy_init(&mr_table->mtt_buddy,
819
- ilog2(hr_dev->caps.num_mtt_segs));
820
- if (ret)
821
- goto err_buddy;
822
-
823
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) {
824
- ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy,
825
- ilog2(hr_dev->caps.num_cqe_segs));
826
- if (ret)
827
- goto err_buddy_cqe;
828
- }
829
- return 0;
830
-
831
-err_buddy_cqe:
832
- hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
833
-
834
-err_buddy:
835
- hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
836219 return ret;
837220 }
838221
....@@ -840,24 +223,24 @@
840223 {
841224 struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
842225
843
- hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
844
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
845
- hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
846226 hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
847227 }
848228
849229 struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
850230 {
231
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
851232 struct hns_roce_mr *mr;
852233 int ret;
853234
854
- mr = kmalloc(sizeof(*mr), GFP_KERNEL);
235
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
855236 if (mr == NULL)
856237 return ERR_PTR(-ENOMEM);
857238
239
+ mr->type = MR_TYPE_DMA;
240
+
858241 /* Allocate memory region key */
859
- ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
860
- ~0ULL, acc, 0, mr);
242
+ hns_roce_hem_list_init(&mr->pbl_mtr.hem_list);
243
+ ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc);
861244 if (ret)
862245 goto err_free;
863246
....@@ -866,121 +249,14 @@
866249 goto err_mr;
867250
868251 mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
869
- mr->umem = NULL;
870252
871253 return &mr->ibmr;
872
-
873254 err_mr:
874
- hns_roce_mr_free(to_hr_dev(pd->device), mr);
255
+ free_mr_key(hr_dev, mr);
875256
876257 err_free:
877258 kfree(mr);
878259 return ERR_PTR(ret);
879
-}
880
-
881
-int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
882
- struct hns_roce_mtt *mtt, struct ib_umem *umem)
883
-{
884
- struct device *dev = hr_dev->dev;
885
- struct scatterlist *sg;
886
- unsigned int order;
887
- int i, k, entry;
888
- int npage = 0;
889
- int ret = 0;
890
- int len;
891
- u64 page_addr;
892
- u64 *pages;
893
- u32 bt_page_size;
894
- u32 n;
895
-
896
- order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz :
897
- hr_dev->caps.cqe_ba_pg_sz;
898
- bt_page_size = 1 << (order + PAGE_SHIFT);
899
-
900
- pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
901
- if (!pages)
902
- return -ENOMEM;
903
-
904
- i = n = 0;
905
-
906
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
907
- len = sg_dma_len(sg) >> PAGE_SHIFT;
908
- for (k = 0; k < len; ++k) {
909
- page_addr =
910
- sg_dma_address(sg) + (k << umem->page_shift);
911
- if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
912
- if (page_addr & ((1 << mtt->page_shift) - 1)) {
913
- dev_err(dev, "page_addr 0x%llx is not page_shift %d alignment!\n",
914
- page_addr, mtt->page_shift);
915
- ret = -EINVAL;
916
- goto out;
917
- }
918
- pages[i++] = page_addr;
919
- }
920
- npage++;
921
- if (i == bt_page_size / sizeof(u64)) {
922
- ret = hns_roce_write_mtt(hr_dev, mtt, n, i,
923
- pages);
924
- if (ret)
925
- goto out;
926
- n += i;
927
- i = 0;
928
- }
929
- }
930
- }
931
-
932
- if (i)
933
- ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
934
-
935
-out:
936
- free_pages((unsigned long) pages, order);
937
- return ret;
938
-}
939
-
940
-static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
941
- struct hns_roce_mr *mr,
942
- struct ib_umem *umem)
943
-{
944
- struct scatterlist *sg;
945
- int i = 0, j = 0, k;
946
- int entry;
947
- int len;
948
- u64 page_addr;
949
- u32 pbl_bt_sz;
950
-
951
- if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0)
952
- return 0;
953
-
954
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
955
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
956
- len = sg_dma_len(sg) >> PAGE_SHIFT;
957
- for (k = 0; k < len; ++k) {
958
- page_addr = sg_dma_address(sg) +
959
- (k << umem->page_shift);
960
-
961
- if (!hr_dev->caps.pbl_hop_num) {
962
- mr->pbl_buf[i++] = page_addr >> 12;
963
- } else if (hr_dev->caps.pbl_hop_num == 1) {
964
- mr->pbl_buf[i++] = page_addr;
965
- } else {
966
- if (hr_dev->caps.pbl_hop_num == 2)
967
- mr->pbl_bt_l1[i][j] = page_addr;
968
- else if (hr_dev->caps.pbl_hop_num == 3)
969
- mr->pbl_bt_l2[i][j] = page_addr;
970
-
971
- j++;
972
- if (j >= (pbl_bt_sz / 8)) {
973
- i++;
974
- j = 0;
975
- }
976
- }
977
- }
978
- }
979
-
980
- /* Memory barrier */
981
- mb();
982
-
983
- return 0;
984260 }
985261
986262 struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
....@@ -988,75 +264,67 @@
988264 struct ib_udata *udata)
989265 {
990266 struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
991
- struct device *dev = hr_dev->dev;
992267 struct hns_roce_mr *mr;
993
- int bt_size;
994268 int ret;
995
- int n;
996
- int i;
997269
998
- mr = kmalloc(sizeof(*mr), GFP_KERNEL);
270
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
999271 if (!mr)
1000272 return ERR_PTR(-ENOMEM);
1001273
1002
- mr->umem = ib_umem_get(pd->uobject->context, start, length,
1003
- access_flags, 0);
1004
- if (IS_ERR(mr->umem)) {
1005
- ret = PTR_ERR(mr->umem);
1006
- goto err_free;
1007
- }
1008
-
1009
- n = ib_umem_page_count(mr->umem);
1010
-
1011
- if (!hr_dev->caps.pbl_hop_num) {
1012
- if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
1013
- dev_err(dev,
1014
- " MR len %lld err. MR is limited to 4G at most!\n",
1015
- length);
1016
- ret = -EINVAL;
1017
- goto err_umem;
1018
- }
1019
- } else {
1020
- u64 pbl_size = 1;
1021
-
1022
- bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8;
1023
- for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
1024
- pbl_size *= bt_size;
1025
- if (n > pbl_size) {
1026
- dev_err(dev,
1027
- " MR len %lld err. MR page num is limited to %lld!\n",
1028
- length, pbl_size);
1029
- ret = -EINVAL;
1030
- goto err_umem;
1031
- }
1032
- }
1033
-
1034
- ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
1035
- access_flags, n, mr);
274
+ mr->type = MR_TYPE_MR;
275
+ ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length,
276
+ access_flags);
1036277 if (ret)
1037
- goto err_umem;
278
+ goto err_alloc_mr;
1038279
1039
- ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
280
+ ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags);
1040281 if (ret)
1041
- goto err_mr;
282
+ goto err_alloc_key;
1042283
1043284 ret = hns_roce_mr_enable(hr_dev, mr);
1044285 if (ret)
1045
- goto err_mr;
286
+ goto err_alloc_pbl;
1046287
1047288 mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1048289
1049290 return &mr->ibmr;
1050291
1051
-err_mr:
1052
- hns_roce_mr_free(hr_dev, mr);
1053
-
1054
-err_umem:
1055
- ib_umem_release(mr->umem);
1056
-
1057
-err_free:
292
+err_alloc_pbl:
293
+ free_mr_pbl(hr_dev, mr);
294
+err_alloc_key:
295
+ free_mr_key(hr_dev, mr);
296
+err_alloc_mr:
1058297 kfree(mr);
1059298 return ERR_PTR(ret);
299
+}
300
+
301
+static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
302
+ u64 start, u64 length,
303
+ u64 virt_addr, int mr_access_flags,
304
+ struct hns_roce_cmd_mailbox *mailbox,
305
+ u32 pdn, struct ib_udata *udata)
306
+{
307
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
308
+ struct ib_device *ibdev = &hr_dev->ib_dev;
309
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
310
+ int ret;
311
+
312
+ free_mr_pbl(hr_dev, mr);
313
+ ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags);
314
+ if (ret) {
315
+ ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret);
316
+ return ret;
317
+ }
318
+
319
+ ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
320
+ mr_access_flags, virt_addr,
321
+ length, mailbox->buf);
322
+ if (ret) {
323
+ ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret);
324
+ free_mr_pbl(hr_dev, mr);
325
+ }
326
+
327
+ return ret;
1060328 }
1061329
1062330 int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
....@@ -1064,12 +332,11 @@
1064332 struct ib_udata *udata)
1065333 {
1066334 struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
335
+ struct ib_device *ib_dev = &hr_dev->ib_dev;
1067336 struct hns_roce_mr *mr = to_hr_mr(ibmr);
1068337 struct hns_roce_cmd_mailbox *mailbox;
1069
- struct device *dev = hr_dev->dev;
1070338 unsigned long mtpt_idx;
1071339 u32 pdn = 0;
1072
- int npages;
1073340 int ret;
1074341
1075342 if (!mr->enabled)
....@@ -1086,9 +353,9 @@
1086353 if (ret)
1087354 goto free_cmd_mbox;
1088355
1089
- ret = hns_roce_hw2sw_mpt(hr_dev, NULL, mtpt_idx);
356
+ ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx);
1090357 if (ret)
1091
- dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
358
+ ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret);
1092359
1093360 mr->enabled = 0;
1094361
....@@ -1096,73 +363,24 @@
1096363 pdn = to_hr_pd(pd)->pdn;
1097364
1098365 if (flags & IB_MR_REREG_TRANS) {
1099
- if (mr->size != ~0ULL) {
1100
- npages = ib_umem_page_count(mr->umem);
1101
-
1102
- if (hr_dev->caps.pbl_hop_num)
1103
- hns_roce_mhop_free(hr_dev, mr);
1104
- else
1105
- dma_free_coherent(dev, npages * 8, mr->pbl_buf,
1106
- mr->pbl_dma_addr);
1107
- }
1108
- ib_umem_release(mr->umem);
1109
-
1110
- mr->umem = ib_umem_get(ibmr->uobject->context, start, length,
1111
- mr_access_flags, 0);
1112
- if (IS_ERR(mr->umem)) {
1113
- ret = PTR_ERR(mr->umem);
1114
- mr->umem = NULL;
366
+ ret = rereg_mr_trans(ibmr, flags,
367
+ start, length,
368
+ virt_addr, mr_access_flags,
369
+ mailbox, pdn, udata);
370
+ if (ret)
1115371 goto free_cmd_mbox;
1116
- }
1117
- npages = ib_umem_page_count(mr->umem);
1118
-
1119
- if (hr_dev->caps.pbl_hop_num) {
1120
- ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
1121
- if (ret)
1122
- goto release_umem;
1123
- } else {
1124
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
1125
- &(mr->pbl_dma_addr),
1126
- GFP_KERNEL);
1127
- if (!mr->pbl_buf) {
1128
- ret = -ENOMEM;
1129
- goto release_umem;
1130
- }
1131
- }
1132
- }
1133
-
1134
- ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
1135
- mr_access_flags, virt_addr,
1136
- length, mailbox->buf);
1137
- if (ret) {
1138
- if (flags & IB_MR_REREG_TRANS)
1139
- goto release_umem;
1140
- else
372
+ } else {
373
+ ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
374
+ mr_access_flags, virt_addr,
375
+ length, mailbox->buf);
376
+ if (ret)
1141377 goto free_cmd_mbox;
1142378 }
1143379
1144
- if (flags & IB_MR_REREG_TRANS) {
1145
- ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
1146
- if (ret) {
1147
- if (mr->size != ~0ULL) {
1148
- npages = ib_umem_page_count(mr->umem);
1149
-
1150
- if (hr_dev->caps.pbl_hop_num)
1151
- hns_roce_mhop_free(hr_dev, mr);
1152
- else
1153
- dma_free_coherent(dev, npages * 8,
1154
- mr->pbl_buf,
1155
- mr->pbl_dma_addr);
1156
- }
1157
-
1158
- goto release_umem;
1159
- }
1160
- }
1161
-
1162
- ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx);
380
+ ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx);
1163381 if (ret) {
1164
- dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
1165
- goto release_umem;
382
+ ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret);
383
+ goto free_cmd_mbox;
1166384 }
1167385
1168386 mr->enabled = 1;
....@@ -1173,31 +391,725 @@
1173391
1174392 return 0;
1175393
1176
-release_umem:
1177
- ib_umem_release(mr->umem);
1178
-
1179394 free_cmd_mbox:
1180395 hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1181396
1182397 return ret;
1183398 }
1184399
1185
-int hns_roce_dereg_mr(struct ib_mr *ibmr)
400
+int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1186401 {
1187402 struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1188403 struct hns_roce_mr *mr = to_hr_mr(ibmr);
1189404 int ret = 0;
1190405
1191406 if (hr_dev->hw->dereg_mr) {
1192
- ret = hr_dev->hw->dereg_mr(hr_dev, mr);
407
+ ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata);
1193408 } else {
1194409 hns_roce_mr_free(hr_dev, mr);
1195
-
1196
- if (mr->umem)
1197
- ib_umem_release(mr->umem);
1198
-
1199410 kfree(mr);
1200411 }
1201412
1202413 return ret;
1203414 }
415
+
416
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
417
+ u32 max_num_sg)
418
+{
419
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
420
+ struct device *dev = hr_dev->dev;
421
+ struct hns_roce_mr *mr;
422
+ u64 length;
423
+ int ret;
424
+
425
+ if (mr_type != IB_MR_TYPE_MEM_REG)
426
+ return ERR_PTR(-EINVAL);
427
+
428
+ if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
429
+ dev_err(dev, "max_num_sg larger than %d\n",
430
+ HNS_ROCE_FRMR_MAX_PA);
431
+ return ERR_PTR(-EINVAL);
432
+ }
433
+
434
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
435
+ if (!mr)
436
+ return ERR_PTR(-ENOMEM);
437
+
438
+ mr->type = MR_TYPE_FRMR;
439
+
440
+ /* Allocate memory region key */
441
+ length = max_num_sg * (1 << PAGE_SHIFT);
442
+ ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0);
443
+ if (ret)
444
+ goto err_free;
445
+
446
+ ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0);
447
+ if (ret)
448
+ goto err_key;
449
+
450
+ ret = hns_roce_mr_enable(hr_dev, mr);
451
+ if (ret)
452
+ goto err_pbl;
453
+
454
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
455
+ mr->ibmr.length = length;
456
+
457
+ return &mr->ibmr;
458
+
459
+err_pbl:
460
+ free_mr_pbl(hr_dev, mr);
461
+err_key:
462
+ free_mr_key(hr_dev, mr);
463
+err_free:
464
+ kfree(mr);
465
+ return ERR_PTR(ret);
466
+}
467
+
468
+static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
469
+{
470
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
471
+
472
+ if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) {
473
+ mr->page_list[mr->npages++] = addr;
474
+ return 0;
475
+ }
476
+
477
+ return -ENOBUFS;
478
+}
479
+
480
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
481
+ unsigned int *sg_offset)
482
+{
483
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
484
+ struct ib_device *ibdev = &hr_dev->ib_dev;
485
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
486
+ struct hns_roce_mtr *mtr = &mr->pbl_mtr;
487
+ int ret = 0;
488
+
489
+ mr->npages = 0;
490
+ mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count,
491
+ sizeof(dma_addr_t), GFP_KERNEL);
492
+ if (!mr->page_list)
493
+ return ret;
494
+
495
+ ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
496
+ if (ret < 1) {
497
+ ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
498
+ mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret);
499
+ goto err_page_list;
500
+ }
501
+
502
+ mtr->hem_cfg.region[0].offset = 0;
503
+ mtr->hem_cfg.region[0].count = mr->npages;
504
+ mtr->hem_cfg.region[0].hopnum = mr->pbl_hop_num;
505
+ mtr->hem_cfg.region_count = 1;
506
+ ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages);
507
+ if (ret) {
508
+ ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret);
509
+ ret = 0;
510
+ } else {
511
+ mr->pbl_mtr.hem_cfg.buf_pg_shift = ilog2(ibmr->page_size);
512
+ ret = mr->npages;
513
+ }
514
+
515
+err_page_list:
516
+ kvfree(mr->page_list);
517
+ mr->page_list = NULL;
518
+
519
+ return ret;
520
+}
521
+
522
+static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
523
+ struct hns_roce_mw *mw)
524
+{
525
+ struct device *dev = hr_dev->dev;
526
+ int ret;
527
+
528
+ if (mw->enabled) {
529
+ ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
530
+ key_to_hw_index(mw->rkey) &
531
+ (hr_dev->caps.num_mtpts - 1));
532
+ if (ret)
533
+ dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret);
534
+
535
+ hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
536
+ key_to_hw_index(mw->rkey));
537
+ }
538
+
539
+ hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
540
+ key_to_hw_index(mw->rkey), BITMAP_NO_RR);
541
+}
542
+
543
+static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
544
+ struct hns_roce_mw *mw)
545
+{
546
+ struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
547
+ struct hns_roce_cmd_mailbox *mailbox;
548
+ struct device *dev = hr_dev->dev;
549
+ unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
550
+ int ret;
551
+
552
+ /* prepare HEM entry memory */
553
+ ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
554
+ if (ret)
555
+ return ret;
556
+
557
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
558
+ if (IS_ERR(mailbox)) {
559
+ ret = PTR_ERR(mailbox);
560
+ goto err_table;
561
+ }
562
+
563
+ ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
564
+ if (ret) {
565
+ dev_err(dev, "MW write mtpt fail!\n");
566
+ goto err_page;
567
+ }
568
+
569
+ ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
570
+ mtpt_idx & (hr_dev->caps.num_mtpts - 1));
571
+ if (ret) {
572
+ dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret);
573
+ goto err_page;
574
+ }
575
+
576
+ mw->enabled = 1;
577
+
578
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
579
+
580
+ return 0;
581
+
582
+err_page:
583
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
584
+
585
+err_table:
586
+ hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
587
+
588
+ return ret;
589
+}
590
+
591
+int hns_roce_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
592
+{
593
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
594
+ struct hns_roce_mw *mw = to_hr_mw(ibmw);
595
+ unsigned long index = 0;
596
+ int ret;
597
+
598
+ /* Allocate a key for mw from bitmap */
599
+ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
600
+ if (ret)
601
+ return ret;
602
+
603
+ mw->rkey = hw_index_to_key(index);
604
+
605
+ ibmw->rkey = mw->rkey;
606
+ mw->pdn = to_hr_pd(ibmw->pd)->pdn;
607
+ mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
608
+ mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
609
+ mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
610
+
611
+ ret = hns_roce_mw_enable(hr_dev, mw);
612
+ if (ret)
613
+ goto err_mw;
614
+
615
+ return 0;
616
+
617
+err_mw:
618
+ hns_roce_mw_free(hr_dev, mw);
619
+ return ret;
620
+}
621
+
622
+int hns_roce_dealloc_mw(struct ib_mw *ibmw)
623
+{
624
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
625
+ struct hns_roce_mw *mw = to_hr_mw(ibmw);
626
+
627
+ hns_roce_mw_free(hr_dev, mw);
628
+ return 0;
629
+}
630
+
631
+static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
632
+ dma_addr_t *pages, struct hns_roce_buf_region *region)
633
+{
634
+ __le64 *mtts;
635
+ int offset;
636
+ int count;
637
+ int npage;
638
+ u64 addr;
639
+ int end;
640
+ int i;
641
+
642
+ /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */
643
+ if (!region->hopnum)
644
+ return 0;
645
+
646
+ offset = region->offset;
647
+ end = offset + region->count;
648
+ npage = 0;
649
+ while (offset < end) {
650
+ mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
651
+ offset, &count, NULL);
652
+ if (!mtts)
653
+ return -ENOBUFS;
654
+
655
+ for (i = 0; i < count; i++) {
656
+ if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
657
+ addr = to_hr_hw_page_addr(pages[npage]);
658
+ else
659
+ addr = pages[npage];
660
+
661
+ mtts[i] = cpu_to_le64(addr);
662
+ npage++;
663
+ }
664
+ offset += count;
665
+ }
666
+
667
+ return 0;
668
+}
669
+
670
+static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr)
671
+{
672
+ int i;
673
+
674
+ for (i = 0; i < attr->region_count; i++)
675
+ if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 &&
676
+ attr->region[i].hopnum > 0)
677
+ return true;
678
+
679
+ /* because the mtr only one root base address, when hopnum is 0 means
680
+ * root base address equals the first buffer address, thus all alloced
681
+ * memory must in a continuous space accessed by direct mode.
682
+ */
683
+ return false;
684
+}
685
+
686
+static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr)
687
+{
688
+ size_t size = 0;
689
+ int i;
690
+
691
+ for (i = 0; i < attr->region_count; i++)
692
+ size += attr->region[i].size;
693
+
694
+ return size;
695
+}
696
+
697
+static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size,
698
+ unsigned int page_shift)
699
+{
700
+ if (is_direct)
701
+ return ALIGN(alloc_size, 1 << page_shift);
702
+ else
703
+ return HNS_HW_DIRECT_PAGE_COUNT << page_shift;
704
+}
705
+
706
+/*
707
+ * check the given pages in continuous address space
708
+ * Returns 0 on success, or the error page num.
709
+ */
710
+static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count,
711
+ unsigned int page_shift)
712
+{
713
+ size_t page_size = 1 << page_shift;
714
+ int i;
715
+
716
+ for (i = 1; i < page_count; i++)
717
+ if (pages[i] - pages[i - 1] != page_size)
718
+ return i;
719
+
720
+ return 0;
721
+}
722
+
723
+static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
724
+{
725
+ /* release user buffers */
726
+ if (mtr->umem) {
727
+ ib_umem_release(mtr->umem);
728
+ mtr->umem = NULL;
729
+ }
730
+
731
+ /* release kernel buffers */
732
+ if (mtr->kmem) {
733
+ hns_roce_buf_free(hr_dev, mtr->kmem);
734
+ kfree(mtr->kmem);
735
+ mtr->kmem = NULL;
736
+ }
737
+}
738
+
739
+static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
740
+ struct hns_roce_buf_attr *buf_attr, bool is_direct,
741
+ struct ib_udata *udata, unsigned long user_addr)
742
+{
743
+ struct ib_device *ibdev = &hr_dev->ib_dev;
744
+ unsigned int best_pg_shift;
745
+ int all_pg_count = 0;
746
+ size_t direct_size;
747
+ size_t total_size;
748
+ int ret;
749
+
750
+ total_size = mtr_bufs_size(buf_attr);
751
+ if (total_size < 1) {
752
+ ibdev_err(ibdev, "Failed to check mtr size\n");
753
+ return -EINVAL;
754
+ }
755
+
756
+ if (udata) {
757
+ unsigned long pgsz_bitmap;
758
+ unsigned long page_size;
759
+
760
+ mtr->kmem = NULL;
761
+ mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
762
+ buf_attr->user_access);
763
+ if (IS_ERR_OR_NULL(mtr->umem)) {
764
+ ibdev_err(ibdev, "Failed to get umem, ret %ld\n",
765
+ PTR_ERR(mtr->umem));
766
+ return -ENOMEM;
767
+ }
768
+ if (buf_attr->fixed_page)
769
+ pgsz_bitmap = 1 << buf_attr->page_shift;
770
+ else
771
+ pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT);
772
+
773
+ page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap,
774
+ user_addr);
775
+ if (!page_size)
776
+ return -EINVAL;
777
+ best_pg_shift = order_base_2(page_size);
778
+ all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size);
779
+ ret = 0;
780
+ } else {
781
+ mtr->umem = NULL;
782
+ mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL);
783
+ if (!mtr->kmem) {
784
+ ibdev_err(ibdev, "Failed to alloc kmem\n");
785
+ return -ENOMEM;
786
+ }
787
+ direct_size = mtr_kmem_direct_size(is_direct, total_size,
788
+ buf_attr->page_shift);
789
+ ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size,
790
+ mtr->kmem, buf_attr->page_shift);
791
+ if (ret) {
792
+ ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret);
793
+ goto err_alloc_mem;
794
+ }
795
+ best_pg_shift = buf_attr->page_shift;
796
+ all_pg_count = mtr->kmem->npages;
797
+ }
798
+
799
+ /* must bigger than minimum hardware page shift */
800
+ if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) {
801
+ ret = -EINVAL;
802
+ ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n",
803
+ best_pg_shift, all_pg_count);
804
+ goto err_alloc_mem;
805
+ }
806
+
807
+ mtr->hem_cfg.buf_pg_shift = best_pg_shift;
808
+ mtr->hem_cfg.buf_pg_count = all_pg_count;
809
+
810
+ return 0;
811
+err_alloc_mem:
812
+ mtr_free_bufs(hr_dev, mtr);
813
+ return ret;
814
+}
815
+
816
+static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
817
+ dma_addr_t *pages, int count, unsigned int page_shift)
818
+{
819
+ struct ib_device *ibdev = &hr_dev->ib_dev;
820
+ int npage;
821
+ int err;
822
+
823
+ if (mtr->umem)
824
+ npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0,
825
+ mtr->umem, page_shift);
826
+ else
827
+ npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0,
828
+ mtr->kmem);
829
+
830
+ if (mtr->hem_cfg.is_direct && npage > 1) {
831
+ err = mtr_check_direct_pages(pages, npage, page_shift);
832
+ if (err) {
833
+ ibdev_err(ibdev, "Failed to check %s direct page-%d\n",
834
+ mtr->umem ? "user" : "kernel", err);
835
+ npage = err;
836
+ }
837
+ }
838
+
839
+ return npage;
840
+}
841
+
842
+int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
843
+ dma_addr_t *pages, int page_cnt)
844
+{
845
+ struct ib_device *ibdev = &hr_dev->ib_dev;
846
+ struct hns_roce_buf_region *r;
847
+ int err;
848
+ int i;
849
+
850
+ /*
851
+ * Only use the first page address as root ba when hopnum is 0, this
852
+ * is because the addresses of all pages are consecutive in this case.
853
+ */
854
+ if (mtr->hem_cfg.is_direct) {
855
+ mtr->hem_cfg.root_ba = pages[0];
856
+ return 0;
857
+ }
858
+
859
+ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
860
+ r = &mtr->hem_cfg.region[i];
861
+ if (r->offset + r->count > page_cnt) {
862
+ err = -EINVAL;
863
+ ibdev_err(ibdev,
864
+ "failed to check mtr%u end %u + %u, max %u.\n",
865
+ i, r->offset, r->count, page_cnt);
866
+ return err;
867
+ }
868
+
869
+ err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r);
870
+ if (err) {
871
+ ibdev_err(ibdev,
872
+ "failed to map mtr%u offset %u, ret = %d.\n",
873
+ i, r->offset, err);
874
+ return err;
875
+ }
876
+ }
877
+
878
+ return 0;
879
+}
880
+
881
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
882
+ int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
883
+{
884
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
885
+ int start_index;
886
+ int mtt_count;
887
+ int total = 0;
888
+ __le64 *mtts;
889
+ int npage;
890
+ u64 addr;
891
+ int left;
892
+
893
+ if (!mtt_buf || mtt_max < 1)
894
+ goto done;
895
+
896
+ /* no mtt memory in direct mode, so just return the buffer address */
897
+ if (cfg->is_direct) {
898
+ start_index = offset >> HNS_HW_PAGE_SHIFT;
899
+ for (mtt_count = 0; mtt_count < cfg->region_count &&
900
+ total < mtt_max; mtt_count++) {
901
+ npage = cfg->region[mtt_count].offset;
902
+ if (npage < start_index)
903
+ continue;
904
+
905
+ addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
906
+ if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
907
+ mtt_buf[total] = to_hr_hw_page_addr(addr);
908
+ else
909
+ mtt_buf[total] = addr;
910
+
911
+ total++;
912
+ }
913
+
914
+ goto done;
915
+ }
916
+
917
+ start_index = offset >> cfg->buf_pg_shift;
918
+ left = mtt_max;
919
+ while (left > 0) {
920
+ mtt_count = 0;
921
+ mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
922
+ start_index + total,
923
+ &mtt_count, NULL);
924
+ if (!mtts || !mtt_count)
925
+ goto done;
926
+
927
+ npage = min(mtt_count, left);
928
+ left -= npage;
929
+ for (mtt_count = 0; mtt_count < npage; mtt_count++)
930
+ mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]);
931
+ }
932
+
933
+done:
934
+ if (base_addr)
935
+ *base_addr = cfg->root_ba;
936
+
937
+ return total;
938
+}
939
+
940
+static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
941
+ struct hns_roce_buf_attr *attr,
942
+ struct hns_roce_hem_cfg *cfg,
943
+ unsigned int *buf_page_shift)
944
+{
945
+ struct hns_roce_buf_region *r;
946
+ unsigned int page_shift;
947
+ int page_cnt = 0;
948
+ size_t buf_size;
949
+ int region_cnt;
950
+
951
+ if (cfg->is_direct) {
952
+ buf_size = cfg->buf_pg_count << cfg->buf_pg_shift;
953
+ page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE);
954
+ /*
955
+ * When HEM buffer use level-0 addressing, the page size equals
956
+ * the buffer size, and the the page size = 4K * 2^N.
957
+ */
958
+ cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt);
959
+ if (attr->region_count > 1) {
960
+ cfg->buf_pg_count = page_cnt;
961
+ page_shift = HNS_HW_PAGE_SHIFT;
962
+ } else {
963
+ cfg->buf_pg_count = 1;
964
+ page_shift = cfg->buf_pg_shift;
965
+ if (buf_size != 1 << page_shift) {
966
+ ibdev_err(&hr_dev->ib_dev,
967
+ "failed to check direct size %zu shift %d.\n",
968
+ buf_size, page_shift);
969
+ return -EINVAL;
970
+ }
971
+ }
972
+ } else {
973
+ page_shift = cfg->buf_pg_shift;
974
+ }
975
+
976
+ /* convert buffer size to page index and page count */
977
+ for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count &&
978
+ region_cnt < attr->region_count &&
979
+ region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
980
+ r = &cfg->region[region_cnt];
981
+ r->offset = page_cnt;
982
+ buf_size = hr_hw_page_align(attr->region[region_cnt].size);
983
+ r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
984
+ page_cnt += r->count;
985
+ r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
986
+ r->count);
987
+ }
988
+
989
+ if (region_cnt < 1) {
990
+ ibdev_err(&hr_dev->ib_dev,
991
+ "failed to check mtr region count, pages = %d.\n",
992
+ cfg->buf_pg_count);
993
+ return -ENOBUFS;
994
+ }
995
+
996
+ cfg->region_count = region_cnt;
997
+ *buf_page_shift = page_shift;
998
+
999
+ return page_cnt;
1000
+}
1001
+
1002
+/**
1003
+ * hns_roce_mtr_create - Create hns memory translate region.
1004
+ *
1005
+ * @mtr: memory translate region
1006
+ * @buf_attr: buffer attribute for creating mtr
1007
+ * @ba_page_shift: page shift for multi-hop base address table
1008
+ * @udata: user space context, if it's NULL, means kernel space
1009
+ * @user_addr: userspace virtual address to start at
1010
+ */
1011
+int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
1012
+ struct hns_roce_buf_attr *buf_attr,
1013
+ unsigned int ba_page_shift, struct ib_udata *udata,
1014
+ unsigned long user_addr)
1015
+{
1016
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
1017
+ struct ib_device *ibdev = &hr_dev->ib_dev;
1018
+ unsigned int buf_page_shift = 0;
1019
+ dma_addr_t *pages = NULL;
1020
+ int all_pg_cnt;
1021
+ int get_pg_cnt;
1022
+ int ret = 0;
1023
+
1024
+ /* if disable mtt, all pages must in a continuous address range */
1025
+ cfg->is_direct = !mtr_has_mtt(buf_attr);
1026
+
1027
+ /* if buffer only need mtt, just init the hem cfg */
1028
+ if (buf_attr->mtt_only) {
1029
+ cfg->buf_pg_shift = buf_attr->page_shift;
1030
+ cfg->buf_pg_count = mtr_bufs_size(buf_attr) >>
1031
+ buf_attr->page_shift;
1032
+ mtr->umem = NULL;
1033
+ mtr->kmem = NULL;
1034
+ } else {
1035
+ ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct,
1036
+ udata, user_addr);
1037
+ if (ret) {
1038
+ ibdev_err(ibdev,
1039
+ "failed to alloc mtr bufs, ret = %d.\n", ret);
1040
+ return ret;
1041
+ }
1042
+ }
1043
+
1044
+ all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift);
1045
+ if (all_pg_cnt < 1) {
1046
+ ret = -ENOBUFS;
1047
+ ibdev_err(ibdev, "failed to init mtr buf cfg.\n");
1048
+ goto err_alloc_bufs;
1049
+ }
1050
+
1051
+ hns_roce_hem_list_init(&mtr->hem_list);
1052
+ if (!cfg->is_direct) {
1053
+ ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
1054
+ cfg->region, cfg->region_count,
1055
+ ba_page_shift);
1056
+ if (ret) {
1057
+ ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n",
1058
+ ret);
1059
+ goto err_alloc_bufs;
1060
+ }
1061
+ cfg->root_ba = mtr->hem_list.root_ba;
1062
+ cfg->ba_pg_shift = ba_page_shift;
1063
+ } else {
1064
+ cfg->ba_pg_shift = cfg->buf_pg_shift;
1065
+ }
1066
+
1067
+ /* no buffer to map */
1068
+ if (buf_attr->mtt_only)
1069
+ return 0;
1070
+
1071
+ /* alloc a tmp array to store buffer's dma address */
1072
+ pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL);
1073
+ if (!pages) {
1074
+ ret = -ENOMEM;
1075
+ ibdev_err(ibdev, "failed to alloc mtr page list %d.\n",
1076
+ all_pg_cnt);
1077
+ goto err_alloc_hem_list;
1078
+ }
1079
+
1080
+ get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt,
1081
+ buf_page_shift);
1082
+ if (get_pg_cnt != all_pg_cnt) {
1083
+ ibdev_err(ibdev, "failed to get mtr page %d != %d.\n",
1084
+ get_pg_cnt, all_pg_cnt);
1085
+ ret = -ENOBUFS;
1086
+ goto err_alloc_page_list;
1087
+ }
1088
+
1089
+ /* write buffer's dma address to BA table */
1090
+ ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt);
1091
+ if (ret) {
1092
+ ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
1093
+ goto err_alloc_page_list;
1094
+ }
1095
+
1096
+ /* drop tmp array */
1097
+ kvfree(pages);
1098
+ return 0;
1099
+err_alloc_page_list:
1100
+ kvfree(pages);
1101
+err_alloc_hem_list:
1102
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
1103
+err_alloc_bufs:
1104
+ mtr_free_bufs(hr_dev, mtr);
1105
+ return ret;
1106
+}
1107
+
1108
+void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
1109
+{
1110
+ /* release multi-hop addressing resource */
1111
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
1112
+
1113
+ /* free buffers */
1114
+ mtr_free_bufs(hr_dev, mtr);
1115
+}