hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/rds/ib_frmr.c
....@@ -32,6 +32,24 @@
3232
3333 #include "ib_mr.h"
3434
35
+static inline void
36
+rds_transition_frwr_state(struct rds_ib_mr *ibmr,
37
+ enum rds_ib_fr_state old_state,
38
+ enum rds_ib_fr_state new_state)
39
+{
40
+ if (cmpxchg(&ibmr->u.frmr.fr_state,
41
+ old_state, new_state) == old_state &&
42
+ old_state == FRMR_IS_INUSE) {
43
+ /* enforce order of ibmr->u.frmr.fr_state update
44
+ * before decrementing i_fastreg_inuse_count
45
+ */
46
+ smp_mb__before_atomic();
47
+ atomic_dec(&ibmr->ic->i_fastreg_inuse_count);
48
+ if (waitqueue_active(&rds_ib_ring_empty_wait))
49
+ wake_up(&rds_ib_ring_empty_wait);
50
+ }
51
+}
52
+
3553 static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
3654 int npages)
3755 {
....@@ -58,7 +76,7 @@
5876
5977 frmr = &ibmr->u.frmr;
6078 frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG,
61
- pool->fmr_attr.max_pages);
79
+ pool->max_pages);
6280 if (IS_ERR(frmr->mr)) {
6381 pr_warn("RDS/IB: %s failed to allocate MR", __func__);
6482 err = PTR_ERR(frmr->mr);
....@@ -75,6 +93,8 @@
7593 pool->max_items_soft = pool->max_items;
7694
7795 frmr->fr_state = FRMR_IS_FREE;
96
+ init_waitqueue_head(&frmr->fr_inv_done);
97
+ init_waitqueue_head(&frmr->fr_reg_done);
7898 return ibmr;
7999
80100 out_no_cigar:
....@@ -116,13 +136,19 @@
116136 if (unlikely(ret != ibmr->sg_dma_len))
117137 return ret < 0 ? ret : -EINVAL;
118138
139
+ if (cmpxchg(&frmr->fr_state,
140
+ FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE)
141
+ return -EBUSY;
142
+
143
+ atomic_inc(&ibmr->ic->i_fastreg_inuse_count);
144
+
119145 /* Perform a WR for the fast_reg_mr. Each individual page
120146 * in the sg list is added to the fast reg page list and placed
121147 * inside the fast_reg_mr WR. The key used is a rolling 8bit
122148 * counter, which should guarantee uniqueness.
123149 */
124150 ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
125
- frmr->fr_state = FRMR_IS_INUSE;
151
+ frmr->fr_reg = true;
126152
127153 memset(&reg_wr, 0, sizeof(reg_wr));
128154 reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
....@@ -138,12 +164,23 @@
138164 ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL);
139165 if (unlikely(ret)) {
140166 /* Failure here can be because of -ENOMEM as well */
141
- frmr->fr_state = FRMR_IS_STALE;
167
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
168
+
142169 atomic_inc(&ibmr->ic->i_fastreg_wrs);
143170 if (printk_ratelimit())
144171 pr_warn("RDS/IB: %s returned error(%d)\n",
145172 __func__, ret);
173
+ goto out;
146174 }
175
+
176
+ /* Wait for the registration to complete in order to prevent an invalid
177
+ * access error resulting from a race between the memory region already
178
+ * being accessed while registration is still pending.
179
+ */
180
+ wait_event(frmr->fr_reg_done, !frmr->fr_reg);
181
+
182
+out:
183
+
147184 return ret;
148185 }
149186
....@@ -181,8 +218,8 @@
181218
182219 ret = -EINVAL;
183220 for (i = 0; i < ibmr->sg_dma_len; ++i) {
184
- unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]);
185
- u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]);
221
+ unsigned int dma_len = sg_dma_len(&ibmr->sg[i]);
222
+ u64 dma_addr = sg_dma_address(&ibmr->sg[i]);
186223
187224 frmr->sg_byte_len += dma_len;
188225 if (dma_addr & ~PAGE_MASK) {
....@@ -203,7 +240,7 @@
203240 }
204241 frmr->dma_npages += len >> PAGE_SHIFT;
205242
206
- if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) {
243
+ if (frmr->dma_npages > ibmr->pool->max_pages) {
207244 ret = -EMSGSIZE;
208245 goto out_unmap;
209246 }
....@@ -239,8 +276,8 @@
239276 if (frmr->fr_state != FRMR_IS_INUSE)
240277 goto out;
241278
242
- while (atomic_dec_return(&ibmr->ic->i_fastunreg_wrs) <= 0) {
243
- atomic_inc(&ibmr->ic->i_fastunreg_wrs);
279
+ while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
280
+ atomic_inc(&ibmr->ic->i_fastreg_wrs);
244281 cpu_relax();
245282 }
246283
....@@ -255,12 +292,29 @@
255292
256293 ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
257294 if (unlikely(ret)) {
258
- frmr->fr_state = FRMR_IS_STALE;
295
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
259296 frmr->fr_inv = false;
260
- atomic_inc(&ibmr->ic->i_fastunreg_wrs);
297
+ /* enforce order of frmr->fr_inv update
298
+ * before incrementing i_fastreg_wrs
299
+ */
300
+ smp_mb__before_atomic();
301
+ atomic_inc(&ibmr->ic->i_fastreg_wrs);
261302 pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
262303 goto out;
263304 }
305
+
306
+ /* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to
307
+ * 1) avoid a silly bouncing between "clean_list" and "drop_list"
308
+ * triggered by function "rds_ib_reg_frmr" as it is releases frmr
309
+ * regions whose state is not "FRMR_IS_FREE" right away.
310
+ * 2) prevents an invalid access error in a race
311
+ * from a pending "IB_WR_LOCAL_INV" operation
312
+ * with a teardown ("dma_unmap_sg", "put_page")
313
+ * and de-registration ("ib_dereg_mr") of the corresponding
314
+ * memory region.
315
+ */
316
+ wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
317
+
264318 out:
265319 return ret;
266320 }
....@@ -271,7 +325,7 @@
271325 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
272326
273327 if (wc->status != IB_WC_SUCCESS) {
274
- frmr->fr_state = FRMR_IS_STALE;
328
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
275329 if (rds_conn_up(ic->conn))
276330 rds_ib_conn_error(ic->conn,
277331 "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
....@@ -283,12 +337,21 @@
283337 }
284338
285339 if (frmr->fr_inv) {
286
- frmr->fr_state = FRMR_IS_FREE;
340
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE);
287341 frmr->fr_inv = false;
288
- atomic_inc(&ic->i_fastreg_wrs);
289
- } else {
290
- atomic_inc(&ic->i_fastunreg_wrs);
342
+ wake_up(&frmr->fr_inv_done);
291343 }
344
+
345
+ if (frmr->fr_reg) {
346
+ frmr->fr_reg = false;
347
+ wake_up(&frmr->fr_reg_done);
348
+ }
349
+
350
+ /* enforce order of frmr->{fr_reg,fr_inv} update
351
+ * before incrementing i_fastreg_wrs
352
+ */
353
+ smp_mb__before_atomic();
354
+ atomic_inc(&ic->i_fastreg_wrs);
292355 }
293356
294357 void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
....@@ -296,14 +359,18 @@
296359 {
297360 struct rds_ib_mr *ibmr, *next;
298361 struct rds_ib_frmr *frmr;
299
- int ret = 0;
362
+ int ret = 0, ret2;
300363 unsigned int freed = *nfreed;
301364
302365 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
303366 list_for_each_entry(ibmr, list, unmap_list) {
304
- if (ibmr->sg_dma_len)
305
- ret |= rds_ib_post_inv(ibmr);
367
+ if (ibmr->sg_dma_len) {
368
+ ret2 = rds_ib_post_inv(ibmr);
369
+ if (ret2 && !ret)
370
+ ret = ret2;
371
+ }
306372 }
373
+
307374 if (ret)
308375 pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
309376