.. | .. |
---|
32 | 32 | |
---|
33 | 33 | #include "ib_mr.h" |
---|
34 | 34 | |
---|
| 35 | +static inline void |
---|
| 36 | +rds_transition_frwr_state(struct rds_ib_mr *ibmr, |
---|
| 37 | + enum rds_ib_fr_state old_state, |
---|
| 38 | + enum rds_ib_fr_state new_state) |
---|
| 39 | +{ |
---|
| 40 | + if (cmpxchg(&ibmr->u.frmr.fr_state, |
---|
| 41 | + old_state, new_state) == old_state && |
---|
| 42 | + old_state == FRMR_IS_INUSE) { |
---|
| 43 | + /* enforce order of ibmr->u.frmr.fr_state update |
---|
| 44 | + * before decrementing i_fastreg_inuse_count |
---|
| 45 | + */ |
---|
| 46 | + smp_mb__before_atomic(); |
---|
| 47 | + atomic_dec(&ibmr->ic->i_fastreg_inuse_count); |
---|
| 48 | + if (waitqueue_active(&rds_ib_ring_empty_wait)) |
---|
| 49 | + wake_up(&rds_ib_ring_empty_wait); |
---|
| 50 | + } |
---|
| 51 | +} |
---|
| 52 | + |
---|
35 | 53 | static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, |
---|
36 | 54 | int npages) |
---|
37 | 55 | { |
---|
.. | .. |
---|
58 | 76 | |
---|
59 | 77 | frmr = &ibmr->u.frmr; |
---|
60 | 78 | frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG, |
---|
61 | | - pool->fmr_attr.max_pages); |
---|
| 79 | + pool->max_pages); |
---|
62 | 80 | if (IS_ERR(frmr->mr)) { |
---|
63 | 81 | pr_warn("RDS/IB: %s failed to allocate MR", __func__); |
---|
64 | 82 | err = PTR_ERR(frmr->mr); |
---|
.. | .. |
---|
75 | 93 | pool->max_items_soft = pool->max_items; |
---|
76 | 94 | |
---|
77 | 95 | frmr->fr_state = FRMR_IS_FREE; |
---|
| 96 | + init_waitqueue_head(&frmr->fr_inv_done); |
---|
| 97 | + init_waitqueue_head(&frmr->fr_reg_done); |
---|
78 | 98 | return ibmr; |
---|
79 | 99 | |
---|
80 | 100 | out_no_cigar: |
---|
.. | .. |
---|
116 | 136 | if (unlikely(ret != ibmr->sg_dma_len)) |
---|
117 | 137 | return ret < 0 ? ret : -EINVAL; |
---|
118 | 138 | |
---|
| 139 | + if (cmpxchg(&frmr->fr_state, |
---|
| 140 | + FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE) |
---|
| 141 | + return -EBUSY; |
---|
| 142 | + |
---|
| 143 | + atomic_inc(&ibmr->ic->i_fastreg_inuse_count); |
---|
| 144 | + |
---|
119 | 145 | /* Perform a WR for the fast_reg_mr. Each individual page |
---|
120 | 146 | * in the sg list is added to the fast reg page list and placed |
---|
121 | 147 | * inside the fast_reg_mr WR. The key used is a rolling 8bit |
---|
122 | 148 | * counter, which should guarantee uniqueness. |
---|
123 | 149 | */ |
---|
124 | 150 | ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++); |
---|
125 | | - frmr->fr_state = FRMR_IS_INUSE; |
---|
| 151 | + frmr->fr_reg = true; |
---|
126 | 152 | |
---|
127 | 153 | memset(®_wr, 0, sizeof(reg_wr)); |
---|
128 | 154 | reg_wr.wr.wr_id = (unsigned long)(void *)ibmr; |
---|
.. | .. |
---|
138 | 164 | ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, NULL); |
---|
139 | 165 | if (unlikely(ret)) { |
---|
140 | 166 | /* Failure here can be because of -ENOMEM as well */ |
---|
141 | | - frmr->fr_state = FRMR_IS_STALE; |
---|
| 167 | + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); |
---|
| 168 | + |
---|
142 | 169 | atomic_inc(&ibmr->ic->i_fastreg_wrs); |
---|
143 | 170 | if (printk_ratelimit()) |
---|
144 | 171 | pr_warn("RDS/IB: %s returned error(%d)\n", |
---|
145 | 172 | __func__, ret); |
---|
| 173 | + goto out; |
---|
146 | 174 | } |
---|
| 175 | + |
---|
| 176 | + /* Wait for the registration to complete in order to prevent an invalid |
---|
| 177 | + * access error resulting from a race between the memory region already |
---|
| 178 | + * being accessed while registration is still pending. |
---|
| 179 | + */ |
---|
| 180 | + wait_event(frmr->fr_reg_done, !frmr->fr_reg); |
---|
| 181 | + |
---|
| 182 | +out: |
---|
| 183 | + |
---|
147 | 184 | return ret; |
---|
148 | 185 | } |
---|
149 | 186 | |
---|
.. | .. |
---|
181 | 218 | |
---|
182 | 219 | ret = -EINVAL; |
---|
183 | 220 | for (i = 0; i < ibmr->sg_dma_len; ++i) { |
---|
184 | | - unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]); |
---|
185 | | - u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]); |
---|
| 221 | + unsigned int dma_len = sg_dma_len(&ibmr->sg[i]); |
---|
| 222 | + u64 dma_addr = sg_dma_address(&ibmr->sg[i]); |
---|
186 | 223 | |
---|
187 | 224 | frmr->sg_byte_len += dma_len; |
---|
188 | 225 | if (dma_addr & ~PAGE_MASK) { |
---|
.. | .. |
---|
203 | 240 | } |
---|
204 | 241 | frmr->dma_npages += len >> PAGE_SHIFT; |
---|
205 | 242 | |
---|
206 | | - if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) { |
---|
| 243 | + if (frmr->dma_npages > ibmr->pool->max_pages) { |
---|
207 | 244 | ret = -EMSGSIZE; |
---|
208 | 245 | goto out_unmap; |
---|
209 | 246 | } |
---|
.. | .. |
---|
239 | 276 | if (frmr->fr_state != FRMR_IS_INUSE) |
---|
240 | 277 | goto out; |
---|
241 | 278 | |
---|
242 | | - while (atomic_dec_return(&ibmr->ic->i_fastunreg_wrs) <= 0) { |
---|
243 | | - atomic_inc(&ibmr->ic->i_fastunreg_wrs); |
---|
| 279 | + while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) { |
---|
| 280 | + atomic_inc(&ibmr->ic->i_fastreg_wrs); |
---|
244 | 281 | cpu_relax(); |
---|
245 | 282 | } |
---|
246 | 283 | |
---|
.. | .. |
---|
255 | 292 | |
---|
256 | 293 | ret = ib_post_send(i_cm_id->qp, s_wr, NULL); |
---|
257 | 294 | if (unlikely(ret)) { |
---|
258 | | - frmr->fr_state = FRMR_IS_STALE; |
---|
| 295 | + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); |
---|
259 | 296 | frmr->fr_inv = false; |
---|
260 | | - atomic_inc(&ibmr->ic->i_fastunreg_wrs); |
---|
| 297 | + /* enforce order of frmr->fr_inv update |
---|
| 298 | + * before incrementing i_fastreg_wrs |
---|
| 299 | + */ |
---|
| 300 | + smp_mb__before_atomic(); |
---|
| 301 | + atomic_inc(&ibmr->ic->i_fastreg_wrs); |
---|
261 | 302 | pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret); |
---|
262 | 303 | goto out; |
---|
263 | 304 | } |
---|
| 305 | + |
---|
| 306 | + /* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to |
---|
| 307 | + * 1) avoid a silly bouncing between "clean_list" and "drop_list" |
---|
| 308 | + * triggered by function "rds_ib_reg_frmr" as it is releases frmr |
---|
| 309 | + * regions whose state is not "FRMR_IS_FREE" right away. |
---|
| 310 | + * 2) prevents an invalid access error in a race |
---|
| 311 | + * from a pending "IB_WR_LOCAL_INV" operation |
---|
| 312 | + * with a teardown ("dma_unmap_sg", "put_page") |
---|
| 313 | + * and de-registration ("ib_dereg_mr") of the corresponding |
---|
| 314 | + * memory region. |
---|
| 315 | + */ |
---|
| 316 | + wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE); |
---|
| 317 | + |
---|
264 | 318 | out: |
---|
265 | 319 | return ret; |
---|
266 | 320 | } |
---|
.. | .. |
---|
271 | 325 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; |
---|
272 | 326 | |
---|
273 | 327 | if (wc->status != IB_WC_SUCCESS) { |
---|
274 | | - frmr->fr_state = FRMR_IS_STALE; |
---|
| 328 | + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); |
---|
275 | 329 | if (rds_conn_up(ic->conn)) |
---|
276 | 330 | rds_ib_conn_error(ic->conn, |
---|
277 | 331 | "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n", |
---|
.. | .. |
---|
283 | 337 | } |
---|
284 | 338 | |
---|
285 | 339 | if (frmr->fr_inv) { |
---|
286 | | - frmr->fr_state = FRMR_IS_FREE; |
---|
| 340 | + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE); |
---|
287 | 341 | frmr->fr_inv = false; |
---|
288 | | - atomic_inc(&ic->i_fastreg_wrs); |
---|
289 | | - } else { |
---|
290 | | - atomic_inc(&ic->i_fastunreg_wrs); |
---|
| 342 | + wake_up(&frmr->fr_inv_done); |
---|
291 | 343 | } |
---|
| 344 | + |
---|
| 345 | + if (frmr->fr_reg) { |
---|
| 346 | + frmr->fr_reg = false; |
---|
| 347 | + wake_up(&frmr->fr_reg_done); |
---|
| 348 | + } |
---|
| 349 | + |
---|
| 350 | + /* enforce order of frmr->{fr_reg,fr_inv} update |
---|
| 351 | + * before incrementing i_fastreg_wrs |
---|
| 352 | + */ |
---|
| 353 | + smp_mb__before_atomic(); |
---|
| 354 | + atomic_inc(&ic->i_fastreg_wrs); |
---|
292 | 355 | } |
---|
293 | 356 | |
---|
294 | 357 | void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed, |
---|
.. | .. |
---|
296 | 359 | { |
---|
297 | 360 | struct rds_ib_mr *ibmr, *next; |
---|
298 | 361 | struct rds_ib_frmr *frmr; |
---|
299 | | - int ret = 0; |
---|
| 362 | + int ret = 0, ret2; |
---|
300 | 363 | unsigned int freed = *nfreed; |
---|
301 | 364 | |
---|
302 | 365 | /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ |
---|
303 | 366 | list_for_each_entry(ibmr, list, unmap_list) { |
---|
304 | | - if (ibmr->sg_dma_len) |
---|
305 | | - ret |= rds_ib_post_inv(ibmr); |
---|
| 367 | + if (ibmr->sg_dma_len) { |
---|
| 368 | + ret2 = rds_ib_post_inv(ibmr); |
---|
| 369 | + if (ret2 && !ret) |
---|
| 370 | + ret = ret2; |
---|
| 371 | + } |
---|
306 | 372 | } |
---|
| 373 | + |
---|
307 | 374 | if (ret) |
---|
308 | 375 | pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret); |
---|
309 | 376 | |
---|