| .. | .. |
|---|
| 32 | 32 | |
|---|
| 33 | 33 | #include "ib_mr.h" |
|---|
| 34 | 34 | |
|---|
| 35 | +static inline void |
|---|
| 36 | +rds_transition_frwr_state(struct rds_ib_mr *ibmr, |
|---|
| 37 | + enum rds_ib_fr_state old_state, |
|---|
| 38 | + enum rds_ib_fr_state new_state) |
|---|
| 39 | +{ |
|---|
| 40 | + if (cmpxchg(&ibmr->u.frmr.fr_state, |
|---|
| 41 | + old_state, new_state) == old_state && |
|---|
| 42 | + old_state == FRMR_IS_INUSE) { |
|---|
| 43 | + /* enforce order of ibmr->u.frmr.fr_state update |
|---|
| 44 | + * before decrementing i_fastreg_inuse_count |
|---|
| 45 | + */ |
|---|
| 46 | + smp_mb__before_atomic(); |
|---|
| 47 | + atomic_dec(&ibmr->ic->i_fastreg_inuse_count); |
|---|
| 48 | + if (waitqueue_active(&rds_ib_ring_empty_wait)) |
|---|
| 49 | + wake_up(&rds_ib_ring_empty_wait); |
|---|
| 50 | + } |
|---|
| 51 | +} |
|---|
| 52 | + |
|---|
| 35 | 53 | static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, |
|---|
| 36 | 54 | int npages) |
|---|
| 37 | 55 | { |
|---|
| .. | .. |
|---|
| 58 | 76 | |
|---|
| 59 | 77 | frmr = &ibmr->u.frmr; |
|---|
| 60 | 78 | frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG, |
|---|
| 61 | | - pool->fmr_attr.max_pages); |
|---|
| 79 | + pool->max_pages); |
|---|
| 62 | 80 | if (IS_ERR(frmr->mr)) { |
|---|
| 63 | 81 | pr_warn("RDS/IB: %s failed to allocate MR", __func__); |
|---|
| 64 | 82 | err = PTR_ERR(frmr->mr); |
|---|
| .. | .. |
|---|
| 75 | 93 | pool->max_items_soft = pool->max_items; |
|---|
| 76 | 94 | |
|---|
| 77 | 95 | frmr->fr_state = FRMR_IS_FREE; |
|---|
| 96 | + init_waitqueue_head(&frmr->fr_inv_done); |
|---|
| 97 | + init_waitqueue_head(&frmr->fr_reg_done); |
|---|
| 78 | 98 | return ibmr; |
|---|
| 79 | 99 | |
|---|
| 80 | 100 | out_no_cigar: |
|---|
| .. | .. |
|---|
| 116 | 136 | if (unlikely(ret != ibmr->sg_dma_len)) |
|---|
| 117 | 137 | return ret < 0 ? ret : -EINVAL; |
|---|
| 118 | 138 | |
|---|
| 139 | + if (cmpxchg(&frmr->fr_state, |
|---|
| 140 | + FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE) |
|---|
| 141 | + return -EBUSY; |
|---|
| 142 | + |
|---|
| 143 | + atomic_inc(&ibmr->ic->i_fastreg_inuse_count); |
|---|
| 144 | + |
|---|
| 119 | 145 | /* Perform a WR for the fast_reg_mr. Each individual page |
|---|
| 120 | 146 | * in the sg list is added to the fast reg page list and placed |
|---|
| 121 | 147 | * inside the fast_reg_mr WR. The key used is a rolling 8bit |
|---|
| 122 | 148 | * counter, which should guarantee uniqueness. |
|---|
| 123 | 149 | */ |
|---|
| 124 | 150 | ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++); |
|---|
| 125 | | - frmr->fr_state = FRMR_IS_INUSE; |
|---|
| 151 | + frmr->fr_reg = true; |
|---|
| 126 | 152 | |
|---|
| 127 | 153 | memset(®_wr, 0, sizeof(reg_wr)); |
|---|
| 128 | 154 | reg_wr.wr.wr_id = (unsigned long)(void *)ibmr; |
|---|
| .. | .. |
|---|
| 138 | 164 | ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, NULL); |
|---|
| 139 | 165 | if (unlikely(ret)) { |
|---|
| 140 | 166 | /* Failure here can be because of -ENOMEM as well */ |
|---|
| 141 | | - frmr->fr_state = FRMR_IS_STALE; |
|---|
| 167 | + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); |
|---|
| 168 | + |
|---|
| 142 | 169 | atomic_inc(&ibmr->ic->i_fastreg_wrs); |
|---|
| 143 | 170 | if (printk_ratelimit()) |
|---|
| 144 | 171 | pr_warn("RDS/IB: %s returned error(%d)\n", |
|---|
| 145 | 172 | __func__, ret); |
|---|
| 173 | + goto out; |
|---|
| 146 | 174 | } |
|---|
| 175 | + |
|---|
| 176 | + /* Wait for the registration to complete in order to prevent an invalid |
|---|
| 177 | + * access error resulting from a race between the memory region already |
|---|
| 178 | + * being accessed while registration is still pending. |
|---|
| 179 | + */ |
|---|
| 180 | + wait_event(frmr->fr_reg_done, !frmr->fr_reg); |
|---|
| 181 | + |
|---|
| 182 | +out: |
|---|
| 183 | + |
|---|
| 147 | 184 | return ret; |
|---|
| 148 | 185 | } |
|---|
| 149 | 186 | |
|---|
| .. | .. |
|---|
| 181 | 218 | |
|---|
| 182 | 219 | ret = -EINVAL; |
|---|
| 183 | 220 | for (i = 0; i < ibmr->sg_dma_len; ++i) { |
|---|
| 184 | | - unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]); |
|---|
| 185 | | - u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]); |
|---|
| 221 | + unsigned int dma_len = sg_dma_len(&ibmr->sg[i]); |
|---|
| 222 | + u64 dma_addr = sg_dma_address(&ibmr->sg[i]); |
|---|
| 186 | 223 | |
|---|
| 187 | 224 | frmr->sg_byte_len += dma_len; |
|---|
| 188 | 225 | if (dma_addr & ~PAGE_MASK) { |
|---|
| .. | .. |
|---|
| 203 | 240 | } |
|---|
| 204 | 241 | frmr->dma_npages += len >> PAGE_SHIFT; |
|---|
| 205 | 242 | |
|---|
| 206 | | - if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) { |
|---|
| 243 | + if (frmr->dma_npages > ibmr->pool->max_pages) { |
|---|
| 207 | 244 | ret = -EMSGSIZE; |
|---|
| 208 | 245 | goto out_unmap; |
|---|
| 209 | 246 | } |
|---|
| .. | .. |
|---|
| 239 | 276 | if (frmr->fr_state != FRMR_IS_INUSE) |
|---|
| 240 | 277 | goto out; |
|---|
| 241 | 278 | |
|---|
| 242 | | - while (atomic_dec_return(&ibmr->ic->i_fastunreg_wrs) <= 0) { |
|---|
| 243 | | - atomic_inc(&ibmr->ic->i_fastunreg_wrs); |
|---|
| 279 | + while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) { |
|---|
| 280 | + atomic_inc(&ibmr->ic->i_fastreg_wrs); |
|---|
| 244 | 281 | cpu_relax(); |
|---|
| 245 | 282 | } |
|---|
| 246 | 283 | |
|---|
| .. | .. |
|---|
| 255 | 292 | |
|---|
| 256 | 293 | ret = ib_post_send(i_cm_id->qp, s_wr, NULL); |
|---|
| 257 | 294 | if (unlikely(ret)) { |
|---|
| 258 | | - frmr->fr_state = FRMR_IS_STALE; |
|---|
| 295 | + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); |
|---|
| 259 | 296 | frmr->fr_inv = false; |
|---|
| 260 | | - atomic_inc(&ibmr->ic->i_fastunreg_wrs); |
|---|
| 297 | + /* enforce order of frmr->fr_inv update |
|---|
| 298 | + * before incrementing i_fastreg_wrs |
|---|
| 299 | + */ |
|---|
| 300 | + smp_mb__before_atomic(); |
|---|
| 301 | + atomic_inc(&ibmr->ic->i_fastreg_wrs); |
|---|
| 261 | 302 | pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret); |
|---|
| 262 | 303 | goto out; |
|---|
| 263 | 304 | } |
|---|
| 305 | + |
|---|
| 306 | + /* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to |
|---|
| 307 | + * 1) avoid a silly bouncing between "clean_list" and "drop_list" |
|---|
| 308 | + * triggered by function "rds_ib_reg_frmr" as it is releases frmr |
|---|
| 309 | + * regions whose state is not "FRMR_IS_FREE" right away. |
|---|
| 310 | + * 2) prevents an invalid access error in a race |
|---|
| 311 | + * from a pending "IB_WR_LOCAL_INV" operation |
|---|
| 312 | + * with a teardown ("dma_unmap_sg", "put_page") |
|---|
| 313 | + * and de-registration ("ib_dereg_mr") of the corresponding |
|---|
| 314 | + * memory region. |
|---|
| 315 | + */ |
|---|
| 316 | + wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE); |
|---|
| 317 | + |
|---|
| 264 | 318 | out: |
|---|
| 265 | 319 | return ret; |
|---|
| 266 | 320 | } |
|---|
| .. | .. |
|---|
| 271 | 325 | struct rds_ib_frmr *frmr = &ibmr->u.frmr; |
|---|
| 272 | 326 | |
|---|
| 273 | 327 | if (wc->status != IB_WC_SUCCESS) { |
|---|
| 274 | | - frmr->fr_state = FRMR_IS_STALE; |
|---|
| 328 | + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); |
|---|
| 275 | 329 | if (rds_conn_up(ic->conn)) |
|---|
| 276 | 330 | rds_ib_conn_error(ic->conn, |
|---|
| 277 | 331 | "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n", |
|---|
| .. | .. |
|---|
| 283 | 337 | } |
|---|
| 284 | 338 | |
|---|
| 285 | 339 | if (frmr->fr_inv) { |
|---|
| 286 | | - frmr->fr_state = FRMR_IS_FREE; |
|---|
| 340 | + rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE); |
|---|
| 287 | 341 | frmr->fr_inv = false; |
|---|
| 288 | | - atomic_inc(&ic->i_fastreg_wrs); |
|---|
| 289 | | - } else { |
|---|
| 290 | | - atomic_inc(&ic->i_fastunreg_wrs); |
|---|
| 342 | + wake_up(&frmr->fr_inv_done); |
|---|
| 291 | 343 | } |
|---|
| 344 | + |
|---|
| 345 | + if (frmr->fr_reg) { |
|---|
| 346 | + frmr->fr_reg = false; |
|---|
| 347 | + wake_up(&frmr->fr_reg_done); |
|---|
| 348 | + } |
|---|
| 349 | + |
|---|
| 350 | + /* enforce order of frmr->{fr_reg,fr_inv} update |
|---|
| 351 | + * before incrementing i_fastreg_wrs |
|---|
| 352 | + */ |
|---|
| 353 | + smp_mb__before_atomic(); |
|---|
| 354 | + atomic_inc(&ic->i_fastreg_wrs); |
|---|
| 292 | 355 | } |
|---|
| 293 | 356 | |
|---|
| 294 | 357 | void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed, |
|---|
| .. | .. |
|---|
| 296 | 359 | { |
|---|
| 297 | 360 | struct rds_ib_mr *ibmr, *next; |
|---|
| 298 | 361 | struct rds_ib_frmr *frmr; |
|---|
| 299 | | - int ret = 0; |
|---|
| 362 | + int ret = 0, ret2; |
|---|
| 300 | 363 | unsigned int freed = *nfreed; |
|---|
| 301 | 364 | |
|---|
| 302 | 365 | /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ |
|---|
| 303 | 366 | list_for_each_entry(ibmr, list, unmap_list) { |
|---|
| 304 | | - if (ibmr->sg_dma_len) |
|---|
| 305 | | - ret |= rds_ib_post_inv(ibmr); |
|---|
| 367 | + if (ibmr->sg_dma_len) { |
|---|
| 368 | + ret2 = rds_ib_post_inv(ibmr); |
|---|
| 369 | + if (ret2 && !ret) |
|---|
| 370 | + ret = ret2; |
|---|
| 371 | + } |
|---|
| 306 | 372 | } |
|---|
| 373 | + |
|---|
| 307 | 374 | if (ret) |
|---|
| 308 | 375 | pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret); |
|---|
| 309 | 376 | |
|---|