.. | .. |
---|
1 | 1 | /* |
---|
2 | | - * Copyright(c) 2016, 2017 Intel Corporation. |
---|
| 2 | + * Copyright(c) 2016 - 2020 Intel Corporation. |
---|
3 | 3 | * |
---|
4 | 4 | * This file is provided under a dual BSD/GPLv2 license. When using or |
---|
5 | 5 | * redistributing this file, you may do so under either license. |
---|
.. | .. |
---|
53 | 53 | #include <rdma/ib_verbs.h> |
---|
54 | 54 | #include <rdma/ib_hdrs.h> |
---|
55 | 55 | #include <rdma/opa_addr.h> |
---|
| 56 | +#include <rdma/uverbs_ioctl.h> |
---|
56 | 57 | #include "qp.h" |
---|
57 | 58 | #include "vt.h" |
---|
58 | 59 | #include "trace.h" |
---|
| 60 | + |
---|
| 61 | +#define RVT_RWQ_COUNT_THRESHOLD 16 |
---|
59 | 62 | |
---|
60 | 63 | static void rvt_rc_timeout(struct timer_list *t); |
---|
61 | 64 | static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, |
---|
.. | .. |
---|
119 | 122 | RVT_POST_SEND_OK | RVT_FLUSH_SEND, |
---|
120 | 123 | }; |
---|
121 | 124 | EXPORT_SYMBOL(ib_rvt_state_ops); |
---|
| 125 | + |
---|
| 126 | +/* platform specific: return the last level cache (llc) size, in KiB */ |
---|
| 127 | +static int rvt_wss_llc_size(void) |
---|
| 128 | +{ |
---|
| 129 | + /* assume that the boot CPU value is universal for all CPUs */ |
---|
| 130 | + return boot_cpu_data.x86_cache_size; |
---|
| 131 | +} |
---|
| 132 | + |
---|
| 133 | +/* platform specific: cacheless copy */ |
---|
| 134 | +static void cacheless_memcpy(void *dst, void *src, size_t n) |
---|
| 135 | +{ |
---|
| 136 | + /* |
---|
| 137 | + * Use the only available X64 cacheless copy. Add a __user cast |
---|
| 138 | + * to quiet sparse. The src agument is already in the kernel so |
---|
| 139 | + * there are no security issues. The extra fault recovery machinery |
---|
| 140 | + * is not invoked. |
---|
| 141 | + */ |
---|
| 142 | + __copy_user_nocache(dst, (void __user *)src, n, 0); |
---|
| 143 | +} |
---|
| 144 | + |
---|
| 145 | +void rvt_wss_exit(struct rvt_dev_info *rdi) |
---|
| 146 | +{ |
---|
| 147 | + struct rvt_wss *wss = rdi->wss; |
---|
| 148 | + |
---|
| 149 | + if (!wss) |
---|
| 150 | + return; |
---|
| 151 | + |
---|
| 152 | + /* coded to handle partially initialized and repeat callers */ |
---|
| 153 | + kfree(wss->entries); |
---|
| 154 | + wss->entries = NULL; |
---|
| 155 | + kfree(rdi->wss); |
---|
| 156 | + rdi->wss = NULL; |
---|
| 157 | +} |
---|
| 158 | + |
---|
| 159 | +/** |
---|
| 160 | + * rvt_wss_init - Init wss data structures |
---|
| 161 | + * |
---|
| 162 | + * Return: 0 on success |
---|
| 163 | + */ |
---|
| 164 | +int rvt_wss_init(struct rvt_dev_info *rdi) |
---|
| 165 | +{ |
---|
| 166 | + unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode; |
---|
| 167 | + unsigned int wss_threshold = rdi->dparms.wss_threshold; |
---|
| 168 | + unsigned int wss_clean_period = rdi->dparms.wss_clean_period; |
---|
| 169 | + long llc_size; |
---|
| 170 | + long llc_bits; |
---|
| 171 | + long table_size; |
---|
| 172 | + long table_bits; |
---|
| 173 | + struct rvt_wss *wss; |
---|
| 174 | + int node = rdi->dparms.node; |
---|
| 175 | + |
---|
| 176 | + if (sge_copy_mode != RVT_SGE_COPY_ADAPTIVE) { |
---|
| 177 | + rdi->wss = NULL; |
---|
| 178 | + return 0; |
---|
| 179 | + } |
---|
| 180 | + |
---|
| 181 | + rdi->wss = kzalloc_node(sizeof(*rdi->wss), GFP_KERNEL, node); |
---|
| 182 | + if (!rdi->wss) |
---|
| 183 | + return -ENOMEM; |
---|
| 184 | + wss = rdi->wss; |
---|
| 185 | + |
---|
| 186 | + /* check for a valid percent range - default to 80 if none or invalid */ |
---|
| 187 | + if (wss_threshold < 1 || wss_threshold > 100) |
---|
| 188 | + wss_threshold = 80; |
---|
| 189 | + |
---|
| 190 | + /* reject a wildly large period */ |
---|
| 191 | + if (wss_clean_period > 1000000) |
---|
| 192 | + wss_clean_period = 256; |
---|
| 193 | + |
---|
| 194 | + /* reject a zero period */ |
---|
| 195 | + if (wss_clean_period == 0) |
---|
| 196 | + wss_clean_period = 1; |
---|
| 197 | + |
---|
| 198 | + /* |
---|
| 199 | + * Calculate the table size - the next power of 2 larger than the |
---|
| 200 | + * LLC size. LLC size is in KiB. |
---|
| 201 | + */ |
---|
| 202 | + llc_size = rvt_wss_llc_size() * 1024; |
---|
| 203 | + table_size = roundup_pow_of_two(llc_size); |
---|
| 204 | + |
---|
| 205 | + /* one bit per page in rounded up table */ |
---|
| 206 | + llc_bits = llc_size / PAGE_SIZE; |
---|
| 207 | + table_bits = table_size / PAGE_SIZE; |
---|
| 208 | + wss->pages_mask = table_bits - 1; |
---|
| 209 | + wss->num_entries = table_bits / BITS_PER_LONG; |
---|
| 210 | + |
---|
| 211 | + wss->threshold = (llc_bits * wss_threshold) / 100; |
---|
| 212 | + if (wss->threshold == 0) |
---|
| 213 | + wss->threshold = 1; |
---|
| 214 | + |
---|
| 215 | + wss->clean_period = wss_clean_period; |
---|
| 216 | + atomic_set(&wss->clean_counter, wss_clean_period); |
---|
| 217 | + |
---|
| 218 | + wss->entries = kcalloc_node(wss->num_entries, sizeof(*wss->entries), |
---|
| 219 | + GFP_KERNEL, node); |
---|
| 220 | + if (!wss->entries) { |
---|
| 221 | + rvt_wss_exit(rdi); |
---|
| 222 | + return -ENOMEM; |
---|
| 223 | + } |
---|
| 224 | + |
---|
| 225 | + return 0; |
---|
| 226 | +} |
---|
| 227 | + |
---|
| 228 | +/* |
---|
| 229 | + * Advance the clean counter. When the clean period has expired, |
---|
| 230 | + * clean an entry. |
---|
| 231 | + * |
---|
| 232 | + * This is implemented in atomics to avoid locking. Because multiple |
---|
| 233 | + * variables are involved, it can be racy which can lead to slightly |
---|
| 234 | + * inaccurate information. Since this is only a heuristic, this is |
---|
| 235 | + * OK. Any innaccuracies will clean themselves out as the counter |
---|
| 236 | + * advances. That said, it is unlikely the entry clean operation will |
---|
| 237 | + * race - the next possible racer will not start until the next clean |
---|
| 238 | + * period. |
---|
| 239 | + * |
---|
| 240 | + * The clean counter is implemented as a decrement to zero. When zero |
---|
| 241 | + * is reached an entry is cleaned. |
---|
| 242 | + */ |
---|
| 243 | +static void wss_advance_clean_counter(struct rvt_wss *wss) |
---|
| 244 | +{ |
---|
| 245 | + int entry; |
---|
| 246 | + int weight; |
---|
| 247 | + unsigned long bits; |
---|
| 248 | + |
---|
| 249 | + /* become the cleaner if we decrement the counter to zero */ |
---|
| 250 | + if (atomic_dec_and_test(&wss->clean_counter)) { |
---|
| 251 | + /* |
---|
| 252 | + * Set, not add, the clean period. This avoids an issue |
---|
| 253 | + * where the counter could decrement below the clean period. |
---|
| 254 | + * Doing a set can result in lost decrements, slowing the |
---|
| 255 | + * clean advance. Since this a heuristic, this possible |
---|
| 256 | + * slowdown is OK. |
---|
| 257 | + * |
---|
| 258 | + * An alternative is to loop, advancing the counter by a |
---|
| 259 | + * clean period until the result is > 0. However, this could |
---|
| 260 | + * lead to several threads keeping another in the clean loop. |
---|
| 261 | + * This could be mitigated by limiting the number of times |
---|
| 262 | + * we stay in the loop. |
---|
| 263 | + */ |
---|
| 264 | + atomic_set(&wss->clean_counter, wss->clean_period); |
---|
| 265 | + |
---|
| 266 | + /* |
---|
| 267 | + * Uniquely grab the entry to clean and move to next. |
---|
| 268 | + * The current entry is always the lower bits of |
---|
| 269 | + * wss.clean_entry. The table size, wss.num_entries, |
---|
| 270 | + * is always a power-of-2. |
---|
| 271 | + */ |
---|
| 272 | + entry = (atomic_inc_return(&wss->clean_entry) - 1) |
---|
| 273 | + & (wss->num_entries - 1); |
---|
| 274 | + |
---|
| 275 | + /* clear the entry and count the bits */ |
---|
| 276 | + bits = xchg(&wss->entries[entry], 0); |
---|
| 277 | + weight = hweight64((u64)bits); |
---|
| 278 | + /* only adjust the contended total count if needed */ |
---|
| 279 | + if (weight) |
---|
| 280 | + atomic_sub(weight, &wss->total_count); |
---|
| 281 | + } |
---|
| 282 | +} |
---|
| 283 | + |
---|
| 284 | +/* |
---|
| 285 | + * Insert the given address into the working set array. |
---|
| 286 | + */ |
---|
| 287 | +static void wss_insert(struct rvt_wss *wss, void *address) |
---|
| 288 | +{ |
---|
| 289 | + u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss->pages_mask; |
---|
| 290 | + u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */ |
---|
| 291 | + u32 nr = page & (BITS_PER_LONG - 1); |
---|
| 292 | + |
---|
| 293 | + if (!test_and_set_bit(nr, &wss->entries[entry])) |
---|
| 294 | + atomic_inc(&wss->total_count); |
---|
| 295 | + |
---|
| 296 | + wss_advance_clean_counter(wss); |
---|
| 297 | +} |
---|
| 298 | + |
---|
| 299 | +/* |
---|
| 300 | + * Is the working set larger than the threshold? |
---|
| 301 | + */ |
---|
| 302 | +static inline bool wss_exceeds_threshold(struct rvt_wss *wss) |
---|
| 303 | +{ |
---|
| 304 | + return atomic_read(&wss->total_count) >= wss->threshold; |
---|
| 305 | +} |
---|
122 | 306 | |
---|
123 | 307 | static void get_map_page(struct rvt_qpn_table *qpt, |
---|
124 | 308 | struct rvt_qpn_map *map) |
---|
.. | .. |
---|
321 | 505 | if (qps_inuse) |
---|
322 | 506 | rvt_pr_err(rdi, "QP memory leak! %u still in use\n", |
---|
323 | 507 | qps_inuse); |
---|
324 | | - if (!rdi->qp_dev) |
---|
325 | | - return; |
---|
326 | 508 | |
---|
327 | 509 | kfree(rdi->qp_dev->qp_table); |
---|
328 | 510 | free_qpn_table(&rdi->qp_dev->qpn_table); |
---|
.. | .. |
---|
341 | 523 | * @rdi: rvt device info structure |
---|
342 | 524 | * @qpt: queue pair number table pointer |
---|
343 | 525 | * @port_num: IB port number, 1 based, comes from core |
---|
| 526 | + * @exclude_prefix: prefix of special queue pair number being allocated |
---|
344 | 527 | * |
---|
345 | 528 | * Return: The queue pair number |
---|
346 | 529 | */ |
---|
347 | 530 | static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, |
---|
348 | | - enum ib_qp_type type, u8 port_num) |
---|
| 531 | + enum ib_qp_type type, u8 port_num, u8 exclude_prefix) |
---|
349 | 532 | { |
---|
350 | 533 | u32 i, offset, max_scan, qpn; |
---|
351 | 534 | struct rvt_qpn_map *map; |
---|
352 | 535 | u32 ret; |
---|
| 536 | + u32 max_qpn = exclude_prefix == RVT_AIP_QP_PREFIX ? |
---|
| 537 | + RVT_AIP_QPN_MAX : RVT_QPN_MAX; |
---|
353 | 538 | |
---|
354 | 539 | if (rdi->driver_f.alloc_qpn) |
---|
355 | 540 | return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num); |
---|
.. | .. |
---|
369 | 554 | } |
---|
370 | 555 | |
---|
371 | 556 | qpn = qpt->last + qpt->incr; |
---|
372 | | - if (qpn >= RVT_QPN_MAX) |
---|
| 557 | + if (qpn >= max_qpn) |
---|
373 | 558 | qpn = qpt->incr | ((qpt->last & 1) ^ 1); |
---|
374 | 559 | /* offset carries bit 0 */ |
---|
375 | 560 | offset = qpn & RVT_BITS_PER_PAGE_MASK; |
---|
.. | .. |
---|
445 | 630 | while (qp->s_last != qp->s_head) { |
---|
446 | 631 | struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); |
---|
447 | 632 | |
---|
448 | | - rvt_put_swqe(wqe); |
---|
449 | | - |
---|
450 | | - if (qp->ibqp.qp_type == IB_QPT_UD || |
---|
451 | | - qp->ibqp.qp_type == IB_QPT_SMI || |
---|
452 | | - qp->ibqp.qp_type == IB_QPT_GSI) |
---|
453 | | - atomic_dec(&ibah_to_rvtah( |
---|
454 | | - wqe->ud_wr.ah)->refcount); |
---|
| 633 | + rvt_put_qp_swqe(qp, wqe); |
---|
455 | 634 | if (++qp->s_last >= qp->s_size) |
---|
456 | 635 | qp->s_last = 0; |
---|
457 | 636 | smp_wmb(); /* see qp_set_savail */ |
---|
.. | .. |
---|
630 | 809 | } |
---|
631 | 810 | |
---|
632 | 811 | /** |
---|
| 812 | + * rvt_alloc_rq - allocate memory for user or kernel buffer |
---|
| 813 | + * @rq: receive queue data structure |
---|
| 814 | + * @size: number of request queue entries |
---|
| 815 | + * @node: The NUMA node |
---|
| 816 | + * @udata: True if user data is available or not false |
---|
| 817 | + * |
---|
| 818 | + * Return: If memory allocation failed, return -ENONEM |
---|
| 819 | + * This function is used by both shared receive |
---|
| 820 | + * queues and non-shared receive queues to allocate |
---|
| 821 | + * memory. |
---|
| 822 | + */ |
---|
| 823 | +int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node, |
---|
| 824 | + struct ib_udata *udata) |
---|
| 825 | +{ |
---|
| 826 | + if (udata) { |
---|
| 827 | + rq->wq = vmalloc_user(sizeof(struct rvt_rwq) + size); |
---|
| 828 | + if (!rq->wq) |
---|
| 829 | + goto bail; |
---|
| 830 | + /* need kwq with no buffers */ |
---|
| 831 | + rq->kwq = kzalloc_node(sizeof(*rq->kwq), GFP_KERNEL, node); |
---|
| 832 | + if (!rq->kwq) |
---|
| 833 | + goto bail; |
---|
| 834 | + rq->kwq->curr_wq = rq->wq->wq; |
---|
| 835 | + } else { |
---|
| 836 | + /* need kwq with buffers */ |
---|
| 837 | + rq->kwq = |
---|
| 838 | + vzalloc_node(sizeof(struct rvt_krwq) + size, node); |
---|
| 839 | + if (!rq->kwq) |
---|
| 840 | + goto bail; |
---|
| 841 | + rq->kwq->curr_wq = rq->kwq->wq; |
---|
| 842 | + } |
---|
| 843 | + |
---|
| 844 | + spin_lock_init(&rq->kwq->p_lock); |
---|
| 845 | + spin_lock_init(&rq->kwq->c_lock); |
---|
| 846 | + return 0; |
---|
| 847 | +bail: |
---|
| 848 | + rvt_free_rq(rq); |
---|
| 849 | + return -ENOMEM; |
---|
| 850 | +} |
---|
| 851 | + |
---|
| 852 | +/** |
---|
633 | 853 | * rvt_init_qp - initialize the QP state to the reset state |
---|
634 | 854 | * @qp: the QP to init or reinit |
---|
635 | 855 | * @type: the QP type |
---|
.. | .. |
---|
677 | 897 | qp->s_mig_state = IB_MIG_MIGRATED; |
---|
678 | 898 | qp->r_head_ack_queue = 0; |
---|
679 | 899 | qp->s_tail_ack_queue = 0; |
---|
| 900 | + qp->s_acked_ack_queue = 0; |
---|
680 | 901 | qp->s_num_rd_atomic = 0; |
---|
681 | | - if (qp->r_rq.wq) { |
---|
682 | | - qp->r_rq.wq->head = 0; |
---|
683 | | - qp->r_rq.wq->tail = 0; |
---|
684 | | - } |
---|
685 | 902 | qp->r_sge.num_sge = 0; |
---|
686 | 903 | atomic_set(&qp->s_reserved_used, 0); |
---|
687 | 904 | } |
---|
.. | .. |
---|
769 | 986 | { |
---|
770 | 987 | struct rvt_qpn_map *map; |
---|
771 | 988 | |
---|
| 989 | + if ((qpn & RVT_AIP_QP_PREFIX_MASK) == RVT_AIP_QP_BASE) |
---|
| 990 | + qpn &= RVT_AIP_QP_SUFFIX; |
---|
| 991 | + |
---|
772 | 992 | map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE; |
---|
773 | 993 | if (map->page) |
---|
774 | 994 | clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); |
---|
| 995 | +} |
---|
| 996 | + |
---|
| 997 | +/** |
---|
| 998 | + * get_allowed_ops - Given a QP type return the appropriate allowed OP |
---|
| 999 | + * @type: valid, supported, QP type |
---|
| 1000 | + */ |
---|
| 1001 | +static u8 get_allowed_ops(enum ib_qp_type type) |
---|
| 1002 | +{ |
---|
| 1003 | + return type == IB_QPT_RC ? IB_OPCODE_RC : type == IB_QPT_UC ? |
---|
| 1004 | + IB_OPCODE_UC : IB_OPCODE_UD; |
---|
| 1005 | +} |
---|
| 1006 | + |
---|
| 1007 | +/** |
---|
| 1008 | + * free_ud_wq_attr - Clean up AH attribute cache for UD QPs |
---|
| 1009 | + * @qp: Valid QP with allowed_ops set |
---|
| 1010 | + * |
---|
| 1011 | + * The rvt_swqe data structure being used is a union, so this is |
---|
| 1012 | + * only valid for UD QPs. |
---|
| 1013 | + */ |
---|
| 1014 | +static void free_ud_wq_attr(struct rvt_qp *qp) |
---|
| 1015 | +{ |
---|
| 1016 | + struct rvt_swqe *wqe; |
---|
| 1017 | + int i; |
---|
| 1018 | + |
---|
| 1019 | + for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) { |
---|
| 1020 | + wqe = rvt_get_swqe_ptr(qp, i); |
---|
| 1021 | + kfree(wqe->ud_wr.attr); |
---|
| 1022 | + wqe->ud_wr.attr = NULL; |
---|
| 1023 | + } |
---|
| 1024 | +} |
---|
| 1025 | + |
---|
| 1026 | +/** |
---|
| 1027 | + * alloc_ud_wq_attr - AH attribute cache for UD QPs |
---|
| 1028 | + * @qp: Valid QP with allowed_ops set |
---|
| 1029 | + * @node: Numa node for allocation |
---|
| 1030 | + * |
---|
| 1031 | + * The rvt_swqe data structure being used is a union, so this is |
---|
| 1032 | + * only valid for UD QPs. |
---|
| 1033 | + */ |
---|
| 1034 | +static int alloc_ud_wq_attr(struct rvt_qp *qp, int node) |
---|
| 1035 | +{ |
---|
| 1036 | + struct rvt_swqe *wqe; |
---|
| 1037 | + int i; |
---|
| 1038 | + |
---|
| 1039 | + for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) { |
---|
| 1040 | + wqe = rvt_get_swqe_ptr(qp, i); |
---|
| 1041 | + wqe->ud_wr.attr = kzalloc_node(sizeof(*wqe->ud_wr.attr), |
---|
| 1042 | + GFP_KERNEL, node); |
---|
| 1043 | + if (!wqe->ud_wr.attr) { |
---|
| 1044 | + free_ud_wq_attr(qp); |
---|
| 1045 | + return -ENOMEM; |
---|
| 1046 | + } |
---|
| 1047 | + } |
---|
| 1048 | + |
---|
| 1049 | + return 0; |
---|
775 | 1050 | } |
---|
776 | 1051 | |
---|
777 | 1052 | /** |
---|
.. | .. |
---|
801 | 1076 | struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); |
---|
802 | 1077 | void *priv = NULL; |
---|
803 | 1078 | size_t sqsize; |
---|
| 1079 | + u8 exclude_prefix = 0; |
---|
804 | 1080 | |
---|
805 | 1081 | if (!rdi) |
---|
806 | 1082 | return ERR_PTR(-EINVAL); |
---|
807 | 1083 | |
---|
808 | 1084 | if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge || |
---|
809 | 1085 | init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || |
---|
810 | | - init_attr->create_flags) |
---|
| 1086 | + (init_attr->create_flags && |
---|
| 1087 | + init_attr->create_flags != IB_QP_CREATE_NETDEV_USE)) |
---|
811 | 1088 | return ERR_PTR(-EINVAL); |
---|
812 | 1089 | |
---|
813 | 1090 | /* Check receive queue parameters if no SRQ is specified. */ |
---|
.. | .. |
---|
832 | 1109 | if (init_attr->port_num == 0 || |
---|
833 | 1110 | init_attr->port_num > ibpd->device->phys_port_cnt) |
---|
834 | 1111 | return ERR_PTR(-EINVAL); |
---|
835 | | - /* fall through */ |
---|
| 1112 | + fallthrough; |
---|
836 | 1113 | case IB_QPT_UC: |
---|
837 | 1114 | case IB_QPT_RC: |
---|
838 | 1115 | case IB_QPT_UD: |
---|
839 | | - sz = sizeof(struct rvt_sge) * |
---|
840 | | - init_attr->cap.max_send_sge + |
---|
841 | | - sizeof(struct rvt_swqe); |
---|
| 1116 | + sz = struct_size(swq, sg_list, init_attr->cap.max_send_sge); |
---|
842 | 1117 | swq = vzalloc_node(array_size(sz, sqsize), rdi->dparms.node); |
---|
843 | 1118 | if (!swq) |
---|
844 | 1119 | return ERR_PTR(-ENOMEM); |
---|
.. | .. |
---|
858 | 1133 | rdi->dparms.node); |
---|
859 | 1134 | if (!qp) |
---|
860 | 1135 | goto bail_swq; |
---|
| 1136 | + qp->allowed_ops = get_allowed_ops(init_attr->qp_type); |
---|
861 | 1137 | |
---|
862 | 1138 | RCU_INIT_POINTER(qp->next, NULL); |
---|
863 | 1139 | if (init_attr->qp_type == IB_QPT_RC) { |
---|
.. | .. |
---|
895 | 1171 | qp->r_rq.max_sge = init_attr->cap.max_recv_sge; |
---|
896 | 1172 | sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + |
---|
897 | 1173 | sizeof(struct rvt_rwqe); |
---|
898 | | - if (udata) |
---|
899 | | - qp->r_rq.wq = vmalloc_user( |
---|
900 | | - sizeof(struct rvt_rwq) + |
---|
901 | | - qp->r_rq.size * sz); |
---|
902 | | - else |
---|
903 | | - qp->r_rq.wq = vzalloc_node( |
---|
904 | | - sizeof(struct rvt_rwq) + |
---|
905 | | - qp->r_rq.size * sz, |
---|
906 | | - rdi->dparms.node); |
---|
907 | | - if (!qp->r_rq.wq) |
---|
| 1174 | + err = rvt_alloc_rq(&qp->r_rq, qp->r_rq.size * sz, |
---|
| 1175 | + rdi->dparms.node, udata); |
---|
| 1176 | + if (err) { |
---|
| 1177 | + ret = ERR_PTR(err); |
---|
908 | 1178 | goto bail_driver_priv; |
---|
| 1179 | + } |
---|
909 | 1180 | } |
---|
910 | 1181 | |
---|
911 | 1182 | /* |
---|
.. | .. |
---|
915 | 1186 | spin_lock_init(&qp->r_lock); |
---|
916 | 1187 | spin_lock_init(&qp->s_hlock); |
---|
917 | 1188 | spin_lock_init(&qp->s_lock); |
---|
918 | | - spin_lock_init(&qp->r_rq.lock); |
---|
919 | 1189 | atomic_set(&qp->refcount, 0); |
---|
920 | 1190 | atomic_set(&qp->local_ops_pending, 0); |
---|
921 | 1191 | init_waitqueue_head(&qp->wait); |
---|
.. | .. |
---|
927 | 1197 | qp->s_max_sge = init_attr->cap.max_send_sge; |
---|
928 | 1198 | if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) |
---|
929 | 1199 | qp->s_flags = RVT_S_SIGNAL_REQ_WR; |
---|
| 1200 | + err = alloc_ud_wq_attr(qp, rdi->dparms.node); |
---|
| 1201 | + if (err) { |
---|
| 1202 | + ret = (ERR_PTR(err)); |
---|
| 1203 | + goto bail_rq_rvt; |
---|
| 1204 | + } |
---|
| 1205 | + |
---|
| 1206 | + if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE) |
---|
| 1207 | + exclude_prefix = RVT_AIP_QP_PREFIX; |
---|
930 | 1208 | |
---|
931 | 1209 | err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, |
---|
932 | 1210 | init_attr->qp_type, |
---|
933 | | - init_attr->port_num); |
---|
| 1211 | + init_attr->port_num, |
---|
| 1212 | + exclude_prefix); |
---|
934 | 1213 | if (err < 0) { |
---|
935 | 1214 | ret = ERR_PTR(err); |
---|
936 | 1215 | goto bail_rq_wq; |
---|
937 | 1216 | } |
---|
938 | 1217 | qp->ibqp.qp_num = err; |
---|
| 1218 | + if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE) |
---|
| 1219 | + qp->ibqp.qp_num |= RVT_AIP_QP_BASE; |
---|
939 | 1220 | qp->port_num = init_attr->port_num; |
---|
940 | 1221 | rvt_init_qp(rdi, qp, init_attr->qp_type); |
---|
| 1222 | + if (rdi->driver_f.qp_priv_init) { |
---|
| 1223 | + err = rdi->driver_f.qp_priv_init(rdi, qp, init_attr); |
---|
| 1224 | + if (err) { |
---|
| 1225 | + ret = ERR_PTR(err); |
---|
| 1226 | + goto bail_rq_wq; |
---|
| 1227 | + } |
---|
| 1228 | + } |
---|
941 | 1229 | break; |
---|
942 | 1230 | |
---|
943 | 1231 | default: |
---|
944 | 1232 | /* Don't support raw QPs */ |
---|
945 | | - return ERR_PTR(-EINVAL); |
---|
| 1233 | + return ERR_PTR(-EOPNOTSUPP); |
---|
946 | 1234 | } |
---|
947 | 1235 | |
---|
948 | 1236 | init_attr->cap.max_inline_data = 0; |
---|
.. | .. |
---|
964 | 1252 | } else { |
---|
965 | 1253 | u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; |
---|
966 | 1254 | |
---|
967 | | - qp->ip = rvt_create_mmap_info(rdi, s, |
---|
968 | | - ibpd->uobject->context, |
---|
| 1255 | + qp->ip = rvt_create_mmap_info(rdi, s, udata, |
---|
969 | 1256 | qp->r_rq.wq); |
---|
970 | | - if (!qp->ip) { |
---|
971 | | - ret = ERR_PTR(-ENOMEM); |
---|
| 1257 | + if (IS_ERR(qp->ip)) { |
---|
| 1258 | + ret = ERR_CAST(qp->ip); |
---|
972 | 1259 | goto bail_qpn; |
---|
973 | 1260 | } |
---|
974 | 1261 | |
---|
.. | .. |
---|
1013 | 1300 | |
---|
1014 | 1301 | ret = &qp->ibqp; |
---|
1015 | 1302 | |
---|
1016 | | - /* |
---|
1017 | | - * We have our QP and its good, now keep track of what types of opcodes |
---|
1018 | | - * can be processed on this QP. We do this by keeping track of what the |
---|
1019 | | - * 3 high order bits of the opcode are. |
---|
1020 | | - */ |
---|
1021 | | - switch (init_attr->qp_type) { |
---|
1022 | | - case IB_QPT_SMI: |
---|
1023 | | - case IB_QPT_GSI: |
---|
1024 | | - case IB_QPT_UD: |
---|
1025 | | - qp->allowed_ops = IB_OPCODE_UD; |
---|
1026 | | - break; |
---|
1027 | | - case IB_QPT_RC: |
---|
1028 | | - qp->allowed_ops = IB_OPCODE_RC; |
---|
1029 | | - break; |
---|
1030 | | - case IB_QPT_UC: |
---|
1031 | | - qp->allowed_ops = IB_OPCODE_UC; |
---|
1032 | | - break; |
---|
1033 | | - default: |
---|
1034 | | - ret = ERR_PTR(-EINVAL); |
---|
1035 | | - goto bail_ip; |
---|
1036 | | - } |
---|
1037 | | - |
---|
1038 | 1303 | return ret; |
---|
1039 | 1304 | |
---|
1040 | 1305 | bail_ip: |
---|
.. | .. |
---|
1045 | 1310 | rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); |
---|
1046 | 1311 | |
---|
1047 | 1312 | bail_rq_wq: |
---|
1048 | | - if (!qp->ip) |
---|
1049 | | - vfree(qp->r_rq.wq); |
---|
| 1313 | + free_ud_wq_attr(qp); |
---|
| 1314 | + |
---|
| 1315 | +bail_rq_rvt: |
---|
| 1316 | + rvt_free_rq(&qp->r_rq); |
---|
1050 | 1317 | |
---|
1051 | 1318 | bail_driver_priv: |
---|
1052 | 1319 | rdi->driver_f.qp_priv_free(rdi, qp); |
---|
.. | .. |
---|
1112 | 1379 | } |
---|
1113 | 1380 | wc.status = IB_WC_WR_FLUSH_ERR; |
---|
1114 | 1381 | |
---|
1115 | | - if (qp->r_rq.wq) { |
---|
1116 | | - struct rvt_rwq *wq; |
---|
| 1382 | + if (qp->r_rq.kwq) { |
---|
1117 | 1383 | u32 head; |
---|
1118 | 1384 | u32 tail; |
---|
| 1385 | + struct rvt_rwq *wq = NULL; |
---|
| 1386 | + struct rvt_krwq *kwq = NULL; |
---|
1119 | 1387 | |
---|
1120 | | - spin_lock(&qp->r_rq.lock); |
---|
1121 | | - |
---|
| 1388 | + spin_lock(&qp->r_rq.kwq->c_lock); |
---|
| 1389 | + /* qp->ip used to validate if there is a user buffer mmaped */ |
---|
| 1390 | + if (qp->ip) { |
---|
| 1391 | + wq = qp->r_rq.wq; |
---|
| 1392 | + head = RDMA_READ_UAPI_ATOMIC(wq->head); |
---|
| 1393 | + tail = RDMA_READ_UAPI_ATOMIC(wq->tail); |
---|
| 1394 | + } else { |
---|
| 1395 | + kwq = qp->r_rq.kwq; |
---|
| 1396 | + head = kwq->head; |
---|
| 1397 | + tail = kwq->tail; |
---|
| 1398 | + } |
---|
1122 | 1399 | /* sanity check pointers before trusting them */ |
---|
1123 | | - wq = qp->r_rq.wq; |
---|
1124 | | - head = wq->head; |
---|
1125 | 1400 | if (head >= qp->r_rq.size) |
---|
1126 | 1401 | head = 0; |
---|
1127 | | - tail = wq->tail; |
---|
1128 | 1402 | if (tail >= qp->r_rq.size) |
---|
1129 | 1403 | tail = 0; |
---|
1130 | 1404 | while (tail != head) { |
---|
.. | .. |
---|
1133 | 1407 | tail = 0; |
---|
1134 | 1408 | rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); |
---|
1135 | 1409 | } |
---|
1136 | | - wq->tail = tail; |
---|
1137 | | - |
---|
1138 | | - spin_unlock(&qp->r_rq.lock); |
---|
| 1410 | + if (qp->ip) |
---|
| 1411 | + RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail); |
---|
| 1412 | + else |
---|
| 1413 | + kwq->tail = tail; |
---|
| 1414 | + spin_unlock(&qp->r_rq.kwq->c_lock); |
---|
1139 | 1415 | } else if (qp->ibqp.event_handler) { |
---|
1140 | 1416 | ret = 1; |
---|
1141 | 1417 | } |
---|
.. | .. |
---|
1189 | 1465 | int lastwqe = 0; |
---|
1190 | 1466 | int mig = 0; |
---|
1191 | 1467 | int pmtu = 0; /* for gcc warning only */ |
---|
1192 | | - enum rdma_link_layer link; |
---|
1193 | 1468 | int opa_ah; |
---|
1194 | | - |
---|
1195 | | - link = rdma_port_get_link_layer(ibqp->device, qp->port_num); |
---|
1196 | 1469 | |
---|
1197 | 1470 | spin_lock_irq(&qp->r_lock); |
---|
1198 | 1471 | spin_lock(&qp->s_hlock); |
---|
.. | .. |
---|
1204 | 1477 | opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num); |
---|
1205 | 1478 | |
---|
1206 | 1479 | if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, |
---|
1207 | | - attr_mask, link)) |
---|
| 1480 | + attr_mask)) |
---|
1208 | 1481 | goto inval; |
---|
1209 | 1482 | |
---|
1210 | 1483 | if (rdi->driver_f.check_modify_qp && |
---|
.. | .. |
---|
1453 | 1726 | * |
---|
1454 | 1727 | * Return: 0 on success. |
---|
1455 | 1728 | */ |
---|
1456 | | -int rvt_destroy_qp(struct ib_qp *ibqp) |
---|
| 1729 | +int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) |
---|
1457 | 1730 | { |
---|
1458 | 1731 | struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); |
---|
1459 | 1732 | struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); |
---|
.. | .. |
---|
1474 | 1747 | |
---|
1475 | 1748 | if (qp->ip) |
---|
1476 | 1749 | kref_put(&qp->ip->ref, rvt_release_mmap_info); |
---|
1477 | | - else |
---|
1478 | | - vfree(qp->r_rq.wq); |
---|
1479 | | - vfree(qp->s_wq); |
---|
| 1750 | + kvfree(qp->r_rq.kwq); |
---|
1480 | 1751 | rdi->driver_f.qp_priv_free(rdi, qp); |
---|
1481 | 1752 | kfree(qp->s_ack_queue); |
---|
1482 | 1753 | rdma_destroy_ah_attr(&qp->remote_ah_attr); |
---|
1483 | 1754 | rdma_destroy_ah_attr(&qp->alt_ah_attr); |
---|
| 1755 | + free_ud_wq_attr(qp); |
---|
| 1756 | + vfree(qp->s_wq); |
---|
1484 | 1757 | kfree(qp); |
---|
1485 | 1758 | return 0; |
---|
1486 | 1759 | } |
---|
.. | .. |
---|
1561 | 1834 | const struct ib_recv_wr **bad_wr) |
---|
1562 | 1835 | { |
---|
1563 | 1836 | struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); |
---|
1564 | | - struct rvt_rwq *wq = qp->r_rq.wq; |
---|
| 1837 | + struct rvt_krwq *wq = qp->r_rq.kwq; |
---|
1565 | 1838 | unsigned long flags; |
---|
1566 | 1839 | int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) && |
---|
1567 | 1840 | !qp->ibqp.srq; |
---|
.. | .. |
---|
1582 | 1855 | return -EINVAL; |
---|
1583 | 1856 | } |
---|
1584 | 1857 | |
---|
1585 | | - spin_lock_irqsave(&qp->r_rq.lock, flags); |
---|
| 1858 | + spin_lock_irqsave(&qp->r_rq.kwq->p_lock, flags); |
---|
1586 | 1859 | next = wq->head + 1; |
---|
1587 | 1860 | if (next >= qp->r_rq.size) |
---|
1588 | 1861 | next = 0; |
---|
1589 | | - if (next == wq->tail) { |
---|
1590 | | - spin_unlock_irqrestore(&qp->r_rq.lock, flags); |
---|
| 1862 | + if (next == READ_ONCE(wq->tail)) { |
---|
| 1863 | + spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags); |
---|
1591 | 1864 | *bad_wr = wr; |
---|
1592 | 1865 | return -ENOMEM; |
---|
1593 | 1866 | } |
---|
.. | .. |
---|
1604 | 1877 | wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); |
---|
1605 | 1878 | wqe->wr_id = wr->wr_id; |
---|
1606 | 1879 | wqe->num_sge = wr->num_sge; |
---|
1607 | | - for (i = 0; i < wr->num_sge; i++) |
---|
1608 | | - wqe->sg_list[i] = wr->sg_list[i]; |
---|
| 1880 | + for (i = 0; i < wr->num_sge; i++) { |
---|
| 1881 | + wqe->sg_list[i].addr = wr->sg_list[i].addr; |
---|
| 1882 | + wqe->sg_list[i].length = wr->sg_list[i].length; |
---|
| 1883 | + wqe->sg_list[i].lkey = wr->sg_list[i].lkey; |
---|
| 1884 | + } |
---|
1609 | 1885 | /* |
---|
1610 | 1886 | * Make sure queue entry is written |
---|
1611 | 1887 | * before the head index. |
---|
1612 | 1888 | */ |
---|
1613 | | - smp_wmb(); |
---|
1614 | | - wq->head = next; |
---|
| 1889 | + smp_store_release(&wq->head, next); |
---|
1615 | 1890 | } |
---|
1616 | | - spin_unlock_irqrestore(&qp->r_rq.lock, flags); |
---|
| 1891 | + spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags); |
---|
1617 | 1892 | } |
---|
1618 | 1893 | return 0; |
---|
1619 | 1894 | } |
---|
.. | .. |
---|
1694 | 1969 | |
---|
1695 | 1970 | /* see rvt_qp_wqe_unreserve() */ |
---|
1696 | 1971 | smp_mb__before_atomic(); |
---|
1697 | | - reserved_used = atomic_read(&qp->s_reserved_used); |
---|
1698 | 1972 | if (unlikely(reserved_op)) { |
---|
1699 | 1973 | /* see rvt_qp_wqe_unreserve() */ |
---|
1700 | | - smp_mb__before_atomic(); |
---|
| 1974 | + reserved_used = atomic_read(&qp->s_reserved_used); |
---|
1701 | 1975 | if (reserved_used >= rdi->dparms.reserved_operations) |
---|
1702 | 1976 | return -ENOMEM; |
---|
1703 | 1977 | return 0; |
---|
.. | .. |
---|
1705 | 1979 | /* non-reserved operations */ |
---|
1706 | 1980 | if (likely(qp->s_avail)) |
---|
1707 | 1981 | return 0; |
---|
1708 | | - slast = READ_ONCE(qp->s_last); |
---|
| 1982 | + /* See rvt_qp_complete_swqe() */ |
---|
| 1983 | + slast = smp_load_acquire(&qp->s_last); |
---|
1709 | 1984 | if (qp->s_head >= slast) |
---|
1710 | 1985 | avail = qp->s_size - (qp->s_head - slast); |
---|
1711 | 1986 | else |
---|
1712 | 1987 | avail = slast - qp->s_head; |
---|
1713 | 1988 | |
---|
1714 | | - /* see rvt_qp_wqe_unreserve() */ |
---|
1715 | | - smp_mb__before_atomic(); |
---|
1716 | 1989 | reserved_used = atomic_read(&qp->s_reserved_used); |
---|
1717 | 1990 | avail = avail - 1 - |
---|
1718 | 1991 | (rdi->dparms.reserved_operations - reserved_used); |
---|
.. | .. |
---|
1737 | 2010 | */ |
---|
1738 | 2011 | static int rvt_post_one_wr(struct rvt_qp *qp, |
---|
1739 | 2012 | const struct ib_send_wr *wr, |
---|
1740 | | - int *call_send) |
---|
| 2013 | + bool *call_send) |
---|
1741 | 2014 | { |
---|
1742 | 2015 | struct rvt_swqe *wqe; |
---|
1743 | 2016 | u32 next; |
---|
.. | .. |
---|
1842 | 2115 | wqe->wr.num_sge = j; |
---|
1843 | 2116 | } |
---|
1844 | 2117 | |
---|
1845 | | - /* general part of wqe valid - allow for driver checks */ |
---|
1846 | | - if (rdi->driver_f.check_send_wqe) { |
---|
1847 | | - ret = rdi->driver_f.check_send_wqe(qp, wqe); |
---|
1848 | | - if (ret < 0) |
---|
1849 | | - goto bail_inval_free; |
---|
1850 | | - if (ret) |
---|
1851 | | - *call_send = ret; |
---|
1852 | | - } |
---|
1853 | | - |
---|
| 2118 | + /* |
---|
| 2119 | + * Calculate and set SWQE PSN values prior to handing it off |
---|
| 2120 | + * to the driver's check routine. This give the driver the |
---|
| 2121 | + * opportunity to adjust PSN values based on internal checks. |
---|
| 2122 | + */ |
---|
1854 | 2123 | log_pmtu = qp->log_pmtu; |
---|
1855 | | - if (qp->ibqp.qp_type != IB_QPT_UC && |
---|
1856 | | - qp->ibqp.qp_type != IB_QPT_RC) { |
---|
1857 | | - struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah); |
---|
| 2124 | + if (qp->allowed_ops == IB_OPCODE_UD) { |
---|
| 2125 | + struct rvt_ah *ah = rvt_get_swqe_ah(wqe); |
---|
1858 | 2126 | |
---|
1859 | 2127 | log_pmtu = ah->log_pmtu; |
---|
1860 | | - atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); |
---|
| 2128 | + rdma_copy_ah_attr(wqe->ud_wr.attr, &ah->attr); |
---|
1861 | 2129 | } |
---|
1862 | 2130 | |
---|
1863 | 2131 | if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) { |
---|
.. | .. |
---|
1875 | 2143 | (wqe->length ? |
---|
1876 | 2144 | ((wqe->length - 1) >> log_pmtu) : |
---|
1877 | 2145 | 0); |
---|
1878 | | - qp->s_next_psn = wqe->lpsn + 1; |
---|
1879 | 2146 | } |
---|
| 2147 | + |
---|
| 2148 | + /* general part of wqe valid - allow for driver checks */ |
---|
| 2149 | + if (rdi->driver_f.setup_wqe) { |
---|
| 2150 | + ret = rdi->driver_f.setup_wqe(qp, wqe, call_send); |
---|
| 2151 | + if (ret < 0) |
---|
| 2152 | + goto bail_inval_free_ref; |
---|
| 2153 | + } |
---|
| 2154 | + |
---|
| 2155 | + if (!(rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) |
---|
| 2156 | + qp->s_next_psn = wqe->lpsn + 1; |
---|
| 2157 | + |
---|
1880 | 2158 | if (unlikely(reserved_op)) { |
---|
1881 | 2159 | wqe->wr.send_flags |= RVT_SEND_RESERVE_USED; |
---|
1882 | 2160 | rvt_qp_wqe_reserve(qp, wqe); |
---|
.. | .. |
---|
1890 | 2168 | |
---|
1891 | 2169 | return 0; |
---|
1892 | 2170 | |
---|
| 2171 | +bail_inval_free_ref: |
---|
| 2172 | + if (qp->allowed_ops == IB_OPCODE_UD) |
---|
| 2173 | + rdma_destroy_ah_attr(wqe->ud_wr.attr); |
---|
1893 | 2174 | bail_inval_free: |
---|
1894 | 2175 | /* release mr holds */ |
---|
1895 | 2176 | while (j) { |
---|
.. | .. |
---|
1916 | 2197 | struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); |
---|
1917 | 2198 | struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); |
---|
1918 | 2199 | unsigned long flags = 0; |
---|
1919 | | - int call_send; |
---|
| 2200 | + bool call_send; |
---|
1920 | 2201 | unsigned nreq = 0; |
---|
1921 | 2202 | int err = 0; |
---|
1922 | 2203 | |
---|
.. | .. |
---|
1949 | 2230 | bail: |
---|
1950 | 2231 | spin_unlock_irqrestore(&qp->s_hlock, flags); |
---|
1951 | 2232 | if (nreq) { |
---|
1952 | | - if (call_send) |
---|
| 2233 | + /* |
---|
| 2234 | + * Only call do_send if there is exactly one packet, and the |
---|
| 2235 | + * driver said it was ok. |
---|
| 2236 | + */ |
---|
| 2237 | + if (nreq == 1 && call_send) |
---|
1953 | 2238 | rdi->driver_f.do_send(qp); |
---|
1954 | 2239 | else |
---|
1955 | 2240 | rdi->driver_f.schedule_send_no_lock(qp); |
---|
.. | .. |
---|
1971 | 2256 | const struct ib_recv_wr **bad_wr) |
---|
1972 | 2257 | { |
---|
1973 | 2258 | struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); |
---|
1974 | | - struct rvt_rwq *wq; |
---|
| 2259 | + struct rvt_krwq *wq; |
---|
1975 | 2260 | unsigned long flags; |
---|
1976 | 2261 | |
---|
1977 | 2262 | for (; wr; wr = wr->next) { |
---|
.. | .. |
---|
1984 | 2269 | return -EINVAL; |
---|
1985 | 2270 | } |
---|
1986 | 2271 | |
---|
1987 | | - spin_lock_irqsave(&srq->rq.lock, flags); |
---|
1988 | | - wq = srq->rq.wq; |
---|
| 2272 | + spin_lock_irqsave(&srq->rq.kwq->p_lock, flags); |
---|
| 2273 | + wq = srq->rq.kwq; |
---|
1989 | 2274 | next = wq->head + 1; |
---|
1990 | 2275 | if (next >= srq->rq.size) |
---|
1991 | 2276 | next = 0; |
---|
1992 | | - if (next == wq->tail) { |
---|
1993 | | - spin_unlock_irqrestore(&srq->rq.lock, flags); |
---|
| 2277 | + if (next == READ_ONCE(wq->tail)) { |
---|
| 2278 | + spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags); |
---|
1994 | 2279 | *bad_wr = wr; |
---|
1995 | 2280 | return -ENOMEM; |
---|
1996 | 2281 | } |
---|
.. | .. |
---|
1998 | 2283 | wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head); |
---|
1999 | 2284 | wqe->wr_id = wr->wr_id; |
---|
2000 | 2285 | wqe->num_sge = wr->num_sge; |
---|
2001 | | - for (i = 0; i < wr->num_sge; i++) |
---|
2002 | | - wqe->sg_list[i] = wr->sg_list[i]; |
---|
| 2286 | + for (i = 0; i < wr->num_sge; i++) { |
---|
| 2287 | + wqe->sg_list[i].addr = wr->sg_list[i].addr; |
---|
| 2288 | + wqe->sg_list[i].length = wr->sg_list[i].length; |
---|
| 2289 | + wqe->sg_list[i].lkey = wr->sg_list[i].lkey; |
---|
| 2290 | + } |
---|
2003 | 2291 | /* Make sure queue entry is written before the head index. */ |
---|
2004 | | - smp_wmb(); |
---|
2005 | | - wq->head = next; |
---|
2006 | | - spin_unlock_irqrestore(&srq->rq.lock, flags); |
---|
| 2292 | + smp_store_release(&wq->head, next); |
---|
| 2293 | + spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags); |
---|
2007 | 2294 | } |
---|
2008 | 2295 | return 0; |
---|
| 2296 | +} |
---|
| 2297 | + |
---|
| 2298 | +/* |
---|
| 2299 | + * rvt used the internal kernel struct as part of its ABI, for now make sure |
---|
| 2300 | + * the kernel struct does not change layout. FIXME: rvt should never cast the |
---|
| 2301 | + * user struct to a kernel struct. |
---|
| 2302 | + */ |
---|
| 2303 | +static struct ib_sge *rvt_cast_sge(struct rvt_wqe_sge *sge) |
---|
| 2304 | +{ |
---|
| 2305 | + BUILD_BUG_ON(offsetof(struct ib_sge, addr) != |
---|
| 2306 | + offsetof(struct rvt_wqe_sge, addr)); |
---|
| 2307 | + BUILD_BUG_ON(offsetof(struct ib_sge, length) != |
---|
| 2308 | + offsetof(struct rvt_wqe_sge, length)); |
---|
| 2309 | + BUILD_BUG_ON(offsetof(struct ib_sge, lkey) != |
---|
| 2310 | + offsetof(struct rvt_wqe_sge, lkey)); |
---|
| 2311 | + return (struct ib_sge *)sge; |
---|
2009 | 2312 | } |
---|
2010 | 2313 | |
---|
2011 | 2314 | /* |
---|
.. | .. |
---|
2031 | 2334 | continue; |
---|
2032 | 2335 | /* Check LKEY */ |
---|
2033 | 2336 | ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, |
---|
2034 | | - NULL, &wqe->sg_list[i], |
---|
| 2337 | + NULL, rvt_cast_sge(&wqe->sg_list[i]), |
---|
2035 | 2338 | IB_ACCESS_LOCAL_WRITE); |
---|
2036 | 2339 | if (unlikely(ret <= 0)) |
---|
2037 | 2340 | goto bad_lkey; |
---|
.. | .. |
---|
2060 | 2363 | } |
---|
2061 | 2364 | |
---|
2062 | 2365 | /** |
---|
| 2366 | + * get_rvt_head - get head indices of the circular buffer |
---|
| 2367 | + * @rq: data structure for request queue entry |
---|
| 2368 | + * @ip: the QP |
---|
| 2369 | + * |
---|
| 2370 | + * Return - head index value |
---|
| 2371 | + */ |
---|
| 2372 | +static inline u32 get_rvt_head(struct rvt_rq *rq, void *ip) |
---|
| 2373 | +{ |
---|
| 2374 | + u32 head; |
---|
| 2375 | + |
---|
| 2376 | + if (ip) |
---|
| 2377 | + head = RDMA_READ_UAPI_ATOMIC(rq->wq->head); |
---|
| 2378 | + else |
---|
| 2379 | + head = rq->kwq->head; |
---|
| 2380 | + |
---|
| 2381 | + return head; |
---|
| 2382 | +} |
---|
| 2383 | + |
---|
| 2384 | +/** |
---|
2063 | 2385 | * rvt_get_rwqe - copy the next RWQE into the QP's RWQE |
---|
2064 | 2386 | * @qp: the QP |
---|
2065 | 2387 | * @wr_id_only: update qp->r_wr_id only, not qp->r_sge |
---|
.. | .. |
---|
2073 | 2395 | { |
---|
2074 | 2396 | unsigned long flags; |
---|
2075 | 2397 | struct rvt_rq *rq; |
---|
| 2398 | + struct rvt_krwq *kwq = NULL; |
---|
2076 | 2399 | struct rvt_rwq *wq; |
---|
2077 | 2400 | struct rvt_srq *srq; |
---|
2078 | 2401 | struct rvt_rwqe *wqe; |
---|
2079 | 2402 | void (*handler)(struct ib_event *, void *); |
---|
2080 | 2403 | u32 tail; |
---|
| 2404 | + u32 head; |
---|
2081 | 2405 | int ret; |
---|
| 2406 | + void *ip = NULL; |
---|
2082 | 2407 | |
---|
2083 | 2408 | if (qp->ibqp.srq) { |
---|
2084 | 2409 | srq = ibsrq_to_rvtsrq(qp->ibqp.srq); |
---|
2085 | 2410 | handler = srq->ibsrq.event_handler; |
---|
2086 | 2411 | rq = &srq->rq; |
---|
| 2412 | + ip = srq->ip; |
---|
2087 | 2413 | } else { |
---|
2088 | 2414 | srq = NULL; |
---|
2089 | 2415 | handler = NULL; |
---|
2090 | 2416 | rq = &qp->r_rq; |
---|
| 2417 | + ip = qp->ip; |
---|
2091 | 2418 | } |
---|
2092 | 2419 | |
---|
2093 | | - spin_lock_irqsave(&rq->lock, flags); |
---|
| 2420 | + spin_lock_irqsave(&rq->kwq->c_lock, flags); |
---|
2094 | 2421 | if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { |
---|
2095 | 2422 | ret = 0; |
---|
2096 | 2423 | goto unlock; |
---|
2097 | 2424 | } |
---|
| 2425 | + kwq = rq->kwq; |
---|
| 2426 | + if (ip) { |
---|
| 2427 | + wq = rq->wq; |
---|
| 2428 | + tail = RDMA_READ_UAPI_ATOMIC(wq->tail); |
---|
| 2429 | + } else { |
---|
| 2430 | + tail = kwq->tail; |
---|
| 2431 | + } |
---|
2098 | 2432 | |
---|
2099 | | - wq = rq->wq; |
---|
2100 | | - tail = wq->tail; |
---|
2101 | 2433 | /* Validate tail before using it since it is user writable. */ |
---|
2102 | 2434 | if (tail >= rq->size) |
---|
2103 | 2435 | tail = 0; |
---|
2104 | | - if (unlikely(tail == wq->head)) { |
---|
| 2436 | + |
---|
| 2437 | + if (kwq->count < RVT_RWQ_COUNT_THRESHOLD) { |
---|
| 2438 | + head = get_rvt_head(rq, ip); |
---|
| 2439 | + kwq->count = rvt_get_rq_count(rq, head, tail); |
---|
| 2440 | + } |
---|
| 2441 | + if (unlikely(kwq->count == 0)) { |
---|
2105 | 2442 | ret = 0; |
---|
2106 | 2443 | goto unlock; |
---|
2107 | 2444 | } |
---|
2108 | | - /* Make sure entry is read after head index is read. */ |
---|
| 2445 | + /* Make sure entry is read after the count is read. */ |
---|
2109 | 2446 | smp_rmb(); |
---|
2110 | 2447 | wqe = rvt_get_rwqe_ptr(rq, tail); |
---|
2111 | 2448 | /* |
---|
.. | .. |
---|
2115 | 2452 | */ |
---|
2116 | 2453 | if (++tail >= rq->size) |
---|
2117 | 2454 | tail = 0; |
---|
2118 | | - wq->tail = tail; |
---|
| 2455 | + if (ip) |
---|
| 2456 | + RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail); |
---|
| 2457 | + else |
---|
| 2458 | + kwq->tail = tail; |
---|
2119 | 2459 | if (!wr_id_only && !init_sge(qp, wqe)) { |
---|
2120 | 2460 | ret = -1; |
---|
2121 | 2461 | goto unlock; |
---|
2122 | 2462 | } |
---|
2123 | 2463 | qp->r_wr_id = wqe->wr_id; |
---|
2124 | 2464 | |
---|
| 2465 | + kwq->count--; |
---|
2125 | 2466 | ret = 1; |
---|
2126 | 2467 | set_bit(RVT_R_WRID_VALID, &qp->r_aflags); |
---|
2127 | 2468 | if (handler) { |
---|
2128 | | - u32 n; |
---|
2129 | | - |
---|
2130 | 2469 | /* |
---|
2131 | 2470 | * Validate head pointer value and compute |
---|
2132 | 2471 | * the number of remaining WQEs. |
---|
2133 | 2472 | */ |
---|
2134 | | - n = wq->head; |
---|
2135 | | - if (n >= rq->size) |
---|
2136 | | - n = 0; |
---|
2137 | | - if (n < tail) |
---|
2138 | | - n += rq->size - tail; |
---|
2139 | | - else |
---|
2140 | | - n -= tail; |
---|
2141 | | - if (n < srq->limit) { |
---|
2142 | | - struct ib_event ev; |
---|
| 2473 | + if (kwq->count < srq->limit) { |
---|
| 2474 | + kwq->count = |
---|
| 2475 | + rvt_get_rq_count(rq, |
---|
| 2476 | + get_rvt_head(rq, ip), tail); |
---|
| 2477 | + if (kwq->count < srq->limit) { |
---|
| 2478 | + struct ib_event ev; |
---|
2143 | 2479 | |
---|
2144 | | - srq->limit = 0; |
---|
2145 | | - spin_unlock_irqrestore(&rq->lock, flags); |
---|
2146 | | - ev.device = qp->ibqp.device; |
---|
2147 | | - ev.element.srq = qp->ibqp.srq; |
---|
2148 | | - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; |
---|
2149 | | - handler(&ev, srq->ibsrq.srq_context); |
---|
2150 | | - goto bail; |
---|
| 2480 | + srq->limit = 0; |
---|
| 2481 | + spin_unlock_irqrestore(&rq->kwq->c_lock, flags); |
---|
| 2482 | + ev.device = qp->ibqp.device; |
---|
| 2483 | + ev.element.srq = qp->ibqp.srq; |
---|
| 2484 | + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; |
---|
| 2485 | + handler(&ev, srq->ibsrq.srq_context); |
---|
| 2486 | + goto bail; |
---|
| 2487 | + } |
---|
2151 | 2488 | } |
---|
2152 | 2489 | } |
---|
2153 | 2490 | unlock: |
---|
2154 | | - spin_unlock_irqrestore(&rq->lock, flags); |
---|
| 2491 | + spin_unlock_irqrestore(&rq->kwq->c_lock, flags); |
---|
2155 | 2492 | bail: |
---|
2156 | 2493 | return ret; |
---|
2157 | 2494 | } |
---|
.. | .. |
---|
2213 | 2550 | } |
---|
2214 | 2551 | |
---|
2215 | 2552 | /* |
---|
2216 | | - * rvt_add_retry_timer - add/start a retry timer |
---|
| 2553 | + * rvt_add_retry_timer_ext - add/start a retry timer |
---|
2217 | 2554 | * @qp - the QP |
---|
| 2555 | + * @shift - timeout shift to wait for multiple packets |
---|
2218 | 2556 | * add a retry timer on the QP |
---|
2219 | 2557 | */ |
---|
2220 | | -void rvt_add_retry_timer(struct rvt_qp *qp) |
---|
| 2558 | +void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift) |
---|
2221 | 2559 | { |
---|
2222 | 2560 | struct ib_qp *ibqp = &qp->ibqp; |
---|
2223 | 2561 | struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); |
---|
.. | .. |
---|
2225 | 2563 | lockdep_assert_held(&qp->s_lock); |
---|
2226 | 2564 | qp->s_flags |= RVT_S_TIMER; |
---|
2227 | 2565 | /* 4.096 usec. * (1 << qp->timeout) */ |
---|
2228 | | - qp->s_timer.expires = jiffies + qp->timeout_jiffies + |
---|
2229 | | - rdi->busy_jiffies; |
---|
| 2566 | + qp->s_timer.expires = jiffies + rdi->busy_jiffies + |
---|
| 2567 | + (qp->timeout_jiffies << shift); |
---|
2230 | 2568 | add_timer(&qp->s_timer); |
---|
2231 | 2569 | } |
---|
2232 | | -EXPORT_SYMBOL(rvt_add_retry_timer); |
---|
| 2570 | +EXPORT_SYMBOL(rvt_add_retry_timer_ext); |
---|
2233 | 2571 | |
---|
2234 | 2572 | /** |
---|
2235 | | - * rvt_add_rnr_timer - add/start an rnr timer |
---|
2236 | | - * @qp - the QP |
---|
2237 | | - * @aeth - aeth of RNR timeout, simulated aeth for loopback |
---|
2238 | | - * add an rnr timer on the QP |
---|
| 2573 | + * rvt_add_rnr_timer - add/start an rnr timer on the QP |
---|
| 2574 | + * @qp: the QP |
---|
| 2575 | + * @aeth: aeth of RNR timeout, simulated aeth for loopback |
---|
2239 | 2576 | */ |
---|
2240 | 2577 | void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth) |
---|
2241 | 2578 | { |
---|
.. | .. |
---|
2252 | 2589 | |
---|
2253 | 2590 | /** |
---|
2254 | 2591 | * rvt_stop_rc_timers - stop all timers |
---|
2255 | | - * @qp - the QP |
---|
| 2592 | + * @qp: the QP |
---|
2256 | 2593 | * stop any pending timers |
---|
2257 | 2594 | */ |
---|
2258 | 2595 | void rvt_stop_rc_timers(struct rvt_qp *qp) |
---|
.. | .. |
---|
2286 | 2623 | |
---|
2287 | 2624 | /** |
---|
2288 | 2625 | * rvt_del_timers_sync - wait for any timeout routines to exit |
---|
2289 | | - * @qp - the QP |
---|
| 2626 | + * @qp: the QP |
---|
2290 | 2627 | */ |
---|
2291 | 2628 | void rvt_del_timers_sync(struct rvt_qp *qp) |
---|
2292 | 2629 | { |
---|
.. | .. |
---|
2295 | 2632 | } |
---|
2296 | 2633 | EXPORT_SYMBOL(rvt_del_timers_sync); |
---|
2297 | 2634 | |
---|
2298 | | -/** |
---|
| 2635 | +/* |
---|
2299 | 2636 | * This is called from s_timer for missing responses. |
---|
2300 | 2637 | */ |
---|
2301 | 2638 | static void rvt_rc_timeout(struct timer_list *t) |
---|
.. | .. |
---|
2345 | 2682 | * rvt_qp_iter_init - initial for QP iteration |
---|
2346 | 2683 | * @rdi: rvt devinfo |
---|
2347 | 2684 | * @v: u64 value |
---|
| 2685 | + * @cb: user-defined callback |
---|
2348 | 2686 | * |
---|
2349 | 2687 | * This returns an iterator suitable for iterating QPs |
---|
2350 | 2688 | * in the system. |
---|
2351 | 2689 | * |
---|
2352 | | - * The @cb is a user defined callback and @v is a 64 |
---|
2353 | | - * bit value passed to and relevant for processing in the |
---|
| 2690 | + * The @cb is a user-defined callback and @v is a 64-bit |
---|
| 2691 | + * value passed to and relevant for processing in the |
---|
2354 | 2692 | * @cb. An example use case would be to alter QP processing |
---|
2355 | 2693 | * based on criteria not part of the rvt_qp. |
---|
2356 | 2694 | * |
---|
.. | .. |
---|
2381 | 2719 | |
---|
2382 | 2720 | /** |
---|
2383 | 2721 | * rvt_qp_iter_next - return the next QP in iter |
---|
2384 | | - * @iter - the iterator |
---|
| 2722 | + * @iter: the iterator |
---|
2385 | 2723 | * |
---|
2386 | 2724 | * Fine grained QP iterator suitable for use |
---|
2387 | 2725 | * with debugfs seq_file mechanisms. |
---|
.. | .. |
---|
2444 | 2782 | |
---|
2445 | 2783 | /** |
---|
2446 | 2784 | * rvt_qp_iter - iterate all QPs |
---|
2447 | | - * @rdi - rvt devinfo |
---|
2448 | | - * @v - a 64 bit value |
---|
2449 | | - * @cb - a callback |
---|
| 2785 | + * @rdi: rvt devinfo |
---|
| 2786 | + * @v: a 64-bit value |
---|
| 2787 | + * @cb: a callback |
---|
2450 | 2788 | * |
---|
2451 | 2789 | * This provides a way for iterating all QPs. |
---|
2452 | 2790 | * |
---|
2453 | | - * The @cb is a user defined callback and @v is a 64 |
---|
2454 | | - * bit value passed to and relevant for processing in the |
---|
| 2791 | + * The @cb is a user-defined callback and @v is a 64-bit |
---|
| 2792 | + * value passed to and relevant for processing in the |
---|
2455 | 2793 | * cb. An example use case would be to alter QP processing |
---|
2456 | 2794 | * based on criteria not part of the rvt_qp. |
---|
2457 | 2795 | * |
---|
.. | .. |
---|
2484 | 2822 | rcu_read_unlock(); |
---|
2485 | 2823 | } |
---|
2486 | 2824 | EXPORT_SYMBOL(rvt_qp_iter); |
---|
| 2825 | + |
---|
| 2826 | +/* |
---|
| 2827 | + * This should be called with s_lock and r_lock held. |
---|
| 2828 | + */ |
---|
| 2829 | +void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, |
---|
| 2830 | + enum ib_wc_status status) |
---|
| 2831 | +{ |
---|
| 2832 | + u32 old_last, last; |
---|
| 2833 | + struct rvt_dev_info *rdi; |
---|
| 2834 | + |
---|
| 2835 | + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) |
---|
| 2836 | + return; |
---|
| 2837 | + rdi = ib_to_rvt(qp->ibqp.device); |
---|
| 2838 | + |
---|
| 2839 | + old_last = qp->s_last; |
---|
| 2840 | + trace_rvt_qp_send_completion(qp, wqe, old_last); |
---|
| 2841 | + last = rvt_qp_complete_swqe(qp, wqe, rdi->wc_opcode[wqe->wr.opcode], |
---|
| 2842 | + status); |
---|
| 2843 | + if (qp->s_acked == old_last) |
---|
| 2844 | + qp->s_acked = last; |
---|
| 2845 | + if (qp->s_cur == old_last) |
---|
| 2846 | + qp->s_cur = last; |
---|
| 2847 | + if (qp->s_tail == old_last) |
---|
| 2848 | + qp->s_tail = last; |
---|
| 2849 | + if (qp->state == IB_QPS_SQD && last == qp->s_cur) |
---|
| 2850 | + qp->s_draining = 0; |
---|
| 2851 | +} |
---|
| 2852 | +EXPORT_SYMBOL(rvt_send_complete); |
---|
| 2853 | + |
---|
| 2854 | +/** |
---|
| 2855 | + * rvt_copy_sge - copy data to SGE memory |
---|
| 2856 | + * @qp: associated QP |
---|
| 2857 | + * @ss: the SGE state |
---|
| 2858 | + * @data: the data to copy |
---|
| 2859 | + * @length: the length of the data |
---|
| 2860 | + * @release: boolean to release MR |
---|
| 2861 | + * @copy_last: do a separate copy of the last 8 bytes |
---|
| 2862 | + */ |
---|
| 2863 | +void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss, |
---|
| 2864 | + void *data, u32 length, |
---|
| 2865 | + bool release, bool copy_last) |
---|
| 2866 | +{ |
---|
| 2867 | + struct rvt_sge *sge = &ss->sge; |
---|
| 2868 | + int i; |
---|
| 2869 | + bool in_last = false; |
---|
| 2870 | + bool cacheless_copy = false; |
---|
| 2871 | + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); |
---|
| 2872 | + struct rvt_wss *wss = rdi->wss; |
---|
| 2873 | + unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode; |
---|
| 2874 | + |
---|
| 2875 | + if (sge_copy_mode == RVT_SGE_COPY_CACHELESS) { |
---|
| 2876 | + cacheless_copy = length >= PAGE_SIZE; |
---|
| 2877 | + } else if (sge_copy_mode == RVT_SGE_COPY_ADAPTIVE) { |
---|
| 2878 | + if (length >= PAGE_SIZE) { |
---|
| 2879 | + /* |
---|
| 2880 | + * NOTE: this *assumes*: |
---|
| 2881 | + * o The first vaddr is the dest. |
---|
| 2882 | + * o If multiple pages, then vaddr is sequential. |
---|
| 2883 | + */ |
---|
| 2884 | + wss_insert(wss, sge->vaddr); |
---|
| 2885 | + if (length >= (2 * PAGE_SIZE)) |
---|
| 2886 | + wss_insert(wss, (sge->vaddr + PAGE_SIZE)); |
---|
| 2887 | + |
---|
| 2888 | + cacheless_copy = wss_exceeds_threshold(wss); |
---|
| 2889 | + } else { |
---|
| 2890 | + wss_advance_clean_counter(wss); |
---|
| 2891 | + } |
---|
| 2892 | + } |
---|
| 2893 | + |
---|
| 2894 | + if (copy_last) { |
---|
| 2895 | + if (length > 8) { |
---|
| 2896 | + length -= 8; |
---|
| 2897 | + } else { |
---|
| 2898 | + copy_last = false; |
---|
| 2899 | + in_last = true; |
---|
| 2900 | + } |
---|
| 2901 | + } |
---|
| 2902 | + |
---|
| 2903 | +again: |
---|
| 2904 | + while (length) { |
---|
| 2905 | + u32 len = rvt_get_sge_length(sge, length); |
---|
| 2906 | + |
---|
| 2907 | + WARN_ON_ONCE(len == 0); |
---|
| 2908 | + if (unlikely(in_last)) { |
---|
| 2909 | + /* enforce byte transfer ordering */ |
---|
| 2910 | + for (i = 0; i < len; i++) |
---|
| 2911 | + ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i]; |
---|
| 2912 | + } else if (cacheless_copy) { |
---|
| 2913 | + cacheless_memcpy(sge->vaddr, data, len); |
---|
| 2914 | + } else { |
---|
| 2915 | + memcpy(sge->vaddr, data, len); |
---|
| 2916 | + } |
---|
| 2917 | + rvt_update_sge(ss, len, release); |
---|
| 2918 | + data += len; |
---|
| 2919 | + length -= len; |
---|
| 2920 | + } |
---|
| 2921 | + |
---|
| 2922 | + if (copy_last) { |
---|
| 2923 | + copy_last = false; |
---|
| 2924 | + in_last = true; |
---|
| 2925 | + length = 8; |
---|
| 2926 | + goto again; |
---|
| 2927 | + } |
---|
| 2928 | +} |
---|
| 2929 | +EXPORT_SYMBOL(rvt_copy_sge); |
---|
| 2930 | + |
---|
| 2931 | +static enum ib_wc_status loopback_qp_drop(struct rvt_ibport *rvp, |
---|
| 2932 | + struct rvt_qp *sqp) |
---|
| 2933 | +{ |
---|
| 2934 | + rvp->n_pkt_drops++; |
---|
| 2935 | + /* |
---|
| 2936 | + * For RC, the requester would timeout and retry so |
---|
| 2937 | + * shortcut the timeouts and just signal too many retries. |
---|
| 2938 | + */ |
---|
| 2939 | + return sqp->ibqp.qp_type == IB_QPT_RC ? |
---|
| 2940 | + IB_WC_RETRY_EXC_ERR : IB_WC_SUCCESS; |
---|
| 2941 | +} |
---|
| 2942 | + |
---|
| 2943 | +/** |
---|
| 2944 | + * ruc_loopback - handle UC and RC loopback requests |
---|
| 2945 | + * @sqp: the sending QP |
---|
| 2946 | + * |
---|
| 2947 | + * This is called from rvt_do_send() to forward a WQE addressed to the same HFI |
---|
| 2948 | + * Note that although we are single threaded due to the send engine, we still |
---|
| 2949 | + * have to protect against post_send(). We don't have to worry about |
---|
| 2950 | + * receive interrupts since this is a connected protocol and all packets |
---|
| 2951 | + * will pass through here. |
---|
| 2952 | + */ |
---|
| 2953 | +void rvt_ruc_loopback(struct rvt_qp *sqp) |
---|
| 2954 | +{ |
---|
| 2955 | + struct rvt_ibport *rvp = NULL; |
---|
| 2956 | + struct rvt_dev_info *rdi = ib_to_rvt(sqp->ibqp.device); |
---|
| 2957 | + struct rvt_qp *qp; |
---|
| 2958 | + struct rvt_swqe *wqe; |
---|
| 2959 | + struct rvt_sge *sge; |
---|
| 2960 | + unsigned long flags; |
---|
| 2961 | + struct ib_wc wc; |
---|
| 2962 | + u64 sdata; |
---|
| 2963 | + atomic64_t *maddr; |
---|
| 2964 | + enum ib_wc_status send_status; |
---|
| 2965 | + bool release; |
---|
| 2966 | + int ret; |
---|
| 2967 | + bool copy_last = false; |
---|
| 2968 | + int local_ops = 0; |
---|
| 2969 | + |
---|
| 2970 | + rcu_read_lock(); |
---|
| 2971 | + rvp = rdi->ports[sqp->port_num - 1]; |
---|
| 2972 | + |
---|
| 2973 | + /* |
---|
| 2974 | + * Note that we check the responder QP state after |
---|
| 2975 | + * checking the requester's state. |
---|
| 2976 | + */ |
---|
| 2977 | + |
---|
| 2978 | + qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), rvp, |
---|
| 2979 | + sqp->remote_qpn); |
---|
| 2980 | + |
---|
| 2981 | + spin_lock_irqsave(&sqp->s_lock, flags); |
---|
| 2982 | + |
---|
| 2983 | + /* Return if we are already busy processing a work request. */ |
---|
| 2984 | + if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || |
---|
| 2985 | + !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) |
---|
| 2986 | + goto unlock; |
---|
| 2987 | + |
---|
| 2988 | + sqp->s_flags |= RVT_S_BUSY; |
---|
| 2989 | + |
---|
| 2990 | +again: |
---|
| 2991 | + if (sqp->s_last == READ_ONCE(sqp->s_head)) |
---|
| 2992 | + goto clr_busy; |
---|
| 2993 | + wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); |
---|
| 2994 | + |
---|
| 2995 | + /* Return if it is not OK to start a new work request. */ |
---|
| 2996 | + if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) { |
---|
| 2997 | + if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND)) |
---|
| 2998 | + goto clr_busy; |
---|
| 2999 | + /* We are in the error state, flush the work request. */ |
---|
| 3000 | + send_status = IB_WC_WR_FLUSH_ERR; |
---|
| 3001 | + goto flush_send; |
---|
| 3002 | + } |
---|
| 3003 | + |
---|
| 3004 | + /* |
---|
| 3005 | + * We can rely on the entry not changing without the s_lock |
---|
| 3006 | + * being held until we update s_last. |
---|
| 3007 | + * We increment s_cur to indicate s_last is in progress. |
---|
| 3008 | + */ |
---|
| 3009 | + if (sqp->s_last == sqp->s_cur) { |
---|
| 3010 | + if (++sqp->s_cur >= sqp->s_size) |
---|
| 3011 | + sqp->s_cur = 0; |
---|
| 3012 | + } |
---|
| 3013 | + spin_unlock_irqrestore(&sqp->s_lock, flags); |
---|
| 3014 | + |
---|
| 3015 | + if (!qp) { |
---|
| 3016 | + send_status = loopback_qp_drop(rvp, sqp); |
---|
| 3017 | + goto serr_no_r_lock; |
---|
| 3018 | + } |
---|
| 3019 | + spin_lock_irqsave(&qp->r_lock, flags); |
---|
| 3020 | + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) || |
---|
| 3021 | + qp->ibqp.qp_type != sqp->ibqp.qp_type) { |
---|
| 3022 | + send_status = loopback_qp_drop(rvp, sqp); |
---|
| 3023 | + goto serr; |
---|
| 3024 | + } |
---|
| 3025 | + |
---|
| 3026 | + memset(&wc, 0, sizeof(wc)); |
---|
| 3027 | + send_status = IB_WC_SUCCESS; |
---|
| 3028 | + |
---|
| 3029 | + release = true; |
---|
| 3030 | + sqp->s_sge.sge = wqe->sg_list[0]; |
---|
| 3031 | + sqp->s_sge.sg_list = wqe->sg_list + 1; |
---|
| 3032 | + sqp->s_sge.num_sge = wqe->wr.num_sge; |
---|
| 3033 | + sqp->s_len = wqe->length; |
---|
| 3034 | + switch (wqe->wr.opcode) { |
---|
| 3035 | + case IB_WR_REG_MR: |
---|
| 3036 | + goto send_comp; |
---|
| 3037 | + |
---|
| 3038 | + case IB_WR_LOCAL_INV: |
---|
| 3039 | + if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { |
---|
| 3040 | + if (rvt_invalidate_rkey(sqp, |
---|
| 3041 | + wqe->wr.ex.invalidate_rkey)) |
---|
| 3042 | + send_status = IB_WC_LOC_PROT_ERR; |
---|
| 3043 | + local_ops = 1; |
---|
| 3044 | + } |
---|
| 3045 | + goto send_comp; |
---|
| 3046 | + |
---|
| 3047 | + case IB_WR_SEND_WITH_INV: |
---|
| 3048 | + case IB_WR_SEND_WITH_IMM: |
---|
| 3049 | + case IB_WR_SEND: |
---|
| 3050 | + ret = rvt_get_rwqe(qp, false); |
---|
| 3051 | + if (ret < 0) |
---|
| 3052 | + goto op_err; |
---|
| 3053 | + if (!ret) |
---|
| 3054 | + goto rnr_nak; |
---|
| 3055 | + if (wqe->length > qp->r_len) |
---|
| 3056 | + goto inv_err; |
---|
| 3057 | + switch (wqe->wr.opcode) { |
---|
| 3058 | + case IB_WR_SEND_WITH_INV: |
---|
| 3059 | + if (!rvt_invalidate_rkey(qp, |
---|
| 3060 | + wqe->wr.ex.invalidate_rkey)) { |
---|
| 3061 | + wc.wc_flags = IB_WC_WITH_INVALIDATE; |
---|
| 3062 | + wc.ex.invalidate_rkey = |
---|
| 3063 | + wqe->wr.ex.invalidate_rkey; |
---|
| 3064 | + } |
---|
| 3065 | + break; |
---|
| 3066 | + case IB_WR_SEND_WITH_IMM: |
---|
| 3067 | + wc.wc_flags = IB_WC_WITH_IMM; |
---|
| 3068 | + wc.ex.imm_data = wqe->wr.ex.imm_data; |
---|
| 3069 | + break; |
---|
| 3070 | + default: |
---|
| 3071 | + break; |
---|
| 3072 | + } |
---|
| 3073 | + break; |
---|
| 3074 | + |
---|
| 3075 | + case IB_WR_RDMA_WRITE_WITH_IMM: |
---|
| 3076 | + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) |
---|
| 3077 | + goto inv_err; |
---|
| 3078 | + wc.wc_flags = IB_WC_WITH_IMM; |
---|
| 3079 | + wc.ex.imm_data = wqe->wr.ex.imm_data; |
---|
| 3080 | + ret = rvt_get_rwqe(qp, true); |
---|
| 3081 | + if (ret < 0) |
---|
| 3082 | + goto op_err; |
---|
| 3083 | + if (!ret) |
---|
| 3084 | + goto rnr_nak; |
---|
| 3085 | + /* skip copy_last set and qp_access_flags recheck */ |
---|
| 3086 | + goto do_write; |
---|
| 3087 | + case IB_WR_RDMA_WRITE: |
---|
| 3088 | + copy_last = rvt_is_user_qp(qp); |
---|
| 3089 | + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) |
---|
| 3090 | + goto inv_err; |
---|
| 3091 | +do_write: |
---|
| 3092 | + if (wqe->length == 0) |
---|
| 3093 | + break; |
---|
| 3094 | + if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length, |
---|
| 3095 | + wqe->rdma_wr.remote_addr, |
---|
| 3096 | + wqe->rdma_wr.rkey, |
---|
| 3097 | + IB_ACCESS_REMOTE_WRITE))) |
---|
| 3098 | + goto acc_err; |
---|
| 3099 | + qp->r_sge.sg_list = NULL; |
---|
| 3100 | + qp->r_sge.num_sge = 1; |
---|
| 3101 | + qp->r_sge.total_len = wqe->length; |
---|
| 3102 | + break; |
---|
| 3103 | + |
---|
| 3104 | + case IB_WR_RDMA_READ: |
---|
| 3105 | + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) |
---|
| 3106 | + goto inv_err; |
---|
| 3107 | + if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, |
---|
| 3108 | + wqe->rdma_wr.remote_addr, |
---|
| 3109 | + wqe->rdma_wr.rkey, |
---|
| 3110 | + IB_ACCESS_REMOTE_READ))) |
---|
| 3111 | + goto acc_err; |
---|
| 3112 | + release = false; |
---|
| 3113 | + sqp->s_sge.sg_list = NULL; |
---|
| 3114 | + sqp->s_sge.num_sge = 1; |
---|
| 3115 | + qp->r_sge.sge = wqe->sg_list[0]; |
---|
| 3116 | + qp->r_sge.sg_list = wqe->sg_list + 1; |
---|
| 3117 | + qp->r_sge.num_sge = wqe->wr.num_sge; |
---|
| 3118 | + qp->r_sge.total_len = wqe->length; |
---|
| 3119 | + break; |
---|
| 3120 | + |
---|
| 3121 | + case IB_WR_ATOMIC_CMP_AND_SWP: |
---|
| 3122 | + case IB_WR_ATOMIC_FETCH_AND_ADD: |
---|
| 3123 | + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) |
---|
| 3124 | + goto inv_err; |
---|
| 3125 | + if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1))) |
---|
| 3126 | + goto inv_err; |
---|
| 3127 | + if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), |
---|
| 3128 | + wqe->atomic_wr.remote_addr, |
---|
| 3129 | + wqe->atomic_wr.rkey, |
---|
| 3130 | + IB_ACCESS_REMOTE_ATOMIC))) |
---|
| 3131 | + goto acc_err; |
---|
| 3132 | + /* Perform atomic OP and save result. */ |
---|
| 3133 | + maddr = (atomic64_t *)qp->r_sge.sge.vaddr; |
---|
| 3134 | + sdata = wqe->atomic_wr.compare_add; |
---|
| 3135 | + *(u64 *)sqp->s_sge.sge.vaddr = |
---|
| 3136 | + (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? |
---|
| 3137 | + (u64)atomic64_add_return(sdata, maddr) - sdata : |
---|
| 3138 | + (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, |
---|
| 3139 | + sdata, wqe->atomic_wr.swap); |
---|
| 3140 | + rvt_put_mr(qp->r_sge.sge.mr); |
---|
| 3141 | + qp->r_sge.num_sge = 0; |
---|
| 3142 | + goto send_comp; |
---|
| 3143 | + |
---|
| 3144 | + default: |
---|
| 3145 | + send_status = IB_WC_LOC_QP_OP_ERR; |
---|
| 3146 | + goto serr; |
---|
| 3147 | + } |
---|
| 3148 | + |
---|
| 3149 | + sge = &sqp->s_sge.sge; |
---|
| 3150 | + while (sqp->s_len) { |
---|
| 3151 | + u32 len = rvt_get_sge_length(sge, sqp->s_len); |
---|
| 3152 | + |
---|
| 3153 | + WARN_ON_ONCE(len == 0); |
---|
| 3154 | + rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, |
---|
| 3155 | + len, release, copy_last); |
---|
| 3156 | + rvt_update_sge(&sqp->s_sge, len, !release); |
---|
| 3157 | + sqp->s_len -= len; |
---|
| 3158 | + } |
---|
| 3159 | + if (release) |
---|
| 3160 | + rvt_put_ss(&qp->r_sge); |
---|
| 3161 | + |
---|
| 3162 | + if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) |
---|
| 3163 | + goto send_comp; |
---|
| 3164 | + |
---|
| 3165 | + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) |
---|
| 3166 | + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; |
---|
| 3167 | + else |
---|
| 3168 | + wc.opcode = IB_WC_RECV; |
---|
| 3169 | + wc.wr_id = qp->r_wr_id; |
---|
| 3170 | + wc.status = IB_WC_SUCCESS; |
---|
| 3171 | + wc.byte_len = wqe->length; |
---|
| 3172 | + wc.qp = &qp->ibqp; |
---|
| 3173 | + wc.src_qp = qp->remote_qpn; |
---|
| 3174 | + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; |
---|
| 3175 | + wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); |
---|
| 3176 | + wc.port_num = 1; |
---|
| 3177 | + /* Signal completion event if the solicited bit is set. */ |
---|
| 3178 | + rvt_recv_cq(qp, &wc, wqe->wr.send_flags & IB_SEND_SOLICITED); |
---|
| 3179 | + |
---|
| 3180 | +send_comp: |
---|
| 3181 | + spin_unlock_irqrestore(&qp->r_lock, flags); |
---|
| 3182 | + spin_lock_irqsave(&sqp->s_lock, flags); |
---|
| 3183 | + rvp->n_loop_pkts++; |
---|
| 3184 | +flush_send: |
---|
| 3185 | + sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; |
---|
| 3186 | + spin_lock(&sqp->r_lock); |
---|
| 3187 | + rvt_send_complete(sqp, wqe, send_status); |
---|
| 3188 | + spin_unlock(&sqp->r_lock); |
---|
| 3189 | + if (local_ops) { |
---|
| 3190 | + atomic_dec(&sqp->local_ops_pending); |
---|
| 3191 | + local_ops = 0; |
---|
| 3192 | + } |
---|
| 3193 | + goto again; |
---|
| 3194 | + |
---|
| 3195 | +rnr_nak: |
---|
| 3196 | + /* Handle RNR NAK */ |
---|
| 3197 | + if (qp->ibqp.qp_type == IB_QPT_UC) |
---|
| 3198 | + goto send_comp; |
---|
| 3199 | + rvp->n_rnr_naks++; |
---|
| 3200 | + /* |
---|
| 3201 | + * Note: we don't need the s_lock held since the BUSY flag |
---|
| 3202 | + * makes this single threaded. |
---|
| 3203 | + */ |
---|
| 3204 | + if (sqp->s_rnr_retry == 0) { |
---|
| 3205 | + send_status = IB_WC_RNR_RETRY_EXC_ERR; |
---|
| 3206 | + goto serr; |
---|
| 3207 | + } |
---|
| 3208 | + if (sqp->s_rnr_retry_cnt < 7) |
---|
| 3209 | + sqp->s_rnr_retry--; |
---|
| 3210 | + spin_unlock_irqrestore(&qp->r_lock, flags); |
---|
| 3211 | + spin_lock_irqsave(&sqp->s_lock, flags); |
---|
| 3212 | + if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK)) |
---|
| 3213 | + goto clr_busy; |
---|
| 3214 | + rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer << |
---|
| 3215 | + IB_AETH_CREDIT_SHIFT); |
---|
| 3216 | + goto clr_busy; |
---|
| 3217 | + |
---|
| 3218 | +op_err: |
---|
| 3219 | + send_status = IB_WC_REM_OP_ERR; |
---|
| 3220 | + wc.status = IB_WC_LOC_QP_OP_ERR; |
---|
| 3221 | + goto err; |
---|
| 3222 | + |
---|
| 3223 | +inv_err: |
---|
| 3224 | + send_status = |
---|
| 3225 | + sqp->ibqp.qp_type == IB_QPT_RC ? |
---|
| 3226 | + IB_WC_REM_INV_REQ_ERR : |
---|
| 3227 | + IB_WC_SUCCESS; |
---|
| 3228 | + wc.status = IB_WC_LOC_QP_OP_ERR; |
---|
| 3229 | + goto err; |
---|
| 3230 | + |
---|
| 3231 | +acc_err: |
---|
| 3232 | + send_status = IB_WC_REM_ACCESS_ERR; |
---|
| 3233 | + wc.status = IB_WC_LOC_PROT_ERR; |
---|
| 3234 | +err: |
---|
| 3235 | + /* responder goes to error state */ |
---|
| 3236 | + rvt_rc_error(qp, wc.status); |
---|
| 3237 | + |
---|
| 3238 | +serr: |
---|
| 3239 | + spin_unlock_irqrestore(&qp->r_lock, flags); |
---|
| 3240 | +serr_no_r_lock: |
---|
| 3241 | + spin_lock_irqsave(&sqp->s_lock, flags); |
---|
| 3242 | + spin_lock(&sqp->r_lock); |
---|
| 3243 | + rvt_send_complete(sqp, wqe, send_status); |
---|
| 3244 | + spin_unlock(&sqp->r_lock); |
---|
| 3245 | + if (sqp->ibqp.qp_type == IB_QPT_RC) { |
---|
| 3246 | + int lastwqe; |
---|
| 3247 | + |
---|
| 3248 | + spin_lock(&sqp->r_lock); |
---|
| 3249 | + lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); |
---|
| 3250 | + spin_unlock(&sqp->r_lock); |
---|
| 3251 | + |
---|
| 3252 | + sqp->s_flags &= ~RVT_S_BUSY; |
---|
| 3253 | + spin_unlock_irqrestore(&sqp->s_lock, flags); |
---|
| 3254 | + if (lastwqe) { |
---|
| 3255 | + struct ib_event ev; |
---|
| 3256 | + |
---|
| 3257 | + ev.device = sqp->ibqp.device; |
---|
| 3258 | + ev.element.qp = &sqp->ibqp; |
---|
| 3259 | + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; |
---|
| 3260 | + sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); |
---|
| 3261 | + } |
---|
| 3262 | + goto done; |
---|
| 3263 | + } |
---|
| 3264 | +clr_busy: |
---|
| 3265 | + sqp->s_flags &= ~RVT_S_BUSY; |
---|
| 3266 | +unlock: |
---|
| 3267 | + spin_unlock_irqrestore(&sqp->s_lock, flags); |
---|
| 3268 | +done: |
---|
| 3269 | + rcu_read_unlock(); |
---|
| 3270 | +} |
---|
| 3271 | +EXPORT_SYMBOL(rvt_ruc_loopback); |
---|