| .. | .. |
|---|
| 1 | 1 | /* |
|---|
| 2 | | - * Copyright(c) 2015 - 2018 Intel Corporation. |
|---|
| 2 | + * Copyright(c) 2015 - 2020 Intel Corporation. |
|---|
| 3 | 3 | * |
|---|
| 4 | 4 | * This file is provided under a dual BSD/GPLv2 license. When using or |
|---|
| 5 | 5 | * redistributing this file, you may do so under either license. |
|---|
| .. | .. |
|---|
| 66 | 66 | #include "vnic.h" |
|---|
| 67 | 67 | #include "fault.h" |
|---|
| 68 | 68 | #include "affinity.h" |
|---|
| 69 | +#include "ipoib.h" |
|---|
| 69 | 70 | |
|---|
| 70 | 71 | static unsigned int hfi1_lkey_table_size = 16; |
|---|
| 71 | 72 | module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, |
|---|
| .. | .. |
|---|
| 130 | 131 | module_param(piothreshold, ushort, S_IRUGO); |
|---|
| 131 | 132 | MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio"); |
|---|
| 132 | 133 | |
|---|
| 133 | | -#define COPY_CACHELESS 1 |
|---|
| 134 | | -#define COPY_ADAPTIVE 2 |
|---|
| 135 | 134 | static unsigned int sge_copy_mode; |
|---|
| 136 | 135 | module_param(sge_copy_mode, uint, S_IRUGO); |
|---|
| 137 | 136 | MODULE_PARM_DESC(sge_copy_mode, |
|---|
| .. | .. |
|---|
| 149 | 148 | /* Length of buffer to create verbs txreq cache name */ |
|---|
| 150 | 149 | #define TXREQ_NAME_LEN 24 |
|---|
| 151 | 150 | |
|---|
| 152 | | -static uint wss_threshold; |
|---|
| 151 | +static uint wss_threshold = 80; |
|---|
| 153 | 152 | module_param(wss_threshold, uint, S_IRUGO); |
|---|
| 154 | 153 | MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); |
|---|
| 155 | 154 | static uint wss_clean_period = 256; |
|---|
| 156 | 155 | module_param(wss_clean_period, uint, S_IRUGO); |
|---|
| 157 | 156 | MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned"); |
|---|
| 158 | 157 | |
|---|
| 159 | | -/* memory working set size */ |
|---|
| 160 | | -struct hfi1_wss { |
|---|
| 161 | | - unsigned long *entries; |
|---|
| 162 | | - atomic_t total_count; |
|---|
| 163 | | - atomic_t clean_counter; |
|---|
| 164 | | - atomic_t clean_entry; |
|---|
| 165 | | - |
|---|
| 166 | | - int threshold; |
|---|
| 167 | | - int num_entries; |
|---|
| 168 | | - long pages_mask; |
|---|
| 169 | | -}; |
|---|
| 170 | | - |
|---|
| 171 | | -static struct hfi1_wss wss; |
|---|
| 172 | | - |
|---|
| 173 | | -int hfi1_wss_init(void) |
|---|
| 174 | | -{ |
|---|
| 175 | | - long llc_size; |
|---|
| 176 | | - long llc_bits; |
|---|
| 177 | | - long table_size; |
|---|
| 178 | | - long table_bits; |
|---|
| 179 | | - |
|---|
| 180 | | - /* check for a valid percent range - default to 80 if none or invalid */ |
|---|
| 181 | | - if (wss_threshold < 1 || wss_threshold > 100) |
|---|
| 182 | | - wss_threshold = 80; |
|---|
| 183 | | - /* reject a wildly large period */ |
|---|
| 184 | | - if (wss_clean_period > 1000000) |
|---|
| 185 | | - wss_clean_period = 256; |
|---|
| 186 | | - /* reject a zero period */ |
|---|
| 187 | | - if (wss_clean_period == 0) |
|---|
| 188 | | - wss_clean_period = 1; |
|---|
| 189 | | - |
|---|
| 190 | | - /* |
|---|
| 191 | | - * Calculate the table size - the next power of 2 larger than the |
|---|
| 192 | | - * LLC size. LLC size is in KiB. |
|---|
| 193 | | - */ |
|---|
| 194 | | - llc_size = wss_llc_size() * 1024; |
|---|
| 195 | | - table_size = roundup_pow_of_two(llc_size); |
|---|
| 196 | | - |
|---|
| 197 | | - /* one bit per page in rounded up table */ |
|---|
| 198 | | - llc_bits = llc_size / PAGE_SIZE; |
|---|
| 199 | | - table_bits = table_size / PAGE_SIZE; |
|---|
| 200 | | - wss.pages_mask = table_bits - 1; |
|---|
| 201 | | - wss.num_entries = table_bits / BITS_PER_LONG; |
|---|
| 202 | | - |
|---|
| 203 | | - wss.threshold = (llc_bits * wss_threshold) / 100; |
|---|
| 204 | | - if (wss.threshold == 0) |
|---|
| 205 | | - wss.threshold = 1; |
|---|
| 206 | | - |
|---|
| 207 | | - atomic_set(&wss.clean_counter, wss_clean_period); |
|---|
| 208 | | - |
|---|
| 209 | | - wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries), |
|---|
| 210 | | - GFP_KERNEL); |
|---|
| 211 | | - if (!wss.entries) { |
|---|
| 212 | | - hfi1_wss_exit(); |
|---|
| 213 | | - return -ENOMEM; |
|---|
| 214 | | - } |
|---|
| 215 | | - |
|---|
| 216 | | - return 0; |
|---|
| 217 | | -} |
|---|
| 218 | | - |
|---|
| 219 | | -void hfi1_wss_exit(void) |
|---|
| 220 | | -{ |
|---|
| 221 | | - /* coded to handle partially initialized and repeat callers */ |
|---|
| 222 | | - kfree(wss.entries); |
|---|
| 223 | | - wss.entries = NULL; |
|---|
| 224 | | -} |
|---|
| 225 | | - |
|---|
| 226 | | -/* |
|---|
| 227 | | - * Advance the clean counter. When the clean period has expired, |
|---|
| 228 | | - * clean an entry. |
|---|
| 229 | | - * |
|---|
| 230 | | - * This is implemented in atomics to avoid locking. Because multiple |
|---|
| 231 | | - * variables are involved, it can be racy which can lead to slightly |
|---|
| 232 | | - * inaccurate information. Since this is only a heuristic, this is |
|---|
| 233 | | - * OK. Any innaccuracies will clean themselves out as the counter |
|---|
| 234 | | - * advances. That said, it is unlikely the entry clean operation will |
|---|
| 235 | | - * race - the next possible racer will not start until the next clean |
|---|
| 236 | | - * period. |
|---|
| 237 | | - * |
|---|
| 238 | | - * The clean counter is implemented as a decrement to zero. When zero |
|---|
| 239 | | - * is reached an entry is cleaned. |
|---|
| 240 | | - */ |
|---|
| 241 | | -static void wss_advance_clean_counter(void) |
|---|
| 242 | | -{ |
|---|
| 243 | | - int entry; |
|---|
| 244 | | - int weight; |
|---|
| 245 | | - unsigned long bits; |
|---|
| 246 | | - |
|---|
| 247 | | - /* become the cleaner if we decrement the counter to zero */ |
|---|
| 248 | | - if (atomic_dec_and_test(&wss.clean_counter)) { |
|---|
| 249 | | - /* |
|---|
| 250 | | - * Set, not add, the clean period. This avoids an issue |
|---|
| 251 | | - * where the counter could decrement below the clean period. |
|---|
| 252 | | - * Doing a set can result in lost decrements, slowing the |
|---|
| 253 | | - * clean advance. Since this a heuristic, this possible |
|---|
| 254 | | - * slowdown is OK. |
|---|
| 255 | | - * |
|---|
| 256 | | - * An alternative is to loop, advancing the counter by a |
|---|
| 257 | | - * clean period until the result is > 0. However, this could |
|---|
| 258 | | - * lead to several threads keeping another in the clean loop. |
|---|
| 259 | | - * This could be mitigated by limiting the number of times |
|---|
| 260 | | - * we stay in the loop. |
|---|
| 261 | | - */ |
|---|
| 262 | | - atomic_set(&wss.clean_counter, wss_clean_period); |
|---|
| 263 | | - |
|---|
| 264 | | - /* |
|---|
| 265 | | - * Uniquely grab the entry to clean and move to next. |
|---|
| 266 | | - * The current entry is always the lower bits of |
|---|
| 267 | | - * wss.clean_entry. The table size, wss.num_entries, |
|---|
| 268 | | - * is always a power-of-2. |
|---|
| 269 | | - */ |
|---|
| 270 | | - entry = (atomic_inc_return(&wss.clean_entry) - 1) |
|---|
| 271 | | - & (wss.num_entries - 1); |
|---|
| 272 | | - |
|---|
| 273 | | - /* clear the entry and count the bits */ |
|---|
| 274 | | - bits = xchg(&wss.entries[entry], 0); |
|---|
| 275 | | - weight = hweight64((u64)bits); |
|---|
| 276 | | - /* only adjust the contended total count if needed */ |
|---|
| 277 | | - if (weight) |
|---|
| 278 | | - atomic_sub(weight, &wss.total_count); |
|---|
| 279 | | - } |
|---|
| 280 | | -} |
|---|
| 281 | | - |
|---|
| 282 | | -/* |
|---|
| 283 | | - * Insert the given address into the working set array. |
|---|
| 284 | | - */ |
|---|
| 285 | | -static void wss_insert(void *address) |
|---|
| 286 | | -{ |
|---|
| 287 | | - u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask; |
|---|
| 288 | | - u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */ |
|---|
| 289 | | - u32 nr = page & (BITS_PER_LONG - 1); |
|---|
| 290 | | - |
|---|
| 291 | | - if (!test_and_set_bit(nr, &wss.entries[entry])) |
|---|
| 292 | | - atomic_inc(&wss.total_count); |
|---|
| 293 | | - |
|---|
| 294 | | - wss_advance_clean_counter(); |
|---|
| 295 | | -} |
|---|
| 296 | | - |
|---|
| 297 | | -/* |
|---|
| 298 | | - * Is the working set larger than the threshold? |
|---|
| 299 | | - */ |
|---|
| 300 | | -static inline bool wss_exceeds_threshold(void) |
|---|
| 301 | | -{ |
|---|
| 302 | | - return atomic_read(&wss.total_count) >= wss.threshold; |
|---|
| 303 | | -} |
|---|
| 304 | | - |
|---|
| 305 | 158 | /* |
|---|
| 306 | 159 | * Translate ib_wr_opcode into ib_wc_opcode. |
|---|
| 307 | 160 | */ |
|---|
| 308 | 161 | const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { |
|---|
| 309 | 162 | [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, |
|---|
| 163 | + [IB_WR_TID_RDMA_WRITE] = IB_WC_RDMA_WRITE, |
|---|
| 310 | 164 | [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, |
|---|
| 311 | 165 | [IB_WR_SEND] = IB_WC_SEND, |
|---|
| 312 | 166 | [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, |
|---|
| 313 | 167 | [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, |
|---|
| 168 | + [IB_WR_TID_RDMA_READ] = IB_WC_RDMA_READ, |
|---|
| 314 | 169 | [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, |
|---|
| 315 | 170 | [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, |
|---|
| 316 | 171 | [IB_WR_SEND_WITH_INV] = IB_WC_SEND, |
|---|
| .. | .. |
|---|
| 346 | 201 | [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, |
|---|
| 347 | 202 | [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, |
|---|
| 348 | 203 | [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4, |
|---|
| 204 | + [IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36, |
|---|
| 205 | + [IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36, |
|---|
| 206 | + [IB_OPCODE_TID_RDMA_WRITE_REQ] = 12 + 8 + 36, |
|---|
| 207 | + [IB_OPCODE_TID_RDMA_WRITE_RESP] = 12 + 8 + 36, |
|---|
| 208 | + [IB_OPCODE_TID_RDMA_WRITE_DATA] = 12 + 8 + 36, |
|---|
| 209 | + [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = 12 + 8 + 36, |
|---|
| 210 | + [IB_OPCODE_TID_RDMA_ACK] = 12 + 8 + 36, |
|---|
| 211 | + [IB_OPCODE_TID_RDMA_RESYNC] = 12 + 8 + 36, |
|---|
| 349 | 212 | /* UC */ |
|---|
| 350 | 213 | [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, |
|---|
| 351 | 214 | [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, |
|---|
| .. | .. |
|---|
| 389 | 252 | [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv, |
|---|
| 390 | 253 | [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv, |
|---|
| 391 | 254 | [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv, |
|---|
| 255 | + |
|---|
| 256 | + /* TID RDMA has separate handlers for different opcodes.*/ |
|---|
| 257 | + [IB_OPCODE_TID_RDMA_WRITE_REQ] = &hfi1_rc_rcv_tid_rdma_write_req, |
|---|
| 258 | + [IB_OPCODE_TID_RDMA_WRITE_RESP] = &hfi1_rc_rcv_tid_rdma_write_resp, |
|---|
| 259 | + [IB_OPCODE_TID_RDMA_WRITE_DATA] = &hfi1_rc_rcv_tid_rdma_write_data, |
|---|
| 260 | + [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data, |
|---|
| 261 | + [IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req, |
|---|
| 262 | + [IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp, |
|---|
| 263 | + [IB_OPCODE_TID_RDMA_RESYNC] = &hfi1_rc_rcv_tid_rdma_resync, |
|---|
| 264 | + [IB_OPCODE_TID_RDMA_ACK] = &hfi1_rc_rcv_tid_rdma_ack, |
|---|
| 265 | + |
|---|
| 392 | 266 | /* UC */ |
|---|
| 393 | 267 | [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv, |
|---|
| 394 | 268 | [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv, |
|---|
| .. | .. |
|---|
| 436 | 310 | */ |
|---|
| 437 | 311 | __be64 ib_hfi1_sys_image_guid; |
|---|
| 438 | 312 | |
|---|
| 439 | | -/** |
|---|
| 440 | | - * hfi1_copy_sge - copy data to SGE memory |
|---|
| 441 | | - * @ss: the SGE state |
|---|
| 442 | | - * @data: the data to copy |
|---|
| 443 | | - * @length: the length of the data |
|---|
| 444 | | - * @release: boolean to release MR |
|---|
| 445 | | - * @copy_last: do a separate copy of the last 8 bytes |
|---|
| 446 | | - */ |
|---|
| 447 | | -void hfi1_copy_sge( |
|---|
| 448 | | - struct rvt_sge_state *ss, |
|---|
| 449 | | - void *data, u32 length, |
|---|
| 450 | | - bool release, |
|---|
| 451 | | - bool copy_last) |
|---|
| 452 | | -{ |
|---|
| 453 | | - struct rvt_sge *sge = &ss->sge; |
|---|
| 454 | | - int i; |
|---|
| 455 | | - bool in_last = false; |
|---|
| 456 | | - bool cacheless_copy = false; |
|---|
| 457 | | - |
|---|
| 458 | | - if (sge_copy_mode == COPY_CACHELESS) { |
|---|
| 459 | | - cacheless_copy = length >= PAGE_SIZE; |
|---|
| 460 | | - } else if (sge_copy_mode == COPY_ADAPTIVE) { |
|---|
| 461 | | - if (length >= PAGE_SIZE) { |
|---|
| 462 | | - /* |
|---|
| 463 | | - * NOTE: this *assumes*: |
|---|
| 464 | | - * o The first vaddr is the dest. |
|---|
| 465 | | - * o If multiple pages, then vaddr is sequential. |
|---|
| 466 | | - */ |
|---|
| 467 | | - wss_insert(sge->vaddr); |
|---|
| 468 | | - if (length >= (2 * PAGE_SIZE)) |
|---|
| 469 | | - wss_insert(sge->vaddr + PAGE_SIZE); |
|---|
| 470 | | - |
|---|
| 471 | | - cacheless_copy = wss_exceeds_threshold(); |
|---|
| 472 | | - } else { |
|---|
| 473 | | - wss_advance_clean_counter(); |
|---|
| 474 | | - } |
|---|
| 475 | | - } |
|---|
| 476 | | - if (copy_last) { |
|---|
| 477 | | - if (length > 8) { |
|---|
| 478 | | - length -= 8; |
|---|
| 479 | | - } else { |
|---|
| 480 | | - copy_last = false; |
|---|
| 481 | | - in_last = true; |
|---|
| 482 | | - } |
|---|
| 483 | | - } |
|---|
| 484 | | - |
|---|
| 485 | | -again: |
|---|
| 486 | | - while (length) { |
|---|
| 487 | | - u32 len = rvt_get_sge_length(sge, length); |
|---|
| 488 | | - |
|---|
| 489 | | - WARN_ON_ONCE(len == 0); |
|---|
| 490 | | - if (unlikely(in_last)) { |
|---|
| 491 | | - /* enforce byte transfer ordering */ |
|---|
| 492 | | - for (i = 0; i < len; i++) |
|---|
| 493 | | - ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i]; |
|---|
| 494 | | - } else if (cacheless_copy) { |
|---|
| 495 | | - cacheless_memcpy(sge->vaddr, data, len); |
|---|
| 496 | | - } else { |
|---|
| 497 | | - memcpy(sge->vaddr, data, len); |
|---|
| 498 | | - } |
|---|
| 499 | | - rvt_update_sge(ss, len, release); |
|---|
| 500 | | - data += len; |
|---|
| 501 | | - length -= len; |
|---|
| 502 | | - } |
|---|
| 503 | | - |
|---|
| 504 | | - if (copy_last) { |
|---|
| 505 | | - copy_last = false; |
|---|
| 506 | | - in_last = true; |
|---|
| 507 | | - length = 8; |
|---|
| 508 | | - goto again; |
|---|
| 509 | | - } |
|---|
| 510 | | -} |
|---|
| 511 | | - |
|---|
| 512 | 313 | /* |
|---|
| 513 | 314 | * Make sure the QP is ready and able to accept the given opcode. |
|---|
| 514 | 315 | */ |
|---|
| .. | .. |
|---|
| 527 | 328 | static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) |
|---|
| 528 | 329 | { |
|---|
| 529 | 330 | #ifdef CONFIG_FAULT_INJECTION |
|---|
| 530 | | - if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) |
|---|
| 331 | + if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) { |
|---|
| 531 | 332 | /* |
|---|
| 532 | 333 | * In order to drop non-IB traffic we |
|---|
| 533 | 334 | * set PbcInsertHrc to NONE (0x2). |
|---|
| .. | .. |
|---|
| 538 | 339 | * packet will not be delivered to the |
|---|
| 539 | 340 | * correct context. |
|---|
| 540 | 341 | */ |
|---|
| 342 | + pbc &= ~PBC_INSERT_HCRC_SMASK; |
|---|
| 541 | 343 | pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT; |
|---|
| 542 | | - else |
|---|
| 344 | + } else { |
|---|
| 543 | 345 | /* |
|---|
| 544 | 346 | * In order to drop regular verbs |
|---|
| 545 | 347 | * traffic we set the PbcTestEbp |
|---|
| .. | .. |
|---|
| 549 | 351 | * triggered and will be dropped. |
|---|
| 550 | 352 | */ |
|---|
| 551 | 353 | pbc |= PBC_TEST_EBP; |
|---|
| 354 | + } |
|---|
| 552 | 355 | #endif |
|---|
| 553 | 356 | return pbc; |
|---|
| 357 | +} |
|---|
| 358 | + |
|---|
| 359 | +static opcode_handler tid_qp_ok(int opcode, struct hfi1_packet *packet) |
|---|
| 360 | +{ |
|---|
| 361 | + if (packet->qp->ibqp.qp_type != IB_QPT_RC || |
|---|
| 362 | + !(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) |
|---|
| 363 | + return NULL; |
|---|
| 364 | + if ((opcode & RVT_OPCODE_QP_MASK) == IB_OPCODE_TID_RDMA) |
|---|
| 365 | + return opcode_handler_tbl[opcode]; |
|---|
| 366 | + return NULL; |
|---|
| 367 | +} |
|---|
| 368 | + |
|---|
| 369 | +void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet) |
|---|
| 370 | +{ |
|---|
| 371 | + struct hfi1_ctxtdata *rcd = packet->rcd; |
|---|
| 372 | + struct ib_header *hdr = packet->hdr; |
|---|
| 373 | + u32 tlen = packet->tlen; |
|---|
| 374 | + struct hfi1_pportdata *ppd = rcd->ppd; |
|---|
| 375 | + struct hfi1_ibport *ibp = &ppd->ibport_data; |
|---|
| 376 | + struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; |
|---|
| 377 | + opcode_handler opcode_handler; |
|---|
| 378 | + unsigned long flags; |
|---|
| 379 | + u32 qp_num; |
|---|
| 380 | + int lnh; |
|---|
| 381 | + u8 opcode; |
|---|
| 382 | + |
|---|
| 383 | + /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */ |
|---|
| 384 | + if (unlikely(tlen < 15 * sizeof(u32))) |
|---|
| 385 | + goto drop; |
|---|
| 386 | + |
|---|
| 387 | + lnh = be16_to_cpu(hdr->lrh[0]) & 3; |
|---|
| 388 | + if (lnh != HFI1_LRH_BTH) |
|---|
| 389 | + goto drop; |
|---|
| 390 | + |
|---|
| 391 | + packet->ohdr = &hdr->u.oth; |
|---|
| 392 | + trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); |
|---|
| 393 | + |
|---|
| 394 | + opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); |
|---|
| 395 | + inc_opstats(tlen, &rcd->opstats->stats[opcode]); |
|---|
| 396 | + |
|---|
| 397 | + /* verbs_qp can be picked up from any tid_rdma header struct */ |
|---|
| 398 | + qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_req.verbs_qp) & |
|---|
| 399 | + RVT_QPN_MASK; |
|---|
| 400 | + |
|---|
| 401 | + rcu_read_lock(); |
|---|
| 402 | + packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); |
|---|
| 403 | + if (!packet->qp) |
|---|
| 404 | + goto drop_rcu; |
|---|
| 405 | + spin_lock_irqsave(&packet->qp->r_lock, flags); |
|---|
| 406 | + opcode_handler = tid_qp_ok(opcode, packet); |
|---|
| 407 | + if (likely(opcode_handler)) |
|---|
| 408 | + opcode_handler(packet); |
|---|
| 409 | + else |
|---|
| 410 | + goto drop_unlock; |
|---|
| 411 | + spin_unlock_irqrestore(&packet->qp->r_lock, flags); |
|---|
| 412 | + rcu_read_unlock(); |
|---|
| 413 | + |
|---|
| 414 | + return; |
|---|
| 415 | +drop_unlock: |
|---|
| 416 | + spin_unlock_irqrestore(&packet->qp->r_lock, flags); |
|---|
| 417 | +drop_rcu: |
|---|
| 418 | + rcu_read_unlock(); |
|---|
| 419 | +drop: |
|---|
| 420 | + ibp->rvp.n_pkt_drops++; |
|---|
| 421 | +} |
|---|
| 422 | + |
|---|
| 423 | +void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet) |
|---|
| 424 | +{ |
|---|
| 425 | + struct hfi1_ctxtdata *rcd = packet->rcd; |
|---|
| 426 | + struct ib_header *hdr = packet->hdr; |
|---|
| 427 | + u32 tlen = packet->tlen; |
|---|
| 428 | + struct hfi1_pportdata *ppd = rcd->ppd; |
|---|
| 429 | + struct hfi1_ibport *ibp = &ppd->ibport_data; |
|---|
| 430 | + struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; |
|---|
| 431 | + opcode_handler opcode_handler; |
|---|
| 432 | + unsigned long flags; |
|---|
| 433 | + u32 qp_num; |
|---|
| 434 | + int lnh; |
|---|
| 435 | + u8 opcode; |
|---|
| 436 | + |
|---|
| 437 | + /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */ |
|---|
| 438 | + if (unlikely(tlen < 15 * sizeof(u32))) |
|---|
| 439 | + goto drop; |
|---|
| 440 | + |
|---|
| 441 | + lnh = be16_to_cpu(hdr->lrh[0]) & 3; |
|---|
| 442 | + if (lnh != HFI1_LRH_BTH) |
|---|
| 443 | + goto drop; |
|---|
| 444 | + |
|---|
| 445 | + packet->ohdr = &hdr->u.oth; |
|---|
| 446 | + trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); |
|---|
| 447 | + |
|---|
| 448 | + opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); |
|---|
| 449 | + inc_opstats(tlen, &rcd->opstats->stats[opcode]); |
|---|
| 450 | + |
|---|
| 451 | + /* verbs_qp can be picked up from any tid_rdma header struct */ |
|---|
| 452 | + qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_rsp.verbs_qp) & |
|---|
| 453 | + RVT_QPN_MASK; |
|---|
| 454 | + |
|---|
| 455 | + rcu_read_lock(); |
|---|
| 456 | + packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); |
|---|
| 457 | + if (!packet->qp) |
|---|
| 458 | + goto drop_rcu; |
|---|
| 459 | + spin_lock_irqsave(&packet->qp->r_lock, flags); |
|---|
| 460 | + opcode_handler = tid_qp_ok(opcode, packet); |
|---|
| 461 | + if (likely(opcode_handler)) |
|---|
| 462 | + opcode_handler(packet); |
|---|
| 463 | + else |
|---|
| 464 | + goto drop_unlock; |
|---|
| 465 | + spin_unlock_irqrestore(&packet->qp->r_lock, flags); |
|---|
| 466 | + rcu_read_unlock(); |
|---|
| 467 | + |
|---|
| 468 | + return; |
|---|
| 469 | +drop_unlock: |
|---|
| 470 | + spin_unlock_irqrestore(&packet->qp->r_lock, flags); |
|---|
| 471 | +drop_rcu: |
|---|
| 472 | + rcu_read_unlock(); |
|---|
| 473 | +drop: |
|---|
| 474 | + ibp->rvp.n_pkt_drops++; |
|---|
| 554 | 475 | } |
|---|
| 555 | 476 | |
|---|
| 556 | 477 | static int hfi1_do_pkey_check(struct hfi1_packet *packet) |
|---|
| .. | .. |
|---|
| 713 | 634 | |
|---|
| 714 | 635 | spin_lock(&qp->s_lock); |
|---|
| 715 | 636 | if (tx->wqe) { |
|---|
| 716 | | - hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); |
|---|
| 637 | + rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS); |
|---|
| 717 | 638 | } else if (qp->ibqp.qp_type == IB_QPT_RC) { |
|---|
| 718 | 639 | struct hfi1_opa_header *hdr; |
|---|
| 719 | 640 | |
|---|
| 720 | 641 | hdr = &tx->phdr.hdr; |
|---|
| 642 | + if (unlikely(status == SDMA_TXREQ_S_ABORTED)) |
|---|
| 643 | + hfi1_rc_verbs_aborted(qp, hdr); |
|---|
| 721 | 644 | hfi1_rc_send_complete(qp, hdr); |
|---|
| 722 | 645 | } |
|---|
| 723 | 646 | spin_unlock(&qp->s_lock); |
|---|
| .. | .. |
|---|
| 725 | 648 | hfi1_put_txreq(tx); |
|---|
| 726 | 649 | } |
|---|
| 727 | 650 | |
|---|
| 651 | +void hfi1_wait_kmem(struct rvt_qp *qp) |
|---|
| 652 | +{ |
|---|
| 653 | + struct hfi1_qp_priv *priv = qp->priv; |
|---|
| 654 | + struct ib_qp *ibqp = &qp->ibqp; |
|---|
| 655 | + struct ib_device *ibdev = ibqp->device; |
|---|
| 656 | + struct hfi1_ibdev *dev = to_idev(ibdev); |
|---|
| 657 | + |
|---|
| 658 | + if (list_empty(&priv->s_iowait.list)) { |
|---|
| 659 | + if (list_empty(&dev->memwait)) |
|---|
| 660 | + mod_timer(&dev->mem_timer, jiffies + 1); |
|---|
| 661 | + qp->s_flags |= RVT_S_WAIT_KMEM; |
|---|
| 662 | + list_add_tail(&priv->s_iowait.list, &dev->memwait); |
|---|
| 663 | + priv->s_iowait.lock = &dev->iowait_lock; |
|---|
| 664 | + trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); |
|---|
| 665 | + rvt_get_qp(qp); |
|---|
| 666 | + } |
|---|
| 667 | +} |
|---|
| 668 | + |
|---|
| 728 | 669 | static int wait_kmem(struct hfi1_ibdev *dev, |
|---|
| 729 | 670 | struct rvt_qp *qp, |
|---|
| 730 | 671 | struct hfi1_pkt_state *ps) |
|---|
| 731 | 672 | { |
|---|
| 732 | | - struct hfi1_qp_priv *priv = qp->priv; |
|---|
| 733 | 673 | unsigned long flags; |
|---|
| 734 | 674 | int ret = 0; |
|---|
| 735 | 675 | |
|---|
| .. | .. |
|---|
| 737 | 677 | if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { |
|---|
| 738 | 678 | write_seqlock(&dev->iowait_lock); |
|---|
| 739 | 679 | list_add_tail(&ps->s_txreq->txreq.list, |
|---|
| 740 | | - &priv->s_iowait.tx_head); |
|---|
| 741 | | - if (list_empty(&priv->s_iowait.list)) { |
|---|
| 742 | | - if (list_empty(&dev->memwait)) |
|---|
| 743 | | - mod_timer(&dev->mem_timer, jiffies + 1); |
|---|
| 744 | | - qp->s_flags |= RVT_S_WAIT_KMEM; |
|---|
| 745 | | - list_add_tail(&priv->s_iowait.list, &dev->memwait); |
|---|
| 746 | | - priv->s_iowait.lock = &dev->iowait_lock; |
|---|
| 747 | | - trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); |
|---|
| 748 | | - rvt_get_qp(qp); |
|---|
| 749 | | - } |
|---|
| 680 | + &ps->wait->tx_head); |
|---|
| 681 | + hfi1_wait_kmem(qp); |
|---|
| 750 | 682 | write_sequnlock(&dev->iowait_lock); |
|---|
| 751 | | - qp->s_flags &= ~RVT_S_BUSY; |
|---|
| 683 | + hfi1_qp_unbusy(qp, ps->wait); |
|---|
| 752 | 684 | ret = -EBUSY; |
|---|
| 753 | 685 | } |
|---|
| 754 | 686 | spin_unlock_irqrestore(&qp->s_lock, flags); |
|---|
| .. | .. |
|---|
| 774 | 706 | int ret = 0; |
|---|
| 775 | 707 | |
|---|
| 776 | 708 | while (length) { |
|---|
| 777 | | - len = ss->sge.length; |
|---|
| 778 | | - if (len > length) |
|---|
| 779 | | - len = length; |
|---|
| 780 | | - if (len > ss->sge.sge_length) |
|---|
| 781 | | - len = ss->sge.sge_length; |
|---|
| 709 | + len = rvt_get_sge_length(&ss->sge, length); |
|---|
| 782 | 710 | WARN_ON_ONCE(len == 0); |
|---|
| 783 | 711 | ret = sdma_txadd_kvaddr( |
|---|
| 784 | 712 | sde->dd, |
|---|
| .. | .. |
|---|
| 892 | 820 | |
|---|
| 893 | 821 | /* add icrc, lt byte, and padding to flit */ |
|---|
| 894 | 822 | if (extra_bytes) |
|---|
| 895 | | - ret = sdma_txadd_daddr(sde->dd, &tx->txreq, |
|---|
| 896 | | - sde->dd->sdma_pad_phys, extra_bytes); |
|---|
| 823 | + ret = sdma_txadd_daddr(sde->dd, &tx->txreq, sde->dd->sdma_pad_phys, |
|---|
| 824 | + extra_bytes); |
|---|
| 897 | 825 | |
|---|
| 898 | 826 | bail_txadd: |
|---|
| 899 | 827 | return ret; |
|---|
| 828 | +} |
|---|
| 829 | + |
|---|
| 830 | +static u64 update_hcrc(u8 opcode, u64 pbc) |
|---|
| 831 | +{ |
|---|
| 832 | + if ((opcode & IB_OPCODE_TID_RDMA) == IB_OPCODE_TID_RDMA) { |
|---|
| 833 | + pbc &= ~PBC_INSERT_HCRC_SMASK; |
|---|
| 834 | + pbc |= (u64)PBC_IHCRC_LKDETH << PBC_INSERT_HCRC_SHIFT; |
|---|
| 835 | + } |
|---|
| 836 | + return pbc; |
|---|
| 900 | 837 | } |
|---|
| 901 | 838 | |
|---|
| 902 | 839 | int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
|---|
| .. | .. |
|---|
| 937 | 874 | else |
|---|
| 938 | 875 | pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); |
|---|
| 939 | 876 | |
|---|
| 940 | | - if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) |
|---|
| 941 | | - pbc = hfi1_fault_tx(qp, ps->opcode, pbc); |
|---|
| 942 | 877 | pbc = create_pbc(ppd, |
|---|
| 943 | 878 | pbc, |
|---|
| 944 | 879 | qp->srate_mbps, |
|---|
| 945 | 880 | vl, |
|---|
| 946 | 881 | plen); |
|---|
| 882 | + |
|---|
| 883 | + if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) |
|---|
| 884 | + pbc = hfi1_fault_tx(qp, ps->opcode, pbc); |
|---|
| 885 | + else |
|---|
| 886 | + /* Update HCRC based on packet opcode */ |
|---|
| 887 | + pbc = update_hcrc(ps->opcode, pbc); |
|---|
| 947 | 888 | } |
|---|
| 948 | 889 | tx->wqe = qp->s_wqe; |
|---|
| 949 | 890 | ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc); |
|---|
| 950 | 891 | if (unlikely(ret)) |
|---|
| 951 | 892 | goto bail_build; |
|---|
| 952 | 893 | } |
|---|
| 953 | | - ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq, |
|---|
| 954 | | - ps->pkts_sent); |
|---|
| 894 | + ret = sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent); |
|---|
| 955 | 895 | if (unlikely(ret < 0)) { |
|---|
| 956 | 896 | if (ret == -ECOMM) |
|---|
| 957 | 897 | goto bail_ecomm; |
|---|
| .. | .. |
|---|
| 987 | 927 | { |
|---|
| 988 | 928 | struct hfi1_qp_priv *priv = qp->priv; |
|---|
| 989 | 929 | struct hfi1_devdata *dd = sc->dd; |
|---|
| 990 | | - struct hfi1_ibdev *dev = &dd->verbs_dev; |
|---|
| 991 | 930 | unsigned long flags; |
|---|
| 992 | 931 | int ret = 0; |
|---|
| 993 | 932 | |
|---|
| .. | .. |
|---|
| 999 | 938 | */ |
|---|
| 1000 | 939 | spin_lock_irqsave(&qp->s_lock, flags); |
|---|
| 1001 | 940 | if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { |
|---|
| 1002 | | - write_seqlock(&dev->iowait_lock); |
|---|
| 941 | + write_seqlock(&sc->waitlock); |
|---|
| 1003 | 942 | list_add_tail(&ps->s_txreq->txreq.list, |
|---|
| 1004 | | - &priv->s_iowait.tx_head); |
|---|
| 943 | + &ps->wait->tx_head); |
|---|
| 1005 | 944 | if (list_empty(&priv->s_iowait.list)) { |
|---|
| 1006 | 945 | struct hfi1_ibdev *dev = &dd->verbs_dev; |
|---|
| 1007 | 946 | int was_empty; |
|---|
| .. | .. |
|---|
| 1010 | 949 | dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN); |
|---|
| 1011 | 950 | qp->s_flags |= flag; |
|---|
| 1012 | 951 | was_empty = list_empty(&sc->piowait); |
|---|
| 952 | + iowait_get_priority(&priv->s_iowait); |
|---|
| 1013 | 953 | iowait_queue(ps->pkts_sent, &priv->s_iowait, |
|---|
| 1014 | 954 | &sc->piowait); |
|---|
| 1015 | | - priv->s_iowait.lock = &dev->iowait_lock; |
|---|
| 955 | + priv->s_iowait.lock = &sc->waitlock; |
|---|
| 1016 | 956 | trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); |
|---|
| 1017 | 957 | rvt_get_qp(qp); |
|---|
| 1018 | 958 | /* counting: only call wantpiobuf_intr if first user */ |
|---|
| 1019 | 959 | if (was_empty) |
|---|
| 1020 | 960 | hfi1_sc_wantpiobuf_intr(sc, 1); |
|---|
| 1021 | 961 | } |
|---|
| 1022 | | - write_sequnlock(&dev->iowait_lock); |
|---|
| 1023 | | - qp->s_flags &= ~RVT_S_BUSY; |
|---|
| 962 | + write_sequnlock(&sc->waitlock); |
|---|
| 963 | + hfi1_qp_unbusy(qp, ps->wait); |
|---|
| 1024 | 964 | ret = -EBUSY; |
|---|
| 1025 | 965 | } |
|---|
| 1026 | 966 | spin_unlock_irqrestore(&qp->s_lock, flags); |
|---|
| .. | .. |
|---|
| 1091 | 1031 | else |
|---|
| 1092 | 1032 | pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); |
|---|
| 1093 | 1033 | |
|---|
| 1034 | + pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); |
|---|
| 1094 | 1035 | if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) |
|---|
| 1095 | 1036 | pbc = hfi1_fault_tx(qp, ps->opcode, pbc); |
|---|
| 1096 | | - pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); |
|---|
| 1037 | + else |
|---|
| 1038 | + /* Update HCRC based on packet opcode */ |
|---|
| 1039 | + pbc = update_hcrc(ps->opcode, pbc); |
|---|
| 1097 | 1040 | } |
|---|
| 1098 | 1041 | if (cb) |
|---|
| 1099 | 1042 | iowait_pio_inc(&priv->s_iowait); |
|---|
| 1100 | 1043 | pbuf = sc_buffer_alloc(sc, plen, cb, qp); |
|---|
| 1101 | | - if (unlikely(IS_ERR_OR_NULL(pbuf))) { |
|---|
| 1044 | + if (IS_ERR_OR_NULL(pbuf)) { |
|---|
| 1102 | 1045 | if (cb) |
|---|
| 1103 | 1046 | verbs_pio_complete(qp, 0); |
|---|
| 1104 | 1047 | if (IS_ERR(pbuf)) { |
|---|
| .. | .. |
|---|
| 1137 | 1080 | if (ss) { |
|---|
| 1138 | 1081 | while (len) { |
|---|
| 1139 | 1082 | void *addr = ss->sge.vaddr; |
|---|
| 1140 | | - u32 slen = ss->sge.length; |
|---|
| 1083 | + u32 slen = rvt_get_sge_length(&ss->sge, len); |
|---|
| 1141 | 1084 | |
|---|
| 1142 | | - if (slen > len) |
|---|
| 1143 | | - slen = len; |
|---|
| 1144 | | - if (slen > ss->sge.sge_length) |
|---|
| 1145 | | - slen = ss->sge.sge_length; |
|---|
| 1146 | 1085 | rvt_update_sge(ss, slen, false); |
|---|
| 1147 | 1086 | seg_pio_copy_mid(pbuf, addr, slen); |
|---|
| 1148 | 1087 | len -= slen; |
|---|
| .. | .. |
|---|
| 1161 | 1100 | &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); |
|---|
| 1162 | 1101 | |
|---|
| 1163 | 1102 | pio_bail: |
|---|
| 1103 | + spin_lock_irqsave(&qp->s_lock, flags); |
|---|
| 1164 | 1104 | if (qp->s_wqe) { |
|---|
| 1165 | | - spin_lock_irqsave(&qp->s_lock, flags); |
|---|
| 1166 | | - hfi1_send_complete(qp, qp->s_wqe, wc_status); |
|---|
| 1167 | | - spin_unlock_irqrestore(&qp->s_lock, flags); |
|---|
| 1105 | + rvt_send_complete(qp, qp->s_wqe, wc_status); |
|---|
| 1168 | 1106 | } else if (qp->ibqp.qp_type == IB_QPT_RC) { |
|---|
| 1169 | | - spin_lock_irqsave(&qp->s_lock, flags); |
|---|
| 1107 | + if (unlikely(wc_status == IB_WC_GENERAL_ERR)) |
|---|
| 1108 | + hfi1_rc_verbs_aborted(qp, &ps->s_txreq->phdr.hdr); |
|---|
| 1170 | 1109 | hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr); |
|---|
| 1171 | | - spin_unlock_irqrestore(&qp->s_lock, flags); |
|---|
| 1172 | 1110 | } |
|---|
| 1111 | + spin_unlock_irqrestore(&qp->s_lock, flags); |
|---|
| 1173 | 1112 | |
|---|
| 1174 | 1113 | ret = 0; |
|---|
| 1175 | 1114 | |
|---|
| .. | .. |
|---|
| 1289 | 1228 | case IB_QPT_UD: |
|---|
| 1290 | 1229 | break; |
|---|
| 1291 | 1230 | case IB_QPT_UC: |
|---|
| 1292 | | - case IB_QPT_RC: { |
|---|
| 1231 | + case IB_QPT_RC: |
|---|
| 1232 | + priv->s_running_pkt_size = |
|---|
| 1233 | + (tx->s_cur_size + priv->s_running_pkt_size) / 2; |
|---|
| 1293 | 1234 | if (piothreshold && |
|---|
| 1294 | | - tx->s_cur_size <= min(piothreshold, qp->pmtu) && |
|---|
| 1235 | + priv->s_running_pkt_size <= min(piothreshold, qp->pmtu) && |
|---|
| 1295 | 1236 | (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) && |
|---|
| 1296 | 1237 | iowait_sdma_pending(&priv->s_iowait) == 0 && |
|---|
| 1297 | 1238 | !sdma_txreq_built(&tx->txreq)) |
|---|
| 1298 | 1239 | return dd->process_pio_send; |
|---|
| 1299 | 1240 | break; |
|---|
| 1300 | | - } |
|---|
| 1301 | 1241 | default: |
|---|
| 1302 | 1242 | break; |
|---|
| 1303 | 1243 | } |
|---|
| .. | .. |
|---|
| 1370 | 1310 | hfi1_cdbg(PIO, "%s() Failed. Completing with err", |
|---|
| 1371 | 1311 | __func__); |
|---|
| 1372 | 1312 | spin_lock_irqsave(&qp->s_lock, flags); |
|---|
| 1373 | | - hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); |
|---|
| 1313 | + rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); |
|---|
| 1374 | 1314 | spin_unlock_irqrestore(&qp->s_lock, flags); |
|---|
| 1375 | 1315 | } |
|---|
| 1376 | 1316 | return -EINVAL; |
|---|
| .. | .. |
|---|
| 1403 | 1343 | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | |
|---|
| 1404 | 1344 | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | |
|---|
| 1405 | 1345 | IB_DEVICE_MEM_MGT_EXTENSIONS | |
|---|
| 1406 | | - IB_DEVICE_RDMA_NETDEV_OPA_VNIC; |
|---|
| 1346 | + IB_DEVICE_RDMA_NETDEV_OPA; |
|---|
| 1407 | 1347 | rdi->dparms.props.page_size_cap = PAGE_SIZE; |
|---|
| 1408 | 1348 | rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; |
|---|
| 1409 | 1349 | rdi->dparms.props.vendor_part_id = dd->pcidev->device; |
|---|
| .. | .. |
|---|
| 1412 | 1352 | rdi->dparms.props.max_mr_size = U64_MAX; |
|---|
| 1413 | 1353 | rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; |
|---|
| 1414 | 1354 | rdi->dparms.props.max_qp = hfi1_max_qps; |
|---|
| 1415 | | - rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; |
|---|
| 1355 | + rdi->dparms.props.max_qp_wr = |
|---|
| 1356 | + (hfi1_max_qp_wrs >= HFI1_QP_WQE_INVALID ? |
|---|
| 1357 | + HFI1_QP_WQE_INVALID - 1 : hfi1_max_qp_wrs); |
|---|
| 1416 | 1358 | rdi->dparms.props.max_send_sge = hfi1_max_sges; |
|---|
| 1417 | 1359 | rdi->dparms.props.max_recv_sge = hfi1_max_sges; |
|---|
| 1418 | 1360 | rdi->dparms.props.max_sge_rd = hfi1_max_sges; |
|---|
| 1419 | 1361 | rdi->dparms.props.max_cq = hfi1_max_cqs; |
|---|
| 1420 | 1362 | rdi->dparms.props.max_ah = hfi1_max_ahs; |
|---|
| 1421 | 1363 | rdi->dparms.props.max_cqe = hfi1_max_cqes; |
|---|
| 1422 | | - rdi->dparms.props.max_map_per_fmr = 32767; |
|---|
| 1423 | 1364 | rdi->dparms.props.max_pd = hfi1_max_pds; |
|---|
| 1424 | 1365 | rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; |
|---|
| 1425 | 1366 | rdi->dparms.props.max_qp_init_rd_atom = 255; |
|---|
| .. | .. |
|---|
| 1483 | 1424 | props->gid_tbl_len = HFI1_GUIDS_PER_PORT; |
|---|
| 1484 | 1425 | props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); |
|---|
| 1485 | 1426 | /* see rate_show() in ib core/sysfs.c */ |
|---|
| 1486 | | - props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active); |
|---|
| 1427 | + props->active_speed = opa_speed_to_ib(ppd->link_speed_active); |
|---|
| 1487 | 1428 | props->max_vl_num = ppd->vls_supported; |
|---|
| 1488 | 1429 | |
|---|
| 1489 | 1430 | /* Once we are a "first class" citizen and have added the OPA MTUs to |
|---|
| .. | .. |
|---|
| 1498 | 1439 | 4096 : hfi1_max_mtu), IB_MTU_4096); |
|---|
| 1499 | 1440 | props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : |
|---|
| 1500 | 1441 | mtu_to_enum(ppd->ibmtu, IB_MTU_4096); |
|---|
| 1442 | + props->phys_mtu = hfi1_max_mtu; |
|---|
| 1501 | 1443 | |
|---|
| 1502 | 1444 | return 0; |
|---|
| 1503 | 1445 | } |
|---|
| .. | .. |
|---|
| 1802 | 1744 | |
|---|
| 1803 | 1745 | static u64 hfi1_sps_ints(void) |
|---|
| 1804 | 1746 | { |
|---|
| 1805 | | - unsigned long flags; |
|---|
| 1747 | + unsigned long index, flags; |
|---|
| 1806 | 1748 | struct hfi1_devdata *dd; |
|---|
| 1807 | 1749 | u64 sps_ints = 0; |
|---|
| 1808 | 1750 | |
|---|
| 1809 | | - spin_lock_irqsave(&hfi1_devs_lock, flags); |
|---|
| 1810 | | - list_for_each_entry(dd, &hfi1_dev_list, list) { |
|---|
| 1751 | + xa_lock_irqsave(&hfi1_dev_table, flags); |
|---|
| 1752 | + xa_for_each(&hfi1_dev_table, index, dd) { |
|---|
| 1811 | 1753 | sps_ints += get_all_cpu_total(dd->int_counter); |
|---|
| 1812 | 1754 | } |
|---|
| 1813 | | - spin_unlock_irqrestore(&hfi1_devs_lock, flags); |
|---|
| 1755 | + xa_unlock_irqrestore(&hfi1_dev_table, flags); |
|---|
| 1814 | 1756 | return sps_ints; |
|---|
| 1815 | 1757 | } |
|---|
| 1816 | 1758 | |
|---|
| .. | .. |
|---|
| 1839 | 1781 | memcpy(stats->value, values, count * sizeof(u64)); |
|---|
| 1840 | 1782 | return count; |
|---|
| 1841 | 1783 | } |
|---|
| 1784 | + |
|---|
| 1785 | +static const struct ib_device_ops hfi1_dev_ops = { |
|---|
| 1786 | + .owner = THIS_MODULE, |
|---|
| 1787 | + .driver_id = RDMA_DRIVER_HFI1, |
|---|
| 1788 | + |
|---|
| 1789 | + .alloc_hw_stats = alloc_hw_stats, |
|---|
| 1790 | + .alloc_rdma_netdev = hfi1_vnic_alloc_rn, |
|---|
| 1791 | + .get_dev_fw_str = hfi1_get_dev_fw_str, |
|---|
| 1792 | + .get_hw_stats = get_hw_stats, |
|---|
| 1793 | + .init_port = hfi1_create_port_files, |
|---|
| 1794 | + .modify_device = modify_device, |
|---|
| 1795 | + /* keep process mad in the driver */ |
|---|
| 1796 | + .process_mad = hfi1_process_mad, |
|---|
| 1797 | + .rdma_netdev_get_params = hfi1_ipoib_rn_get_params, |
|---|
| 1798 | +}; |
|---|
| 1842 | 1799 | |
|---|
| 1843 | 1800 | /** |
|---|
| 1844 | 1801 | * hfi1_register_ib_device - register our device with the infiniband core |
|---|
| .. | .. |
|---|
| 1880 | 1837 | */ |
|---|
| 1881 | 1838 | if (!ib_hfi1_sys_image_guid) |
|---|
| 1882 | 1839 | ib_hfi1_sys_image_guid = ibdev->node_guid; |
|---|
| 1883 | | - ibdev->owner = THIS_MODULE; |
|---|
| 1884 | 1840 | ibdev->phys_port_cnt = dd->num_pports; |
|---|
| 1885 | 1841 | ibdev->dev.parent = &dd->pcidev->dev; |
|---|
| 1886 | | - ibdev->modify_device = modify_device; |
|---|
| 1887 | | - ibdev->alloc_hw_stats = alloc_hw_stats; |
|---|
| 1888 | | - ibdev->get_hw_stats = get_hw_stats; |
|---|
| 1889 | | - ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn; |
|---|
| 1890 | 1842 | |
|---|
| 1891 | | - /* keep process mad in the driver */ |
|---|
| 1892 | | - ibdev->process_mad = hfi1_process_mad; |
|---|
| 1893 | | - ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; |
|---|
| 1843 | + ib_set_device_ops(ibdev, &hfi1_dev_ops); |
|---|
| 1894 | 1844 | |
|---|
| 1895 | 1845 | strlcpy(ibdev->node_desc, init_utsname()->nodename, |
|---|
| 1896 | 1846 | sizeof(ibdev->node_desc)); |
|---|
| .. | .. |
|---|
| 1898 | 1848 | /* |
|---|
| 1899 | 1849 | * Fill in rvt info object. |
|---|
| 1900 | 1850 | */ |
|---|
| 1901 | | - dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files; |
|---|
| 1902 | 1851 | dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; |
|---|
| 1903 | 1852 | dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; |
|---|
| 1904 | 1853 | dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; |
|---|
| .. | .. |
|---|
| 1916 | 1865 | dd->verbs_dev.rdi.dparms.qpn_start = 0; |
|---|
| 1917 | 1866 | dd->verbs_dev.rdi.dparms.qpn_inc = 1; |
|---|
| 1918 | 1867 | dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift; |
|---|
| 1919 | | - dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16; |
|---|
| 1920 | | - dd->verbs_dev.rdi.dparms.qpn_res_end = |
|---|
| 1921 | | - dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; |
|---|
| 1868 | + dd->verbs_dev.rdi.dparms.qpn_res_start = RVT_KDETH_QP_BASE; |
|---|
| 1869 | + dd->verbs_dev.rdi.dparms.qpn_res_end = RVT_AIP_QP_MAX; |
|---|
| 1922 | 1870 | dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC; |
|---|
| 1923 | 1871 | dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; |
|---|
| 1924 | 1872 | dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; |
|---|
| .. | .. |
|---|
| 1928 | 1876 | dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE; |
|---|
| 1929 | 1877 | |
|---|
| 1930 | 1878 | dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; |
|---|
| 1879 | + dd->verbs_dev.rdi.driver_f.qp_priv_init = hfi1_qp_priv_init; |
|---|
| 1931 | 1880 | dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; |
|---|
| 1932 | 1881 | dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; |
|---|
| 1933 | 1882 | dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; |
|---|
| .. | .. |
|---|
| 1945 | 1894 | dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; |
|---|
| 1946 | 1895 | dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; |
|---|
| 1947 | 1896 | dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc; |
|---|
| 1948 | | - dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; |
|---|
| 1897 | + dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe; |
|---|
| 1949 | 1898 | dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup = |
|---|
| 1950 | 1899 | hfi1_comp_vect_mappings_lookup; |
|---|
| 1951 | 1900 | |
|---|
| .. | .. |
|---|
| 1958 | 1907 | dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size; |
|---|
| 1959 | 1908 | dd->verbs_dev.rdi.dparms.nports = dd->num_pports; |
|---|
| 1960 | 1909 | dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); |
|---|
| 1910 | + dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode; |
|---|
| 1911 | + dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold; |
|---|
| 1912 | + dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period; |
|---|
| 1913 | + dd->verbs_dev.rdi.dparms.reserved_operations = 1; |
|---|
| 1914 | + dd->verbs_dev.rdi.dparms.extra_rdma_atomic = HFI1_TID_RDMA_WRITE_CNT; |
|---|
| 1961 | 1915 | |
|---|
| 1962 | 1916 | /* post send table */ |
|---|
| 1963 | 1917 | dd->verbs_dev.rdi.post_parms = hfi1_post_parms; |
|---|
| 1918 | + |
|---|
| 1919 | + /* opcode translation table */ |
|---|
| 1920 | + dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode; |
|---|
| 1964 | 1921 | |
|---|
| 1965 | 1922 | ppd = dd->pport; |
|---|
| 1966 | 1923 | for (i = 0; i < dd->num_pports; i++, ppd++) |
|---|
| .. | .. |
|---|
| 1969 | 1926 | i, |
|---|
| 1970 | 1927 | ppd->pkeys); |
|---|
| 1971 | 1928 | |
|---|
| 1972 | | - ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1); |
|---|
| 1929 | + rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, |
|---|
| 1930 | + &ib_hfi1_attr_group); |
|---|
| 1931 | + |
|---|
| 1932 | + ret = rvt_register_device(&dd->verbs_dev.rdi); |
|---|
| 1973 | 1933 | if (ret) |
|---|
| 1974 | 1934 | goto err_verbs_txreq; |
|---|
| 1975 | 1935 | |
|---|