hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/include/rdma/ib_verbs.h
....@@ -1,3 +1,4 @@
1
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
12 /*
23 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
34 * Copyright (c) 2004 Infinicon Corporation. All rights reserved.
....@@ -6,49 +7,18 @@
67 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
78 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
89 * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
9
- *
10
- * This software is available to you under a choice of one of two
11
- * licenses. You may choose to be licensed under the terms of the GNU
12
- * General Public License (GPL) Version 2, available from the file
13
- * COPYING in the main directory of this source tree, or the
14
- * OpenIB.org BSD license below:
15
- *
16
- * Redistribution and use in source and binary forms, with or
17
- * without modification, are permitted provided that the following
18
- * conditions are met:
19
- *
20
- * - Redistributions of source code must retain the above
21
- * copyright notice, this list of conditions and the following
22
- * disclaimer.
23
- *
24
- * - Redistributions in binary form must reproduce the above
25
- * copyright notice, this list of conditions and the following
26
- * disclaimer in the documentation and/or other materials
27
- * provided with the distribution.
28
- *
29
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
33
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
34
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
35
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36
- * SOFTWARE.
3710 */
3811
39
-#if !defined(IB_VERBS_H)
12
+#ifndef IB_VERBS_H
4013 #define IB_VERBS_H
4114
4215 #include <linux/types.h>
4316 #include <linux/device.h>
44
-#include <linux/mm.h>
4517 #include <linux/dma-mapping.h>
4618 #include <linux/kref.h>
4719 #include <linux/list.h>
4820 #include <linux/rwsem.h>
49
-#include <linux/scatterlist.h>
5021 #include <linux/workqueue.h>
51
-#include <linux/socket.h>
5222 #include <linux/irq_poll.h>
5323 #include <uapi/linux/if_ether.h>
5424 #include <net/ipv6.h>
....@@ -56,22 +26,106 @@
5626 #include <linux/string.h>
5727 #include <linux/slab.h>
5828 #include <linux/netdevice.h>
59
-
29
+#include <linux/refcount.h>
6030 #include <linux/if_link.h>
6131 #include <linux/atomic.h>
6232 #include <linux/mmu_notifier.h>
6333 #include <linux/uaccess.h>
6434 #include <linux/cgroup_rdma.h>
35
+#include <linux/irqflags.h>
36
+#include <linux/preempt.h>
37
+#include <linux/dim.h>
6538 #include <uapi/rdma/ib_user_verbs.h>
39
+#include <rdma/rdma_counter.h>
6640 #include <rdma/restrack.h>
41
+#include <rdma/signature.h>
6742 #include <uapi/rdma/rdma_user_ioctl.h>
6843 #include <uapi/rdma/ib_user_ioctl_verbs.h>
6944
7045 #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
7146
47
+struct ib_umem_odp;
48
+struct ib_uqp_object;
49
+struct ib_usrq_object;
50
+struct ib_uwq_object;
51
+struct rdma_cm_id;
52
+
7253 extern struct workqueue_struct *ib_wq;
7354 extern struct workqueue_struct *ib_comp_wq;
7455 extern struct workqueue_struct *ib_comp_unbound_wq;
56
+
57
+struct ib_ucq_object;
58
+
59
+__printf(3, 4) __cold
60
+void ibdev_printk(const char *level, const struct ib_device *ibdev,
61
+ const char *format, ...);
62
+__printf(2, 3) __cold
63
+void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...);
64
+__printf(2, 3) __cold
65
+void ibdev_alert(const struct ib_device *ibdev, const char *format, ...);
66
+__printf(2, 3) __cold
67
+void ibdev_crit(const struct ib_device *ibdev, const char *format, ...);
68
+__printf(2, 3) __cold
69
+void ibdev_err(const struct ib_device *ibdev, const char *format, ...);
70
+__printf(2, 3) __cold
71
+void ibdev_warn(const struct ib_device *ibdev, const char *format, ...);
72
+__printf(2, 3) __cold
73
+void ibdev_notice(const struct ib_device *ibdev, const char *format, ...);
74
+__printf(2, 3) __cold
75
+void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
76
+
77
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
78
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
79
+#define ibdev_dbg(__dev, format, args...) \
80
+ dynamic_ibdev_dbg(__dev, format, ##args)
81
+#else
82
+__printf(2, 3) __cold
83
+static inline
84
+void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
85
+#endif
86
+
87
+#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...) \
88
+do { \
89
+ static DEFINE_RATELIMIT_STATE(_rs, \
90
+ DEFAULT_RATELIMIT_INTERVAL, \
91
+ DEFAULT_RATELIMIT_BURST); \
92
+ if (__ratelimit(&_rs)) \
93
+ ibdev_level(ibdev, fmt, ##__VA_ARGS__); \
94
+} while (0)
95
+
96
+#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \
97
+ ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__)
98
+#define ibdev_alert_ratelimited(ibdev, fmt, ...) \
99
+ ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__)
100
+#define ibdev_crit_ratelimited(ibdev, fmt, ...) \
101
+ ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__)
102
+#define ibdev_err_ratelimited(ibdev, fmt, ...) \
103
+ ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__)
104
+#define ibdev_warn_ratelimited(ibdev, fmt, ...) \
105
+ ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__)
106
+#define ibdev_notice_ratelimited(ibdev, fmt, ...) \
107
+ ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__)
108
+#define ibdev_info_ratelimited(ibdev, fmt, ...) \
109
+ ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
110
+
111
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
112
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
113
+/* descriptor check is first to prevent flooding with "callbacks suppressed" */
114
+#define ibdev_dbg_ratelimited(ibdev, fmt, ...) \
115
+do { \
116
+ static DEFINE_RATELIMIT_STATE(_rs, \
117
+ DEFAULT_RATELIMIT_INTERVAL, \
118
+ DEFAULT_RATELIMIT_BURST); \
119
+ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
120
+ if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) \
121
+ __dynamic_ibdev_dbg(&descriptor, ibdev, fmt, \
122
+ ##__VA_ARGS__); \
123
+} while (0)
124
+#else
125
+__printf(2, 3) __cold
126
+static inline
127
+void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {}
128
+#endif
75129
76130 union ib_gid {
77131 u8 raw[16];
....@@ -84,31 +138,20 @@
84138 extern union ib_gid zgid;
85139
86140 enum ib_gid_type {
87
- /* If link layer is Ethernet, this is RoCE V1 */
88
- IB_GID_TYPE_IB = 0,
89
- IB_GID_TYPE_ROCE = 0,
90
- IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
141
+ IB_GID_TYPE_IB = IB_UVERBS_GID_TYPE_IB,
142
+ IB_GID_TYPE_ROCE = IB_UVERBS_GID_TYPE_ROCE_V1,
143
+ IB_GID_TYPE_ROCE_UDP_ENCAP = IB_UVERBS_GID_TYPE_ROCE_V2,
91144 IB_GID_TYPE_SIZE
92145 };
93146
94147 #define ROCE_V2_UDP_DPORT 4791
95148 struct ib_gid_attr {
96
- struct net_device *ndev;
149
+ struct net_device __rcu *ndev;
97150 struct ib_device *device;
98151 union ib_gid gid;
99152 enum ib_gid_type gid_type;
100153 u16 index;
101154 u8 port_num;
102
-};
103
-
104
-enum rdma_node_type {
105
- /* IB values map to NodeInfo:NodeType. */
106
- RDMA_NODE_IB_CA = 1,
107
- RDMA_NODE_IB_SWITCH,
108
- RDMA_NODE_IB_ROUTER,
109
- RDMA_NODE_RNIC,
110
- RDMA_NODE_USNIC,
111
- RDMA_NODE_USNIC_UDP,
112155 };
113156
114157 enum {
....@@ -120,7 +163,8 @@
120163 RDMA_TRANSPORT_IB,
121164 RDMA_TRANSPORT_IWARP,
122165 RDMA_TRANSPORT_USNIC,
123
- RDMA_TRANSPORT_USNIC_UDP
166
+ RDMA_TRANSPORT_USNIC_UDP,
167
+ RDMA_TRANSPORT_UNSPECIFIED,
124168 };
125169
126170 enum rdma_protocol_type {
....@@ -131,11 +175,11 @@
131175 };
132176
133177 __attribute_const__ enum rdma_transport_type
134
-rdma_node_get_transport(enum rdma_node_type node_type);
178
+rdma_node_get_transport(unsigned int node_type);
135179
136180 enum rdma_network_type {
137181 RDMA_NETWORK_IB,
138
- RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
182
+ RDMA_NETWORK_ROCE_V1,
139183 RDMA_NETWORK_IPV4,
140184 RDMA_NETWORK_IPV6
141185 };
....@@ -145,9 +189,10 @@
145189 if (network_type == RDMA_NETWORK_IPV4 ||
146190 network_type == RDMA_NETWORK_IPV6)
147191 return IB_GID_TYPE_ROCE_UDP_ENCAP;
148
-
149
- /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
150
- return IB_GID_TYPE_IB;
192
+ else if (network_type == RDMA_NETWORK_ROCE_V1)
193
+ return IB_GID_TYPE_ROCE;
194
+ else
195
+ return IB_GID_TYPE_IB;
151196 }
152197
153198 static inline enum rdma_network_type
....@@ -155,6 +200,9 @@
155200 {
156201 if (attr->gid_type == IB_GID_TYPE_IB)
157202 return RDMA_NETWORK_IB;
203
+
204
+ if (attr->gid_type == IB_GID_TYPE_ROCE)
205
+ return RDMA_NETWORK_ROCE_V1;
158206
159207 if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
160208 return RDMA_NETWORK_IPV4;
....@@ -230,26 +278,16 @@
230278 */
231279 IB_DEVICE_CROSS_CHANNEL = (1 << 27),
232280 IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
233
- IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30),
281
+ IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30),
234282 IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31),
235283 IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
236284 IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
237285 /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
238286 IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
239
- IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35),
287
+ IB_DEVICE_RDMA_NETDEV_OPA = (1ULL << 35),
240288 /* The device supports padding incoming writes to cacheline. */
241289 IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36),
242
-};
243
-
244
-enum ib_signature_prot_cap {
245
- IB_PROT_T10DIF_TYPE_1 = 1,
246
- IB_PROT_T10DIF_TYPE_2 = 1 << 1,
247
- IB_PROT_T10DIF_TYPE_3 = 1 << 2,
248
-};
249
-
250
-enum ib_signature_guard_cap {
251
- IB_GUARD_T10DIF_CRC = 1,
252
- IB_GUARD_T10DIF_CSUM = 1 << 1,
290
+ IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37),
253291 };
254292
255293 enum ib_atomic_cap {
....@@ -269,6 +307,7 @@
269307 IB_ODP_SUPPORT_WRITE = 1 << 2,
270308 IB_ODP_SUPPORT_READ = 1 << 3,
271309 IB_ODP_SUPPORT_ATOMIC = 1 << 4,
310
+ IB_ODP_SUPPORT_SRQ_RECV = 1 << 5,
272311 };
273312
274313 struct ib_odp_caps {
....@@ -277,6 +316,7 @@
277316 uint32_t rc_odp_caps;
278317 uint32_t uc_odp_caps;
279318 uint32_t ud_odp_caps;
319
+ uint32_t xrc_odp_caps;
280320 } per_transport_caps;
281321 };
282322
....@@ -369,12 +409,11 @@
369409 int max_mcast_qp_attach;
370410 int max_total_mcast_qp_attach;
371411 int max_ah;
372
- int max_fmr;
373
- int max_map_per_fmr;
374412 int max_srq;
375413 int max_srq_wr;
376414 int max_srq_sge;
377415 unsigned int max_fast_reg_page_list_len;
416
+ unsigned int max_pi_fast_reg_page_list_len;
378417 u16 max_pkeys;
379418 u8 local_ca_ack_delay;
380419 int sig_prot_cap;
....@@ -388,6 +427,8 @@
388427 struct ib_tm_caps tm_caps;
389428 struct ib_cq_caps cq_caps;
390429 u64 max_dm_size;
430
+ /* Max entries for sgl for optimized performance per READ */
431
+ u32 max_sgl_rd;
391432 };
392433
393434 enum ib_mtu {
....@@ -396,6 +437,11 @@
396437 IB_MTU_1024 = 3,
397438 IB_MTU_2048 = 4,
398439 IB_MTU_4096 = 5
440
+};
441
+
442
+enum opa_mtu {
443
+ OPA_MTU_8192 = 6,
444
+ OPA_MTU_10240 = 7
399445 };
400446
401447 static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
....@@ -424,6 +470,28 @@
424470 return IB_MTU_256;
425471 }
426472
473
+static inline int opa_mtu_enum_to_int(enum opa_mtu mtu)
474
+{
475
+ switch (mtu) {
476
+ case OPA_MTU_8192:
477
+ return 8192;
478
+ case OPA_MTU_10240:
479
+ return 10240;
480
+ default:
481
+ return(ib_mtu_enum_to_int((enum ib_mtu)mtu));
482
+ }
483
+}
484
+
485
+static inline enum opa_mtu opa_mtu_int_to_enum(int mtu)
486
+{
487
+ if (mtu >= 10240)
488
+ return OPA_MTU_10240;
489
+ else if (mtu >= 8192)
490
+ return OPA_MTU_8192;
491
+ else
492
+ return ((enum opa_mtu)ib_mtu_int_to_enum(mtu));
493
+}
494
+
427495 enum ib_port_state {
428496 IB_PORT_NOP = 0,
429497 IB_PORT_DOWN = 1,
....@@ -433,8 +501,19 @@
433501 IB_PORT_ACTIVE_DEFER = 5
434502 };
435503
504
+enum ib_port_phys_state {
505
+ IB_PORT_PHYS_STATE_SLEEP = 1,
506
+ IB_PORT_PHYS_STATE_POLLING = 2,
507
+ IB_PORT_PHYS_STATE_DISABLED = 3,
508
+ IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4,
509
+ IB_PORT_PHYS_STATE_LINK_UP = 5,
510
+ IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6,
511
+ IB_PORT_PHYS_STATE_PHY_TEST = 7,
512
+};
513
+
436514 enum ib_port_width {
437515 IB_WIDTH_1X = 1,
516
+ IB_WIDTH_2X = 16,
438517 IB_WIDTH_4X = 2,
439518 IB_WIDTH_8X = 4,
440519 IB_WIDTH_12X = 8
....@@ -444,6 +523,7 @@
444523 {
445524 switch (width) {
446525 case IB_WIDTH_1X: return 1;
526
+ case IB_WIDTH_2X: return 2;
447527 case IB_WIDTH_4X: return 4;
448528 case IB_WIDTH_8X: return 8;
449529 case IB_WIDTH_12X: return 12;
....@@ -458,7 +538,8 @@
458538 IB_SPEED_FDR10 = 8,
459539 IB_SPEED_FDR = 16,
460540 IB_SPEED_EDR = 32,
461
- IB_SPEED_HDR = 64
541
+ IB_SPEED_HDR = 64,
542
+ IB_SPEED_NDR = 128,
462543 };
463544
464545 /**
....@@ -575,6 +656,7 @@
575656 enum ib_port_state state;
576657 enum ib_mtu max_mtu;
577658 enum ib_mtu active_mtu;
659
+ u32 phys_mtu;
578660 int gid_tbl_len;
579661 unsigned int ip_gids:1;
580662 /* This is the value from PortInfo CapabilityMask, defined by IBA */
....@@ -591,8 +673,9 @@
591673 u8 subnet_timeout;
592674 u8 init_type_reply;
593675 u8 active_width;
594
- u8 active_speed;
676
+ u16 active_speed;
595677 u8 phys_state;
678
+ u16 port_cap_flags2;
596679 };
597680
598681 enum ib_device_modify_flags {
....@@ -730,7 +813,11 @@
730813 IB_RATE_25_GBPS = 15,
731814 IB_RATE_100_GBPS = 16,
732815 IB_RATE_200_GBPS = 17,
733
- IB_RATE_300_GBPS = 18
816
+ IB_RATE_300_GBPS = 18,
817
+ IB_RATE_28_GBPS = 19,
818
+ IB_RATE_50_GBPS = 20,
819
+ IB_RATE_400_GBPS = 21,
820
+ IB_RATE_600_GBPS = 22,
734821 };
735822
736823 /**
....@@ -753,118 +840,26 @@
753840 * enum ib_mr_type - memory region type
754841 * @IB_MR_TYPE_MEM_REG: memory region that is used for
755842 * normal registration
756
- * @IB_MR_TYPE_SIGNATURE: memory region that is used for
757
- * signature operations (data-integrity
758
- * capable regions)
759843 * @IB_MR_TYPE_SG_GAPS: memory region that is capable to
760844 * register any arbitrary sg lists (without
761845 * the normal mr constraints - see
762846 * ib_map_mr_sg)
847
+ * @IB_MR_TYPE_DM: memory region that is used for device
848
+ * memory registration
849
+ * @IB_MR_TYPE_USER: memory region that is used for the user-space
850
+ * application
851
+ * @IB_MR_TYPE_DMA: memory region that is used for DMA operations
852
+ * without address translations (VA=PA)
853
+ * @IB_MR_TYPE_INTEGRITY: memory region that is used for
854
+ * data integrity operations
763855 */
764856 enum ib_mr_type {
765857 IB_MR_TYPE_MEM_REG,
766
- IB_MR_TYPE_SIGNATURE,
767858 IB_MR_TYPE_SG_GAPS,
768
-};
769
-
770
-/**
771
- * Signature types
772
- * IB_SIG_TYPE_NONE: Unprotected.
773
- * IB_SIG_TYPE_T10_DIF: Type T10-DIF
774
- */
775
-enum ib_signature_type {
776
- IB_SIG_TYPE_NONE,
777
- IB_SIG_TYPE_T10_DIF,
778
-};
779
-
780
-/**
781
- * Signature T10-DIF block-guard types
782
- * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
783
- * IB_T10DIF_CSUM: Corresponds to IP checksum rules.
784
- */
785
-enum ib_t10_dif_bg_type {
786
- IB_T10DIF_CRC,
787
- IB_T10DIF_CSUM
788
-};
789
-
790
-/**
791
- * struct ib_t10_dif_domain - Parameters specific for T10-DIF
792
- * domain.
793
- * @bg_type: T10-DIF block guard type (CRC|CSUM)
794
- * @pi_interval: protection information interval.
795
- * @bg: seed of guard computation.
796
- * @app_tag: application tag of guard block
797
- * @ref_tag: initial guard block reference tag.
798
- * @ref_remap: Indicate wethear the reftag increments each block
799
- * @app_escape: Indicate to skip block check if apptag=0xffff
800
- * @ref_escape: Indicate to skip block check if reftag=0xffffffff
801
- * @apptag_check_mask: check bitmask of application tag.
802
- */
803
-struct ib_t10_dif_domain {
804
- enum ib_t10_dif_bg_type bg_type;
805
- u16 pi_interval;
806
- u16 bg;
807
- u16 app_tag;
808
- u32 ref_tag;
809
- bool ref_remap;
810
- bool app_escape;
811
- bool ref_escape;
812
- u16 apptag_check_mask;
813
-};
814
-
815
-/**
816
- * struct ib_sig_domain - Parameters for signature domain
817
- * @sig_type: specific signauture type
818
- * @sig: union of all signature domain attributes that may
819
- * be used to set domain layout.
820
- */
821
-struct ib_sig_domain {
822
- enum ib_signature_type sig_type;
823
- union {
824
- struct ib_t10_dif_domain dif;
825
- } sig;
826
-};
827
-
828
-/**
829
- * struct ib_sig_attrs - Parameters for signature handover operation
830
- * @check_mask: bitmask for signature byte check (8 bytes)
831
- * @mem: memory domain layout desciptor.
832
- * @wire: wire domain layout desciptor.
833
- */
834
-struct ib_sig_attrs {
835
- u8 check_mask;
836
- struct ib_sig_domain mem;
837
- struct ib_sig_domain wire;
838
-};
839
-
840
-enum ib_sig_err_type {
841
- IB_SIG_BAD_GUARD,
842
- IB_SIG_BAD_REFTAG,
843
- IB_SIG_BAD_APPTAG,
844
-};
845
-
846
-/**
847
- * Signature check masks (8 bytes in total) according to the T10-PI standard:
848
- * -------- -------- ------------
849
- * | GUARD | APPTAG | REFTAG |
850
- * | 2B | 2B | 4B |
851
- * -------- -------- ------------
852
- */
853
-enum {
854
- IB_SIG_CHECK_GUARD = 0xc0,
855
- IB_SIG_CHECK_APPTAG = 0x30,
856
- IB_SIG_CHECK_REFTAG = 0x0f,
857
-};
858
-
859
-/**
860
- * struct ib_sig_err - signature error descriptor
861
- */
862
-struct ib_sig_err {
863
- enum ib_sig_err_type err_type;
864
- u32 expected;
865
- u32 actual;
866
- u64 sig_err_offset;
867
- u32 key;
859
+ IB_MR_TYPE_DM,
860
+ IB_MR_TYPE_USER,
861
+ IB_MR_TYPE_DMA,
862
+ IB_MR_TYPE_INTEGRITY,
868863 };
869864
870865 enum ib_mr_status_check {
....@@ -890,6 +885,12 @@
890885 * @mult: multiple to convert.
891886 */
892887 __attribute_const__ enum ib_rate mult_to_ib_rate(int mult);
888
+
889
+struct rdma_ah_init_attr {
890
+ struct rdma_ah_attr *ah_attr;
891
+ u32 flags;
892
+ struct net_device *xmit_slave;
893
+};
893894
894895 enum rdma_ah_attr_type {
895896 RDMA_AH_ATTR_TYPE_UNDEFINED,
....@@ -955,13 +956,14 @@
955956 const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
956957
957958 enum ib_wc_opcode {
958
- IB_WC_SEND,
959
- IB_WC_RDMA_WRITE,
960
- IB_WC_RDMA_READ,
961
- IB_WC_COMP_SWAP,
962
- IB_WC_FETCH_ADD,
963
- IB_WC_LSO,
964
- IB_WC_LOCAL_INV,
959
+ IB_WC_SEND = IB_UVERBS_WC_SEND,
960
+ IB_WC_RDMA_WRITE = IB_UVERBS_WC_RDMA_WRITE,
961
+ IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ,
962
+ IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP,
963
+ IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD,
964
+ IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
965
+ IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
966
+ IB_WC_LSO = IB_UVERBS_WC_TSO,
965967 IB_WC_REG_MR,
966968 IB_WC_MASKED_COMP_SWAP,
967969 IB_WC_MASKED_FETCH_ADD,
....@@ -1017,9 +1019,9 @@
10171019 };
10181020
10191021 enum ib_srq_type {
1020
- IB_SRQT_BASIC,
1021
- IB_SRQT_XRC,
1022
- IB_SRQT_TM,
1022
+ IB_SRQT_BASIC = IB_UVERBS_SRQT_BASIC,
1023
+ IB_SRQT_XRC = IB_UVERBS_SRQT_XRC,
1024
+ IB_SRQT_TM = IB_UVERBS_SRQT_TM,
10231025 };
10241026
10251027 static inline bool ib_srq_has_cq(enum ib_srq_type srq_type)
....@@ -1088,16 +1090,16 @@
10881090 IB_QPT_SMI,
10891091 IB_QPT_GSI,
10901092
1091
- IB_QPT_RC,
1092
- IB_QPT_UC,
1093
- IB_QPT_UD,
1093
+ IB_QPT_RC = IB_UVERBS_QPT_RC,
1094
+ IB_QPT_UC = IB_UVERBS_QPT_UC,
1095
+ IB_QPT_UD = IB_UVERBS_QPT_UD,
10941096 IB_QPT_RAW_IPV6,
10951097 IB_QPT_RAW_ETHERTYPE,
1096
- IB_QPT_RAW_PACKET = 8,
1097
- IB_QPT_XRC_INI = 9,
1098
- IB_QPT_XRC_TGT,
1098
+ IB_QPT_RAW_PACKET = IB_UVERBS_QPT_RAW_PACKET,
1099
+ IB_QPT_XRC_INI = IB_UVERBS_QPT_XRC_INI,
1100
+ IB_QPT_XRC_TGT = IB_UVERBS_QPT_XRC_TGT,
10991101 IB_QPT_MAX,
1100
- IB_QPT_DRIVER = 0xFF,
1102
+ IB_QPT_DRIVER = IB_UVERBS_QPT_DRIVER,
11011103 /* Reserve a range for qp types internal to the low level driver.
11021104 * These qp types will not be visible at the IB core layer, so the
11031105 * IB_QPT_MAX usages should not be affected in the core layer
....@@ -1116,17 +1118,21 @@
11161118
11171119 enum ib_qp_create_flags {
11181120 IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
1119
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
1121
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK =
1122
+ IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
11201123 IB_QP_CREATE_CROSS_CHANNEL = 1 << 2,
11211124 IB_QP_CREATE_MANAGED_SEND = 1 << 3,
11221125 IB_QP_CREATE_MANAGED_RECV = 1 << 4,
11231126 IB_QP_CREATE_NETIF_QP = 1 << 5,
1124
- IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
1125
- /* FREE = 1 << 7, */
1126
- IB_QP_CREATE_SCATTER_FCS = 1 << 8,
1127
- IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9,
1127
+ IB_QP_CREATE_INTEGRITY_EN = 1 << 6,
1128
+ IB_QP_CREATE_NETDEV_USE = 1 << 7,
1129
+ IB_QP_CREATE_SCATTER_FCS =
1130
+ IB_UVERBS_QP_CREATE_SCATTER_FCS,
1131
+ IB_QP_CREATE_CVLAN_STRIPPING =
1132
+ IB_UVERBS_QP_CREATE_CVLAN_STRIPPING,
11281133 IB_QP_CREATE_SOURCE_QPN = 1 << 10,
1129
- IB_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11,
1134
+ IB_QP_CREATE_PCI_WRITE_END_PADDING =
1135
+ IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING,
11301136 /* reserve bits 26-31 for low level drivers' internal use */
11311137 IB_QP_CREATE_RESERVED_START = 1 << 26,
11321138 IB_QP_CREATE_RESERVED_END = 1 << 31,
....@@ -1138,7 +1144,9 @@
11381144 */
11391145
11401146 struct ib_qp_init_attr {
1147
+ /* Consumer's event_handler callback must not block */
11411148 void (*event_handler)(struct ib_event *, void *);
1149
+
11421150 void *qp_context;
11431151 struct ib_cq *send_cq;
11441152 struct ib_cq *recv_cq;
....@@ -1276,6 +1284,7 @@
12761284 u8 alt_port_num;
12771285 u8 alt_timeout;
12781286 u32 rate_limit;
1287
+ struct net_device *xmit_slave;
12791288 };
12801289
12811290 enum ib_wr_opcode {
....@@ -1287,6 +1296,7 @@
12871296 IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
12881297 IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
12891298 IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
1299
+ IB_WR_BIND_MW = IB_UVERBS_WR_BIND_MW,
12901300 IB_WR_LSO = IB_UVERBS_WR_TSO,
12911301 IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
12921302 IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
....@@ -1298,7 +1308,7 @@
12981308
12991309 /* These are kernel only and can not be issued by userspace */
13001310 IB_WR_REG_MR = 0x20,
1301
- IB_WR_REG_SIG_MR,
1311
+ IB_WR_REG_MR_INTEGRITY,
13021312
13031313 /* reserve values for low level drivers' internal use.
13041314 * These values will not be used at all in the ib core layer.
....@@ -1408,20 +1418,6 @@
14081418 return container_of(wr, struct ib_reg_wr, wr);
14091419 }
14101420
1411
-struct ib_sig_handover_wr {
1412
- struct ib_send_wr wr;
1413
- struct ib_sig_attrs *sig_attrs;
1414
- struct ib_mr *sig_mr;
1415
- int access_flags;
1416
- struct ib_sge *prot;
1417
-};
1418
-
1419
-static inline const struct ib_sig_handover_wr *
1420
-sig_handover_wr(const struct ib_send_wr *wr)
1421
-{
1422
- return container_of(wr, struct ib_sig_handover_wr, wr);
1423
-}
1424
-
14251421 struct ib_recv_wr {
14261422 struct ib_recv_wr *next;
14271423 union {
....@@ -1441,8 +1437,11 @@
14411437 IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
14421438 IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
14431439 IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
1440
+ IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING,
14441441
1445
- IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1)
1442
+ IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE,
1443
+ IB_ACCESS_SUPPORTED =
1444
+ ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL,
14461445 };
14471446
14481447 /*
....@@ -1454,12 +1453,6 @@
14541453 IB_MR_REREG_PD = (1<<1),
14551454 IB_MR_REREG_ACCESS = (1<<2),
14561455 IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1)
1457
-};
1458
-
1459
-struct ib_fmr_attr {
1460
- int max_pages;
1461
- int max_maps;
1462
- u8 page_shift;
14631456 };
14641457
14651458 struct ib_umem;
....@@ -1487,34 +1480,15 @@
14871480 struct ib_ucontext {
14881481 struct ib_device *device;
14891482 struct ib_uverbs_file *ufile;
1490
- /*
1491
- * 'closing' can be read by the driver only during a destroy callback,
1492
- * it is set when we are closing the file descriptor and indicates
1493
- * that mm_sem may be locked.
1494
- */
1495
- int closing;
14961483
14971484 bool cleanup_retryable;
14981485
1499
- struct pid *tgid;
1500
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1501
- struct rb_root_cached umem_tree;
1502
- /*
1503
- * Protects .umem_rbroot and tree, as well as odp_mrs_count and
1504
- * mmu notifiers registration.
1505
- */
1506
- struct rw_semaphore umem_rwsem;
1507
- void (*invalidate_range)(struct ib_umem *umem,
1508
- unsigned long start, unsigned long end);
1509
-
1510
- struct mmu_notifier mn;
1511
- atomic_t notifier_count;
1512
- /* A list of umems that don't have private mmu notifier counters yet. */
1513
- struct list_head no_private_counters;
1514
- int odp_mrs_count;
1515
-#endif
1516
-
15171486 struct ib_rdmacg_object cg_obj;
1487
+ /*
1488
+ * Implementation details of the RDMA core, don't use in drivers:
1489
+ */
1490
+ struct rdma_restrack_entry res;
1491
+ struct xarray mmap_xa;
15181492 };
15191493
15201494 struct ib_uobject {
....@@ -1561,9 +1535,8 @@
15611535 struct ib_device *device;
15621536 atomic_t usecnt; /* count all exposed resources */
15631537 struct inode *inode;
1564
-
1565
- struct mutex tgt_qp_mutex;
1566
- struct list_head tgt_qp_list;
1538
+ struct rw_semaphore tgt_qps_rwsem;
1539
+ struct xarray tgt_qps;
15671540 };
15681541
15691542 struct ib_ah {
....@@ -1577,27 +1550,39 @@
15771550 typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
15781551
15791552 enum ib_poll_context {
1580
- IB_POLL_DIRECT, /* caller context, no hw completions */
15811553 IB_POLL_SOFTIRQ, /* poll from softirq context */
15821554 IB_POLL_WORKQUEUE, /* poll from workqueue */
15831555 IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
1556
+ IB_POLL_LAST_POOL_TYPE = IB_POLL_UNBOUND_WORKQUEUE,
1557
+
1558
+ IB_POLL_DIRECT, /* caller context, no hw completions */
15841559 };
15851560
15861561 struct ib_cq {
15871562 struct ib_device *device;
1588
- struct ib_uobject *uobject;
1563
+ struct ib_ucq_object *uobject;
15891564 ib_comp_handler comp_handler;
15901565 void (*event_handler)(struct ib_event *, void *);
15911566 void *cq_context;
15921567 int cqe;
1568
+ unsigned int cqe_used;
15931569 atomic_t usecnt; /* count number of work queues */
15941570 enum ib_poll_context poll_ctx;
15951571 struct ib_wc *wc;
1572
+ struct list_head pool_entry;
15961573 union {
15971574 struct irq_poll iop;
15981575 struct work_struct work;
15991576 };
16001577 struct workqueue_struct *comp_wq;
1578
+ struct dim *dim;
1579
+
1580
+ /* updated only by trace points */
1581
+ ktime_t timestamp;
1582
+ u8 interrupt:1;
1583
+ u8 shared:1;
1584
+ unsigned int comp_vector;
1585
+
16011586 /*
16021587 * Implementation details of the RDMA core, don't use in drivers:
16031588 */
....@@ -1607,7 +1592,7 @@
16071592 struct ib_srq {
16081593 struct ib_device *device;
16091594 struct ib_pd *pd;
1610
- struct ib_uobject *uobject;
1595
+ struct ib_usrq_object *uobject;
16111596 void (*event_handler)(struct ib_event *, void *);
16121597 void *srq_context;
16131598 enum ib_srq_type srq_type;
....@@ -1641,7 +1626,7 @@
16411626 };
16421627
16431628 enum ib_wq_type {
1644
- IB_WQT_RQ
1629
+ IB_WQT_RQ = IB_UVERBS_WQT_RQ,
16451630 };
16461631
16471632 enum ib_wq_state {
....@@ -1652,7 +1637,7 @@
16521637
16531638 struct ib_wq {
16541639 struct ib_device *device;
1655
- struct ib_uobject *uobject;
1640
+ struct ib_uwq_object *uobject;
16561641 void *wq_context;
16571642 void (*event_handler)(struct ib_event *, void *);
16581643 struct ib_pd *pd;
....@@ -1664,10 +1649,11 @@
16641649 };
16651650
16661651 enum ib_wq_flags {
1667
- IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0,
1668
- IB_WQ_FLAGS_SCATTER_FCS = 1 << 1,
1669
- IB_WQ_FLAGS_DELAY_DROP = 1 << 2,
1670
- IB_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3,
1652
+ IB_WQ_FLAGS_CVLAN_STRIPPING = IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING,
1653
+ IB_WQ_FLAGS_SCATTER_FCS = IB_UVERBS_WQ_FLAGS_SCATTER_FCS,
1654
+ IB_WQ_FLAGS_DELAY_DROP = IB_UVERBS_WQ_FLAGS_DELAY_DROP,
1655
+ IB_WQ_FLAGS_PCI_WRITE_END_PADDING =
1656
+ IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING,
16711657 };
16721658
16731659 struct ib_wq_init_attr {
....@@ -1768,7 +1754,7 @@
17681754 atomic_t usecnt;
17691755 struct list_head open_list;
17701756 struct ib_qp *real_qp;
1771
- struct ib_uobject *uobject;
1757
+ struct ib_uqp_object *uobject;
17721758 void (*event_handler)(struct ib_event *, void *);
17731759 void *qp_context;
17741760 /* sgid_attrs associated with the AV's */
....@@ -1782,10 +1768,14 @@
17821768 struct ib_qp_security *qp_sec;
17831769 u8 port;
17841770
1771
+ bool integrity_en;
17851772 /*
17861773 * Implementation details of the RDMA core, don't use in drivers:
17871774 */
17881775 struct rdma_restrack_entry res;
1776
+
1777
+ /* The counter the qp is bind to */
1778
+ struct rdma_counter *counter;
17891779 };
17901780
17911781 struct ib_dm {
....@@ -1804,6 +1794,7 @@
18041794 u64 iova;
18051795 u64 length;
18061796 unsigned int page_size;
1797
+ enum ib_mr_type type;
18071798 bool need_inval;
18081799 union {
18091800 struct ib_uobject *uobject; /* user */
....@@ -1811,7 +1802,7 @@
18111802 };
18121803
18131804 struct ib_dm *dm;
1814
-
1805
+ struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */
18151806 /*
18161807 * Implementation details of the RDMA core, don't use in drivers:
18171808 */
....@@ -1824,14 +1815,6 @@
18241815 struct ib_uobject *uobject;
18251816 u32 rkey;
18261817 enum ib_mw_type type;
1827
-};
1828
-
1829
-struct ib_fmr {
1830
- struct ib_device *device;
1831
- struct ib_pd *pd;
1832
- struct list_head list;
1833
- u32 lkey;
1834
- u32 rkey;
18351818 };
18361819
18371820 /* Supported steering options */
....@@ -1875,17 +1858,6 @@
18751858 #define IB_FLOW_SPEC_LAYER_MASK 0xF0
18761859 #define IB_FLOW_SPEC_SUPPORT_LAYERS 10
18771860
1878
-/* Flow steering rule priority is set according to it's domain.
1879
- * Lower domain value means higher priority.
1880
- */
1881
-enum ib_flow_domain {
1882
- IB_FLOW_DOMAIN_USER,
1883
- IB_FLOW_DOMAIN_ETHTOOL,
1884
- IB_FLOW_DOMAIN_RFS,
1885
- IB_FLOW_DOMAIN_NIC,
1886
- IB_FLOW_DOMAIN_NUM /* Must be last */
1887
-};
1888
-
18891861 enum ib_flow_flags {
18901862 IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
18911863 IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */
....@@ -1898,7 +1870,7 @@
18981870 __be16 ether_type;
18991871 __be16 vlan_tag;
19001872 /* Must be last */
1901
- u8 real_sz[0];
1873
+ u8 real_sz[];
19021874 };
19031875
19041876 struct ib_flow_spec_eth {
....@@ -1912,7 +1884,7 @@
19121884 __be16 dlid;
19131885 __u8 sl;
19141886 /* Must be last */
1915
- u8 real_sz[0];
1887
+ u8 real_sz[];
19161888 };
19171889
19181890 struct ib_flow_spec_ib {
....@@ -1937,7 +1909,7 @@
19371909 u8 ttl;
19381910 u8 flags;
19391911 /* Must be last */
1940
- u8 real_sz[0];
1912
+ u8 real_sz[];
19411913 };
19421914
19431915 struct ib_flow_spec_ipv4 {
....@@ -1955,7 +1927,7 @@
19551927 u8 traffic_class;
19561928 u8 hop_limit;
19571929 /* Must be last */
1958
- u8 real_sz[0];
1930
+ u8 real_sz[];
19591931 };
19601932
19611933 struct ib_flow_spec_ipv6 {
....@@ -1969,7 +1941,7 @@
19691941 __be16 dst_port;
19701942 __be16 src_port;
19711943 /* Must be last */
1972
- u8 real_sz[0];
1944
+ u8 real_sz[];
19731945 };
19741946
19751947 struct ib_flow_spec_tcp_udp {
....@@ -1981,7 +1953,7 @@
19811953
19821954 struct ib_flow_tunnel_filter {
19831955 __be32 tunnel_id;
1984
- u8 real_sz[0];
1956
+ u8 real_sz[];
19851957 };
19861958
19871959 /* ib_flow_spec_tunnel describes the Vxlan tunnel
....@@ -1998,7 +1970,7 @@
19981970 __be32 spi;
19991971 __be32 seq;
20001972 /* Must be last */
2001
- u8 real_sz[0];
1973
+ u8 real_sz[];
20021974 };
20031975
20041976 struct ib_flow_spec_esp {
....@@ -2013,7 +1985,7 @@
20131985 __be16 protocol;
20141986 __be32 key;
20151987 /* Must be last */
2016
- u8 real_sz[0];
1988
+ u8 real_sz[];
20171989 };
20181990
20191991 struct ib_flow_spec_gre {
....@@ -2026,7 +1998,7 @@
20261998 struct ib_flow_mpls_filter {
20271999 __be32 tag;
20282000 /* Must be last */
2029
- u8 real_sz[0];
2001
+ u8 real_sz[];
20302002 };
20312003
20322004 struct ib_flow_spec_mpls {
....@@ -2158,7 +2130,7 @@
21582130 atomic_t usecnt;
21592131 };
21602132
2161
-struct ib_mad_hdr;
2133
+struct ib_mad;
21622134 struct ib_grh;
21632135
21642136 enum ib_process_mad_flags {
....@@ -2182,19 +2154,28 @@
21822154 enum ib_port_state port_state;
21832155 };
21842156
2185
-struct ib_cache {
2186
- rwlock_t lock;
2187
- struct ib_event_handler event_handler;
2188
- struct ib_port_cache *ports;
2189
-};
2190
-
2191
-struct iw_cm_verbs;
2192
-
21932157 struct ib_port_immutable {
21942158 int pkey_tbl_len;
21952159 int gid_tbl_len;
21962160 u32 core_cap_flags;
21972161 u32 max_mad_size;
2162
+};
2163
+
2164
+struct ib_port_data {
2165
+ struct ib_device *ib_dev;
2166
+
2167
+ struct ib_port_immutable immutable;
2168
+
2169
+ spinlock_t pkey_list_lock;
2170
+ struct list_head pkey_list;
2171
+
2172
+ struct ib_port_cache cache;
2173
+
2174
+ spinlock_t netdev_lock;
2175
+ struct net_device __rcu *netdev;
2176
+ struct hlist_node ndev_hash_link;
2177
+ struct rdma_port_counter port_counter;
2178
+ struct rdma_hw_stats *hw_stats;
21982179 };
21992180
22002181 /* rdma netdev type - specifies protocol type */
....@@ -2211,6 +2192,7 @@
22112192 void *clnt_priv;
22122193 struct ib_device *hca;
22132194 u8 port_num;
2195
+ int mtu;
22142196
22152197 /*
22162198 * cleanup function must be specified.
....@@ -2232,10 +2214,20 @@
22322214 union ib_gid *gid, u16 mlid);
22332215 };
22342216
2235
-struct ib_port_pkey_list {
2236
- /* Lock to hold while modifying the list. */
2237
- spinlock_t list_lock;
2238
- struct list_head pkey_list;
2217
+struct rdma_netdev_alloc_params {
2218
+ size_t sizeof_priv;
2219
+ unsigned int txqs;
2220
+ unsigned int rxqs;
2221
+ void *param;
2222
+
2223
+ int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num,
2224
+ struct net_device *netdev, void *param);
2225
+};
2226
+
2227
+struct ib_odp_counters {
2228
+ atomic64_t faults;
2229
+ atomic64_t invalidations;
2230
+ atomic64_t prefetch;
22392231 };
22402232
22412233 struct ib_counters {
....@@ -2252,33 +2244,266 @@
22522244 };
22532245
22542246 struct uverbs_attr_bundle;
2247
+struct iw_cm_id;
2248
+struct iw_cm_conn_param;
22552249
2256
-struct ib_device {
2257
- /* Do not access @dma_device directly from ULP nor from HW drivers. */
2258
- struct device *dma_device;
2250
+#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \
2251
+ .size_##ib_struct = \
2252
+ (sizeof(struct drv_struct) + \
2253
+ BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) + \
2254
+ BUILD_BUG_ON_ZERO( \
2255
+ !__same_type(((struct drv_struct *)NULL)->member, \
2256
+ struct ib_struct)))
22592257
2260
- char name[IB_DEVICE_NAME_MAX];
2258
+#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \
2259
+ ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp))
22612260
2262
- struct list_head event_handler_list;
2263
- spinlock_t event_handler_lock;
2261
+#define rdma_zalloc_drv_obj(ib_dev, ib_type) \
2262
+ rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL)
22642263
2265
- spinlock_t client_data_lock;
2266
- struct list_head core_list;
2267
- /* Access to the client_data_list is protected by the client_data_lock
2268
- * spinlock and the lists_rwsem read-write semaphore */
2269
- struct list_head client_data_list;
2264
+#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
22702265
2271
- struct ib_cache cache;
2266
+struct rdma_user_mmap_entry {
2267
+ struct kref ref;
2268
+ struct ib_ucontext *ucontext;
2269
+ unsigned long start_pgoff;
2270
+ size_t npages;
2271
+ bool driver_removed;
2272
+};
2273
+
2274
+/* Return the offset (in bytes) the user should pass to libc's mmap() */
2275
+static inline u64
2276
+rdma_user_mmap_get_offset(const struct rdma_user_mmap_entry *entry)
2277
+{
2278
+ return (u64)entry->start_pgoff << PAGE_SHIFT;
2279
+}
2280
+
2281
+/**
2282
+ * struct ib_device_ops - InfiniBand device operations
2283
+ * This structure defines all the InfiniBand device operations, providers will
2284
+ * need to define the supported operations, otherwise they will be set to null.
2285
+ */
2286
+struct ib_device_ops {
2287
+ struct module *owner;
2288
+ enum rdma_driver_id driver_id;
2289
+ u32 uverbs_abi_ver;
2290
+ unsigned int uverbs_no_driver_id_binding:1;
2291
+
2292
+ int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
2293
+ const struct ib_send_wr **bad_send_wr);
2294
+ int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
2295
+ const struct ib_recv_wr **bad_recv_wr);
2296
+ void (*drain_rq)(struct ib_qp *qp);
2297
+ void (*drain_sq)(struct ib_qp *qp);
2298
+ int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
2299
+ int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
2300
+ int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
2301
+ int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
2302
+ int (*post_srq_recv)(struct ib_srq *srq,
2303
+ const struct ib_recv_wr *recv_wr,
2304
+ const struct ib_recv_wr **bad_recv_wr);
2305
+ int (*process_mad)(struct ib_device *device, int process_mad_flags,
2306
+ u8 port_num, const struct ib_wc *in_wc,
2307
+ const struct ib_grh *in_grh,
2308
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
2309
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
2310
+ int (*query_device)(struct ib_device *device,
2311
+ struct ib_device_attr *device_attr,
2312
+ struct ib_udata *udata);
2313
+ int (*modify_device)(struct ib_device *device, int device_modify_mask,
2314
+ struct ib_device_modify *device_modify);
2315
+ void (*get_dev_fw_str)(struct ib_device *device, char *str);
2316
+ const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
2317
+ int comp_vector);
2318
+ int (*query_port)(struct ib_device *device, u8 port_num,
2319
+ struct ib_port_attr *port_attr);
2320
+ int (*modify_port)(struct ib_device *device, u8 port_num,
2321
+ int port_modify_mask,
2322
+ struct ib_port_modify *port_modify);
22722323 /**
2273
- * port_immutable is indexed by port number
2324
+ * The following mandatory functions are used only at device
2325
+ * registration. Keep functions such as these at the end of this
2326
+ * structure to avoid cache line misses when accessing struct ib_device
2327
+ * in fast paths.
22742328 */
2275
- struct ib_port_immutable *port_immutable;
2329
+ int (*get_port_immutable)(struct ib_device *device, u8 port_num,
2330
+ struct ib_port_immutable *immutable);
2331
+ enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
2332
+ u8 port_num);
2333
+ /**
2334
+ * When calling get_netdev, the HW vendor's driver should return the
2335
+ * net device of device @device at port @port_num or NULL if such
2336
+ * a net device doesn't exist. The vendor driver should call dev_hold
2337
+ * on this net device. The HW vendor's device driver must guarantee
2338
+ * that this function returns NULL before the net device has finished
2339
+ * NETDEV_UNREGISTER state.
2340
+ */
2341
+ struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
2342
+ /**
2343
+ * rdma netdev operation
2344
+ *
2345
+ * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
2346
+ * must return -EOPNOTSUPP if it doesn't support the specified type.
2347
+ */
2348
+ struct net_device *(*alloc_rdma_netdev)(
2349
+ struct ib_device *device, u8 port_num, enum rdma_netdev_t type,
2350
+ const char *name, unsigned char name_assign_type,
2351
+ void (*setup)(struct net_device *));
22762352
2277
- int num_comp_vectors;
2278
-
2279
- struct ib_port_pkey_list *port_pkey_list;
2280
-
2281
- struct iw_cm_verbs *iwcm;
2353
+ int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
2354
+ enum rdma_netdev_t type,
2355
+ struct rdma_netdev_alloc_params *params);
2356
+ /**
2357
+ * query_gid should be return GID value for @device, when @port_num
2358
+ * link layer is either IB or iWarp. It is no-op if @port_num port
2359
+ * is RoCE link layer.
2360
+ */
2361
+ int (*query_gid)(struct ib_device *device, u8 port_num, int index,
2362
+ union ib_gid *gid);
2363
+ /**
2364
+ * When calling add_gid, the HW vendor's driver should add the gid
2365
+ * of device of port at gid index available at @attr. Meta-info of
2366
+ * that gid (for example, the network device related to this gid) is
2367
+ * available at @attr. @context allows the HW vendor driver to store
2368
+ * extra information together with a GID entry. The HW vendor driver may
2369
+ * allocate memory to contain this information and store it in @context
2370
+ * when a new GID entry is written to. Params are consistent until the
2371
+ * next call of add_gid or delete_gid. The function should return 0 on
2372
+ * success or error otherwise. The function could be called
2373
+ * concurrently for different ports. This function is only called when
2374
+ * roce_gid_table is used.
2375
+ */
2376
+ int (*add_gid)(const struct ib_gid_attr *attr, void **context);
2377
+ /**
2378
+ * When calling del_gid, the HW vendor's driver should delete the
2379
+ * gid of device @device at gid index gid_index of port port_num
2380
+ * available in @attr.
2381
+ * Upon the deletion of a GID entry, the HW vendor must free any
2382
+ * allocated memory. The caller will clear @context afterwards.
2383
+ * This function is only called when roce_gid_table is used.
2384
+ */
2385
+ int (*del_gid)(const struct ib_gid_attr *attr, void **context);
2386
+ int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
2387
+ u16 *pkey);
2388
+ int (*alloc_ucontext)(struct ib_ucontext *context,
2389
+ struct ib_udata *udata);
2390
+ void (*dealloc_ucontext)(struct ib_ucontext *context);
2391
+ int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
2392
+ /**
2393
+ * This will be called once refcount of an entry in mmap_xa reaches
2394
+ * zero. The type of the memory that was mapped may differ between
2395
+ * entries and is opaque to the rdma_user_mmap interface.
2396
+ * Therefore needs to be implemented by the driver in mmap_free.
2397
+ */
2398
+ void (*mmap_free)(struct rdma_user_mmap_entry *entry);
2399
+ void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
2400
+ int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
2401
+ int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
2402
+ int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
2403
+ struct ib_udata *udata);
2404
+ int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
2405
+ int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
2406
+ int (*destroy_ah)(struct ib_ah *ah, u32 flags);
2407
+ int (*create_srq)(struct ib_srq *srq,
2408
+ struct ib_srq_init_attr *srq_init_attr,
2409
+ struct ib_udata *udata);
2410
+ int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
2411
+ enum ib_srq_attr_mask srq_attr_mask,
2412
+ struct ib_udata *udata);
2413
+ int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
2414
+ int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
2415
+ struct ib_qp *(*create_qp)(struct ib_pd *pd,
2416
+ struct ib_qp_init_attr *qp_init_attr,
2417
+ struct ib_udata *udata);
2418
+ int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
2419
+ int qp_attr_mask, struct ib_udata *udata);
2420
+ int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
2421
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
2422
+ int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
2423
+ int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
2424
+ struct ib_udata *udata);
2425
+ int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
2426
+ int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
2427
+ int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
2428
+ struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
2429
+ struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
2430
+ u64 virt_addr, int mr_access_flags,
2431
+ struct ib_udata *udata);
2432
+ int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
2433
+ u64 virt_addr, int mr_access_flags,
2434
+ struct ib_pd *pd, struct ib_udata *udata);
2435
+ int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
2436
+ struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
2437
+ u32 max_num_sg);
2438
+ struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd,
2439
+ u32 max_num_data_sg,
2440
+ u32 max_num_meta_sg);
2441
+ int (*advise_mr)(struct ib_pd *pd,
2442
+ enum ib_uverbs_advise_mr_advice advice, u32 flags,
2443
+ struct ib_sge *sg_list, u32 num_sge,
2444
+ struct uverbs_attr_bundle *attrs);
2445
+ int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
2446
+ unsigned int *sg_offset);
2447
+ int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
2448
+ struct ib_mr_status *mr_status);
2449
+ int (*alloc_mw)(struct ib_mw *mw, struct ib_udata *udata);
2450
+ int (*dealloc_mw)(struct ib_mw *mw);
2451
+ int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
2452
+ int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
2453
+ int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
2454
+ int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
2455
+ struct ib_flow *(*create_flow)(struct ib_qp *qp,
2456
+ struct ib_flow_attr *flow_attr,
2457
+ struct ib_udata *udata);
2458
+ int (*destroy_flow)(struct ib_flow *flow_id);
2459
+ struct ib_flow_action *(*create_flow_action_esp)(
2460
+ struct ib_device *device,
2461
+ const struct ib_flow_action_attrs_esp *attr,
2462
+ struct uverbs_attr_bundle *attrs);
2463
+ int (*destroy_flow_action)(struct ib_flow_action *action);
2464
+ int (*modify_flow_action_esp)(
2465
+ struct ib_flow_action *action,
2466
+ const struct ib_flow_action_attrs_esp *attr,
2467
+ struct uverbs_attr_bundle *attrs);
2468
+ int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
2469
+ int state);
2470
+ int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
2471
+ struct ifla_vf_info *ivf);
2472
+ int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
2473
+ struct ifla_vf_stats *stats);
2474
+ int (*get_vf_guid)(struct ib_device *device, int vf, u8 port,
2475
+ struct ifla_vf_guid *node_guid,
2476
+ struct ifla_vf_guid *port_guid);
2477
+ int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
2478
+ int type);
2479
+ struct ib_wq *(*create_wq)(struct ib_pd *pd,
2480
+ struct ib_wq_init_attr *init_attr,
2481
+ struct ib_udata *udata);
2482
+ int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
2483
+ int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
2484
+ u32 wq_attr_mask, struct ib_udata *udata);
2485
+ int (*create_rwq_ind_table)(struct ib_rwq_ind_table *ib_rwq_ind_table,
2486
+ struct ib_rwq_ind_table_init_attr *init_attr,
2487
+ struct ib_udata *udata);
2488
+ int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
2489
+ struct ib_dm *(*alloc_dm)(struct ib_device *device,
2490
+ struct ib_ucontext *context,
2491
+ struct ib_dm_alloc_attr *attr,
2492
+ struct uverbs_attr_bundle *attrs);
2493
+ int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs);
2494
+ struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
2495
+ struct ib_dm_mr_attr *attr,
2496
+ struct uverbs_attr_bundle *attrs);
2497
+ int (*create_counters)(struct ib_counters *counters,
2498
+ struct uverbs_attr_bundle *attrs);
2499
+ int (*destroy_counters)(struct ib_counters *counters);
2500
+ int (*read_counters)(struct ib_counters *counters,
2501
+ struct ib_counters_read_attr *counters_read_attr,
2502
+ struct uverbs_attr_bundle *attrs);
2503
+ int (*map_mr_sg_pi)(struct ib_mr *mr, struct scatterlist *data_sg,
2504
+ int data_sg_nents, unsigned int *data_sg_offset,
2505
+ struct scatterlist *meta_sg, int meta_sg_nents,
2506
+ unsigned int *meta_sg_offset);
22822507
22832508 /**
22842509 * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
....@@ -2286,8 +2511,8 @@
22862511 * core when the device is removed. A lifespan of -1 in the return
22872512 * struct tells the core to set a default lifespan.
22882513 */
2289
- struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
2290
- u8 port_num);
2514
+ struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
2515
+ u8 port_num);
22912516 /**
22922517 * get_hw_stats - Fill in the counter value(s) in the stats struct.
22932518 * @index - The index in the value array we wish to have updated, or
....@@ -2300,261 +2525,145 @@
23002525 * Drivers are allowed to update all counters in leiu of just the
23012526 * one given in index at their option
23022527 */
2303
- int (*get_hw_stats)(struct ib_device *device,
2304
- struct rdma_hw_stats *stats,
2305
- u8 port, int index);
2306
- int (*query_device)(struct ib_device *device,
2307
- struct ib_device_attr *device_attr,
2308
- struct ib_udata *udata);
2309
- int (*query_port)(struct ib_device *device,
2310
- u8 port_num,
2311
- struct ib_port_attr *port_attr);
2312
- enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
2313
- u8 port_num);
2314
- /* When calling get_netdev, the HW vendor's driver should return the
2315
- * net device of device @device at port @port_num or NULL if such
2316
- * a net device doesn't exist. The vendor driver should call dev_hold
2317
- * on this net device. The HW vendor's device driver must guarantee
2318
- * that this function returns NULL before the net device has finished
2319
- * NETDEV_UNREGISTER state.
2528
+ int (*get_hw_stats)(struct ib_device *device,
2529
+ struct rdma_hw_stats *stats, u8 port, int index);
2530
+ /*
2531
+ * This function is called once for each port when a ib device is
2532
+ * registered.
23202533 */
2321
- struct net_device *(*get_netdev)(struct ib_device *device,
2322
- u8 port_num);
2323
- /* query_gid should be return GID value for @device, when @port_num
2324
- * link layer is either IB or iWarp. It is no-op if @port_num port
2325
- * is RoCE link layer.
2534
+ int (*init_port)(struct ib_device *device, u8 port_num,
2535
+ struct kobject *port_sysfs);
2536
+ /**
2537
+ * Allows rdma drivers to add their own restrack attributes.
23262538 */
2327
- int (*query_gid)(struct ib_device *device,
2328
- u8 port_num, int index,
2329
- union ib_gid *gid);
2330
- /* When calling add_gid, the HW vendor's driver should add the gid
2331
- * of device of port at gid index available at @attr. Meta-info of
2332
- * that gid (for example, the network device related to this gid) is
2333
- * available at @attr. @context allows the HW vendor driver to store
2334
- * extra information together with a GID entry. The HW vendor driver may
2335
- * allocate memory to contain this information and store it in @context
2336
- * when a new GID entry is written to. Params are consistent until the
2337
- * next call of add_gid or delete_gid. The function should return 0 on
2338
- * success or error otherwise. The function could be called
2339
- * concurrently for different ports. This function is only called when
2340
- * roce_gid_table is used.
2539
+ int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
2540
+ int (*fill_res_mr_entry_raw)(struct sk_buff *msg, struct ib_mr *ibmr);
2541
+ int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq);
2542
+ int (*fill_res_cq_entry_raw)(struct sk_buff *msg, struct ib_cq *ibcq);
2543
+ int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp);
2544
+ int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp);
2545
+ int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id);
2546
+
2547
+ /* Device lifecycle callbacks */
2548
+ /*
2549
+ * Called after the device becomes registered, before clients are
2550
+ * attached
23412551 */
2342
- int (*add_gid)(const struct ib_gid_attr *attr,
2343
- void **context);
2344
- /* When calling del_gid, the HW vendor's driver should delete the
2345
- * gid of device @device at gid index gid_index of port port_num
2346
- * available in @attr.
2347
- * Upon the deletion of a GID entry, the HW vendor must free any
2348
- * allocated memory. The caller will clear @context afterwards.
2349
- * This function is only called when roce_gid_table is used.
2552
+ int (*enable_driver)(struct ib_device *dev);
2553
+ /*
2554
+ * This is called as part of ib_dealloc_device().
23502555 */
2351
- int (*del_gid)(const struct ib_gid_attr *attr,
2352
- void **context);
2353
- int (*query_pkey)(struct ib_device *device,
2354
- u8 port_num, u16 index, u16 *pkey);
2355
- int (*modify_device)(struct ib_device *device,
2356
- int device_modify_mask,
2357
- struct ib_device_modify *device_modify);
2358
- int (*modify_port)(struct ib_device *device,
2359
- u8 port_num, int port_modify_mask,
2360
- struct ib_port_modify *port_modify);
2361
- struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device,
2362
- struct ib_udata *udata);
2363
- int (*dealloc_ucontext)(struct ib_ucontext *context);
2364
- int (*mmap)(struct ib_ucontext *context,
2365
- struct vm_area_struct *vma);
2366
- struct ib_pd * (*alloc_pd)(struct ib_device *device,
2367
- struct ib_ucontext *context,
2368
- struct ib_udata *udata);
2369
- int (*dealloc_pd)(struct ib_pd *pd);
2370
- struct ib_ah * (*create_ah)(struct ib_pd *pd,
2371
- struct rdma_ah_attr *ah_attr,
2372
- struct ib_udata *udata);
2373
- int (*modify_ah)(struct ib_ah *ah,
2374
- struct rdma_ah_attr *ah_attr);
2375
- int (*query_ah)(struct ib_ah *ah,
2376
- struct rdma_ah_attr *ah_attr);
2377
- int (*destroy_ah)(struct ib_ah *ah);
2378
- struct ib_srq * (*create_srq)(struct ib_pd *pd,
2379
- struct ib_srq_init_attr *srq_init_attr,
2380
- struct ib_udata *udata);
2381
- int (*modify_srq)(struct ib_srq *srq,
2382
- struct ib_srq_attr *srq_attr,
2383
- enum ib_srq_attr_mask srq_attr_mask,
2384
- struct ib_udata *udata);
2385
- int (*query_srq)(struct ib_srq *srq,
2386
- struct ib_srq_attr *srq_attr);
2387
- int (*destroy_srq)(struct ib_srq *srq);
2388
- int (*post_srq_recv)(struct ib_srq *srq,
2389
- const struct ib_recv_wr *recv_wr,
2390
- const struct ib_recv_wr **bad_recv_wr);
2391
- struct ib_qp * (*create_qp)(struct ib_pd *pd,
2392
- struct ib_qp_init_attr *qp_init_attr,
2393
- struct ib_udata *udata);
2394
- int (*modify_qp)(struct ib_qp *qp,
2395
- struct ib_qp_attr *qp_attr,
2396
- int qp_attr_mask,
2397
- struct ib_udata *udata);
2398
- int (*query_qp)(struct ib_qp *qp,
2399
- struct ib_qp_attr *qp_attr,
2400
- int qp_attr_mask,
2401
- struct ib_qp_init_attr *qp_init_attr);
2402
- int (*destroy_qp)(struct ib_qp *qp);
2403
- int (*post_send)(struct ib_qp *qp,
2404
- const struct ib_send_wr *send_wr,
2405
- const struct ib_send_wr **bad_send_wr);
2406
- int (*post_recv)(struct ib_qp *qp,
2407
- const struct ib_recv_wr *recv_wr,
2408
- const struct ib_recv_wr **bad_recv_wr);
2409
- struct ib_cq * (*create_cq)(struct ib_device *device,
2410
- const struct ib_cq_init_attr *attr,
2411
- struct ib_ucontext *context,
2412
- struct ib_udata *udata);
2413
- int (*modify_cq)(struct ib_cq *cq, u16 cq_count,
2414
- u16 cq_period);
2415
- int (*destroy_cq)(struct ib_cq *cq);
2416
- int (*resize_cq)(struct ib_cq *cq, int cqe,
2417
- struct ib_udata *udata);
2418
- int (*poll_cq)(struct ib_cq *cq, int num_entries,
2419
- struct ib_wc *wc);
2420
- int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
2421
- int (*req_notify_cq)(struct ib_cq *cq,
2422
- enum ib_cq_notify_flags flags);
2423
- int (*req_ncomp_notif)(struct ib_cq *cq,
2424
- int wc_cnt);
2425
- struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
2426
- int mr_access_flags);
2427
- struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
2428
- u64 start, u64 length,
2429
- u64 virt_addr,
2430
- int mr_access_flags,
2431
- struct ib_udata *udata);
2432
- int (*rereg_user_mr)(struct ib_mr *mr,
2433
- int flags,
2434
- u64 start, u64 length,
2435
- u64 virt_addr,
2436
- int mr_access_flags,
2437
- struct ib_pd *pd,
2438
- struct ib_udata *udata);
2439
- int (*dereg_mr)(struct ib_mr *mr);
2440
- struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
2441
- enum ib_mr_type mr_type,
2442
- u32 max_num_sg);
2443
- int (*map_mr_sg)(struct ib_mr *mr,
2444
- struct scatterlist *sg,
2445
- int sg_nents,
2446
- unsigned int *sg_offset);
2447
- struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
2448
- enum ib_mw_type type,
2449
- struct ib_udata *udata);
2450
- int (*dealloc_mw)(struct ib_mw *mw);
2451
- struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
2452
- int mr_access_flags,
2453
- struct ib_fmr_attr *fmr_attr);
2454
- int (*map_phys_fmr)(struct ib_fmr *fmr,
2455
- u64 *page_list, int list_len,
2456
- u64 iova);
2457
- int (*unmap_fmr)(struct list_head *fmr_list);
2458
- int (*dealloc_fmr)(struct ib_fmr *fmr);
2459
- int (*attach_mcast)(struct ib_qp *qp,
2460
- union ib_gid *gid,
2461
- u16 lid);
2462
- int (*detach_mcast)(struct ib_qp *qp,
2463
- union ib_gid *gid,
2464
- u16 lid);
2465
- int (*process_mad)(struct ib_device *device,
2466
- int process_mad_flags,
2467
- u8 port_num,
2468
- const struct ib_wc *in_wc,
2469
- const struct ib_grh *in_grh,
2470
- const struct ib_mad_hdr *in_mad,
2471
- size_t in_mad_size,
2472
- struct ib_mad_hdr *out_mad,
2473
- size_t *out_mad_size,
2474
- u16 *out_mad_pkey_index);
2475
- struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device,
2476
- struct ib_ucontext *ucontext,
2477
- struct ib_udata *udata);
2478
- int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
2479
- struct ib_flow * (*create_flow)(struct ib_qp *qp,
2480
- struct ib_flow_attr
2481
- *flow_attr,
2482
- int domain,
2483
- struct ib_udata *udata);
2484
- int (*destroy_flow)(struct ib_flow *flow_id);
2485
- int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
2486
- struct ib_mr_status *mr_status);
2487
- void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
2488
- void (*drain_rq)(struct ib_qp *qp);
2489
- void (*drain_sq)(struct ib_qp *qp);
2490
- int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
2491
- int state);
2492
- int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
2493
- struct ifla_vf_info *ivf);
2494
- int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
2495
- struct ifla_vf_stats *stats);
2496
- int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
2497
- int type);
2498
- struct ib_wq * (*create_wq)(struct ib_pd *pd,
2499
- struct ib_wq_init_attr *init_attr,
2500
- struct ib_udata *udata);
2501
- int (*destroy_wq)(struct ib_wq *wq);
2502
- int (*modify_wq)(struct ib_wq *wq,
2503
- struct ib_wq_attr *attr,
2504
- u32 wq_attr_mask,
2505
- struct ib_udata *udata);
2506
- struct ib_rwq_ind_table * (*create_rwq_ind_table)(struct ib_device *device,
2507
- struct ib_rwq_ind_table_init_attr *init_attr,
2508
- struct ib_udata *udata);
2509
- int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
2510
- struct ib_flow_action * (*create_flow_action_esp)(struct ib_device *device,
2511
- const struct ib_flow_action_attrs_esp *attr,
2512
- struct uverbs_attr_bundle *attrs);
2513
- int (*destroy_flow_action)(struct ib_flow_action *action);
2514
- int (*modify_flow_action_esp)(struct ib_flow_action *action,
2515
- const struct ib_flow_action_attrs_esp *attr,
2516
- struct uverbs_attr_bundle *attrs);
2517
- struct ib_dm * (*alloc_dm)(struct ib_device *device,
2518
- struct ib_ucontext *context,
2519
- struct ib_dm_alloc_attr *attr,
2520
- struct uverbs_attr_bundle *attrs);
2521
- int (*dealloc_dm)(struct ib_dm *dm);
2522
- struct ib_mr * (*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
2523
- struct ib_dm_mr_attr *attr,
2524
- struct uverbs_attr_bundle *attrs);
2525
- struct ib_counters * (*create_counters)(struct ib_device *device,
2526
- struct uverbs_attr_bundle *attrs);
2527
- int (*destroy_counters)(struct ib_counters *counters);
2528
- int (*read_counters)(struct ib_counters *counters,
2529
- struct ib_counters_read_attr *counters_read_attr,
2530
- struct uverbs_attr_bundle *attrs);
2556
+ void (*dealloc_driver)(struct ib_device *dev);
2557
+
2558
+ /* iWarp CM callbacks */
2559
+ void (*iw_add_ref)(struct ib_qp *qp);
2560
+ void (*iw_rem_ref)(struct ib_qp *qp);
2561
+ struct ib_qp *(*iw_get_qp)(struct ib_device *device, int qpn);
2562
+ int (*iw_connect)(struct iw_cm_id *cm_id,
2563
+ struct iw_cm_conn_param *conn_param);
2564
+ int (*iw_accept)(struct iw_cm_id *cm_id,
2565
+ struct iw_cm_conn_param *conn_param);
2566
+ int (*iw_reject)(struct iw_cm_id *cm_id, const void *pdata,
2567
+ u8 pdata_len);
2568
+ int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
2569
+ int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
2570
+ /**
2571
+ * counter_bind_qp - Bind a QP to a counter.
2572
+ * @counter - The counter to be bound. If counter->id is zero then
2573
+ * the driver needs to allocate a new counter and set counter->id
2574
+ */
2575
+ int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp);
2576
+ /**
2577
+ * counter_unbind_qp - Unbind the qp from the dynamically-allocated
2578
+ * counter and bind it onto the default one
2579
+ */
2580
+ int (*counter_unbind_qp)(struct ib_qp *qp);
2581
+ /**
2582
+ * counter_dealloc -De-allocate the hw counter
2583
+ */
2584
+ int (*counter_dealloc)(struct rdma_counter *counter);
2585
+ /**
2586
+ * counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in
2587
+ * the driver initialized data.
2588
+ */
2589
+ struct rdma_hw_stats *(*counter_alloc_stats)(
2590
+ struct rdma_counter *counter);
2591
+ /**
2592
+ * counter_update_stats - Query the stats value of this counter
2593
+ */
2594
+ int (*counter_update_stats)(struct rdma_counter *counter);
25312595
25322596 /**
2533
- * rdma netdev operation
2534
- *
2535
- * Driver implementing alloc_rdma_netdev must return -EOPNOTSUPP if it
2536
- * doesn't support the specified rdma netdev type.
2597
+ * Allows rdma drivers to add their own restrack attributes
2598
+ * dumped via 'rdma stat' iproute2 command.
25372599 */
2538
- struct net_device *(*alloc_rdma_netdev)(
2539
- struct ib_device *device,
2540
- u8 port_num,
2541
- enum rdma_netdev_t type,
2542
- const char *name,
2543
- unsigned char name_assign_type,
2544
- void (*setup)(struct net_device *));
2600
+ int (*fill_stat_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
25452601
2546
- struct module *owner;
2547
- struct device dev;
2548
- struct kobject *ports_parent;
2549
- struct list_head port_list;
2602
+ /* query driver for its ucontext properties */
2603
+ int (*query_ucontext)(struct ib_ucontext *context,
2604
+ struct uverbs_attr_bundle *attrs);
25502605
2551
- enum {
2552
- IB_DEV_UNINITIALIZED,
2553
- IB_DEV_REGISTERED,
2554
- IB_DEV_UNREGISTERED
2555
- } reg_state;
2606
+ DECLARE_RDMA_OBJ_SIZE(ib_ah);
2607
+ DECLARE_RDMA_OBJ_SIZE(ib_counters);
2608
+ DECLARE_RDMA_OBJ_SIZE(ib_cq);
2609
+ DECLARE_RDMA_OBJ_SIZE(ib_mw);
2610
+ DECLARE_RDMA_OBJ_SIZE(ib_pd);
2611
+ DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table);
2612
+ DECLARE_RDMA_OBJ_SIZE(ib_srq);
2613
+ DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
2614
+ DECLARE_RDMA_OBJ_SIZE(ib_xrcd);
2615
+};
25562616
2557
- int uverbs_abi_ver;
2617
+struct ib_core_device {
2618
+ /* device must be the first element in structure until,
2619
+ * union of ib_core_device and device exists in ib_device.
2620
+ */
2621
+ struct device dev;
2622
+ possible_net_t rdma_net;
2623
+ struct kobject *ports_kobj;
2624
+ struct list_head port_list;
2625
+ struct ib_device *owner; /* reach back to owner ib_device */
2626
+};
2627
+
2628
+struct rdma_restrack_root;
2629
+struct ib_device {
2630
+ /* Do not access @dma_device directly from ULP nor from HW drivers. */
2631
+ struct device *dma_device;
2632
+ struct ib_device_ops ops;
2633
+ char name[IB_DEVICE_NAME_MAX];
2634
+ struct rcu_head rcu_head;
2635
+
2636
+ struct list_head event_handler_list;
2637
+ /* Protects event_handler_list */
2638
+ struct rw_semaphore event_handler_rwsem;
2639
+
2640
+ /* Protects QP's event_handler calls and open_qp list */
2641
+ spinlock_t qp_open_list_lock;
2642
+
2643
+ struct rw_semaphore client_data_rwsem;
2644
+ struct xarray client_data;
2645
+ struct mutex unregistration_lock;
2646
+
2647
+ /* Synchronize GID, Pkey cache entries, subnet prefix, LMC */
2648
+ rwlock_t cache_lock;
2649
+ /**
2650
+ * port_data is indexed by port number
2651
+ */
2652
+ struct ib_port_data *port_data;
2653
+
2654
+ int num_comp_vectors;
2655
+
2656
+ union {
2657
+ struct device dev;
2658
+ struct ib_core_device coredev;
2659
+ };
2660
+
2661
+ /* First group for device attributes,
2662
+ * Second group for driver provided attributes (optional).
2663
+ * It is NULL terminated array.
2664
+ */
2665
+ const struct attribute_group *groups[3];
2666
+
25582667 u64 uverbs_cmd_mask;
25592668 u64 uverbs_ex_cmd_mask;
25602669
....@@ -2562,6 +2671,10 @@
25622671 __be64 node_guid;
25632672 u32 local_dma_lkey;
25642673 u16 is_switch:1;
2674
+ /* Indicates kernel verbs support, should not be used in drivers */
2675
+ u16 kverbs_provider:1;
2676
+ /* CQ adaptive moderation (RDMA DIM) */
2677
+ u16 use_cq_dim:1;
25652678 u8 node_type;
25662679 u8 phys_port_cnt;
25672680 struct ib_device_attr attrs;
....@@ -2573,30 +2686,44 @@
25732686 #endif
25742687
25752688 u32 index;
2689
+
2690
+ spinlock_t cq_pools_lock;
2691
+ struct list_head cq_pools[IB_POLL_LAST_POOL_TYPE + 1];
2692
+
2693
+ struct rdma_restrack_root *res;
2694
+
2695
+ const struct uapi_definition *driver_def;
2696
+
25762697 /*
2577
- * Implementation details of the RDMA core, don't use in drivers
2698
+ * Positive refcount indicates that the device is currently
2699
+ * registered and cannot be unregistered.
25782700 */
2579
- struct rdma_restrack_root res;
2701
+ refcount_t refcount;
2702
+ struct completion unreg_completion;
2703
+ struct work_struct unregistration_work;
25802704
2581
- /**
2582
- * The following mandatory functions are used only at device
2583
- * registration. Keep functions such as these at the end of this
2584
- * structure to avoid cache line misses when accessing struct ib_device
2585
- * in fast paths.
2586
- */
2587
- int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *);
2588
- void (*get_dev_fw_str)(struct ib_device *, char *str);
2589
- const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
2590
- int comp_vector);
2705
+ const struct rdma_link_ops *link_ops;
25912706
2592
- const struct uverbs_object_tree_def *const *driver_specs;
2593
- enum rdma_driver_id driver_id;
2707
+ /* Protects compat_devs xarray modifications */
2708
+ struct mutex compat_devs_mutex;
2709
+ /* Maintains compat devices for each net namespace */
2710
+ struct xarray compat_devs;
2711
+
2712
+ /* Used by iWarp CM */
2713
+ char iw_ifname[IFNAMSIZ];
2714
+ u32 iw_driver_flags;
2715
+ u32 lag_flags;
25942716 };
25952717
2718
+struct ib_client_nl_info;
25962719 struct ib_client {
2597
- char *name;
2598
- void (*add) (struct ib_device *);
2720
+ const char *name;
2721
+ int (*add)(struct ib_device *ibdev);
25992722 void (*remove)(struct ib_device *, void *client_data);
2723
+ void (*rename)(struct ib_device *dev, void *client_data);
2724
+ int (*get_nl_info)(struct ib_device *ibdev, void *client_data,
2725
+ struct ib_client_nl_info *res);
2726
+ int (*get_global_nl_info)(struct ib_client_nl_info *res);
26002727
26012728 /* Returns the net_dev belonging to this ib_client and matching the
26022729 * given parameters.
....@@ -2620,25 +2747,123 @@
26202747 const union ib_gid *gid,
26212748 const struct sockaddr *addr,
26222749 void *client_data);
2623
- struct list_head list;
2750
+
2751
+ refcount_t uses;
2752
+ struct completion uses_zero;
2753
+ u32 client_id;
2754
+
2755
+ /* kverbs are not required by the client */
2756
+ u8 no_kverbs_req:1;
26242757 };
26252758
2626
-struct ib_device *ib_alloc_device(size_t size);
2759
+/*
2760
+ * IB block DMA iterator
2761
+ *
2762
+ * Iterates the DMA-mapped SGL in contiguous memory blocks aligned
2763
+ * to a HW supported page size.
2764
+ */
2765
+struct ib_block_iter {
2766
+ /* internal states */
2767
+ struct scatterlist *__sg; /* sg holding the current aligned block */
2768
+ dma_addr_t __dma_addr; /* unaligned DMA address of this block */
2769
+ unsigned int __sg_nents; /* number of SG entries */
2770
+ unsigned int __sg_advance; /* number of bytes to advance in sg in next step */
2771
+ unsigned int __pg_bit; /* alignment of current block */
2772
+};
2773
+
2774
+struct ib_device *_ib_alloc_device(size_t size);
2775
+#define ib_alloc_device(drv_struct, member) \
2776
+ container_of(_ib_alloc_device(sizeof(struct drv_struct) + \
2777
+ BUILD_BUG_ON_ZERO(offsetof( \
2778
+ struct drv_struct, member))), \
2779
+ struct drv_struct, member)
2780
+
26272781 void ib_dealloc_device(struct ib_device *device);
26282782
26292783 void ib_get_device_fw_str(struct ib_device *device, char *str);
26302784
2631
-int ib_register_device(struct ib_device *device,
2632
- int (*port_callback)(struct ib_device *,
2633
- u8, struct kobject *));
2785
+int ib_register_device(struct ib_device *device, const char *name,
2786
+ struct device *dma_device);
26342787 void ib_unregister_device(struct ib_device *device);
2788
+void ib_unregister_driver(enum rdma_driver_id driver_id);
2789
+void ib_unregister_device_and_put(struct ib_device *device);
2790
+void ib_unregister_device_queued(struct ib_device *ib_dev);
26352791
26362792 int ib_register_client (struct ib_client *client);
26372793 void ib_unregister_client(struct ib_client *client);
26382794
2639
-void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
2795
+void __rdma_block_iter_start(struct ib_block_iter *biter,
2796
+ struct scatterlist *sglist,
2797
+ unsigned int nents,
2798
+ unsigned long pgsz);
2799
+bool __rdma_block_iter_next(struct ib_block_iter *biter);
2800
+
2801
+/**
2802
+ * rdma_block_iter_dma_address - get the aligned dma address of the current
2803
+ * block held by the block iterator.
2804
+ * @biter: block iterator holding the memory block
2805
+ */
2806
+static inline dma_addr_t
2807
+rdma_block_iter_dma_address(struct ib_block_iter *biter)
2808
+{
2809
+ return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1);
2810
+}
2811
+
2812
+/**
2813
+ * rdma_for_each_block - iterate over contiguous memory blocks of the sg list
2814
+ * @sglist: sglist to iterate over
2815
+ * @biter: block iterator holding the memory block
2816
+ * @nents: maximum number of sg entries to iterate over
2817
+ * @pgsz: best HW supported page size to use
2818
+ *
2819
+ * Callers may use rdma_block_iter_dma_address() to get each
2820
+ * blocks aligned DMA address.
2821
+ */
2822
+#define rdma_for_each_block(sglist, biter, nents, pgsz) \
2823
+ for (__rdma_block_iter_start(biter, sglist, nents, \
2824
+ pgsz); \
2825
+ __rdma_block_iter_next(biter);)
2826
+
2827
+/**
2828
+ * ib_get_client_data - Get IB client context
2829
+ * @device:Device to get context for
2830
+ * @client:Client to get context for
2831
+ *
2832
+ * ib_get_client_data() returns the client context data set with
2833
+ * ib_set_client_data(). This can only be called while the client is
2834
+ * registered to the device, once the ib_client remove() callback returns this
2835
+ * cannot be called.
2836
+ */
2837
+static inline void *ib_get_client_data(struct ib_device *device,
2838
+ struct ib_client *client)
2839
+{
2840
+ return xa_load(&device->client_data, client->client_id);
2841
+}
26402842 void ib_set_client_data(struct ib_device *device, struct ib_client *client,
26412843 void *data);
2844
+void ib_set_device_ops(struct ib_device *device,
2845
+ const struct ib_device_ops *ops);
2846
+
2847
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
2848
+ unsigned long pfn, unsigned long size, pgprot_t prot,
2849
+ struct rdma_user_mmap_entry *entry);
2850
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
2851
+ struct rdma_user_mmap_entry *entry,
2852
+ size_t length);
2853
+int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
2854
+ struct rdma_user_mmap_entry *entry,
2855
+ size_t length, u32 min_pgoff,
2856
+ u32 max_pgoff);
2857
+
2858
+struct rdma_user_mmap_entry *
2859
+rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
2860
+ unsigned long pgoff);
2861
+struct rdma_user_mmap_entry *
2862
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
2863
+ struct vm_area_struct *vma);
2864
+void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry);
2865
+
2866
+void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry);
26422867
26432868 static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
26442869 {
....@@ -2723,7 +2948,6 @@
27232948 * @next_state: Next QP state
27242949 * @type: QP type
27252950 * @mask: Mask of supplied QP attributes
2726
- * @ll : link layer of port
27272951 *
27282952 * This function is a helper function that a low-level driver's
27292953 * modify_qp method can use to validate the consumer's input. It
....@@ -2732,12 +2956,11 @@
27322956 * and that the attribute mask supplied is allowed for the transition.
27332957 */
27342958 bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
2735
- enum ib_qp_type type, enum ib_qp_attr_mask mask,
2736
- enum rdma_link_layer ll);
2959
+ enum ib_qp_type type, enum ib_qp_attr_mask mask);
27372960
27382961 void ib_register_event_handler(struct ib_event_handler *event_handler);
27392962 void ib_unregister_event_handler(struct ib_event_handler *event_handler);
2740
-void ib_dispatch_event(struct ib_event *event);
2963
+void ib_dispatch_event(const struct ib_event *event);
27412964
27422965 int ib_query_port(struct ib_device *device,
27432966 u8 port_num, struct ib_port_attr *port_attr);
....@@ -2773,6 +2996,16 @@
27732996 }
27742997
27752998 /**
2999
+ * rdma_for_each_port - Iterate over all valid port numbers of the IB device
3000
+ * @device - The struct ib_device * to iterate over
3001
+ * @iter - The unsigned int to store the port number
3002
+ */
3003
+#define rdma_for_each_port(device, iter) \
3004
+ for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type( \
3005
+ unsigned int, iter))); \
3006
+ iter <= rdma_end_port(device); (iter)++)
3007
+
3008
+/**
27763009 * rdma_end_port - Return the last valid port number for the device
27773010 * specified
27783011 *
....@@ -2795,34 +3028,38 @@
27953028 static inline bool rdma_is_grh_required(const struct ib_device *device,
27963029 u8 port_num)
27973030 {
2798
- return device->port_immutable[port_num].core_cap_flags &
2799
- RDMA_CORE_PORT_IB_GRH_REQUIRED;
3031
+ return device->port_data[port_num].immutable.core_cap_flags &
3032
+ RDMA_CORE_PORT_IB_GRH_REQUIRED;
28003033 }
28013034
28023035 static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
28033036 {
2804
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
3037
+ return device->port_data[port_num].immutable.core_cap_flags &
3038
+ RDMA_CORE_CAP_PROT_IB;
28053039 }
28063040
28073041 static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
28083042 {
2809
- return device->port_immutable[port_num].core_cap_flags &
2810
- (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
3043
+ return device->port_data[port_num].immutable.core_cap_flags &
3044
+ (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
28113045 }
28123046
28133047 static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
28143048 {
2815
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
3049
+ return device->port_data[port_num].immutable.core_cap_flags &
3050
+ RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
28163051 }
28173052
28183053 static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
28193054 {
2820
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
3055
+ return device->port_data[port_num].immutable.core_cap_flags &
3056
+ RDMA_CORE_CAP_PROT_ROCE;
28213057 }
28223058
28233059 static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
28243060 {
2825
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP;
3061
+ return device->port_data[port_num].immutable.core_cap_flags &
3062
+ RDMA_CORE_CAP_PROT_IWARP;
28263063 }
28273064
28283065 static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
....@@ -2833,12 +3070,14 @@
28333070
28343071 static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
28353072 {
2836
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET;
3073
+ return device->port_data[port_num].immutable.core_cap_flags &
3074
+ RDMA_CORE_CAP_PROT_RAW_PACKET;
28373075 }
28383076
28393077 static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
28403078 {
2841
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC;
3079
+ return device->port_data[port_num].immutable.core_cap_flags &
3080
+ RDMA_CORE_CAP_PROT_USNIC;
28423081 }
28433082
28443083 /**
....@@ -2855,7 +3094,8 @@
28553094 */
28563095 static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
28573096 {
2858
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_MAD;
3097
+ return device->port_data[port_num].immutable.core_cap_flags &
3098
+ RDMA_CORE_CAP_IB_MAD;
28593099 }
28603100
28613101 /**
....@@ -2879,8 +3119,8 @@
28793119 */
28803120 static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
28813121 {
2882
- return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_OPA_MAD)
2883
- == RDMA_CORE_CAP_OPA_MAD;
3122
+ return device->port_data[port_num].immutable.core_cap_flags &
3123
+ RDMA_CORE_CAP_OPA_MAD;
28843124 }
28853125
28863126 /**
....@@ -2905,7 +3145,8 @@
29053145 */
29063146 static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
29073147 {
2908
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SMI;
3148
+ return device->port_data[port_num].immutable.core_cap_flags &
3149
+ RDMA_CORE_CAP_IB_SMI;
29093150 }
29103151
29113152 /**
....@@ -2925,7 +3166,8 @@
29253166 */
29263167 static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
29273168 {
2928
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_CM;
3169
+ return device->port_data[port_num].immutable.core_cap_flags &
3170
+ RDMA_CORE_CAP_IB_CM;
29293171 }
29303172
29313173 /**
....@@ -2942,7 +3184,8 @@
29423184 */
29433185 static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
29443186 {
2945
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IW_CM;
3187
+ return device->port_data[port_num].immutable.core_cap_flags &
3188
+ RDMA_CORE_CAP_IW_CM;
29463189 }
29473190
29483191 /**
....@@ -2962,7 +3205,8 @@
29623205 */
29633206 static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
29643207 {
2965
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SA;
3208
+ return device->port_data[port_num].immutable.core_cap_flags &
3209
+ RDMA_CORE_CAP_IB_SA;
29663210 }
29673211
29683212 /**
....@@ -3002,7 +3246,8 @@
30023246 */
30033247 static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
30043248 {
3005
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_AF_IB;
3249
+ return device->port_data[port_num].immutable.core_cap_flags &
3250
+ RDMA_CORE_CAP_AF_IB;
30063251 }
30073252
30083253 /**
....@@ -3023,7 +3268,8 @@
30233268 */
30243269 static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
30253270 {
3026
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH;
3271
+ return device->port_data[port_num].immutable.core_cap_flags &
3272
+ RDMA_CORE_CAP_ETH_AH;
30273273 }
30283274
30293275 /**
....@@ -3037,7 +3283,7 @@
30373283 */
30383284 static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
30393285 {
3040
- return (device->port_immutable[port_num].core_cap_flags &
3286
+ return (device->port_data[port_num].immutable.core_cap_flags &
30413287 RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH;
30423288 }
30433289
....@@ -3055,7 +3301,7 @@
30553301 */
30563302 static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
30573303 {
3058
- return device->port_immutable[port_num].max_mad_size;
3304
+ return device->port_data[port_num].immutable.max_mad_size;
30593305 }
30603306
30613307 /**
....@@ -3075,7 +3321,7 @@
30753321 u8 port_num)
30763322 {
30773323 return rdma_protocol_roce(device, port_num) &&
3078
- device->add_gid && device->del_gid;
3324
+ device->ops.add_gid && device->ops.del_gid;
30793325 }
30803326
30813327 /*
....@@ -3090,12 +3336,64 @@
30903336 return rdma_protocol_iwarp(dev, port_num);
30913337 }
30923338
3339
+/**
3340
+ * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
3341
+ * @device: Device
3342
+ * @port_num: 1 based Port number
3343
+ *
3344
+ * Return true if port is an Intel OPA port , false if not
3345
+ */
3346
+static inline bool rdma_core_cap_opa_port(struct ib_device *device,
3347
+ u32 port_num)
3348
+{
3349
+ return (device->port_data[port_num].immutable.core_cap_flags &
3350
+ RDMA_CORE_PORT_INTEL_OPA) == RDMA_CORE_PORT_INTEL_OPA;
3351
+}
3352
+
3353
+/**
3354
+ * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value.
3355
+ * @device: Device
3356
+ * @port_num: Port number
3357
+ * @mtu: enum value of MTU
3358
+ *
3359
+ * Return the MTU size supported by the port as an integer value. Will return
3360
+ * -1 if enum value of mtu is not supported.
3361
+ */
3362
+static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port,
3363
+ int mtu)
3364
+{
3365
+ if (rdma_core_cap_opa_port(device, port))
3366
+ return opa_mtu_enum_to_int((enum opa_mtu)mtu);
3367
+ else
3368
+ return ib_mtu_enum_to_int((enum ib_mtu)mtu);
3369
+}
3370
+
3371
+/**
3372
+ * rdma_mtu_from_attr - Return the mtu of the port from the port attribute.
3373
+ * @device: Device
3374
+ * @port_num: Port number
3375
+ * @attr: port attribute
3376
+ *
3377
+ * Return the MTU size supported by the port as an integer value.
3378
+ */
3379
+static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port,
3380
+ struct ib_port_attr *attr)
3381
+{
3382
+ if (rdma_core_cap_opa_port(device, port))
3383
+ return attr->phys_mtu;
3384
+ else
3385
+ return ib_mtu_enum_to_int(attr->max_mtu);
3386
+}
3387
+
30933388 int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
30943389 int state);
30953390 int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
30963391 struct ifla_vf_info *info);
30973392 int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
30983393 struct ifla_vf_stats *stats);
3394
+int ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
3395
+ struct ifla_vf_guid *node_guid,
3396
+ struct ifla_vf_guid *port_guid);
30993397 int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
31003398 int type);
31013399
....@@ -3131,19 +3429,41 @@
31313429
31323430 struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
31333431 const char *caller);
3432
+
31343433 #define ib_alloc_pd(device, flags) \
31353434 __ib_alloc_pd((device), (flags), KBUILD_MODNAME)
3136
-void ib_dealloc_pd(struct ib_pd *pd);
3435
+
3436
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
3437
+
3438
+/**
3439
+ * ib_dealloc_pd - Deallocate kernel PD
3440
+ * @pd: The protection domain
3441
+ *
3442
+ * NOTE: for user PD use ib_dealloc_pd_user with valid udata!
3443
+ */
3444
+static inline void ib_dealloc_pd(struct ib_pd *pd)
3445
+{
3446
+ int ret = ib_dealloc_pd_user(pd, NULL);
3447
+
3448
+ WARN_ONCE(ret, "Destroy of kernel PD shouldn't fail");
3449
+}
3450
+
3451
+enum rdma_create_ah_flags {
3452
+ /* In a sleepable context */
3453
+ RDMA_CREATE_AH_SLEEPABLE = BIT(0),
3454
+};
31373455
31383456 /**
31393457 * rdma_create_ah - Creates an address handle for the given address vector.
31403458 * @pd: The protection domain associated with the address handle.
31413459 * @ah_attr: The attributes of the address vector.
3460
+ * @flags: Create address handle flags (see enum rdma_create_ah_flags).
31423461 *
31433462 * The address handle is used to reference a local or global destination
31443463 * in all UD QP post sends.
31453464 */
3146
-struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr);
3465
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
3466
+ u32 flags);
31473467
31483468 /**
31493469 * rdma_create_user_ah - Creates an address handle for the given address vector.
....@@ -3233,27 +3553,45 @@
32333553 */
32343554 int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
32353555
3236
-/**
3237
- * rdma_destroy_ah - Destroys an address handle.
3238
- * @ah: The address handle to destroy.
3239
- */
3240
-int rdma_destroy_ah(struct ib_ah *ah);
3556
+enum rdma_destroy_ah_flags {
3557
+ /* In a sleepable context */
3558
+ RDMA_DESTROY_AH_SLEEPABLE = BIT(0),
3559
+};
32413560
32423561 /**
3243
- * ib_create_srq - Creates a SRQ associated with the specified protection
3244
- * domain.
3245
- * @pd: The protection domain associated with the SRQ.
3246
- * @srq_init_attr: A list of initial attributes required to create the
3247
- * SRQ. If SRQ creation succeeds, then the attributes are updated to
3248
- * the actual capabilities of the created SRQ.
3249
- *
3250
- * srq_attr->max_wr and srq_attr->max_sge are read the determine the
3251
- * requested size of the SRQ, and set to the actual values allocated
3252
- * on return. If ib_create_srq() succeeds, then max_wr and max_sge
3253
- * will always be at least as large as the requested values.
3562
+ * rdma_destroy_ah_user - Destroys an address handle.
3563
+ * @ah: The address handle to destroy.
3564
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
3565
+ * @udata: Valid user data or NULL for kernel objects
32543566 */
3255
-struct ib_srq *ib_create_srq(struct ib_pd *pd,
3256
- struct ib_srq_init_attr *srq_init_attr);
3567
+int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
3568
+
3569
+/**
3570
+ * rdma_destroy_ah - Destroys an kernel address handle.
3571
+ * @ah: The address handle to destroy.
3572
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
3573
+ *
3574
+ * NOTE: for user ah use rdma_destroy_ah_user with valid udata!
3575
+ */
3576
+static inline void rdma_destroy_ah(struct ib_ah *ah, u32 flags)
3577
+{
3578
+ int ret = rdma_destroy_ah_user(ah, flags, NULL);
3579
+
3580
+ WARN_ONCE(ret, "Destroy of kernel AH shouldn't fail");
3581
+}
3582
+
3583
+struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
3584
+ struct ib_srq_init_attr *srq_init_attr,
3585
+ struct ib_usrq_object *uobject,
3586
+ struct ib_udata *udata);
3587
+static inline struct ib_srq *
3588
+ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr)
3589
+{
3590
+ if (!pd->device->ops.create_srq)
3591
+ return ERR_PTR(-EOPNOTSUPP);
3592
+
3593
+ return ib_create_srq_user(pd, srq_init_attr, NULL, NULL);
3594
+}
32573595
32583596 /**
32593597 * ib_modify_srq - Modifies the attributes for the specified SRQ.
....@@ -3281,10 +3619,24 @@
32813619 struct ib_srq_attr *srq_attr);
32823620
32833621 /**
3284
- * ib_destroy_srq - Destroys the specified SRQ.
3622
+ * ib_destroy_srq_user - Destroys the specified SRQ.
32853623 * @srq: The SRQ to destroy.
3624
+ * @udata: Valid user data or NULL for kernel objects
32863625 */
3287
-int ib_destroy_srq(struct ib_srq *srq);
3626
+int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
3627
+
3628
+/**
3629
+ * ib_destroy_srq - Destroys the specified kernel SRQ.
3630
+ * @srq: The SRQ to destroy.
3631
+ *
3632
+ * NOTE: for user srq use ib_destroy_srq_user with valid udata!
3633
+ */
3634
+static inline void ib_destroy_srq(struct ib_srq *srq)
3635
+{
3636
+ int ret = ib_destroy_srq_user(srq, NULL);
3637
+
3638
+ WARN_ONCE(ret, "Destroy of kernel SRQ shouldn't fail");
3639
+}
32883640
32893641 /**
32903642 * ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
....@@ -3299,17 +3651,10 @@
32993651 {
33003652 const struct ib_recv_wr *dummy;
33013653
3302
- return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy);
3654
+ return srq->device->ops.post_srq_recv(srq, recv_wr,
3655
+ bad_recv_wr ? : &dummy);
33033656 }
33043657
3305
-/**
3306
- * ib_create_qp - Creates a QP associated with the specified protection
3307
- * domain.
3308
- * @pd: The protection domain associated with the QP.
3309
- * @qp_init_attr: A list of initial attributes required to create the
3310
- * QP. If QP creation succeeds, then the attributes are updated to
3311
- * the actual capabilities of the created QP.
3312
- */
33133658 struct ib_qp *ib_create_qp(struct ib_pd *pd,
33143659 struct ib_qp_init_attr *qp_init_attr);
33153660
....@@ -3361,8 +3706,20 @@
33613706 /**
33623707 * ib_destroy_qp - Destroys the specified QP.
33633708 * @qp: The QP to destroy.
3709
+ * @udata: Valid udata or NULL for kernel objects
33643710 */
3365
-int ib_destroy_qp(struct ib_qp *qp);
3711
+int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata);
3712
+
3713
+/**
3714
+ * ib_destroy_qp - Destroys the specified kernel QP.
3715
+ * @qp: The QP to destroy.
3716
+ *
3717
+ * NOTE: for user qp use ib_destroy_qp_user with valid udata!
3718
+ */
3719
+static inline int ib_destroy_qp(struct ib_qp *qp)
3720
+{
3721
+ return ib_destroy_qp_user(qp, NULL);
3722
+}
33663723
33673724 /**
33683725 * ib_open_qp - Obtain a reference to an existing sharable QP.
....@@ -3402,7 +3759,7 @@
34023759 {
34033760 const struct ib_send_wr *dummy;
34043761
3405
- return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy);
3762
+ return qp->device->ops.post_send(qp, send_wr, bad_send_wr ? : &dummy);
34063763 }
34073764
34083765 /**
....@@ -3419,14 +3776,38 @@
34193776 {
34203777 const struct ib_recv_wr *dummy;
34213778
3422
- return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
3779
+ return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
34233780 }
34243781
3425
-struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
3426
- int nr_cqe, int comp_vector,
3427
- enum ib_poll_context poll_ctx, const char *caller);
3428
-#define ib_alloc_cq(device, priv, nr_cqe, comp_vect, poll_ctx) \
3429
- __ib_alloc_cq((device), (priv), (nr_cqe), (comp_vect), (poll_ctx), KBUILD_MODNAME)
3782
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
3783
+ int comp_vector, enum ib_poll_context poll_ctx,
3784
+ const char *caller);
3785
+static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
3786
+ int nr_cqe, int comp_vector,
3787
+ enum ib_poll_context poll_ctx)
3788
+{
3789
+ return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
3790
+ KBUILD_MODNAME);
3791
+}
3792
+
3793
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
3794
+ int nr_cqe, enum ib_poll_context poll_ctx,
3795
+ const char *caller);
3796
+
3797
+/**
3798
+ * ib_alloc_cq_any: Allocate kernel CQ
3799
+ * @dev: The IB device
3800
+ * @private: Private data attached to the CQE
3801
+ * @nr_cqe: Number of CQEs in the CQ
3802
+ * @poll_ctx: Context used for polling the CQ
3803
+ */
3804
+static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
3805
+ void *private, int nr_cqe,
3806
+ enum ib_poll_context poll_ctx)
3807
+{
3808
+ return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx,
3809
+ KBUILD_MODNAME);
3810
+}
34303811
34313812 void ib_free_cq(struct ib_cq *cq);
34323813 int ib_process_cq_direct(struct ib_cq *cq, int budget);
....@@ -3472,10 +3853,24 @@
34723853 int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period);
34733854
34743855 /**
3475
- * ib_destroy_cq - Destroys the specified CQ.
3856
+ * ib_destroy_cq_user - Destroys the specified CQ.
34763857 * @cq: The CQ to destroy.
3858
+ * @udata: Valid user data or NULL for kernel objects
34773859 */
3478
-int ib_destroy_cq(struct ib_cq *cq);
3860
+int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
3861
+
3862
+/**
3863
+ * ib_destroy_cq - Destroys the specified kernel CQ.
3864
+ * @cq: The CQ to destroy.
3865
+ *
3866
+ * NOTE: for user cq use ib_destroy_cq_user with valid udata!
3867
+ */
3868
+static inline void ib_destroy_cq(struct ib_cq *cq)
3869
+{
3870
+ int ret = ib_destroy_cq_user(cq, NULL);
3871
+
3872
+ WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
3873
+}
34793874
34803875 /**
34813876 * ib_poll_cq - poll a CQ for completion(s)
....@@ -3492,7 +3887,7 @@
34923887 static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
34933888 struct ib_wc *wc)
34943889 {
3495
- return cq->device->poll_cq(cq, num_entries, wc);
3890
+ return cq->device->ops.poll_cq(cq, num_entries, wc);
34963891 }
34973892
34983893 /**
....@@ -3525,8 +3920,14 @@
35253920 static inline int ib_req_notify_cq(struct ib_cq *cq,
35263921 enum ib_cq_notify_flags flags)
35273922 {
3528
- return cq->device->req_notify_cq(cq, flags);
3923
+ return cq->device->ops.req_notify_cq(cq, flags);
35293924 }
3925
+
3926
+struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe,
3927
+ int comp_vector_hint,
3928
+ enum ib_poll_context poll_ctx);
3929
+
3930
+void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe);
35303931
35313932 /**
35323933 * ib_req_ncomp_notif - Request completion notification when there are
....@@ -3537,9 +3938,19 @@
35373938 */
35383939 static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
35393940 {
3540
- return cq->device->req_ncomp_notif ?
3541
- cq->device->req_ncomp_notif(cq, wc_cnt) :
3941
+ return cq->device->ops.req_ncomp_notif ?
3942
+ cq->device->ops.req_ncomp_notif(cq, wc_cnt) :
35423943 -ENOSYS;
3944
+}
3945
+
3946
+/*
3947
+ * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to
3948
+ * NULL. This causes the ib_dma* helpers to just stash the kernel virtual
3949
+ * address into the dma address.
3950
+ */
3951
+static inline bool ib_uses_virt_dma(struct ib_device *dev)
3952
+{
3953
+ return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device;
35433954 }
35443955
35453956 /**
....@@ -3549,6 +3960,8 @@
35493960 */
35503961 static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
35513962 {
3963
+ if (ib_uses_virt_dma(dev))
3964
+ return 0;
35523965 return dma_mapping_error(dev->dma_device, dma_addr);
35533966 }
35543967
....@@ -3563,6 +3976,8 @@
35633976 void *cpu_addr, size_t size,
35643977 enum dma_data_direction direction)
35653978 {
3979
+ if (ib_uses_virt_dma(dev))
3980
+ return (uintptr_t)cpu_addr;
35663981 return dma_map_single(dev->dma_device, cpu_addr, size, direction);
35673982 }
35683983
....@@ -3577,7 +3992,8 @@
35773992 u64 addr, size_t size,
35783993 enum dma_data_direction direction)
35793994 {
3580
- dma_unmap_single(dev->dma_device, addr, size, direction);
3995
+ if (!ib_uses_virt_dma(dev))
3996
+ dma_unmap_single(dev->dma_device, addr, size, direction);
35813997 }
35823998
35833999 /**
....@@ -3594,6 +4010,8 @@
35944010 size_t size,
35954011 enum dma_data_direction direction)
35964012 {
4013
+ if (ib_uses_virt_dma(dev))
4014
+ return (uintptr_t)(page_address(page) + offset);
35974015 return dma_map_page(dev->dma_device, page, offset, size, direction);
35984016 }
35994017
....@@ -3608,7 +4026,30 @@
36084026 u64 addr, size_t size,
36094027 enum dma_data_direction direction)
36104028 {
3611
- dma_unmap_page(dev->dma_device, addr, size, direction);
4029
+ if (!ib_uses_virt_dma(dev))
4030
+ dma_unmap_page(dev->dma_device, addr, size, direction);
4031
+}
4032
+
4033
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents);
4034
+static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
4035
+ struct scatterlist *sg, int nents,
4036
+ enum dma_data_direction direction,
4037
+ unsigned long dma_attrs)
4038
+{
4039
+ if (ib_uses_virt_dma(dev))
4040
+ return ib_dma_virt_map_sg(dev, sg, nents);
4041
+ return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
4042
+ dma_attrs);
4043
+}
4044
+
4045
+static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
4046
+ struct scatterlist *sg, int nents,
4047
+ enum dma_data_direction direction,
4048
+ unsigned long dma_attrs)
4049
+{
4050
+ if (!ib_uses_virt_dma(dev))
4051
+ dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
4052
+ dma_attrs);
36124053 }
36134054
36144055 /**
....@@ -3622,7 +4063,7 @@
36224063 struct scatterlist *sg, int nents,
36234064 enum dma_data_direction direction)
36244065 {
3625
- return dma_map_sg(dev->dma_device, sg, nents, direction);
4066
+ return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0);
36264067 }
36274068
36284069 /**
....@@ -3636,51 +4077,20 @@
36364077 struct scatterlist *sg, int nents,
36374078 enum dma_data_direction direction)
36384079 {
3639
- dma_unmap_sg(dev->dma_device, sg, nents, direction);
3640
-}
3641
-
3642
-static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
3643
- struct scatterlist *sg, int nents,
3644
- enum dma_data_direction direction,
3645
- unsigned long dma_attrs)
3646
-{
3647
- return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
3648
- dma_attrs);
3649
-}
3650
-
3651
-static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
3652
- struct scatterlist *sg, int nents,
3653
- enum dma_data_direction direction,
3654
- unsigned long dma_attrs)
3655
-{
3656
- dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
3657
-}
3658
-/**
3659
- * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
3660
- * @dev: The device for which the DMA addresses were created
3661
- * @sg: The scatter/gather entry
3662
- *
3663
- * Note: this function is obsolete. To do: change all occurrences of
3664
- * ib_sg_dma_address() into sg_dma_address().
3665
- */
3666
-static inline u64 ib_sg_dma_address(struct ib_device *dev,
3667
- struct scatterlist *sg)
3668
-{
3669
- return sg_dma_address(sg);
4080
+ ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0);
36704081 }
36714082
36724083 /**
3673
- * ib_sg_dma_len - Return the DMA length from a scatter/gather entry
3674
- * @dev: The device for which the DMA addresses were created
3675
- * @sg: The scatter/gather entry
4084
+ * ib_dma_max_seg_size - Return the size limit of a single DMA transfer
4085
+ * @dev: The device to query
36764086 *
3677
- * Note: this function is obsolete. To do: change all occurrences of
3678
- * ib_sg_dma_len() into sg_dma_len().
4087
+ * The returned value represents a size in bytes.
36794088 */
3680
-static inline unsigned int ib_sg_dma_len(struct ib_device *dev,
3681
- struct scatterlist *sg)
4089
+static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
36824090 {
3683
- return sg_dma_len(sg);
4091
+ if (ib_uses_virt_dma(dev))
4092
+ return UINT_MAX;
4093
+ return dma_get_max_seg_size(dev->dma_device);
36844094 }
36854095
36864096 /**
....@@ -3695,7 +4105,8 @@
36954105 size_t size,
36964106 enum dma_data_direction dir)
36974107 {
3698
- dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
4108
+ if (!ib_uses_virt_dma(dev))
4109
+ dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
36994110 }
37004111
37014112 /**
....@@ -3710,7 +4121,8 @@
37104121 size_t size,
37114122 enum dma_data_direction dir)
37124123 {
3713
- dma_sync_single_for_device(dev->dma_device, addr, size, dir);
4124
+ if (!ib_uses_virt_dma(dev))
4125
+ dma_sync_single_for_device(dev->dma_device, addr, size, dir);
37144126 }
37154127
37164128 /**
....@@ -3742,18 +4154,45 @@
37424154 dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
37434155 }
37444156
4157
+/* ib_reg_user_mr - register a memory region for virtual addresses from kernel
4158
+ * space. This function should be called when 'current' is the owning MM.
4159
+ */
4160
+struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
4161
+ u64 virt_addr, int mr_access_flags);
4162
+
4163
+/* ib_advise_mr - give an advice about an address range in a memory region */
4164
+int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
4165
+ u32 flags, struct ib_sge *sg_list, u32 num_sge);
37454166 /**
3746
- * ib_dereg_mr - Deregisters a memory region and removes it from the
4167
+ * ib_dereg_mr_user - Deregisters a memory region and removes it from the
4168
+ * HCA translation table.
4169
+ * @mr: The memory region to deregister.
4170
+ * @udata: Valid user data or NULL for kernel object
4171
+ *
4172
+ * This function can fail, if the memory region has memory windows bound to it.
4173
+ */
4174
+int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata);
4175
+
4176
+/**
4177
+ * ib_dereg_mr - Deregisters a kernel memory region and removes it from the
37474178 * HCA translation table.
37484179 * @mr: The memory region to deregister.
37494180 *
37504181 * This function can fail, if the memory region has memory windows bound to it.
4182
+ *
4183
+ * NOTE: for user mr use ib_dereg_mr_user with valid udata!
37514184 */
3752
-int ib_dereg_mr(struct ib_mr *mr);
4185
+static inline int ib_dereg_mr(struct ib_mr *mr)
4186
+{
4187
+ return ib_dereg_mr_user(mr, NULL);
4188
+}
37534189
3754
-struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
3755
- enum ib_mr_type mr_type,
4190
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
37564191 u32 max_num_sg);
4192
+
4193
+struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
4194
+ u32 max_num_data_sg,
4195
+ u32 max_num_meta_sg);
37574196
37584197 /**
37594198 * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
....@@ -3779,45 +4218,6 @@
37794218 }
37804219
37814220 /**
3782
- * ib_alloc_fmr - Allocates a unmapped fast memory region.
3783
- * @pd: The protection domain associated with the unmapped region.
3784
- * @mr_access_flags: Specifies the memory access rights.
3785
- * @fmr_attr: Attributes of the unmapped region.
3786
- *
3787
- * A fast memory region must be mapped before it can be used as part of
3788
- * a work request.
3789
- */
3790
-struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
3791
- int mr_access_flags,
3792
- struct ib_fmr_attr *fmr_attr);
3793
-
3794
-/**
3795
- * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region.
3796
- * @fmr: The fast memory region to associate with the pages.
3797
- * @page_list: An array of physical pages to map to the fast memory region.
3798
- * @list_len: The number of pages in page_list.
3799
- * @iova: The I/O virtual address to use with the mapped region.
3800
- */
3801
-static inline int ib_map_phys_fmr(struct ib_fmr *fmr,
3802
- u64 *page_list, int list_len,
3803
- u64 iova)
3804
-{
3805
- return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova);
3806
-}
3807
-
3808
-/**
3809
- * ib_unmap_fmr - Removes the mapping from a list of fast memory regions.
3810
- * @fmr_list: A linked list of fast memory regions to unmap.
3811
- */
3812
-int ib_unmap_fmr(struct list_head *fmr_list);
3813
-
3814
-/**
3815
- * ib_dealloc_fmr - Deallocates a fast memory region.
3816
- * @fmr: The fast memory region to deallocate.
3817
- */
3818
-int ib_dealloc_fmr(struct ib_fmr *fmr);
3819
-
3820
-/**
38214221 * ib_attach_mcast - Attaches the specified QP to a multicast group.
38224222 * @qp: QP to attach to the multicast group. The QP must be type
38234223 * IB_QPT_UD.
....@@ -3839,20 +4239,9 @@
38394239 */
38404240 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
38414241
3842
-/**
3843
- * ib_alloc_xrcd - Allocates an XRC domain.
3844
- * @device: The device on which to allocate the XRC domain.
3845
- * @caller: Module name for kernel consumers
3846
- */
3847
-struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
3848
-#define ib_alloc_xrcd(device) \
3849
- __ib_alloc_xrcd((device), KBUILD_MODNAME)
3850
-
3851
-/**
3852
- * ib_dealloc_xrcd - Deallocates an XRC domain.
3853
- * @xrcd: The XRC domain to deallocate.
3854
- */
3855
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
4242
+struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
4243
+ struct inode *inode, struct ib_udata *udata);
4244
+int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata);
38564245
38574246 static inline int ib_check_mr_access(int flags)
38584247 {
....@@ -3899,21 +4288,48 @@
38994288 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
39004289 struct ib_mr_status *mr_status);
39014290
4291
+/**
4292
+ * ib_device_try_get: Hold a registration lock
4293
+ * device: The device to lock
4294
+ *
4295
+ * A device under an active registration lock cannot become unregistered. It
4296
+ * is only possible to obtain a registration lock on a device that is fully
4297
+ * registered, otherwise this function returns false.
4298
+ *
4299
+ * The registration lock is only necessary for actions which require the
4300
+ * device to still be registered. Uses that only require the device pointer to
4301
+ * be valid should use get_device(&ibdev->dev) to hold the memory.
4302
+ *
4303
+ */
4304
+static inline bool ib_device_try_get(struct ib_device *dev)
4305
+{
4306
+ return refcount_inc_not_zero(&dev->refcount);
4307
+}
4308
+
4309
+void ib_device_put(struct ib_device *device);
4310
+struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
4311
+ enum rdma_driver_id driver_id);
4312
+struct ib_device *ib_device_get_by_name(const char *name,
4313
+ enum rdma_driver_id driver_id);
39024314 struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
39034315 u16 pkey, const union ib_gid *gid,
39044316 const struct sockaddr *addr);
4317
+int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
4318
+ unsigned int port);
4319
+struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
4320
+
39054321 struct ib_wq *ib_create_wq(struct ib_pd *pd,
39064322 struct ib_wq_init_attr *init_attr);
3907
-int ib_destroy_wq(struct ib_wq *wq);
4323
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata);
39084324 int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
39094325 u32 wq_attr_mask);
3910
-struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
3911
- struct ib_rwq_ind_table_init_attr*
3912
- wq_ind_table_init_attr);
3913
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
39144326
39154327 int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
39164328 unsigned int *sg_offset, unsigned int page_size);
4329
+int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
4330
+ int data_sg_nents, unsigned int *data_sg_offset,
4331
+ struct scatterlist *meta_sg, int meta_sg_nents,
4332
+ unsigned int *meta_sg_offset, unsigned int page_size);
39174333
39184334 static inline int
39194335 ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
....@@ -3934,7 +4350,7 @@
39344350 void ib_drain_sq(struct ib_qp *qp);
39354351 void ib_drain_qp(struct ib_qp *qp);
39364352
3937
-int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width);
4353
+int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width);
39384354
39394355 static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr)
39404356 {
....@@ -4158,25 +4574,11 @@
41584574 ib_get_vector_affinity(struct ib_device *device, int comp_vector)
41594575 {
41604576 if (comp_vector < 0 || comp_vector >= device->num_comp_vectors ||
4161
- !device->get_vector_affinity)
4577
+ !device->ops.get_vector_affinity)
41624578 return NULL;
41634579
4164
- return device->get_vector_affinity(device, comp_vector);
4580
+ return device->ops.get_vector_affinity(device, comp_vector);
41654581
4166
-}
4167
-
4168
-static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
4169
- struct ib_qp *qp, struct ib_device *device)
4170
-{
4171
- uobj->object = ibflow;
4172
- ibflow->uobject = uobj;
4173
-
4174
- if (qp) {
4175
- atomic_inc(&qp->usecnt);
4176
- ibflow->qp = qp;
4177
- }
4178
-
4179
- ibflow->device = device;
41804582 }
41814583
41824584 /**
....@@ -4187,8 +4589,128 @@
41874589 */
41884590 void rdma_roce_rescan_device(struct ib_device *ibdev);
41894591
4190
-struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
4592
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
41914593
4192
-int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
4193
- struct uverbs_attr_bundle *attrs);
4594
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
4595
+
4596
+struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
4597
+ enum rdma_netdev_t type, const char *name,
4598
+ unsigned char name_assign_type,
4599
+ void (*setup)(struct net_device *));
4600
+
4601
+int rdma_init_netdev(struct ib_device *device, u8 port_num,
4602
+ enum rdma_netdev_t type, const char *name,
4603
+ unsigned char name_assign_type,
4604
+ void (*setup)(struct net_device *),
4605
+ struct net_device *netdev);
4606
+
4607
+/**
4608
+ * rdma_set_device_sysfs_group - Set device attributes group to have
4609
+ * driver specific sysfs entries at
4610
+ * for infiniband class.
4611
+ *
4612
+ * @device: device pointer for which attributes to be created
4613
+ * @group: Pointer to group which should be added when device
4614
+ * is registered with sysfs.
4615
+ * rdma_set_device_sysfs_group() allows existing drivers to expose one
4616
+ * group per device to have sysfs attributes.
4617
+ *
4618
+ * NOTE: New drivers should not make use of this API; instead new device
4619
+ * parameter should be exposed via netlink command. This API and mechanism
4620
+ * exist only for existing drivers.
4621
+ */
4622
+static inline void
4623
+rdma_set_device_sysfs_group(struct ib_device *dev,
4624
+ const struct attribute_group *group)
4625
+{
4626
+ dev->groups[1] = group;
4627
+}
4628
+
4629
+/**
4630
+ * rdma_device_to_ibdev - Get ib_device pointer from device pointer
4631
+ *
4632
+ * @device: device pointer for which ib_device pointer to retrieve
4633
+ *
4634
+ * rdma_device_to_ibdev() retrieves ib_device pointer from device.
4635
+ *
4636
+ */
4637
+static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
4638
+{
4639
+ struct ib_core_device *coredev =
4640
+ container_of(device, struct ib_core_device, dev);
4641
+
4642
+ return coredev->owner;
4643
+}
4644
+
4645
+/**
4646
+ * ibdev_to_node - return the NUMA node for a given ib_device
4647
+ * @dev: device to get the NUMA node for.
4648
+ */
4649
+static inline int ibdev_to_node(struct ib_device *ibdev)
4650
+{
4651
+ struct device *parent = ibdev->dev.parent;
4652
+
4653
+ if (!parent)
4654
+ return NUMA_NO_NODE;
4655
+ return dev_to_node(parent);
4656
+}
4657
+
4658
+/**
4659
+ * rdma_device_to_drv_device - Helper macro to reach back to driver's
4660
+ * ib_device holder structure from device pointer.
4661
+ *
4662
+ * NOTE: New drivers should not make use of this API; This API is only for
4663
+ * existing drivers who have exposed sysfs entries using
4664
+ * rdma_set_device_sysfs_group().
4665
+ */
4666
+#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \
4667
+ container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member)
4668
+
4669
+bool rdma_dev_access_netns(const struct ib_device *device,
4670
+ const struct net *net);
4671
+
4672
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
4673
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF)
4674
+#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF)
4675
+
4676
+/**
4677
+ * rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based
4678
+ * on the flow_label
4679
+ *
4680
+ * This function will convert the 20 bit flow_label input to a valid RoCE v2
4681
+ * UDP src port 14 bit value. All RoCE V2 drivers should use this same
4682
+ * convention.
4683
+ */
4684
+static inline u16 rdma_flow_label_to_udp_sport(u32 fl)
4685
+{
4686
+ u32 fl_low = fl & 0x03fff, fl_high = fl & 0xFC000;
4687
+
4688
+ fl_low ^= fl_high >> 14;
4689
+ return (u16)(fl_low | IB_ROCE_UDP_ENCAP_VALID_PORT_MIN);
4690
+}
4691
+
4692
+/**
4693
+ * rdma_calc_flow_label - generate a RDMA symmetric flow label value based on
4694
+ * local and remote qpn values
4695
+ *
4696
+ * This function folded the multiplication results of two qpns, 24 bit each,
4697
+ * fields, and converts it to a 20 bit results.
4698
+ *
4699
+ * This function will create symmetric flow_label value based on the local
4700
+ * and remote qpn values. this will allow both the requester and responder
4701
+ * to calculate the same flow_label for a given connection.
4702
+ *
4703
+ * This helper function should be used by driver in case the upper layer
4704
+ * provide a zero flow_label value. This is to improve entropy of RDMA
4705
+ * traffic in the network.
4706
+ */
4707
+static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn)
4708
+{
4709
+ u64 v = (u64)lqpn * rqpn;
4710
+
4711
+ v ^= v >> 20;
4712
+ v ^= v >> 40;
4713
+
4714
+ return (u32)(v & IB_GRH_FLOWLABEL_MASK);
4715
+}
41944716 #endif /* IB_VERBS_H */