From 244b2c5ca8b14627e4a17755e5922221e121c771 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 09 Oct 2024 06:15:07 +0000
Subject: [PATCH] change system file
---
kernel/include/rdma/ib_verbs.h | 2092 +++++++++++++++++++++++++++++++++++++----------------------
1 files changed, 1,307 insertions(+), 785 deletions(-)
diff --git a/kernel/include/rdma/ib_verbs.h b/kernel/include/rdma/ib_verbs.h
index 874cd6e..ac6ffa5 100644
--- a/kernel/include/rdma/ib_verbs.h
+++ b/kernel/include/rdma/ib_verbs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
@@ -6,49 +7,18 @@
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
-#if !defined(IB_VERBS_H)
+#ifndef IB_VERBS_H
#define IB_VERBS_H
#include <linux/types.h>
#include <linux/device.h>
-#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/rwsem.h>
-#include <linux/scatterlist.h>
#include <linux/workqueue.h>
-#include <linux/socket.h>
#include <linux/irq_poll.h>
#include <uapi/linux/if_ether.h>
#include <net/ipv6.h>
@@ -56,22 +26,106 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
-
+#include <linux/refcount.h>
#include <linux/if_link.h>
#include <linux/atomic.h>
#include <linux/mmu_notifier.h>
#include <linux/uaccess.h>
#include <linux/cgroup_rdma.h>
+#include <linux/irqflags.h>
+#include <linux/preempt.h>
+#include <linux/dim.h>
#include <uapi/rdma/ib_user_verbs.h>
+#include <rdma/rdma_counter.h>
#include <rdma/restrack.h>
+#include <rdma/signature.h>
#include <uapi/rdma/rdma_user_ioctl.h>
#include <uapi/rdma/ib_user_ioctl_verbs.h>
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
+struct ib_umem_odp;
+struct ib_uqp_object;
+struct ib_usrq_object;
+struct ib_uwq_object;
+struct rdma_cm_id;
+
extern struct workqueue_struct *ib_wq;
extern struct workqueue_struct *ib_comp_wq;
extern struct workqueue_struct *ib_comp_unbound_wq;
+
+struct ib_ucq_object;
+
+__printf(3, 4) __cold
+void ibdev_printk(const char *level, const struct ib_device *ibdev,
+ const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_alert(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_crit(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_err(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_warn(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_notice(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
+
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+#define ibdev_dbg(__dev, format, args...) \
+ dynamic_ibdev_dbg(__dev, format, ##args)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
+
+#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ if (__ratelimit(&_rs)) \
+ ibdev_level(ibdev, fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_alert_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_crit_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_err_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_warn_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_notice_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_info_ratelimited(ibdev, fmt, ...) \
+ ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
+
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+ (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+/* descriptor check is first to prevent flooding with "callbacks suppressed" */
+#define ibdev_dbg_ratelimited(ibdev, fmt, ...) \
+do { \
+ static DEFINE_RATELIMIT_STATE(_rs, \
+ DEFAULT_RATELIMIT_INTERVAL, \
+ DEFAULT_RATELIMIT_BURST); \
+ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
+ if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs)) \
+ __dynamic_ibdev_dbg(&descriptor, ibdev, fmt, \
+ ##__VA_ARGS__); \
+} while (0)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
union ib_gid {
u8 raw[16];
@@ -84,31 +138,20 @@
extern union ib_gid zgid;
enum ib_gid_type {
- /* If link layer is Ethernet, this is RoCE V1 */
- IB_GID_TYPE_IB = 0,
- IB_GID_TYPE_ROCE = 0,
- IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
+ IB_GID_TYPE_IB = IB_UVERBS_GID_TYPE_IB,
+ IB_GID_TYPE_ROCE = IB_UVERBS_GID_TYPE_ROCE_V1,
+ IB_GID_TYPE_ROCE_UDP_ENCAP = IB_UVERBS_GID_TYPE_ROCE_V2,
IB_GID_TYPE_SIZE
};
#define ROCE_V2_UDP_DPORT 4791
struct ib_gid_attr {
- struct net_device *ndev;
+ struct net_device __rcu *ndev;
struct ib_device *device;
union ib_gid gid;
enum ib_gid_type gid_type;
u16 index;
u8 port_num;
-};
-
-enum rdma_node_type {
- /* IB values map to NodeInfo:NodeType. */
- RDMA_NODE_IB_CA = 1,
- RDMA_NODE_IB_SWITCH,
- RDMA_NODE_IB_ROUTER,
- RDMA_NODE_RNIC,
- RDMA_NODE_USNIC,
- RDMA_NODE_USNIC_UDP,
};
enum {
@@ -120,7 +163,8 @@
RDMA_TRANSPORT_IB,
RDMA_TRANSPORT_IWARP,
RDMA_TRANSPORT_USNIC,
- RDMA_TRANSPORT_USNIC_UDP
+ RDMA_TRANSPORT_USNIC_UDP,
+ RDMA_TRANSPORT_UNSPECIFIED,
};
enum rdma_protocol_type {
@@ -131,11 +175,11 @@
};
__attribute_const__ enum rdma_transport_type
-rdma_node_get_transport(enum rdma_node_type node_type);
+rdma_node_get_transport(unsigned int node_type);
enum rdma_network_type {
RDMA_NETWORK_IB,
- RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
+ RDMA_NETWORK_ROCE_V1,
RDMA_NETWORK_IPV4,
RDMA_NETWORK_IPV6
};
@@ -145,9 +189,10 @@
if (network_type == RDMA_NETWORK_IPV4 ||
network_type == RDMA_NETWORK_IPV6)
return IB_GID_TYPE_ROCE_UDP_ENCAP;
-
- /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
- return IB_GID_TYPE_IB;
+ else if (network_type == RDMA_NETWORK_ROCE_V1)
+ return IB_GID_TYPE_ROCE;
+ else
+ return IB_GID_TYPE_IB;
}
static inline enum rdma_network_type
@@ -155,6 +200,9 @@
{
if (attr->gid_type == IB_GID_TYPE_IB)
return RDMA_NETWORK_IB;
+
+ if (attr->gid_type == IB_GID_TYPE_ROCE)
+ return RDMA_NETWORK_ROCE_V1;
if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
return RDMA_NETWORK_IPV4;
@@ -230,26 +278,16 @@
*/
IB_DEVICE_CROSS_CHANNEL = (1 << 27),
IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
- IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30),
+ IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30),
IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31),
IB_DEVICE_SG_GAPS_REG = (1ULL << 32),
IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33),
/* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34),
- IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35),
+ IB_DEVICE_RDMA_NETDEV_OPA = (1ULL << 35),
/* The device supports padding incoming writes to cacheline. */
IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36),
-};
-
-enum ib_signature_prot_cap {
- IB_PROT_T10DIF_TYPE_1 = 1,
- IB_PROT_T10DIF_TYPE_2 = 1 << 1,
- IB_PROT_T10DIF_TYPE_3 = 1 << 2,
-};
-
-enum ib_signature_guard_cap {
- IB_GUARD_T10DIF_CRC = 1,
- IB_GUARD_T10DIF_CSUM = 1 << 1,
+ IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37),
};
enum ib_atomic_cap {
@@ -269,6 +307,7 @@
IB_ODP_SUPPORT_WRITE = 1 << 2,
IB_ODP_SUPPORT_READ = 1 << 3,
IB_ODP_SUPPORT_ATOMIC = 1 << 4,
+ IB_ODP_SUPPORT_SRQ_RECV = 1 << 5,
};
struct ib_odp_caps {
@@ -277,6 +316,7 @@
uint32_t rc_odp_caps;
uint32_t uc_odp_caps;
uint32_t ud_odp_caps;
+ uint32_t xrc_odp_caps;
} per_transport_caps;
};
@@ -369,12 +409,11 @@
int max_mcast_qp_attach;
int max_total_mcast_qp_attach;
int max_ah;
- int max_fmr;
- int max_map_per_fmr;
int max_srq;
int max_srq_wr;
int max_srq_sge;
unsigned int max_fast_reg_page_list_len;
+ unsigned int max_pi_fast_reg_page_list_len;
u16 max_pkeys;
u8 local_ca_ack_delay;
int sig_prot_cap;
@@ -388,6 +427,8 @@
struct ib_tm_caps tm_caps;
struct ib_cq_caps cq_caps;
u64 max_dm_size;
+ /* Max entries for sgl for optimized performance per READ */
+ u32 max_sgl_rd;
};
enum ib_mtu {
@@ -396,6 +437,11 @@
IB_MTU_1024 = 3,
IB_MTU_2048 = 4,
IB_MTU_4096 = 5
+};
+
+enum opa_mtu {
+ OPA_MTU_8192 = 6,
+ OPA_MTU_10240 = 7
};
static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
@@ -424,6 +470,28 @@
return IB_MTU_256;
}
+static inline int opa_mtu_enum_to_int(enum opa_mtu mtu)
+{
+ switch (mtu) {
+ case OPA_MTU_8192:
+ return 8192;
+ case OPA_MTU_10240:
+ return 10240;
+ default:
+ return(ib_mtu_enum_to_int((enum ib_mtu)mtu));
+ }
+}
+
+static inline enum opa_mtu opa_mtu_int_to_enum(int mtu)
+{
+ if (mtu >= 10240)
+ return OPA_MTU_10240;
+ else if (mtu >= 8192)
+ return OPA_MTU_8192;
+ else
+ return ((enum opa_mtu)ib_mtu_int_to_enum(mtu));
+}
+
enum ib_port_state {
IB_PORT_NOP = 0,
IB_PORT_DOWN = 1,
@@ -433,8 +501,19 @@
IB_PORT_ACTIVE_DEFER = 5
};
+enum ib_port_phys_state {
+ IB_PORT_PHYS_STATE_SLEEP = 1,
+ IB_PORT_PHYS_STATE_POLLING = 2,
+ IB_PORT_PHYS_STATE_DISABLED = 3,
+ IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4,
+ IB_PORT_PHYS_STATE_LINK_UP = 5,
+ IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6,
+ IB_PORT_PHYS_STATE_PHY_TEST = 7,
+};
+
enum ib_port_width {
IB_WIDTH_1X = 1,
+ IB_WIDTH_2X = 16,
IB_WIDTH_4X = 2,
IB_WIDTH_8X = 4,
IB_WIDTH_12X = 8
@@ -444,6 +523,7 @@
{
switch (width) {
case IB_WIDTH_1X: return 1;
+ case IB_WIDTH_2X: return 2;
case IB_WIDTH_4X: return 4;
case IB_WIDTH_8X: return 8;
case IB_WIDTH_12X: return 12;
@@ -458,7 +538,8 @@
IB_SPEED_FDR10 = 8,
IB_SPEED_FDR = 16,
IB_SPEED_EDR = 32,
- IB_SPEED_HDR = 64
+ IB_SPEED_HDR = 64,
+ IB_SPEED_NDR = 128,
};
/**
@@ -575,6 +656,7 @@
enum ib_port_state state;
enum ib_mtu max_mtu;
enum ib_mtu active_mtu;
+ u32 phys_mtu;
int gid_tbl_len;
unsigned int ip_gids:1;
/* This is the value from PortInfo CapabilityMask, defined by IBA */
@@ -591,8 +673,9 @@
u8 subnet_timeout;
u8 init_type_reply;
u8 active_width;
- u8 active_speed;
+ u16 active_speed;
u8 phys_state;
+ u16 port_cap_flags2;
};
enum ib_device_modify_flags {
@@ -730,7 +813,11 @@
IB_RATE_25_GBPS = 15,
IB_RATE_100_GBPS = 16,
IB_RATE_200_GBPS = 17,
- IB_RATE_300_GBPS = 18
+ IB_RATE_300_GBPS = 18,
+ IB_RATE_28_GBPS = 19,
+ IB_RATE_50_GBPS = 20,
+ IB_RATE_400_GBPS = 21,
+ IB_RATE_600_GBPS = 22,
};
/**
@@ -753,118 +840,26 @@
* enum ib_mr_type - memory region type
* @IB_MR_TYPE_MEM_REG: memory region that is used for
* normal registration
- * @IB_MR_TYPE_SIGNATURE: memory region that is used for
- * signature operations (data-integrity
- * capable regions)
* @IB_MR_TYPE_SG_GAPS: memory region that is capable to
* register any arbitrary sg lists (without
* the normal mr constraints - see
* ib_map_mr_sg)
+ * @IB_MR_TYPE_DM: memory region that is used for device
+ * memory registration
+ * @IB_MR_TYPE_USER: memory region that is used for the user-space
+ * application
+ * @IB_MR_TYPE_DMA: memory region that is used for DMA operations
+ * without address translations (VA=PA)
+ * @IB_MR_TYPE_INTEGRITY: memory region that is used for
+ * data integrity operations
*/
enum ib_mr_type {
IB_MR_TYPE_MEM_REG,
- IB_MR_TYPE_SIGNATURE,
IB_MR_TYPE_SG_GAPS,
-};
-
-/**
- * Signature types
- * IB_SIG_TYPE_NONE: Unprotected.
- * IB_SIG_TYPE_T10_DIF: Type T10-DIF
- */
-enum ib_signature_type {
- IB_SIG_TYPE_NONE,
- IB_SIG_TYPE_T10_DIF,
-};
-
-/**
- * Signature T10-DIF block-guard types
- * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
- * IB_T10DIF_CSUM: Corresponds to IP checksum rules.
- */
-enum ib_t10_dif_bg_type {
- IB_T10DIF_CRC,
- IB_T10DIF_CSUM
-};
-
-/**
- * struct ib_t10_dif_domain - Parameters specific for T10-DIF
- * domain.
- * @bg_type: T10-DIF block guard type (CRC|CSUM)
- * @pi_interval: protection information interval.
- * @bg: seed of guard computation.
- * @app_tag: application tag of guard block
- * @ref_tag: initial guard block reference tag.
- * @ref_remap: Indicate wethear the reftag increments each block
- * @app_escape: Indicate to skip block check if apptag=0xffff
- * @ref_escape: Indicate to skip block check if reftag=0xffffffff
- * @apptag_check_mask: check bitmask of application tag.
- */
-struct ib_t10_dif_domain {
- enum ib_t10_dif_bg_type bg_type;
- u16 pi_interval;
- u16 bg;
- u16 app_tag;
- u32 ref_tag;
- bool ref_remap;
- bool app_escape;
- bool ref_escape;
- u16 apptag_check_mask;
-};
-
-/**
- * struct ib_sig_domain - Parameters for signature domain
- * @sig_type: specific signauture type
- * @sig: union of all signature domain attributes that may
- * be used to set domain layout.
- */
-struct ib_sig_domain {
- enum ib_signature_type sig_type;
- union {
- struct ib_t10_dif_domain dif;
- } sig;
-};
-
-/**
- * struct ib_sig_attrs - Parameters for signature handover operation
- * @check_mask: bitmask for signature byte check (8 bytes)
- * @mem: memory domain layout desciptor.
- * @wire: wire domain layout desciptor.
- */
-struct ib_sig_attrs {
- u8 check_mask;
- struct ib_sig_domain mem;
- struct ib_sig_domain wire;
-};
-
-enum ib_sig_err_type {
- IB_SIG_BAD_GUARD,
- IB_SIG_BAD_REFTAG,
- IB_SIG_BAD_APPTAG,
-};
-
-/**
- * Signature check masks (8 bytes in total) according to the T10-PI standard:
- * -------- -------- ------------
- * | GUARD | APPTAG | REFTAG |
- * | 2B | 2B | 4B |
- * -------- -------- ------------
- */
-enum {
- IB_SIG_CHECK_GUARD = 0xc0,
- IB_SIG_CHECK_APPTAG = 0x30,
- IB_SIG_CHECK_REFTAG = 0x0f,
-};
-
-/**
- * struct ib_sig_err - signature error descriptor
- */
-struct ib_sig_err {
- enum ib_sig_err_type err_type;
- u32 expected;
- u32 actual;
- u64 sig_err_offset;
- u32 key;
+ IB_MR_TYPE_DM,
+ IB_MR_TYPE_USER,
+ IB_MR_TYPE_DMA,
+ IB_MR_TYPE_INTEGRITY,
};
enum ib_mr_status_check {
@@ -890,6 +885,12 @@
* @mult: multiple to convert.
*/
__attribute_const__ enum ib_rate mult_to_ib_rate(int mult);
+
+struct rdma_ah_init_attr {
+ struct rdma_ah_attr *ah_attr;
+ u32 flags;
+ struct net_device *xmit_slave;
+};
enum rdma_ah_attr_type {
RDMA_AH_ATTR_TYPE_UNDEFINED,
@@ -955,13 +956,14 @@
const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
enum ib_wc_opcode {
- IB_WC_SEND,
- IB_WC_RDMA_WRITE,
- IB_WC_RDMA_READ,
- IB_WC_COMP_SWAP,
- IB_WC_FETCH_ADD,
- IB_WC_LSO,
- IB_WC_LOCAL_INV,
+ IB_WC_SEND = IB_UVERBS_WC_SEND,
+ IB_WC_RDMA_WRITE = IB_UVERBS_WC_RDMA_WRITE,
+ IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ,
+ IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP,
+ IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD,
+ IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
+ IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
+ IB_WC_LSO = IB_UVERBS_WC_TSO,
IB_WC_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
@@ -1017,9 +1019,9 @@
};
enum ib_srq_type {
- IB_SRQT_BASIC,
- IB_SRQT_XRC,
- IB_SRQT_TM,
+ IB_SRQT_BASIC = IB_UVERBS_SRQT_BASIC,
+ IB_SRQT_XRC = IB_UVERBS_SRQT_XRC,
+ IB_SRQT_TM = IB_UVERBS_SRQT_TM,
};
static inline bool ib_srq_has_cq(enum ib_srq_type srq_type)
@@ -1088,16 +1090,16 @@
IB_QPT_SMI,
IB_QPT_GSI,
- IB_QPT_RC,
- IB_QPT_UC,
- IB_QPT_UD,
+ IB_QPT_RC = IB_UVERBS_QPT_RC,
+ IB_QPT_UC = IB_UVERBS_QPT_UC,
+ IB_QPT_UD = IB_UVERBS_QPT_UD,
IB_QPT_RAW_IPV6,
IB_QPT_RAW_ETHERTYPE,
- IB_QPT_RAW_PACKET = 8,
- IB_QPT_XRC_INI = 9,
- IB_QPT_XRC_TGT,
+ IB_QPT_RAW_PACKET = IB_UVERBS_QPT_RAW_PACKET,
+ IB_QPT_XRC_INI = IB_UVERBS_QPT_XRC_INI,
+ IB_QPT_XRC_TGT = IB_UVERBS_QPT_XRC_TGT,
IB_QPT_MAX,
- IB_QPT_DRIVER = 0xFF,
+ IB_QPT_DRIVER = IB_UVERBS_QPT_DRIVER,
/* Reserve a range for qp types internal to the low level driver.
* These qp types will not be visible at the IB core layer, so the
* IB_QPT_MAX usages should not be affected in the core layer
@@ -1116,17 +1118,21 @@
enum ib_qp_create_flags {
IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK =
+ IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
IB_QP_CREATE_CROSS_CHANNEL = 1 << 2,
IB_QP_CREATE_MANAGED_SEND = 1 << 3,
IB_QP_CREATE_MANAGED_RECV = 1 << 4,
IB_QP_CREATE_NETIF_QP = 1 << 5,
- IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
- /* FREE = 1 << 7, */
- IB_QP_CREATE_SCATTER_FCS = 1 << 8,
- IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9,
+ IB_QP_CREATE_INTEGRITY_EN = 1 << 6,
+ IB_QP_CREATE_NETDEV_USE = 1 << 7,
+ IB_QP_CREATE_SCATTER_FCS =
+ IB_UVERBS_QP_CREATE_SCATTER_FCS,
+ IB_QP_CREATE_CVLAN_STRIPPING =
+ IB_UVERBS_QP_CREATE_CVLAN_STRIPPING,
IB_QP_CREATE_SOURCE_QPN = 1 << 10,
- IB_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11,
+ IB_QP_CREATE_PCI_WRITE_END_PADDING =
+ IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
@@ -1138,7 +1144,9 @@
*/
struct ib_qp_init_attr {
+ /* Consumer's event_handler callback must not block */
void (*event_handler)(struct ib_event *, void *);
+
void *qp_context;
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
@@ -1276,6 +1284,7 @@
u8 alt_port_num;
u8 alt_timeout;
u32 rate_limit;
+ struct net_device *xmit_slave;
};
enum ib_wr_opcode {
@@ -1287,6 +1296,7 @@
IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
+ IB_WR_BIND_MW = IB_UVERBS_WR_BIND_MW,
IB_WR_LSO = IB_UVERBS_WR_TSO,
IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
@@ -1298,7 +1308,7 @@
/* These are kernel only and can not be issued by userspace */
IB_WR_REG_MR = 0x20,
- IB_WR_REG_SIG_MR,
+ IB_WR_REG_MR_INTEGRITY,
/* reserve values for low level drivers' internal use.
* These values will not be used at all in the ib core layer.
@@ -1408,20 +1418,6 @@
return container_of(wr, struct ib_reg_wr, wr);
}
-struct ib_sig_handover_wr {
- struct ib_send_wr wr;
- struct ib_sig_attrs *sig_attrs;
- struct ib_mr *sig_mr;
- int access_flags;
- struct ib_sge *prot;
-};
-
-static inline const struct ib_sig_handover_wr *
-sig_handover_wr(const struct ib_send_wr *wr)
-{
- return container_of(wr, struct ib_sig_handover_wr, wr);
-}
-
struct ib_recv_wr {
struct ib_recv_wr *next;
union {
@@ -1441,8 +1437,11 @@
IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
+ IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING,
- IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1)
+ IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE,
+ IB_ACCESS_SUPPORTED =
+ ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL,
};
/*
@@ -1454,12 +1453,6 @@
IB_MR_REREG_PD = (1<<1),
IB_MR_REREG_ACCESS = (1<<2),
IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1)
-};
-
-struct ib_fmr_attr {
- int max_pages;
- int max_maps;
- u8 page_shift;
};
struct ib_umem;
@@ -1487,34 +1480,15 @@
struct ib_ucontext {
struct ib_device *device;
struct ib_uverbs_file *ufile;
- /*
- * 'closing' can be read by the driver only during a destroy callback,
- * it is set when we are closing the file descriptor and indicates
- * that mm_sem may be locked.
- */
- int closing;
bool cleanup_retryable;
- struct pid *tgid;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- struct rb_root_cached umem_tree;
- /*
- * Protects .umem_rbroot and tree, as well as odp_mrs_count and
- * mmu notifiers registration.
- */
- struct rw_semaphore umem_rwsem;
- void (*invalidate_range)(struct ib_umem *umem,
- unsigned long start, unsigned long end);
-
- struct mmu_notifier mn;
- atomic_t notifier_count;
- /* A list of umems that don't have private mmu notifier counters yet. */
- struct list_head no_private_counters;
- int odp_mrs_count;
-#endif
-
struct ib_rdmacg_object cg_obj;
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
+ struct xarray mmap_xa;
};
struct ib_uobject {
@@ -1561,9 +1535,8 @@
struct ib_device *device;
atomic_t usecnt; /* count all exposed resources */
struct inode *inode;
-
- struct mutex tgt_qp_mutex;
- struct list_head tgt_qp_list;
+ struct rw_semaphore tgt_qps_rwsem;
+ struct xarray tgt_qps;
};
struct ib_ah {
@@ -1577,27 +1550,39 @@
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
enum ib_poll_context {
- IB_POLL_DIRECT, /* caller context, no hw completions */
IB_POLL_SOFTIRQ, /* poll from softirq context */
IB_POLL_WORKQUEUE, /* poll from workqueue */
IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
+ IB_POLL_LAST_POOL_TYPE = IB_POLL_UNBOUND_WORKQUEUE,
+
+ IB_POLL_DIRECT, /* caller context, no hw completions */
};
struct ib_cq {
struct ib_device *device;
- struct ib_uobject *uobject;
+ struct ib_ucq_object *uobject;
ib_comp_handler comp_handler;
void (*event_handler)(struct ib_event *, void *);
void *cq_context;
int cqe;
+ unsigned int cqe_used;
atomic_t usecnt; /* count number of work queues */
enum ib_poll_context poll_ctx;
struct ib_wc *wc;
+ struct list_head pool_entry;
union {
struct irq_poll iop;
struct work_struct work;
};
struct workqueue_struct *comp_wq;
+ struct dim *dim;
+
+ /* updated only by trace points */
+ ktime_t timestamp;
+ u8 interrupt:1;
+ u8 shared:1;
+ unsigned int comp_vector;
+
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -1607,7 +1592,7 @@
struct ib_srq {
struct ib_device *device;
struct ib_pd *pd;
- struct ib_uobject *uobject;
+ struct ib_usrq_object *uobject;
void (*event_handler)(struct ib_event *, void *);
void *srq_context;
enum ib_srq_type srq_type;
@@ -1641,7 +1626,7 @@
};
enum ib_wq_type {
- IB_WQT_RQ
+ IB_WQT_RQ = IB_UVERBS_WQT_RQ,
};
enum ib_wq_state {
@@ -1652,7 +1637,7 @@
struct ib_wq {
struct ib_device *device;
- struct ib_uobject *uobject;
+ struct ib_uwq_object *uobject;
void *wq_context;
void (*event_handler)(struct ib_event *, void *);
struct ib_pd *pd;
@@ -1664,10 +1649,11 @@
};
enum ib_wq_flags {
- IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0,
- IB_WQ_FLAGS_SCATTER_FCS = 1 << 1,
- IB_WQ_FLAGS_DELAY_DROP = 1 << 2,
- IB_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3,
+ IB_WQ_FLAGS_CVLAN_STRIPPING = IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING,
+ IB_WQ_FLAGS_SCATTER_FCS = IB_UVERBS_WQ_FLAGS_SCATTER_FCS,
+ IB_WQ_FLAGS_DELAY_DROP = IB_UVERBS_WQ_FLAGS_DELAY_DROP,
+ IB_WQ_FLAGS_PCI_WRITE_END_PADDING =
+ IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING,
};
struct ib_wq_init_attr {
@@ -1768,7 +1754,7 @@
atomic_t usecnt;
struct list_head open_list;
struct ib_qp *real_qp;
- struct ib_uobject *uobject;
+ struct ib_uqp_object *uobject;
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
/* sgid_attrs associated with the AV's */
@@ -1782,10 +1768,14 @@
struct ib_qp_security *qp_sec;
u8 port;
+ bool integrity_en;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
struct rdma_restrack_entry res;
+
+ /* The counter the qp is bind to */
+ struct rdma_counter *counter;
};
struct ib_dm {
@@ -1804,6 +1794,7 @@
u64 iova;
u64 length;
unsigned int page_size;
+ enum ib_mr_type type;
bool need_inval;
union {
struct ib_uobject *uobject; /* user */
@@ -1811,7 +1802,7 @@
};
struct ib_dm *dm;
-
+ struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -1824,14 +1815,6 @@
struct ib_uobject *uobject;
u32 rkey;
enum ib_mw_type type;
-};
-
-struct ib_fmr {
- struct ib_device *device;
- struct ib_pd *pd;
- struct list_head list;
- u32 lkey;
- u32 rkey;
};
/* Supported steering options */
@@ -1875,17 +1858,6 @@
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 10
-/* Flow steering rule priority is set according to it's domain.
- * Lower domain value means higher priority.
- */
-enum ib_flow_domain {
- IB_FLOW_DOMAIN_USER,
- IB_FLOW_DOMAIN_ETHTOOL,
- IB_FLOW_DOMAIN_RFS,
- IB_FLOW_DOMAIN_NIC,
- IB_FLOW_DOMAIN_NUM /* Must be last */
-};
-
enum ib_flow_flags {
IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */
@@ -1898,7 +1870,7 @@
__be16 ether_type;
__be16 vlan_tag;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_eth {
@@ -1912,7 +1884,7 @@
__be16 dlid;
__u8 sl;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_ib {
@@ -1937,7 +1909,7 @@
u8 ttl;
u8 flags;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_ipv4 {
@@ -1955,7 +1927,7 @@
u8 traffic_class;
u8 hop_limit;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_ipv6 {
@@ -1969,7 +1941,7 @@
__be16 dst_port;
__be16 src_port;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_tcp_udp {
@@ -1981,7 +1953,7 @@
struct ib_flow_tunnel_filter {
__be32 tunnel_id;
- u8 real_sz[0];
+ u8 real_sz[];
};
/* ib_flow_spec_tunnel describes the Vxlan tunnel
@@ -1998,7 +1970,7 @@
__be32 spi;
__be32 seq;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_esp {
@@ -2013,7 +1985,7 @@
__be16 protocol;
__be32 key;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_gre {
@@ -2026,7 +1998,7 @@
struct ib_flow_mpls_filter {
__be32 tag;
/* Must be last */
- u8 real_sz[0];
+ u8 real_sz[];
};
struct ib_flow_spec_mpls {
@@ -2158,7 +2130,7 @@
atomic_t usecnt;
};
-struct ib_mad_hdr;
+struct ib_mad;
struct ib_grh;
enum ib_process_mad_flags {
@@ -2182,19 +2154,28 @@
enum ib_port_state port_state;
};
-struct ib_cache {
- rwlock_t lock;
- struct ib_event_handler event_handler;
- struct ib_port_cache *ports;
-};
-
-struct iw_cm_verbs;
-
struct ib_port_immutable {
int pkey_tbl_len;
int gid_tbl_len;
u32 core_cap_flags;
u32 max_mad_size;
+};
+
+struct ib_port_data {
+ struct ib_device *ib_dev;
+
+ struct ib_port_immutable immutable;
+
+ spinlock_t pkey_list_lock;
+ struct list_head pkey_list;
+
+ struct ib_port_cache cache;
+
+ spinlock_t netdev_lock;
+ struct net_device __rcu *netdev;
+ struct hlist_node ndev_hash_link;
+ struct rdma_port_counter port_counter;
+ struct rdma_hw_stats *hw_stats;
};
/* rdma netdev type - specifies protocol type */
@@ -2211,6 +2192,7 @@
void *clnt_priv;
struct ib_device *hca;
u8 port_num;
+ int mtu;
/*
* cleanup function must be specified.
@@ -2232,10 +2214,20 @@
union ib_gid *gid, u16 mlid);
};
-struct ib_port_pkey_list {
- /* Lock to hold while modifying the list. */
- spinlock_t list_lock;
- struct list_head pkey_list;
+struct rdma_netdev_alloc_params {
+ size_t sizeof_priv;
+ unsigned int txqs;
+ unsigned int rxqs;
+ void *param;
+
+ int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num,
+ struct net_device *netdev, void *param);
+};
+
+struct ib_odp_counters {
+ atomic64_t faults;
+ atomic64_t invalidations;
+ atomic64_t prefetch;
};
struct ib_counters {
@@ -2252,33 +2244,266 @@
};
struct uverbs_attr_bundle;
+struct iw_cm_id;
+struct iw_cm_conn_param;
-struct ib_device {
- /* Do not access @dma_device directly from ULP nor from HW drivers. */
- struct device *dma_device;
+#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member) \
+ .size_##ib_struct = \
+ (sizeof(struct drv_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) + \
+ BUILD_BUG_ON_ZERO( \
+ !__same_type(((struct drv_struct *)NULL)->member, \
+ struct ib_struct)))
- char name[IB_DEVICE_NAME_MAX];
+#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp) \
+ ((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp))
- struct list_head event_handler_list;
- spinlock_t event_handler_lock;
+#define rdma_zalloc_drv_obj(ib_dev, ib_type) \
+ rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL)
- spinlock_t client_data_lock;
- struct list_head core_list;
- /* Access to the client_data_list is protected by the client_data_lock
- * spinlock and the lists_rwsem read-write semaphore */
- struct list_head client_data_list;
+#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
- struct ib_cache cache;
+struct rdma_user_mmap_entry {
+ struct kref ref;
+ struct ib_ucontext *ucontext;
+ unsigned long start_pgoff;
+ size_t npages;
+ bool driver_removed;
+};
+
+/* Return the offset (in bytes) the user should pass to libc's mmap() */
+static inline u64
+rdma_user_mmap_get_offset(const struct rdma_user_mmap_entry *entry)
+{
+ return (u64)entry->start_pgoff << PAGE_SHIFT;
+}
+
+/**
+ * struct ib_device_ops - InfiniBand device operations
+ * This structure defines all the InfiniBand device operations, providers will
+ * need to define the supported operations, otherwise they will be set to null.
+ */
+struct ib_device_ops {
+ struct module *owner;
+ enum rdma_driver_id driver_id;
+ u32 uverbs_abi_ver;
+ unsigned int uverbs_no_driver_id_binding:1;
+
+ int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr);
+ int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
+ void (*drain_rq)(struct ib_qp *qp);
+ void (*drain_sq)(struct ib_qp *qp);
+ int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+ int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
+ int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+ int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
+ int (*post_srq_recv)(struct ib_srq *srq,
+ const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
+ int (*process_mad)(struct ib_device *device, int process_mad_flags,
+ u8 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
+ int (*query_device)(struct ib_device *device,
+ struct ib_device_attr *device_attr,
+ struct ib_udata *udata);
+ int (*modify_device)(struct ib_device *device, int device_modify_mask,
+ struct ib_device_modify *device_modify);
+ void (*get_dev_fw_str)(struct ib_device *device, char *str);
+ const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
+ int comp_vector);
+ int (*query_port)(struct ib_device *device, u8 port_num,
+ struct ib_port_attr *port_attr);
+ int (*modify_port)(struct ib_device *device, u8 port_num,
+ int port_modify_mask,
+ struct ib_port_modify *port_modify);
/**
- * port_immutable is indexed by port number
+ * The following mandatory functions are used only at device
+ * registration. Keep functions such as these at the end of this
+ * structure to avoid cache line misses when accessing struct ib_device
+ * in fast paths.
*/
- struct ib_port_immutable *port_immutable;
+ int (*get_port_immutable)(struct ib_device *device, u8 port_num,
+ struct ib_port_immutable *immutable);
+ enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
+ u8 port_num);
+ /**
+ * When calling get_netdev, the HW vendor's driver should return the
+ * net device of device @device at port @port_num or NULL if such
+ * a net device doesn't exist. The vendor driver should call dev_hold
+ * on this net device. The HW vendor's device driver must guarantee
+ * that this function returns NULL before the net device has finished
+ * NETDEV_UNREGISTER state.
+ */
+ struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
+ /**
+ * rdma netdev operation
+ *
+ * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
+ * must return -EOPNOTSUPP if it doesn't support the specified type.
+ */
+ struct net_device *(*alloc_rdma_netdev)(
+ struct ib_device *device, u8 port_num, enum rdma_netdev_t type,
+ const char *name, unsigned char name_assign_type,
+ void (*setup)(struct net_device *));
- int num_comp_vectors;
-
- struct ib_port_pkey_list *port_pkey_list;
-
- struct iw_cm_verbs *iwcm;
+ int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
+ enum rdma_netdev_t type,
+ struct rdma_netdev_alloc_params *params);
+ /**
+ * query_gid should be return GID value for @device, when @port_num
+ * link layer is either IB or iWarp. It is no-op if @port_num port
+ * is RoCE link layer.
+ */
+ int (*query_gid)(struct ib_device *device, u8 port_num, int index,
+ union ib_gid *gid);
+ /**
+ * When calling add_gid, the HW vendor's driver should add the gid
+ * of device of port at gid index available at @attr. Meta-info of
+ * that gid (for example, the network device related to this gid) is
+ * available at @attr. @context allows the HW vendor driver to store
+ * extra information together with a GID entry. The HW vendor driver may
+ * allocate memory to contain this information and store it in @context
+ * when a new GID entry is written to. Params are consistent until the
+ * next call of add_gid or delete_gid. The function should return 0 on
+ * success or error otherwise. The function could be called
+ * concurrently for different ports. This function is only called when
+ * roce_gid_table is used.
+ */
+ int (*add_gid)(const struct ib_gid_attr *attr, void **context);
+ /**
+ * When calling del_gid, the HW vendor's driver should delete the
+ * gid of device @device at gid index gid_index of port port_num
+ * available in @attr.
+ * Upon the deletion of a GID entry, the HW vendor must free any
+ * allocated memory. The caller will clear @context afterwards.
+ * This function is only called when roce_gid_table is used.
+ */
+ int (*del_gid)(const struct ib_gid_attr *attr, void **context);
+ int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
+ u16 *pkey);
+ int (*alloc_ucontext)(struct ib_ucontext *context,
+ struct ib_udata *udata);
+ void (*dealloc_ucontext)(struct ib_ucontext *context);
+ int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
+ /**
+ * This will be called once refcount of an entry in mmap_xa reaches
+ * zero. The type of the memory that was mapped may differ between
+ * entries and is opaque to the rdma_user_mmap interface.
+ * Therefore needs to be implemented by the driver in mmap_free.
+ */
+ void (*mmap_free)(struct rdma_user_mmap_entry *entry);
+ void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
+ int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+ int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+ int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata);
+ int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+ int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+ int (*destroy_ah)(struct ib_ah *ah, u32 flags);
+ int (*create_srq)(struct ib_srq *srq,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+ int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+ int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+ int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
+ struct ib_qp *(*create_qp)(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+ int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata);
+ int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+ int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
+ int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
+ struct ib_udata *udata);
+ int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+ int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
+ int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+ struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
+ struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata);
+ int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_pd *pd, struct ib_udata *udata);
+ int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
+ struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+ struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd,
+ u32 max_num_data_sg,
+ u32 max_num_meta_sg);
+ int (*advise_mr)(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice, u32 flags,
+ struct ib_sge *sg_list, u32 num_sge,
+ struct uverbs_attr_bundle *attrs);
+ int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+ int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
+ struct ib_mr_status *mr_status);
+ int (*alloc_mw)(struct ib_mw *mw, struct ib_udata *udata);
+ int (*dealloc_mw)(struct ib_mw *mw);
+ int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+ int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+ int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
+ int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
+ struct ib_flow *(*create_flow)(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ struct ib_udata *udata);
+ int (*destroy_flow)(struct ib_flow *flow_id);
+ struct ib_flow_action *(*create_flow_action_esp)(
+ struct ib_device *device,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*destroy_flow_action)(struct ib_flow_action *action);
+ int (*modify_flow_action_esp)(
+ struct ib_flow_action *action,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
+ int state);
+ int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_info *ivf);
+ int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_stats *stats);
+ int (*get_vf_guid)(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid);
+ int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
+ int type);
+ struct ib_wq *(*create_wq)(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata);
+ int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
+ int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
+ u32 wq_attr_mask, struct ib_udata *udata);
+ int (*create_rwq_ind_table)(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+ int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
+ struct ib_dm *(*alloc_dm)(struct ib_device *device,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs);
+ struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
+ struct ib_dm_mr_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*create_counters)(struct ib_counters *counters,
+ struct uverbs_attr_bundle *attrs);
+ int (*destroy_counters)(struct ib_counters *counters);
+ int (*read_counters)(struct ib_counters *counters,
+ struct ib_counters_read_attr *counters_read_attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*map_mr_sg_pi)(struct ib_mr *mr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset);
/**
* alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
@@ -2286,8 +2511,8 @@
* core when the device is removed. A lifespan of -1 in the return
* struct tells the core to set a default lifespan.
*/
- struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
- u8 port_num);
+ struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
+ u8 port_num);
/**
* get_hw_stats - Fill in the counter value(s) in the stats struct.
* @index - The index in the value array we wish to have updated, or
@@ -2300,261 +2525,145 @@
* Drivers are allowed to update all counters in leiu of just the
* one given in index at their option
*/
- int (*get_hw_stats)(struct ib_device *device,
- struct rdma_hw_stats *stats,
- u8 port, int index);
- int (*query_device)(struct ib_device *device,
- struct ib_device_attr *device_attr,
- struct ib_udata *udata);
- int (*query_port)(struct ib_device *device,
- u8 port_num,
- struct ib_port_attr *port_attr);
- enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
- u8 port_num);
- /* When calling get_netdev, the HW vendor's driver should return the
- * net device of device @device at port @port_num or NULL if such
- * a net device doesn't exist. The vendor driver should call dev_hold
- * on this net device. The HW vendor's device driver must guarantee
- * that this function returns NULL before the net device has finished
- * NETDEV_UNREGISTER state.
+ int (*get_hw_stats)(struct ib_device *device,
+ struct rdma_hw_stats *stats, u8 port, int index);
+ /*
+ * This function is called once for each port when a ib device is
+ * registered.
*/
- struct net_device *(*get_netdev)(struct ib_device *device,
- u8 port_num);
- /* query_gid should be return GID value for @device, when @port_num
- * link layer is either IB or iWarp. It is no-op if @port_num port
- * is RoCE link layer.
+ int (*init_port)(struct ib_device *device, u8 port_num,
+ struct kobject *port_sysfs);
+ /**
+ * Allows rdma drivers to add their own restrack attributes.
*/
- int (*query_gid)(struct ib_device *device,
- u8 port_num, int index,
- union ib_gid *gid);
- /* When calling add_gid, the HW vendor's driver should add the gid
- * of device of port at gid index available at @attr. Meta-info of
- * that gid (for example, the network device related to this gid) is
- * available at @attr. @context allows the HW vendor driver to store
- * extra information together with a GID entry. The HW vendor driver may
- * allocate memory to contain this information and store it in @context
- * when a new GID entry is written to. Params are consistent until the
- * next call of add_gid or delete_gid. The function should return 0 on
- * success or error otherwise. The function could be called
- * concurrently for different ports. This function is only called when
- * roce_gid_table is used.
+ int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
+ int (*fill_res_mr_entry_raw)(struct sk_buff *msg, struct ib_mr *ibmr);
+ int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq);
+ int (*fill_res_cq_entry_raw)(struct sk_buff *msg, struct ib_cq *ibcq);
+ int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp);
+ int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp);
+ int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id);
+
+ /* Device lifecycle callbacks */
+ /*
+ * Called after the device becomes registered, before clients are
+ * attached
*/
- int (*add_gid)(const struct ib_gid_attr *attr,
- void **context);
- /* When calling del_gid, the HW vendor's driver should delete the
- * gid of device @device at gid index gid_index of port port_num
- * available in @attr.
- * Upon the deletion of a GID entry, the HW vendor must free any
- * allocated memory. The caller will clear @context afterwards.
- * This function is only called when roce_gid_table is used.
+ int (*enable_driver)(struct ib_device *dev);
+ /*
+ * This is called as part of ib_dealloc_device().
*/
- int (*del_gid)(const struct ib_gid_attr *attr,
- void **context);
- int (*query_pkey)(struct ib_device *device,
- u8 port_num, u16 index, u16 *pkey);
- int (*modify_device)(struct ib_device *device,
- int device_modify_mask,
- struct ib_device_modify *device_modify);
- int (*modify_port)(struct ib_device *device,
- u8 port_num, int port_modify_mask,
- struct ib_port_modify *port_modify);
- struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device,
- struct ib_udata *udata);
- int (*dealloc_ucontext)(struct ib_ucontext *context);
- int (*mmap)(struct ib_ucontext *context,
- struct vm_area_struct *vma);
- struct ib_pd * (*alloc_pd)(struct ib_device *device,
- struct ib_ucontext *context,
- struct ib_udata *udata);
- int (*dealloc_pd)(struct ib_pd *pd);
- struct ib_ah * (*create_ah)(struct ib_pd *pd,
- struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata);
- int (*modify_ah)(struct ib_ah *ah,
- struct rdma_ah_attr *ah_attr);
- int (*query_ah)(struct ib_ah *ah,
- struct rdma_ah_attr *ah_attr);
- int (*destroy_ah)(struct ib_ah *ah);
- struct ib_srq * (*create_srq)(struct ib_pd *pd,
- struct ib_srq_init_attr *srq_init_attr,
- struct ib_udata *udata);
- int (*modify_srq)(struct ib_srq *srq,
- struct ib_srq_attr *srq_attr,
- enum ib_srq_attr_mask srq_attr_mask,
- struct ib_udata *udata);
- int (*query_srq)(struct ib_srq *srq,
- struct ib_srq_attr *srq_attr);
- int (*destroy_srq)(struct ib_srq *srq);
- int (*post_srq_recv)(struct ib_srq *srq,
- const struct ib_recv_wr *recv_wr,
- const struct ib_recv_wr **bad_recv_wr);
- struct ib_qp * (*create_qp)(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr,
- struct ib_udata *udata);
- int (*modify_qp)(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_udata *udata);
- int (*query_qp)(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr);
- int (*destroy_qp)(struct ib_qp *qp);
- int (*post_send)(struct ib_qp *qp,
- const struct ib_send_wr *send_wr,
- const struct ib_send_wr **bad_send_wr);
- int (*post_recv)(struct ib_qp *qp,
- const struct ib_recv_wr *recv_wr,
- const struct ib_recv_wr **bad_recv_wr);
- struct ib_cq * (*create_cq)(struct ib_device *device,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata);
- int (*modify_cq)(struct ib_cq *cq, u16 cq_count,
- u16 cq_period);
- int (*destroy_cq)(struct ib_cq *cq);
- int (*resize_cq)(struct ib_cq *cq, int cqe,
- struct ib_udata *udata);
- int (*poll_cq)(struct ib_cq *cq, int num_entries,
- struct ib_wc *wc);
- int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
- int (*req_notify_cq)(struct ib_cq *cq,
- enum ib_cq_notify_flags flags);
- int (*req_ncomp_notif)(struct ib_cq *cq,
- int wc_cnt);
- struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
- int mr_access_flags);
- struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
- u64 start, u64 length,
- u64 virt_addr,
- int mr_access_flags,
- struct ib_udata *udata);
- int (*rereg_user_mr)(struct ib_mr *mr,
- int flags,
- u64 start, u64 length,
- u64 virt_addr,
- int mr_access_flags,
- struct ib_pd *pd,
- struct ib_udata *udata);
- int (*dereg_mr)(struct ib_mr *mr);
- struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg);
- int (*map_mr_sg)(struct ib_mr *mr,
- struct scatterlist *sg,
- int sg_nents,
- unsigned int *sg_offset);
- struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
- enum ib_mw_type type,
- struct ib_udata *udata);
- int (*dealloc_mw)(struct ib_mw *mw);
- struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
- int mr_access_flags,
- struct ib_fmr_attr *fmr_attr);
- int (*map_phys_fmr)(struct ib_fmr *fmr,
- u64 *page_list, int list_len,
- u64 iova);
- int (*unmap_fmr)(struct list_head *fmr_list);
- int (*dealloc_fmr)(struct ib_fmr *fmr);
- int (*attach_mcast)(struct ib_qp *qp,
- union ib_gid *gid,
- u16 lid);
- int (*detach_mcast)(struct ib_qp *qp,
- union ib_gid *gid,
- u16 lid);
- int (*process_mad)(struct ib_device *device,
- int process_mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad,
- size_t in_mad_size,
- struct ib_mad_hdr *out_mad,
- size_t *out_mad_size,
- u16 *out_mad_pkey_index);
- struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device,
- struct ib_ucontext *ucontext,
- struct ib_udata *udata);
- int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
- struct ib_flow * (*create_flow)(struct ib_qp *qp,
- struct ib_flow_attr
- *flow_attr,
- int domain,
- struct ib_udata *udata);
- int (*destroy_flow)(struct ib_flow *flow_id);
- int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
- struct ib_mr_status *mr_status);
- void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
- void (*drain_rq)(struct ib_qp *qp);
- void (*drain_sq)(struct ib_qp *qp);
- int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
- int state);
- int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
- struct ifla_vf_info *ivf);
- int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
- struct ifla_vf_stats *stats);
- int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
- int type);
- struct ib_wq * (*create_wq)(struct ib_pd *pd,
- struct ib_wq_init_attr *init_attr,
- struct ib_udata *udata);
- int (*destroy_wq)(struct ib_wq *wq);
- int (*modify_wq)(struct ib_wq *wq,
- struct ib_wq_attr *attr,
- u32 wq_attr_mask,
- struct ib_udata *udata);
- struct ib_rwq_ind_table * (*create_rwq_ind_table)(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
- int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
- struct ib_flow_action * (*create_flow_action_esp)(struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs);
- int (*destroy_flow_action)(struct ib_flow_action *action);
- int (*modify_flow_action_esp)(struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs);
- struct ib_dm * (*alloc_dm)(struct ib_device *device,
- struct ib_ucontext *context,
- struct ib_dm_alloc_attr *attr,
- struct uverbs_attr_bundle *attrs);
- int (*dealloc_dm)(struct ib_dm *dm);
- struct ib_mr * (*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
- struct ib_dm_mr_attr *attr,
- struct uverbs_attr_bundle *attrs);
- struct ib_counters * (*create_counters)(struct ib_device *device,
- struct uverbs_attr_bundle *attrs);
- int (*destroy_counters)(struct ib_counters *counters);
- int (*read_counters)(struct ib_counters *counters,
- struct ib_counters_read_attr *counters_read_attr,
- struct uverbs_attr_bundle *attrs);
+ void (*dealloc_driver)(struct ib_device *dev);
+
+ /* iWarp CM callbacks */
+ void (*iw_add_ref)(struct ib_qp *qp);
+ void (*iw_rem_ref)(struct ib_qp *qp);
+ struct ib_qp *(*iw_get_qp)(struct ib_device *device, int qpn);
+ int (*iw_connect)(struct iw_cm_id *cm_id,
+ struct iw_cm_conn_param *conn_param);
+ int (*iw_accept)(struct iw_cm_id *cm_id,
+ struct iw_cm_conn_param *conn_param);
+ int (*iw_reject)(struct iw_cm_id *cm_id, const void *pdata,
+ u8 pdata_len);
+ int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
+ int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
+ /**
+ * counter_bind_qp - Bind a QP to a counter.
+ * @counter - The counter to be bound. If counter->id is zero then
+ * the driver needs to allocate a new counter and set counter->id
+ */
+ int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp);
+ /**
+ * counter_unbind_qp - Unbind the qp from the dynamically-allocated
+ * counter and bind it onto the default one
+ */
+ int (*counter_unbind_qp)(struct ib_qp *qp);
+ /**
+ * counter_dealloc -De-allocate the hw counter
+ */
+ int (*counter_dealloc)(struct rdma_counter *counter);
+ /**
+ * counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in
+ * the driver initialized data.
+ */
+ struct rdma_hw_stats *(*counter_alloc_stats)(
+ struct rdma_counter *counter);
+ /**
+ * counter_update_stats - Query the stats value of this counter
+ */
+ int (*counter_update_stats)(struct rdma_counter *counter);
/**
- * rdma netdev operation
- *
- * Driver implementing alloc_rdma_netdev must return -EOPNOTSUPP if it
- * doesn't support the specified rdma netdev type.
+ * Allows rdma drivers to add their own restrack attributes
+ * dumped via 'rdma stat' iproute2 command.
*/
- struct net_device *(*alloc_rdma_netdev)(
- struct ib_device *device,
- u8 port_num,
- enum rdma_netdev_t type,
- const char *name,
- unsigned char name_assign_type,
- void (*setup)(struct net_device *));
+ int (*fill_stat_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
- struct module *owner;
- struct device dev;
- struct kobject *ports_parent;
- struct list_head port_list;
+ /* query driver for its ucontext properties */
+ int (*query_ucontext)(struct ib_ucontext *context,
+ struct uverbs_attr_bundle *attrs);
- enum {
- IB_DEV_UNINITIALIZED,
- IB_DEV_REGISTERED,
- IB_DEV_UNREGISTERED
- } reg_state;
+ DECLARE_RDMA_OBJ_SIZE(ib_ah);
+ DECLARE_RDMA_OBJ_SIZE(ib_counters);
+ DECLARE_RDMA_OBJ_SIZE(ib_cq);
+ DECLARE_RDMA_OBJ_SIZE(ib_mw);
+ DECLARE_RDMA_OBJ_SIZE(ib_pd);
+ DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table);
+ DECLARE_RDMA_OBJ_SIZE(ib_srq);
+ DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
+ DECLARE_RDMA_OBJ_SIZE(ib_xrcd);
+};
- int uverbs_abi_ver;
+struct ib_core_device {
+ /* device must be the first element in structure until,
+ * union of ib_core_device and device exists in ib_device.
+ */
+ struct device dev;
+ possible_net_t rdma_net;
+ struct kobject *ports_kobj;
+ struct list_head port_list;
+ struct ib_device *owner; /* reach back to owner ib_device */
+};
+
+struct rdma_restrack_root;
+struct ib_device {
+ /* Do not access @dma_device directly from ULP nor from HW drivers. */
+ struct device *dma_device;
+ struct ib_device_ops ops;
+ char name[IB_DEVICE_NAME_MAX];
+ struct rcu_head rcu_head;
+
+ struct list_head event_handler_list;
+ /* Protects event_handler_list */
+ struct rw_semaphore event_handler_rwsem;
+
+ /* Protects QP's event_handler calls and open_qp list */
+ spinlock_t qp_open_list_lock;
+
+ struct rw_semaphore client_data_rwsem;
+ struct xarray client_data;
+ struct mutex unregistration_lock;
+
+ /* Synchronize GID, Pkey cache entries, subnet prefix, LMC */
+ rwlock_t cache_lock;
+ /**
+ * port_data is indexed by port number
+ */
+ struct ib_port_data *port_data;
+
+ int num_comp_vectors;
+
+ union {
+ struct device dev;
+ struct ib_core_device coredev;
+ };
+
+ /* First group for device attributes,
+ * Second group for driver provided attributes (optional).
+ * It is NULL terminated array.
+ */
+ const struct attribute_group *groups[3];
+
u64 uverbs_cmd_mask;
u64 uverbs_ex_cmd_mask;
@@ -2562,6 +2671,10 @@
__be64 node_guid;
u32 local_dma_lkey;
u16 is_switch:1;
+ /* Indicates kernel verbs support, should not be used in drivers */
+ u16 kverbs_provider:1;
+ /* CQ adaptive moderation (RDMA DIM) */
+ u16 use_cq_dim:1;
u8 node_type;
u8 phys_port_cnt;
struct ib_device_attr attrs;
@@ -2573,30 +2686,44 @@
#endif
u32 index;
+
+ spinlock_t cq_pools_lock;
+ struct list_head cq_pools[IB_POLL_LAST_POOL_TYPE + 1];
+
+ struct rdma_restrack_root *res;
+
+ const struct uapi_definition *driver_def;
+
/*
- * Implementation details of the RDMA core, don't use in drivers
+ * Positive refcount indicates that the device is currently
+ * registered and cannot be unregistered.
*/
- struct rdma_restrack_root res;
+ refcount_t refcount;
+ struct completion unreg_completion;
+ struct work_struct unregistration_work;
- /**
- * The following mandatory functions are used only at device
- * registration. Keep functions such as these at the end of this
- * structure to avoid cache line misses when accessing struct ib_device
- * in fast paths.
- */
- int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *);
- void (*get_dev_fw_str)(struct ib_device *, char *str);
- const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
- int comp_vector);
+ const struct rdma_link_ops *link_ops;
- const struct uverbs_object_tree_def *const *driver_specs;
- enum rdma_driver_id driver_id;
+ /* Protects compat_devs xarray modifications */
+ struct mutex compat_devs_mutex;
+ /* Maintains compat devices for each net namespace */
+ struct xarray compat_devs;
+
+ /* Used by iWarp CM */
+ char iw_ifname[IFNAMSIZ];
+ u32 iw_driver_flags;
+ u32 lag_flags;
};
+struct ib_client_nl_info;
struct ib_client {
- char *name;
- void (*add) (struct ib_device *);
+ const char *name;
+ int (*add)(struct ib_device *ibdev);
void (*remove)(struct ib_device *, void *client_data);
+ void (*rename)(struct ib_device *dev, void *client_data);
+ int (*get_nl_info)(struct ib_device *ibdev, void *client_data,
+ struct ib_client_nl_info *res);
+ int (*get_global_nl_info)(struct ib_client_nl_info *res);
/* Returns the net_dev belonging to this ib_client and matching the
* given parameters.
@@ -2620,25 +2747,123 @@
const union ib_gid *gid,
const struct sockaddr *addr,
void *client_data);
- struct list_head list;
+
+ refcount_t uses;
+ struct completion uses_zero;
+ u32 client_id;
+
+ /* kverbs are not required by the client */
+ u8 no_kverbs_req:1;
};
-struct ib_device *ib_alloc_device(size_t size);
+/*
+ * IB block DMA iterator
+ *
+ * Iterates the DMA-mapped SGL in contiguous memory blocks aligned
+ * to a HW supported page size.
+ */
+struct ib_block_iter {
+ /* internal states */
+ struct scatterlist *__sg; /* sg holding the current aligned block */
+ dma_addr_t __dma_addr; /* unaligned DMA address of this block */
+ unsigned int __sg_nents; /* number of SG entries */
+ unsigned int __sg_advance; /* number of bytes to advance in sg in next step */
+ unsigned int __pg_bit; /* alignment of current block */
+};
+
+struct ib_device *_ib_alloc_device(size_t size);
+#define ib_alloc_device(drv_struct, member) \
+ container_of(_ib_alloc_device(sizeof(struct drv_struct) + \
+ BUILD_BUG_ON_ZERO(offsetof( \
+ struct drv_struct, member))), \
+ struct drv_struct, member)
+
void ib_dealloc_device(struct ib_device *device);
void ib_get_device_fw_str(struct ib_device *device, char *str);
-int ib_register_device(struct ib_device *device,
- int (*port_callback)(struct ib_device *,
- u8, struct kobject *));
+int ib_register_device(struct ib_device *device, const char *name,
+ struct device *dma_device);
void ib_unregister_device(struct ib_device *device);
+void ib_unregister_driver(enum rdma_driver_id driver_id);
+void ib_unregister_device_and_put(struct ib_device *device);
+void ib_unregister_device_queued(struct ib_device *ib_dev);
int ib_register_client (struct ib_client *client);
void ib_unregister_client(struct ib_client *client);
-void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
+void __rdma_block_iter_start(struct ib_block_iter *biter,
+ struct scatterlist *sglist,
+ unsigned int nents,
+ unsigned long pgsz);
+bool __rdma_block_iter_next(struct ib_block_iter *biter);
+
+/**
+ * rdma_block_iter_dma_address - get the aligned dma address of the current
+ * block held by the block iterator.
+ * @biter: block iterator holding the memory block
+ */
+static inline dma_addr_t
+rdma_block_iter_dma_address(struct ib_block_iter *biter)
+{
+ return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1);
+}
+
+/**
+ * rdma_for_each_block - iterate over contiguous memory blocks of the sg list
+ * @sglist: sglist to iterate over
+ * @biter: block iterator holding the memory block
+ * @nents: maximum number of sg entries to iterate over
+ * @pgsz: best HW supported page size to use
+ *
+ * Callers may use rdma_block_iter_dma_address() to get each
+ * blocks aligned DMA address.
+ */
+#define rdma_for_each_block(sglist, biter, nents, pgsz) \
+ for (__rdma_block_iter_start(biter, sglist, nents, \
+ pgsz); \
+ __rdma_block_iter_next(biter);)
+
+/**
+ * ib_get_client_data - Get IB client context
+ * @device:Device to get context for
+ * @client:Client to get context for
+ *
+ * ib_get_client_data() returns the client context data set with
+ * ib_set_client_data(). This can only be called while the client is
+ * registered to the device, once the ib_client remove() callback returns this
+ * cannot be called.
+ */
+static inline void *ib_get_client_data(struct ib_device *device,
+ struct ib_client *client)
+{
+ return xa_load(&device->client_data, client->client_id);
+}
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data);
+void ib_set_device_ops(struct ib_device *device,
+ const struct ib_device_ops *ops);
+
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size, pgprot_t prot,
+ struct rdma_user_mmap_entry *entry);
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length);
+int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length, u32 min_pgoff,
+ u32 max_pgoff);
+
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
+ unsigned long pgoff);
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma);
+void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry);
+
+void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry);
static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
{
@@ -2723,7 +2948,6 @@
* @next_state: Next QP state
* @type: QP type
* @mask: Mask of supplied QP attributes
- * @ll : link layer of port
*
* This function is a helper function that a low-level driver's
* modify_qp method can use to validate the consumer's input. It
@@ -2732,12 +2956,11 @@
* and that the attribute mask supplied is allowed for the transition.
*/
bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask,
- enum rdma_link_layer ll);
+ enum ib_qp_type type, enum ib_qp_attr_mask mask);
void ib_register_event_handler(struct ib_event_handler *event_handler);
void ib_unregister_event_handler(struct ib_event_handler *event_handler);
-void ib_dispatch_event(struct ib_event *event);
+void ib_dispatch_event(const struct ib_event *event);
int ib_query_port(struct ib_device *device,
u8 port_num, struct ib_port_attr *port_attr);
@@ -2773,6 +2996,16 @@
}
/**
+ * rdma_for_each_port - Iterate over all valid port numbers of the IB device
+ * @device - The struct ib_device * to iterate over
+ * @iter - The unsigned int to store the port number
+ */
+#define rdma_for_each_port(device, iter) \
+ for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type( \
+ unsigned int, iter))); \
+ iter <= rdma_end_port(device); (iter)++)
+
+/**
* rdma_end_port - Return the last valid port number for the device
* specified
*
@@ -2795,34 +3028,38 @@
static inline bool rdma_is_grh_required(const struct ib_device *device,
u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags &
- RDMA_CORE_PORT_IB_GRH_REQUIRED;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_PORT_IB_GRH_REQUIRED;
}
static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_IB;
}
static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags &
- (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+ return device->port_data[port_num].immutable.core_cap_flags &
+ (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
}
static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
}
static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_ROCE;
}
static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_IWARP;
}
static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
@@ -2833,12 +3070,14 @@
static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_RAW_PACKET;
}
static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_USNIC;
}
/**
@@ -2855,7 +3094,8 @@
*/
static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_MAD;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IB_MAD;
}
/**
@@ -2879,8 +3119,8 @@
*/
static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
{
- return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_OPA_MAD)
- == RDMA_CORE_CAP_OPA_MAD;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_OPA_MAD;
}
/**
@@ -2905,7 +3145,8 @@
*/
static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SMI;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IB_SMI;
}
/**
@@ -2925,7 +3166,8 @@
*/
static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_CM;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IB_CM;
}
/**
@@ -2942,7 +3184,8 @@
*/
static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IW_CM;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IW_CM;
}
/**
@@ -2962,7 +3205,8 @@
*/
static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SA;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_IB_SA;
}
/**
@@ -3002,7 +3246,8 @@
*/
static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_AF_IB;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_AF_IB;
}
/**
@@ -3023,7 +3268,8 @@
*/
static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH;
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_ETH_AH;
}
/**
@@ -3037,7 +3283,7 @@
*/
static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
{
- return (device->port_immutable[port_num].core_cap_flags &
+ return (device->port_data[port_num].immutable.core_cap_flags &
RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH;
}
@@ -3055,7 +3301,7 @@
*/
static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].max_mad_size;
+ return device->port_data[port_num].immutable.max_mad_size;
}
/**
@@ -3075,7 +3321,7 @@
u8 port_num)
{
return rdma_protocol_roce(device, port_num) &&
- device->add_gid && device->del_gid;
+ device->ops.add_gid && device->ops.del_gid;
}
/*
@@ -3090,12 +3336,64 @@
return rdma_protocol_iwarp(dev, port_num);
}
+/**
+ * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
+ * @device: Device
+ * @port_num: 1 based Port number
+ *
+ * Return true if port is an Intel OPA port , false if not
+ */
+static inline bool rdma_core_cap_opa_port(struct ib_device *device,
+ u32 port_num)
+{
+ return (device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_PORT_INTEL_OPA) == RDMA_CORE_PORT_INTEL_OPA;
+}
+
+/**
+ * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value.
+ * @device: Device
+ * @port_num: Port number
+ * @mtu: enum value of MTU
+ *
+ * Return the MTU size supported by the port as an integer value. Will return
+ * -1 if enum value of mtu is not supported.
+ */
+static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port,
+ int mtu)
+{
+ if (rdma_core_cap_opa_port(device, port))
+ return opa_mtu_enum_to_int((enum opa_mtu)mtu);
+ else
+ return ib_mtu_enum_to_int((enum ib_mtu)mtu);
+}
+
+/**
+ * rdma_mtu_from_attr - Return the mtu of the port from the port attribute.
+ * @device: Device
+ * @port_num: Port number
+ * @attr: port attribute
+ *
+ * Return the MTU size supported by the port as an integer value.
+ */
+static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port,
+ struct ib_port_attr *attr)
+{
+ if (rdma_core_cap_opa_port(device, port))
+ return attr->phys_mtu;
+ else
+ return ib_mtu_enum_to_int(attr->max_mtu);
+}
+
int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
int state);
int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
struct ifla_vf_info *info);
int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
struct ifla_vf_stats *stats);
+int ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid);
int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
int type);
@@ -3131,19 +3429,41 @@
struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
const char *caller);
+
#define ib_alloc_pd(device, flags) \
__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
-void ib_dealloc_pd(struct ib_pd *pd);
+
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
+
+/**
+ * ib_dealloc_pd - Deallocate kernel PD
+ * @pd: The protection domain
+ *
+ * NOTE: for user PD use ib_dealloc_pd_user with valid udata!
+ */
+static inline void ib_dealloc_pd(struct ib_pd *pd)
+{
+ int ret = ib_dealloc_pd_user(pd, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel PD shouldn't fail");
+}
+
+enum rdma_create_ah_flags {
+ /* In a sleepable context */
+ RDMA_CREATE_AH_SLEEPABLE = BIT(0),
+};
/**
* rdma_create_ah - Creates an address handle for the given address vector.
* @pd: The protection domain associated with the address handle.
* @ah_attr: The attributes of the address vector.
+ * @flags: Create address handle flags (see enum rdma_create_ah_flags).
*
* The address handle is used to reference a local or global destination
* in all UD QP post sends.
*/
-struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr);
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
+ u32 flags);
/**
* rdma_create_user_ah - Creates an address handle for the given address vector.
@@ -3233,27 +3553,45 @@
*/
int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
-/**
- * rdma_destroy_ah - Destroys an address handle.
- * @ah: The address handle to destroy.
- */
-int rdma_destroy_ah(struct ib_ah *ah);
+enum rdma_destroy_ah_flags {
+ /* In a sleepable context */
+ RDMA_DESTROY_AH_SLEEPABLE = BIT(0),
+};
/**
- * ib_create_srq - Creates a SRQ associated with the specified protection
- * domain.
- * @pd: The protection domain associated with the SRQ.
- * @srq_init_attr: A list of initial attributes required to create the
- * SRQ. If SRQ creation succeeds, then the attributes are updated to
- * the actual capabilities of the created SRQ.
- *
- * srq_attr->max_wr and srq_attr->max_sge are read the determine the
- * requested size of the SRQ, and set to the actual values allocated
- * on return. If ib_create_srq() succeeds, then max_wr and max_sge
- * will always be at least as large as the requested values.
+ * rdma_destroy_ah_user - Destroys an address handle.
+ * @ah: The address handle to destroy.
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
+ * @udata: Valid user data or NULL for kernel objects
*/
-struct ib_srq *ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *srq_init_attr);
+int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
+
+/**
+ * rdma_destroy_ah - Destroys an kernel address handle.
+ * @ah: The address handle to destroy.
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
+ *
+ * NOTE: for user ah use rdma_destroy_ah_user with valid udata!
+ */
+static inline void rdma_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ int ret = rdma_destroy_ah_user(ah, flags, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel AH shouldn't fail");
+}
+
+struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_usrq_object *uobject,
+ struct ib_udata *udata);
+static inline struct ib_srq *
+ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr)
+{
+ if (!pd->device->ops.create_srq)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return ib_create_srq_user(pd, srq_init_attr, NULL, NULL);
+}
/**
* ib_modify_srq - Modifies the attributes for the specified SRQ.
@@ -3281,10 +3619,24 @@
struct ib_srq_attr *srq_attr);
/**
- * ib_destroy_srq - Destroys the specified SRQ.
+ * ib_destroy_srq_user - Destroys the specified SRQ.
* @srq: The SRQ to destroy.
+ * @udata: Valid user data or NULL for kernel objects
*/
-int ib_destroy_srq(struct ib_srq *srq);
+int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
+
+/**
+ * ib_destroy_srq - Destroys the specified kernel SRQ.
+ * @srq: The SRQ to destroy.
+ *
+ * NOTE: for user srq use ib_destroy_srq_user with valid udata!
+ */
+static inline void ib_destroy_srq(struct ib_srq *srq)
+{
+ int ret = ib_destroy_srq_user(srq, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel SRQ shouldn't fail");
+}
/**
* ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
@@ -3299,17 +3651,10 @@
{
const struct ib_recv_wr *dummy;
- return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy);
+ return srq->device->ops.post_srq_recv(srq, recv_wr,
+ bad_recv_wr ? : &dummy);
}
-/**
- * ib_create_qp - Creates a QP associated with the specified protection
- * domain.
- * @pd: The protection domain associated with the QP.
- * @qp_init_attr: A list of initial attributes required to create the
- * QP. If QP creation succeeds, then the attributes are updated to
- * the actual capabilities of the created QP.
- */
struct ib_qp *ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr);
@@ -3361,8 +3706,20 @@
/**
* ib_destroy_qp - Destroys the specified QP.
* @qp: The QP to destroy.
+ * @udata: Valid udata or NULL for kernel objects
*/
-int ib_destroy_qp(struct ib_qp *qp);
+int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata);
+
+/**
+ * ib_destroy_qp - Destroys the specified kernel QP.
+ * @qp: The QP to destroy.
+ *
+ * NOTE: for user qp use ib_destroy_qp_user with valid udata!
+ */
+static inline int ib_destroy_qp(struct ib_qp *qp)
+{
+ return ib_destroy_qp_user(qp, NULL);
+}
/**
* ib_open_qp - Obtain a reference to an existing sharable QP.
@@ -3402,7 +3759,7 @@
{
const struct ib_send_wr *dummy;
- return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy);
+ return qp->device->ops.post_send(qp, send_wr, bad_send_wr ? : &dummy);
}
/**
@@ -3419,14 +3776,38 @@
{
const struct ib_recv_wr *dummy;
- return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
+ return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
}
-struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector,
- enum ib_poll_context poll_ctx, const char *caller);
-#define ib_alloc_cq(device, priv, nr_cqe, comp_vect, poll_ctx) \
- __ib_alloc_cq((device), (priv), (nr_cqe), (comp_vect), (poll_ctx), KBUILD_MODNAME)
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+ int comp_vector, enum ib_poll_context poll_ctx,
+ const char *caller);
+static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector,
+ enum ib_poll_context poll_ctx)
+{
+ return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+ KBUILD_MODNAME);
+}
+
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+ int nr_cqe, enum ib_poll_context poll_ctx,
+ const char *caller);
+
+/**
+ * ib_alloc_cq_any: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @poll_ctx: Context used for polling the CQ
+ */
+static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
+ void *private, int nr_cqe,
+ enum ib_poll_context poll_ctx)
+{
+ return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx,
+ KBUILD_MODNAME);
+}
void ib_free_cq(struct ib_cq *cq);
int ib_process_cq_direct(struct ib_cq *cq, int budget);
@@ -3472,10 +3853,24 @@
int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period);
/**
- * ib_destroy_cq - Destroys the specified CQ.
+ * ib_destroy_cq_user - Destroys the specified CQ.
* @cq: The CQ to destroy.
+ * @udata: Valid user data or NULL for kernel objects
*/
-int ib_destroy_cq(struct ib_cq *cq);
+int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
+
+/**
+ * ib_destroy_cq - Destroys the specified kernel CQ.
+ * @cq: The CQ to destroy.
+ *
+ * NOTE: for user cq use ib_destroy_cq_user with valid udata!
+ */
+static inline void ib_destroy_cq(struct ib_cq *cq)
+{
+ int ret = ib_destroy_cq_user(cq, NULL);
+
+ WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
+}
/**
* ib_poll_cq - poll a CQ for completion(s)
@@ -3492,7 +3887,7 @@
static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
struct ib_wc *wc)
{
- return cq->device->poll_cq(cq, num_entries, wc);
+ return cq->device->ops.poll_cq(cq, num_entries, wc);
}
/**
@@ -3525,8 +3920,14 @@
static inline int ib_req_notify_cq(struct ib_cq *cq,
enum ib_cq_notify_flags flags)
{
- return cq->device->req_notify_cq(cq, flags);
+ return cq->device->ops.req_notify_cq(cq, flags);
}
+
+struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe,
+ int comp_vector_hint,
+ enum ib_poll_context poll_ctx);
+
+void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe);
/**
* ib_req_ncomp_notif - Request completion notification when there are
@@ -3537,9 +3938,19 @@
*/
static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
{
- return cq->device->req_ncomp_notif ?
- cq->device->req_ncomp_notif(cq, wc_cnt) :
+ return cq->device->ops.req_ncomp_notif ?
+ cq->device->ops.req_ncomp_notif(cq, wc_cnt) :
-ENOSYS;
+}
+
+/*
+ * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to
+ * NULL. This causes the ib_dma* helpers to just stash the kernel virtual
+ * address into the dma address.
+ */
+static inline bool ib_uses_virt_dma(struct ib_device *dev)
+{
+ return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device;
}
/**
@@ -3549,6 +3960,8 @@
*/
static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
{
+ if (ib_uses_virt_dma(dev))
+ return 0;
return dma_mapping_error(dev->dma_device, dma_addr);
}
@@ -3563,6 +3976,8 @@
void *cpu_addr, size_t size,
enum dma_data_direction direction)
{
+ if (ib_uses_virt_dma(dev))
+ return (uintptr_t)cpu_addr;
return dma_map_single(dev->dma_device, cpu_addr, size, direction);
}
@@ -3577,7 +3992,8 @@
u64 addr, size_t size,
enum dma_data_direction direction)
{
- dma_unmap_single(dev->dma_device, addr, size, direction);
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_single(dev->dma_device, addr, size, direction);
}
/**
@@ -3594,6 +4010,8 @@
size_t size,
enum dma_data_direction direction)
{
+ if (ib_uses_virt_dma(dev))
+ return (uintptr_t)(page_address(page) + offset);
return dma_map_page(dev->dma_device, page, offset, size, direction);
}
@@ -3608,7 +4026,30 @@
u64 addr, size_t size,
enum dma_data_direction direction)
{
- dma_unmap_page(dev->dma_device, addr, size, direction);
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_page(dev->dma_device, addr, size, direction);
+}
+
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents);
+static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ if (ib_uses_virt_dma(dev))
+ return ib_dma_virt_map_sg(dev, sg, nents);
+ return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
+ dma_attrs);
+}
+
+static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ unsigned long dma_attrs)
+{
+ if (!ib_uses_virt_dma(dev))
+ dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
+ dma_attrs);
}
/**
@@ -3622,7 +4063,7 @@
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- return dma_map_sg(dev->dma_device, sg, nents, direction);
+ return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0);
}
/**
@@ -3636,51 +4077,20 @@
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- dma_unmap_sg(dev->dma_device, sg, nents, direction);
-}
-
-static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long dma_attrs)
-{
- return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
- dma_attrs);
-}
-
-static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long dma_attrs)
-{
- dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
-}
-/**
- * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
- * @dev: The device for which the DMA addresses were created
- * @sg: The scatter/gather entry
- *
- * Note: this function is obsolete. To do: change all occurrences of
- * ib_sg_dma_address() into sg_dma_address().
- */
-static inline u64 ib_sg_dma_address(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg_dma_address(sg);
+ ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0);
}
/**
- * ib_sg_dma_len - Return the DMA length from a scatter/gather entry
- * @dev: The device for which the DMA addresses were created
- * @sg: The scatter/gather entry
+ * ib_dma_max_seg_size - Return the size limit of a single DMA transfer
+ * @dev: The device to query
*
- * Note: this function is obsolete. To do: change all occurrences of
- * ib_sg_dma_len() into sg_dma_len().
+ * The returned value represents a size in bytes.
*/
-static inline unsigned int ib_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
+static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
{
- return sg_dma_len(sg);
+ if (ib_uses_virt_dma(dev))
+ return UINT_MAX;
+ return dma_get_max_seg_size(dev->dma_device);
}
/**
@@ -3695,7 +4105,8 @@
size_t size,
enum dma_data_direction dir)
{
- dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+ if (!ib_uses_virt_dma(dev))
+ dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
}
/**
@@ -3710,7 +4121,8 @@
size_t size,
enum dma_data_direction dir)
{
- dma_sync_single_for_device(dev->dma_device, addr, size, dir);
+ if (!ib_uses_virt_dma(dev))
+ dma_sync_single_for_device(dev->dma_device, addr, size, dir);
}
/**
@@ -3742,18 +4154,45 @@
dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
}
+/* ib_reg_user_mr - register a memory region for virtual addresses from kernel
+ * space. This function should be called when 'current' is the owning MM.
+ */
+struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags);
+
+/* ib_advise_mr - give an advice about an address range in a memory region */
+int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge);
/**
- * ib_dereg_mr - Deregisters a memory region and removes it from the
+ * ib_dereg_mr_user - Deregisters a memory region and removes it from the
+ * HCA translation table.
+ * @mr: The memory region to deregister.
+ * @udata: Valid user data or NULL for kernel object
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
+ */
+int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata);
+
+/**
+ * ib_dereg_mr - Deregisters a kernel memory region and removes it from the
* HCA translation table.
* @mr: The memory region to deregister.
*
* This function can fail, if the memory region has memory windows bound to it.
+ *
+ * NOTE: for user mr use ib_dereg_mr_user with valid udata!
*/
-int ib_dereg_mr(struct ib_mr *mr);
+static inline int ib_dereg_mr(struct ib_mr *mr)
+{
+ return ib_dereg_mr_user(mr, NULL);
+}
-struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
+
+struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_data_sg,
+ u32 max_num_meta_sg);
/**
* ib_update_fast_reg_key - updates the key portion of the fast_reg MR
@@ -3779,45 +4218,6 @@
}
/**
- * ib_alloc_fmr - Allocates a unmapped fast memory region.
- * @pd: The protection domain associated with the unmapped region.
- * @mr_access_flags: Specifies the memory access rights.
- * @fmr_attr: Attributes of the unmapped region.
- *
- * A fast memory region must be mapped before it can be used as part of
- * a work request.
- */
-struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
- int mr_access_flags,
- struct ib_fmr_attr *fmr_attr);
-
-/**
- * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region.
- * @fmr: The fast memory region to associate with the pages.
- * @page_list: An array of physical pages to map to the fast memory region.
- * @list_len: The number of pages in page_list.
- * @iova: The I/O virtual address to use with the mapped region.
- */
-static inline int ib_map_phys_fmr(struct ib_fmr *fmr,
- u64 *page_list, int list_len,
- u64 iova)
-{
- return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova);
-}
-
-/**
- * ib_unmap_fmr - Removes the mapping from a list of fast memory regions.
- * @fmr_list: A linked list of fast memory regions to unmap.
- */
-int ib_unmap_fmr(struct list_head *fmr_list);
-
-/**
- * ib_dealloc_fmr - Deallocates a fast memory region.
- * @fmr: The fast memory region to deallocate.
- */
-int ib_dealloc_fmr(struct ib_fmr *fmr);
-
-/**
* ib_attach_mcast - Attaches the specified QP to a multicast group.
* @qp: QP to attach to the multicast group. The QP must be type
* IB_QPT_UD.
@@ -3839,20 +4239,9 @@
*/
int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
-/**
- * ib_alloc_xrcd - Allocates an XRC domain.
- * @device: The device on which to allocate the XRC domain.
- * @caller: Module name for kernel consumers
- */
-struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
-#define ib_alloc_xrcd(device) \
- __ib_alloc_xrcd((device), KBUILD_MODNAME)
-
-/**
- * ib_dealloc_xrcd - Deallocates an XRC domain.
- * @xrcd: The XRC domain to deallocate.
- */
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
+ struct inode *inode, struct ib_udata *udata);
+int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata);
static inline int ib_check_mr_access(int flags)
{
@@ -3899,21 +4288,48 @@
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
+/**
+ * ib_device_try_get: Hold a registration lock
+ * device: The device to lock
+ *
+ * A device under an active registration lock cannot become unregistered. It
+ * is only possible to obtain a registration lock on a device that is fully
+ * registered, otherwise this function returns false.
+ *
+ * The registration lock is only necessary for actions which require the
+ * device to still be registered. Uses that only require the device pointer to
+ * be valid should use get_device(&ibdev->dev) to hold the memory.
+ *
+ */
+static inline bool ib_device_try_get(struct ib_device *dev)
+{
+ return refcount_inc_not_zero(&dev->refcount);
+}
+
+void ib_device_put(struct ib_device *device);
+struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
+ enum rdma_driver_id driver_id);
+struct ib_device *ib_device_get_by_name(const char *name,
+ enum rdma_driver_id driver_id);
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
u16 pkey, const union ib_gid *gid,
const struct sockaddr *addr);
+int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
+ unsigned int port);
+struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
+
struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr);
-int ib_destroy_wq(struct ib_wq *wq);
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata);
int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
u32 wq_attr_mask);
-struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr*
- wq_ind_table_init_attr);
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size);
+int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset, unsigned int page_size);
static inline int
ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
@@ -3934,7 +4350,7 @@
void ib_drain_sq(struct ib_qp *qp);
void ib_drain_qp(struct ib_qp *qp);
-int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width);
+int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width);
static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr)
{
@@ -4158,25 +4574,11 @@
ib_get_vector_affinity(struct ib_device *device, int comp_vector)
{
if (comp_vector < 0 || comp_vector >= device->num_comp_vectors ||
- !device->get_vector_affinity)
+ !device->ops.get_vector_affinity)
return NULL;
- return device->get_vector_affinity(device, comp_vector);
+ return device->ops.get_vector_affinity(device, comp_vector);
-}
-
-static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
- struct ib_qp *qp, struct ib_device *device)
-{
- uobj->object = ibflow;
- ibflow->uobject = uobj;
-
- if (qp) {
- atomic_inc(&qp->usecnt);
- ibflow->qp = qp;
- }
-
- ibflow->device = device;
}
/**
@@ -4187,8 +4589,128 @@
*/
void rdma_roce_rescan_device(struct ib_device *ibdev);
-struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
-int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs);
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
+
+struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
+ enum rdma_netdev_t type, const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *));
+
+int rdma_init_netdev(struct ib_device *device, u8 port_num,
+ enum rdma_netdev_t type, const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *),
+ struct net_device *netdev);
+
+/**
+ * rdma_set_device_sysfs_group - Set device attributes group to have
+ * driver specific sysfs entries at
+ * for infiniband class.
+ *
+ * @device: device pointer for which attributes to be created
+ * @group: Pointer to group which should be added when device
+ * is registered with sysfs.
+ * rdma_set_device_sysfs_group() allows existing drivers to expose one
+ * group per device to have sysfs attributes.
+ *
+ * NOTE: New drivers should not make use of this API; instead new device
+ * parameter should be exposed via netlink command. This API and mechanism
+ * exist only for existing drivers.
+ */
+static inline void
+rdma_set_device_sysfs_group(struct ib_device *dev,
+ const struct attribute_group *group)
+{
+ dev->groups[1] = group;
+}
+
+/**
+ * rdma_device_to_ibdev - Get ib_device pointer from device pointer
+ *
+ * @device: device pointer for which ib_device pointer to retrieve
+ *
+ * rdma_device_to_ibdev() retrieves ib_device pointer from device.
+ *
+ */
+static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
+{
+ struct ib_core_device *coredev =
+ container_of(device, struct ib_core_device, dev);
+
+ return coredev->owner;
+}
+
+/**
+ * ibdev_to_node - return the NUMA node for a given ib_device
+ * @dev: device to get the NUMA node for.
+ */
+static inline int ibdev_to_node(struct ib_device *ibdev)
+{
+ struct device *parent = ibdev->dev.parent;
+
+ if (!parent)
+ return NUMA_NO_NODE;
+ return dev_to_node(parent);
+}
+
+/**
+ * rdma_device_to_drv_device - Helper macro to reach back to driver's
+ * ib_device holder structure from device pointer.
+ *
+ * NOTE: New drivers should not make use of this API; This API is only for
+ * existing drivers who have exposed sysfs entries using
+ * rdma_set_device_sysfs_group().
+ */
+#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member) \
+ container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member)
+
+bool rdma_dev_access_netns(const struct ib_device *device,
+ const struct net *net);
+
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF)
+#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF)
+
+/**
+ * rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based
+ * on the flow_label
+ *
+ * This function will convert the 20 bit flow_label input to a valid RoCE v2
+ * UDP src port 14 bit value. All RoCE V2 drivers should use this same
+ * convention.
+ */
+static inline u16 rdma_flow_label_to_udp_sport(u32 fl)
+{
+ u32 fl_low = fl & 0x03fff, fl_high = fl & 0xFC000;
+
+ fl_low ^= fl_high >> 14;
+ return (u16)(fl_low | IB_ROCE_UDP_ENCAP_VALID_PORT_MIN);
+}
+
+/**
+ * rdma_calc_flow_label - generate a RDMA symmetric flow label value based on
+ * local and remote qpn values
+ *
+ * This function folded the multiplication results of two qpns, 24 bit each,
+ * fields, and converts it to a 20 bit results.
+ *
+ * This function will create symmetric flow_label value based on the local
+ * and remote qpn values. this will allow both the requester and responder
+ * to calculate the same flow_label for a given connection.
+ *
+ * This helper function should be used by driver in case the upper layer
+ * provide a zero flow_label value. This is to improve entropy of RDMA
+ * traffic in the network.
+ */
+static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn)
+{
+ u64 v = (u64)lqpn * rqpn;
+
+ v ^= v >> 20;
+ v ^= v >> 40;
+
+ return (u32)(v & IB_GRH_FLOWLABEL_MASK);
+}
#endif /* IB_VERBS_H */
--
Gitblit v1.6.2