hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/drivers/infiniband/hw/mlx5/main.c
....@@ -1,33 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
12 /*
2
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3
- *
4
- * This software is available to you under a choice of one of two
5
- * licenses. You may choose to be licensed under the terms of the GNU
6
- * General Public License (GPL) Version 2, available from the file
7
- * COPYING in the main directory of this source tree, or the
8
- * OpenIB.org BSD license below:
9
- *
10
- * Redistribution and use in source and binary forms, with or
11
- * without modification, are permitted provided that the following
12
- * conditions are met:
13
- *
14
- * - Redistributions of source code must retain the above
15
- * copyright notice, this list of conditions and the following
16
- * disclaimer.
17
- *
18
- * - Redistributions in binary form must reproduce the above
19
- * copyright notice, this list of conditions and the following
20
- * disclaimer in the documentation and/or other materials
21
- * provided with the distribution.
22
- *
23
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
- * SOFTWARE.
3
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
314 */
325
336 #include <linux/debugfs.h>
....@@ -39,9 +12,6 @@
3912 #include <linux/dma-mapping.h>
4013 #include <linux/slab.h>
4114 #include <linux/bitmap.h>
42
-#if defined(CONFIG_X86)
43
-#include <asm/pat.h>
44
-#endif
4515 #include <linux/sched.h>
4616 #include <linux/sched/mm.h>
4717 #include <linux/sched/task.h>
....@@ -52,40 +22,45 @@
5222 #include <linux/mlx5/port.h>
5323 #include <linux/mlx5/vport.h>
5424 #include <linux/mlx5/fs.h>
25
+#include <linux/mlx5/eswitch.h>
5526 #include <linux/list.h>
5627 #include <rdma/ib_smi.h>
5728 #include <rdma/ib_umem.h>
29
+#include <rdma/lag.h>
5830 #include <linux/in.h>
5931 #include <linux/etherdevice.h>
6032 #include "mlx5_ib.h"
6133 #include "ib_rep.h"
6234 #include "cmd.h"
63
-#include <linux/mlx5/fs_helpers.h>
35
+#include "devx.h"
36
+#include "fs.h"
37
+#include "srq.h"
38
+#include "qp.h"
39
+#include "wr.h"
40
+#include "restrack.h"
41
+#include "counters.h"
6442 #include <linux/mlx5/accel.h>
6543 #include <rdma/uverbs_std_types.h>
6644 #include <rdma/mlx5_user_ioctl_verbs.h>
6745 #include <rdma/mlx5_user_ioctl_cmds.h>
46
+#include <rdma/ib_umem_odp.h>
6847
6948 #define UVERBS_MODULE_NAME mlx5_ib
7049 #include <rdma/uverbs_named_ioctl.h>
7150
72
-#define DRIVER_NAME "mlx5_ib"
73
-#define DRIVER_VERSION "5.0-0"
74
-
7551 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
76
-MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
52
+MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) IB driver");
7753 MODULE_LICENSE("Dual BSD/GPL");
78
-
79
-static char mlx5_version[] =
80
- DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
81
- DRIVER_VERSION "\n";
8254
8355 struct mlx5_ib_event_work {
8456 struct work_struct work;
85
- struct mlx5_core_dev *dev;
86
- void *context;
87
- enum mlx5_dev_event event;
88
- unsigned long param;
57
+ union {
58
+ struct mlx5_ib_dev *dev;
59
+ struct mlx5_ib_multiport_info *mpi;
60
+ };
61
+ bool is_slave;
62
+ unsigned int event;
63
+ void *param;
8964 };
9065
9166 enum {
....@@ -146,10 +121,38 @@
146121 int ret;
147122
148123 memset(&attr, 0, sizeof(attr));
149
- ret = ibdev->query_port(ibdev, port_num, &attr);
124
+ ret = ibdev->ops.query_port(ibdev, port_num, &attr);
150125 if (!ret)
151126 *state = attr.state;
152127 return ret;
128
+}
129
+
130
+static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
131
+ struct net_device *ndev,
132
+ u8 *port_num)
133
+{
134
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
135
+ struct net_device *rep_ndev;
136
+ struct mlx5_ib_port *port;
137
+ int i;
138
+
139
+ for (i = 0; i < dev->num_ports; i++) {
140
+ port = &dev->port[i];
141
+ if (!port->rep)
142
+ continue;
143
+
144
+ read_lock(&port->roce.netdev_lock);
145
+ rep_ndev = mlx5_ib_get_rep_netdev(esw,
146
+ port->rep->vport);
147
+ if (rep_ndev == ndev) {
148
+ read_unlock(&port->roce.netdev_lock);
149
+ *port_num = i + 1;
150
+ return &port->roce;
151
+ }
152
+ read_unlock(&port->roce.netdev_lock);
153
+ }
154
+
155
+ return NULL;
153156 }
154157
155158 static int mlx5_netdev_event(struct notifier_block *this,
....@@ -168,21 +171,20 @@
168171
169172 switch (event) {
170173 case NETDEV_REGISTER:
171
- case NETDEV_UNREGISTER:
174
+ /* Should already be registered during the load */
175
+ if (ibdev->is_rep)
176
+ break;
172177 write_lock(&roce->netdev_lock);
173
- if (ibdev->rep) {
174
- struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
175
- struct net_device *rep_ndev;
178
+ if (ndev->dev.parent == mdev->device)
179
+ roce->netdev = ndev;
180
+ write_unlock(&roce->netdev_lock);
181
+ break;
176182
177
- rep_ndev = mlx5_ib_get_rep_netdev(esw,
178
- ibdev->rep->vport);
179
- if (rep_ndev == ndev)
180
- roce->netdev = (event == NETDEV_UNREGISTER) ?
181
- NULL : ndev;
182
- } else if (ndev->dev.parent == &mdev->pdev->dev) {
183
- roce->netdev = (event == NETDEV_UNREGISTER) ?
184
- NULL : ndev;
185
- }
183
+ case NETDEV_UNREGISTER:
184
+ /* In case of reps, ib device goes away before the netdevs */
185
+ write_lock(&roce->netdev_lock);
186
+ if (roce->netdev == ndev)
187
+ roce->netdev = NULL;
186188 write_unlock(&roce->netdev_lock);
187189 break;
188190
....@@ -197,6 +199,10 @@
197199 dev_put(lag_ndev);
198200 }
199201
202
+ if (ibdev->is_rep)
203
+ roce = mlx5_get_rep_roce(ibdev, ndev, &port_num);
204
+ if (!roce)
205
+ return NOTIFY_DONE;
200206 if ((upper == ndev || (!upper && ndev == roce->netdev))
201207 && ibdev->ib_active) {
202208 struct ib_event ibev = { };
....@@ -249,11 +255,11 @@
249255
250256 /* Ensure ndev does not disappear before we invoke dev_hold()
251257 */
252
- read_lock(&ibdev->roce[port_num - 1].netdev_lock);
253
- ndev = ibdev->roce[port_num - 1].netdev;
258
+ read_lock(&ibdev->port[port_num - 1].roce.netdev_lock);
259
+ ndev = ibdev->port[port_num - 1].roce.netdev;
254260 if (ndev)
255261 dev_hold(ndev);
256
- read_unlock(&ibdev->roce[port_num - 1].netdev_lock);
262
+ read_unlock(&ibdev->port[port_num - 1].roce.netdev_lock);
257263
258264 out:
259265 mlx5_ib_put_native_port_mdev(ibdev, port_num);
....@@ -281,9 +287,6 @@
281287 *native_port_num = 1;
282288
283289 port = &ibdev->port[ib_port_num - 1];
284
- if (!port)
285
- return NULL;
286
-
287290 spin_lock(&port->mp.mpi_lock);
288291 mpi = ibdev->port[ib_port_num - 1].mp.mpi;
289292 if (mpi && !mpi->unaffiliate) {
....@@ -323,8 +326,8 @@
323326 spin_unlock(&port->mp.mpi_lock);
324327 }
325328
326
-static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
327
- u8 *active_width)
329
+static int translate_eth_legacy_proto_oper(u32 eth_proto_oper,
330
+ u16 *active_speed, u8 *active_width)
328331 {
329332 switch (eth_proto_oper) {
330333 case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
....@@ -381,10 +384,73 @@
381384 return 0;
382385 }
383386
387
+static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
388
+ u8 *active_width)
389
+{
390
+ switch (eth_proto_oper) {
391
+ case MLX5E_PROT_MASK(MLX5E_SGMII_100M):
392
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_X_SGMII):
393
+ *active_width = IB_WIDTH_1X;
394
+ *active_speed = IB_SPEED_SDR;
395
+ break;
396
+ case MLX5E_PROT_MASK(MLX5E_5GBASE_R):
397
+ *active_width = IB_WIDTH_1X;
398
+ *active_speed = IB_SPEED_DDR;
399
+ break;
400
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_XFI_XAUI_1):
401
+ *active_width = IB_WIDTH_1X;
402
+ *active_speed = IB_SPEED_QDR;
403
+ break;
404
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_XLAUI_4_XLPPI_4):
405
+ *active_width = IB_WIDTH_4X;
406
+ *active_speed = IB_SPEED_QDR;
407
+ break;
408
+ case MLX5E_PROT_MASK(MLX5E_25GAUI_1_25GBASE_CR_KR):
409
+ *active_width = IB_WIDTH_1X;
410
+ *active_speed = IB_SPEED_EDR;
411
+ break;
412
+ case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2):
413
+ *active_width = IB_WIDTH_2X;
414
+ *active_speed = IB_SPEED_EDR;
415
+ break;
416
+ case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR):
417
+ *active_width = IB_WIDTH_1X;
418
+ *active_speed = IB_SPEED_HDR;
419
+ break;
420
+ case MLX5E_PROT_MASK(MLX5E_CAUI_4_100GBASE_CR4_KR4):
421
+ *active_width = IB_WIDTH_4X;
422
+ *active_speed = IB_SPEED_EDR;
423
+ break;
424
+ case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2):
425
+ *active_width = IB_WIDTH_2X;
426
+ *active_speed = IB_SPEED_HDR;
427
+ break;
428
+ case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4):
429
+ *active_width = IB_WIDTH_4X;
430
+ *active_speed = IB_SPEED_HDR;
431
+ break;
432
+ default:
433
+ return -EINVAL;
434
+ }
435
+
436
+ return 0;
437
+}
438
+
439
+static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed,
440
+ u8 *active_width, bool ext)
441
+{
442
+ return ext ?
443
+ translate_eth_ext_proto_oper(eth_proto_oper, active_speed,
444
+ active_width) :
445
+ translate_eth_legacy_proto_oper(eth_proto_oper, active_speed,
446
+ active_width);
447
+}
448
+
384449 static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
385450 struct ib_port_attr *props)
386451 {
387452 struct mlx5_ib_dev *dev = to_mdev(device);
453
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
388454 struct mlx5_core_dev *mdev;
389455 struct net_device *ndev, *upper;
390456 enum ib_mtu ndev_ib_mtu;
....@@ -392,6 +458,7 @@
392458 u16 qkey_viol_cntr;
393459 u32 eth_prot_oper;
394460 u8 mdev_port_num;
461
+ bool ext;
395462 int err;
396463
397464 mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
....@@ -407,17 +474,24 @@
407474
408475 /* Possible bad flows are checked before filling out props so in case
409476 * of an error it will still be zeroed out.
477
+ * Use native port in case of reps
410478 */
411
- err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper,
412
- mdev_port_num);
479
+ if (dev->is_rep)
480
+ err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
481
+ 1);
482
+ else
483
+ err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
484
+ mdev_port_num);
413485 if (err)
414486 goto out;
487
+ ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability);
488
+ eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
415489
416490 props->active_width = IB_WIDTH_4X;
417491 props->active_speed = IB_SPEED_QDR;
418492
419493 translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
420
- &props->active_width);
494
+ &props->active_width, ext);
421495
422496 props->port_cap_flags |= IB_PORT_CM_SUP;
423497 props->ip_gids = true;
....@@ -428,7 +502,7 @@
428502 props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
429503 props->pkey_tbl_len = 1;
430504 props->state = IB_PORT_DOWN;
431
- props->phys_state = 3;
505
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
432506
433507 mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
434508 props->qkey_viol_cntr = qkey_viol_cntr;
....@@ -441,7 +515,7 @@
441515 if (!ndev)
442516 goto out;
443517
444
- if (mlx5_lag_is_active(dev->mdev)) {
518
+ if (dev->lag_active) {
445519 rcu_read_lock();
446520 upper = netdev_master_upper_dev_get_rcu(ndev);
447521 if (upper) {
....@@ -454,7 +528,7 @@
454528
455529 if (netif_running(ndev) && netif_carrier_ok(ndev)) {
456530 props->state = IB_PORT_ACTIVE;
457
- props->phys_state = 5;
531
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
458532 }
459533
460534 ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
....@@ -472,25 +546,22 @@
472546 unsigned int index, const union ib_gid *gid,
473547 const struct ib_gid_attr *attr)
474548 {
475
- enum ib_gid_type gid_type = IB_GID_TYPE_IB;
549
+ enum ib_gid_type gid_type = IB_GID_TYPE_ROCE;
550
+ u16 vlan_id = 0xffff;
476551 u8 roce_version = 0;
477552 u8 roce_l3_type = 0;
478
- bool vlan = false;
479553 u8 mac[ETH_ALEN];
480
- u16 vlan_id = 0;
554
+ int ret;
481555
482556 if (gid) {
483557 gid_type = attr->gid_type;
484
- ether_addr_copy(mac, attr->ndev->dev_addr);
485
-
486
- if (is_vlan_dev(attr->ndev)) {
487
- vlan = true;
488
- vlan_id = vlan_dev_vlan_id(attr->ndev);
489
- }
558
+ ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
559
+ if (ret)
560
+ return ret;
490561 }
491562
492563 switch (gid_type) {
493
- case IB_GID_TYPE_IB:
564
+ case IB_GID_TYPE_ROCE:
494565 roce_version = MLX5_ROCE_VERSION_1;
495566 break;
496567 case IB_GID_TYPE_ROCE_UDP_ENCAP:
....@@ -506,8 +577,9 @@
506577 }
507578
508579 return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
509
- roce_l3_type, gid->raw, mac, vlan,
510
- vlan_id, port_num);
580
+ roce_l3_type, gid->raw, mac,
581
+ vlan_id < VLAN_CFI_MASK, vlan_id,
582
+ port_num);
511583 }
512584
513585 static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
....@@ -524,8 +596,8 @@
524596 attr->index, NULL, NULL);
525597 }
526598
527
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
528
- const struct ib_gid_attr *attr)
599
+__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev,
600
+ const struct ib_gid_attr *attr)
529601 {
530602 if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
531603 return 0;
....@@ -588,21 +660,6 @@
588660 get_atomic_caps(dev, atomic_size_qp, props);
589661 }
590662
591
-static void get_atomic_caps_dc(struct mlx5_ib_dev *dev,
592
- struct ib_device_attr *props)
593
-{
594
- u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
595
-
596
- get_atomic_caps(dev, atomic_size_qp, props);
597
-}
598
-
599
-bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev)
600
-{
601
- struct ib_device_attr props = {};
602
-
603
- get_atomic_caps_dc(dev, &props);
604
- return (props.atomic_cap == IB_ATOMIC_HCA) ? true : false;
605
-}
606663 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
607664 __be64 *sys_image_guid)
608665 {
....@@ -724,6 +781,7 @@
724781 struct ib_device_attr *props,
725782 struct ib_udata *uhw)
726783 {
784
+ size_t uhw_outlen = (uhw) ? uhw->outlen : 0;
727785 struct mlx5_ib_dev *dev = to_mdev(ibdev);
728786 struct mlx5_core_dev *mdev = dev->mdev;
729787 int err = -ENOMEM;
....@@ -737,12 +795,12 @@
737795 u64 max_tso;
738796
739797 resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
740
- if (uhw->outlen && uhw->outlen < resp_len)
798
+ if (uhw_outlen && uhw_outlen < resp_len)
741799 return -EINVAL;
742
- else
743
- resp.response_length = resp_len;
744800
745
- if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
801
+ resp.response_length = resp_len;
802
+
803
+ if (uhw && uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
746804 return -EINVAL;
747805
748806 memset(props, 0, sizeof(*props));
....@@ -782,9 +840,11 @@
782840 /* We support 'Gappy' memory registration too */
783841 props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
784842 }
785
- props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
843
+ /* IB_WR_REG_MR always requires changing the entity size with UMR */
844
+ if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
845
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
786846 if (MLX5_CAP_GEN(mdev, sho)) {
787
- props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
847
+ props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER;
788848 /* At this stage no support for signature handover */
789849 props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
790850 IB_PROT_T10DIF_TYPE_2 |
....@@ -806,7 +866,7 @@
806866 props->raw_packet_caps |=
807867 IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
808868
809
- if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
869
+ if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen) {
810870 max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
811871 if (max_tso) {
812872 resp.tso_caps.max_tso = 1 << max_tso;
....@@ -816,7 +876,7 @@
816876 }
817877 }
818878
819
- if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
879
+ if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen) {
820880 resp.rss_caps.rx_hash_function =
821881 MLX5_RX_HASH_FUNC_TOEPLITZ;
822882 resp.rss_caps.rx_hash_fields_mask =
....@@ -836,9 +896,9 @@
836896 resp.response_length += sizeof(resp.rss_caps);
837897 }
838898 } else {
839
- if (field_avail(typeof(resp), tso_caps, uhw->outlen))
899
+ if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen)
840900 resp.response_length += sizeof(resp.tso_caps);
841
- if (field_avail(typeof(resp), rss_caps, uhw->outlen))
901
+ if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen)
842902 resp.response_length += sizeof(resp.rss_caps);
843903 }
844904
....@@ -904,27 +964,47 @@
904964 props->max_srq_sge = max_rq_sg - 1;
905965 props->max_fast_reg_page_list_len =
906966 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
967
+ props->max_pi_fast_reg_page_list_len =
968
+ props->max_fast_reg_page_list_len / 2;
969
+ props->max_sgl_rd =
970
+ MLX5_CAP_GEN(mdev, max_sgl_for_optimized_performance);
907971 get_atomic_caps_qp(dev, props);
908972 props->masked_atomic_cap = IB_ATOMIC_NONE;
909973 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
910974 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
911975 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
912976 props->max_mcast_grp;
913
- props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
914977 props->max_ah = INT_MAX;
915978 props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
916979 props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
917980
918
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
919
- if (MLX5_CAP_GEN(mdev, pg))
920
- props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
921
- props->odp_caps = dev->odp_caps;
922
-#endif
981
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
982
+ if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
983
+ props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
984
+ props->odp_caps = dev->odp_caps;
985
+ if (!uhw) {
986
+ /* ODP for kernel QPs is not implemented for receive
987
+ * WQEs and SRQ WQEs
988
+ */
989
+ props->odp_caps.per_transport_caps.rc_odp_caps &=
990
+ ~(IB_ODP_SUPPORT_READ |
991
+ IB_ODP_SUPPORT_SRQ_RECV);
992
+ props->odp_caps.per_transport_caps.uc_odp_caps &=
993
+ ~(IB_ODP_SUPPORT_READ |
994
+ IB_ODP_SUPPORT_SRQ_RECV);
995
+ props->odp_caps.per_transport_caps.ud_odp_caps &=
996
+ ~(IB_ODP_SUPPORT_READ |
997
+ IB_ODP_SUPPORT_SRQ_RECV);
998
+ props->odp_caps.per_transport_caps.xrc_odp_caps &=
999
+ ~(IB_ODP_SUPPORT_READ |
1000
+ IB_ODP_SUPPORT_SRQ_RECV);
1001
+ }
1002
+ }
9231003
9241004 if (MLX5_CAP_GEN(mdev, cd))
9251005 props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
9261006
927
- if (!mlx5_core_is_pf(mdev))
1007
+ if (mlx5_core_is_vf(mdev))
9281008 props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
9291009
9301010 if (mlx5_ib_port_link_layer(ibdev, 1) ==
....@@ -959,7 +1039,7 @@
9591039 MLX5_MAX_CQ_PERIOD;
9601040 }
9611041
962
- if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
1042
+ if (offsetofend(typeof(resp), cqe_comp_caps) <= uhw_outlen) {
9631043 resp.response_length += sizeof(resp.cqe_comp_caps);
9641044
9651045 if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) {
....@@ -977,7 +1057,7 @@
9771057 }
9781058 }
9791059
980
- if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) &&
1060
+ if (offsetofend(typeof(resp), packet_pacing_caps) <= uhw_outlen &&
9811061 raw_support) {
9821062 if (MLX5_CAP_QOS(mdev, packet_pacing) &&
9831063 MLX5_CAP_GEN(mdev, qos)) {
....@@ -995,8 +1075,8 @@
9951075 resp.response_length += sizeof(resp.packet_pacing_caps);
9961076 }
9971077
998
- if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
999
- uhw->outlen)) {
1078
+ if (offsetofend(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes) <=
1079
+ uhw_outlen) {
10001080 if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe))
10011081 resp.mlx5_ib_support_multi_pkt_send_wqes =
10021082 MLX5_IB_ALLOW_MPW;
....@@ -1009,7 +1089,7 @@
10091089 sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
10101090 }
10111091
1012
- if (field_avail(typeof(resp), flags, uhw->outlen)) {
1092
+ if (offsetofend(typeof(resp), flags) <= uhw_outlen) {
10131093 resp.response_length += sizeof(resp.flags);
10141094
10151095 if (MLX5_CAP_GEN(mdev, cqe_compression_128))
....@@ -1018,10 +1098,14 @@
10181098
10191099 if (MLX5_CAP_GEN(mdev, cqe_128_always))
10201100 resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD;
1101
+ if (MLX5_CAP_GEN(mdev, qp_packet_based))
1102
+ resp.flags |=
1103
+ MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
1104
+
1105
+ resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
10211106 }
10221107
1023
- if (field_avail(typeof(resp), sw_parsing_caps,
1024
- uhw->outlen)) {
1108
+ if (offsetofend(typeof(resp), sw_parsing_caps) <= uhw_outlen) {
10251109 resp.response_length += sizeof(resp.sw_parsing_caps);
10261110 if (MLX5_CAP_ETH(mdev, swp)) {
10271111 resp.sw_parsing_caps.sw_parsing_offloads |=
....@@ -1041,7 +1125,7 @@
10411125 }
10421126 }
10431127
1044
- if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen) &&
1128
+ if (offsetofend(typeof(resp), striding_rq_caps) <= uhw_outlen &&
10451129 raw_support) {
10461130 resp.response_length += sizeof(resp.striding_rq_caps);
10471131 if (MLX5_CAP_GEN(mdev, striding_rq)) {
....@@ -1049,8 +1133,14 @@
10491133 MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
10501134 resp.striding_rq_caps.max_single_stride_log_num_of_bytes =
10511135 MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES;
1052
- resp.striding_rq_caps.min_single_wqe_log_num_of_strides =
1053
- MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
1136
+ if (MLX5_CAP_GEN(dev->mdev, ext_stride_num_range))
1137
+ resp.striding_rq_caps
1138
+ .min_single_wqe_log_num_of_strides =
1139
+ MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
1140
+ else
1141
+ resp.striding_rq_caps
1142
+ .min_single_wqe_log_num_of_strides =
1143
+ MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
10541144 resp.striding_rq_caps.max_single_wqe_log_num_of_strides =
10551145 MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES;
10561146 resp.striding_rq_caps.supported_qpts =
....@@ -1058,8 +1148,7 @@
10581148 }
10591149 }
10601150
1061
- if (field_avail(typeof(resp), tunnel_offloads_caps,
1062
- uhw->outlen)) {
1151
+ if (offsetofend(typeof(resp), tunnel_offloads_caps) <= uhw_outlen) {
10631152 resp.response_length += sizeof(resp.tunnel_offloads_caps);
10641153 if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan))
10651154 resp.tunnel_offloads_caps |=
....@@ -1078,7 +1167,7 @@
10781167 MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP;
10791168 }
10801169
1081
- if (uhw->outlen) {
1170
+ if (uhw_outlen) {
10821171 err = ib_copy_to_udata(uhw, &resp, resp.response_length);
10831172
10841173 if (err)
....@@ -1088,30 +1177,24 @@
10881177 return 0;
10891178 }
10901179
1091
-enum mlx5_ib_width {
1092
- MLX5_IB_WIDTH_1X = 1 << 0,
1093
- MLX5_IB_WIDTH_2X = 1 << 1,
1094
- MLX5_IB_WIDTH_4X = 1 << 2,
1095
- MLX5_IB_WIDTH_8X = 1 << 3,
1096
- MLX5_IB_WIDTH_12X = 1 << 4
1097
-};
1098
-
1099
-static void translate_active_width(struct ib_device *ibdev, u8 active_width,
1100
- u8 *ib_width)
1180
+static void translate_active_width(struct ib_device *ibdev, u16 active_width,
1181
+ u8 *ib_width)
11011182 {
11021183 struct mlx5_ib_dev *dev = to_mdev(ibdev);
11031184
1104
- if (active_width & MLX5_IB_WIDTH_1X)
1185
+ if (active_width & MLX5_PTYS_WIDTH_1X)
11051186 *ib_width = IB_WIDTH_1X;
1106
- else if (active_width & MLX5_IB_WIDTH_4X)
1187
+ else if (active_width & MLX5_PTYS_WIDTH_2X)
1188
+ *ib_width = IB_WIDTH_2X;
1189
+ else if (active_width & MLX5_PTYS_WIDTH_4X)
11071190 *ib_width = IB_WIDTH_4X;
1108
- else if (active_width & MLX5_IB_WIDTH_8X)
1191
+ else if (active_width & MLX5_PTYS_WIDTH_8X)
11091192 *ib_width = IB_WIDTH_8X;
1110
- else if (active_width & MLX5_IB_WIDTH_12X)
1193
+ else if (active_width & MLX5_PTYS_WIDTH_12X)
11111194 *ib_width = IB_WIDTH_12X;
11121195 else {
11131196 mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n",
1114
- (int)active_width);
1197
+ active_width);
11151198 *ib_width = IB_WIDTH_4X;
11161199 }
11171200
....@@ -1188,7 +1271,7 @@
11881271 u16 max_mtu;
11891272 u16 oper_mtu;
11901273 int err;
1191
- u8 ib_link_width_oper;
1274
+ u16 ib_link_width_oper;
11921275 u8 vl_hw_cap;
11931276
11941277 rep = kzalloc(sizeof(*rep), GFP_KERNEL);
....@@ -1218,15 +1301,15 @@
12181301 props->subnet_timeout = rep->subnet_timeout;
12191302 props->init_type_reply = rep->init_type_reply;
12201303
1221
- err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
1304
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
1305
+ props->port_cap_flags2 = rep->cap_mask2;
1306
+
1307
+ err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper,
1308
+ &props->active_speed, port);
12221309 if (err)
12231310 goto out;
12241311
12251312 translate_active_width(ibdev, ib_link_width_oper, &props->active_width);
1226
-
1227
- err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
1228
- if (err)
1229
- goto out;
12301313
12311314 mlx5_query_port_max_mtu(mdev, &max_mtu, port);
12321315
....@@ -1297,7 +1380,9 @@
12971380 {
12981381 int ret;
12991382
1300
- /* Only link layer == ethernet is valid for representors */
1383
+ /* Only link layer == ethernet is valid for representors
1384
+ * and we always use port 1
1385
+ */
13011386 ret = mlx5_query_port_roce(ibdev, port, props);
13021387 if (ret || !props)
13031388 return ret;
....@@ -1566,14 +1651,57 @@
15661651 mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
15671652 }
15681653
1569
-static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
1654
+int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
1655
+{
1656
+ int err = 0;
1657
+
1658
+ mutex_lock(&dev->lb.mutex);
1659
+ if (td)
1660
+ dev->lb.user_td++;
1661
+ if (qp)
1662
+ dev->lb.qps++;
1663
+
1664
+ if (dev->lb.user_td == 2 ||
1665
+ dev->lb.qps == 1) {
1666
+ if (!dev->lb.enabled) {
1667
+ err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
1668
+ dev->lb.enabled = true;
1669
+ }
1670
+ }
1671
+
1672
+ mutex_unlock(&dev->lb.mutex);
1673
+
1674
+ return err;
1675
+}
1676
+
1677
+void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
1678
+{
1679
+ mutex_lock(&dev->lb.mutex);
1680
+ if (td)
1681
+ dev->lb.user_td--;
1682
+ if (qp)
1683
+ dev->lb.qps--;
1684
+
1685
+ if (dev->lb.user_td == 1 &&
1686
+ dev->lb.qps == 0) {
1687
+ if (dev->lb.enabled) {
1688
+ mlx5_nic_vport_update_local_lb(dev->mdev, false);
1689
+ dev->lb.enabled = false;
1690
+ }
1691
+ }
1692
+
1693
+ mutex_unlock(&dev->lb.mutex);
1694
+}
1695
+
1696
+static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn,
1697
+ u16 uid)
15701698 {
15711699 int err;
15721700
15731701 if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
15741702 return 0;
15751703
1576
- err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
1704
+ err = mlx5_cmd_alloc_transport_domain(dev->mdev, tdn, uid);
15771705 if (err)
15781706 return err;
15791707
....@@ -1582,115 +1710,160 @@
15821710 !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
15831711 return err;
15841712
1585
- mutex_lock(&dev->lb_mutex);
1586
- dev->user_td++;
1587
-
1588
- if (dev->user_td == 2)
1589
- err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
1590
-
1591
- mutex_unlock(&dev->lb_mutex);
1592
- return err;
1713
+ return mlx5_ib_enable_lb(dev, true, false);
15931714 }
15941715
1595
-static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
1716
+static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
1717
+ u16 uid)
15961718 {
15971719 if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
15981720 return;
15991721
1600
- mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
1722
+ mlx5_cmd_dealloc_transport_domain(dev->mdev, tdn, uid);
16011723
16021724 if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
16031725 (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
16041726 !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
16051727 return;
16061728
1607
- mutex_lock(&dev->lb_mutex);
1608
- dev->user_td--;
1609
-
1610
- if (dev->user_td < 2)
1611
- mlx5_nic_vport_update_local_lb(dev->mdev, false);
1612
-
1613
- mutex_unlock(&dev->lb_mutex);
1729
+ mlx5_ib_disable_lb(dev, true, false);
16141730 }
16151731
1616
-static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1617
- struct ib_udata *udata)
1732
+static int set_ucontext_resp(struct ib_ucontext *uctx,
1733
+ struct mlx5_ib_alloc_ucontext_resp *resp)
16181734 {
1735
+ struct ib_device *ibdev = uctx->device;
1736
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
1737
+ struct mlx5_ib_ucontext *context = to_mucontext(uctx);
1738
+ struct mlx5_bfreg_info *bfregi = &context->bfregi;
1739
+ int err;
1740
+
1741
+ if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
1742
+ err = mlx5_cmd_dump_fill_mkey(dev->mdev,
1743
+ &resp->dump_fill_mkey);
1744
+ if (err)
1745
+ return err;
1746
+ resp->comp_mask |=
1747
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
1748
+ }
1749
+
1750
+ resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
1751
+ if (dev->wc_support)
1752
+ resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
1753
+ log_bf_reg_size);
1754
+ resp->cache_line_size = cache_line_size();
1755
+ resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
1756
+ resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
1757
+ resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
1758
+ resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
1759
+ resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
1760
+ resp->cqe_version = context->cqe_version;
1761
+ resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
1762
+ MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
1763
+ resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
1764
+ MLX5_CAP_GEN(dev->mdev,
1765
+ num_of_uars_per_page) : 1;
1766
+
1767
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
1768
+ MLX5_ACCEL_IPSEC_CAP_DEVICE) {
1769
+ if (mlx5_get_flow_namespace(dev->mdev,
1770
+ MLX5_FLOW_NAMESPACE_EGRESS))
1771
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
1772
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
1773
+ MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
1774
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
1775
+ if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
1776
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
1777
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
1778
+ MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
1779
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
1780
+ /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
1781
+ }
1782
+
1783
+ resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 :
1784
+ bfregi->total_num_bfregs - bfregi->num_dyn_bfregs;
1785
+ resp->num_ports = dev->num_ports;
1786
+ resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
1787
+ MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
1788
+
1789
+ if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
1790
+ mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline);
1791
+ resp->eth_min_inline++;
1792
+ }
1793
+
1794
+ if (dev->mdev->clock_info)
1795
+ resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
1796
+
1797
+ /*
1798
+ * We don't want to expose information from the PCI bar that is located
1799
+ * after 4096 bytes, so if the arch only supports larger pages, let's
1800
+ * pretend we don't support reading the HCA's core clock. This is also
1801
+ * forced by mmap function.
1802
+ */
1803
+ if (PAGE_SIZE <= 4096) {
1804
+ resp->comp_mask |=
1805
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
1806
+ resp->hca_core_clock_offset =
1807
+ offsetof(struct mlx5_init_seg,
1808
+ internal_timer_h) % PAGE_SIZE;
1809
+ }
1810
+
1811
+ if (MLX5_CAP_GEN(dev->mdev, ece_support))
1812
+ resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE;
1813
+
1814
+ resp->num_dyn_bfregs = bfregi->num_dyn_bfregs;
1815
+ return 0;
1816
+}
1817
+
1818
+static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
1819
+ struct ib_udata *udata)
1820
+{
1821
+ struct ib_device *ibdev = uctx->device;
16191822 struct mlx5_ib_dev *dev = to_mdev(ibdev);
16201823 struct mlx5_ib_alloc_ucontext_req_v2 req = {};
16211824 struct mlx5_ib_alloc_ucontext_resp resp = {};
1622
- struct mlx5_core_dev *mdev = dev->mdev;
1623
- struct mlx5_ib_ucontext *context;
1825
+ struct mlx5_ib_ucontext *context = to_mucontext(uctx);
16241826 struct mlx5_bfreg_info *bfregi;
16251827 int ver;
16261828 int err;
16271829 size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
16281830 max_cqe_version);
1629
- u32 dump_fill_mkey;
16301831 bool lib_uar_4k;
1832
+ bool lib_uar_dyn;
16311833
16321834 if (!dev->ib_active)
1633
- return ERR_PTR(-EAGAIN);
1835
+ return -EAGAIN;
16341836
16351837 if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
16361838 ver = 0;
16371839 else if (udata->inlen >= min_req_v2)
16381840 ver = 2;
16391841 else
1640
- return ERR_PTR(-EINVAL);
1842
+ return -EINVAL;
16411843
16421844 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
16431845 if (err)
1644
- return ERR_PTR(err);
1846
+ return err;
16451847
16461848 if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX)
1647
- return ERR_PTR(-EOPNOTSUPP);
1849
+ return -EOPNOTSUPP;
16481850
16491851 if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
1650
- return ERR_PTR(-EOPNOTSUPP);
1852
+ return -EOPNOTSUPP;
16511853
16521854 req.total_num_bfregs = ALIGN(req.total_num_bfregs,
16531855 MLX5_NON_FP_BFREGS_PER_UAR);
16541856 if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
1655
- return ERR_PTR(-EINVAL);
1656
-
1657
- resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
1658
- if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
1659
- resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
1660
- resp.cache_line_size = cache_line_size();
1661
- resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
1662
- resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
1663
- resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
1664
- resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
1665
- resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
1666
- resp.cqe_version = min_t(__u8,
1667
- (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
1668
- req.max_cqe_version);
1669
- resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
1670
- MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
1671
- resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
1672
- MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
1673
- resp.response_length = min(offsetof(typeof(resp), response_length) +
1674
- sizeof(resp.response_length), udata->outlen);
1675
-
1676
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) {
1677
- if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS))
1678
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
1679
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
1680
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
1681
- if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
1682
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
1683
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
1684
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
1685
- /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
1686
- }
1687
-
1688
- context = kzalloc(sizeof(*context), GFP_KERNEL);
1689
- if (!context)
1690
- return ERR_PTR(-ENOMEM);
1857
+ return -EINVAL;
16911858
16921859 lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
1860
+ lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
16931861 bfregi = &context->bfregi;
1862
+
1863
+ if (lib_uar_dyn) {
1864
+ bfregi->lib_uar_dyn = lib_uar_dyn;
1865
+ goto uar_done;
1866
+ }
16941867
16951868 /* updates req->total_num_bfregs */
16961869 err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
....@@ -1718,124 +1891,55 @@
17181891 if (err)
17191892 goto out_sys_pages;
17201893
1721
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1722
- context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
1723
-#endif
1724
-
1725
- err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
1726
- if (err)
1727
- goto out_uars;
1728
-
1894
+uar_done:
17291895 if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
1730
- /* Block DEVX on Infiniband as of SELinux */
1731
- if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) {
1732
- err = -EPERM;
1733
- goto out_td;
1734
- }
1735
-
1736
- err = mlx5_ib_devx_create(dev, context);
1737
- if (err)
1738
- goto out_td;
1896
+ err = mlx5_ib_devx_create(dev, true);
1897
+ if (err < 0)
1898
+ goto out_uars;
1899
+ context->devx_uid = err;
17391900 }
17401901
1741
- if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
1742
- err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
1743
- if (err)
1744
- goto out_mdev;
1745
- }
1902
+ err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
1903
+ context->devx_uid);
1904
+ if (err)
1905
+ goto out_devx;
17461906
1747
- INIT_LIST_HEAD(&context->vma_private_list);
1748
- mutex_init(&context->vma_private_list_mutex);
17491907 INIT_LIST_HEAD(&context->db_page_list);
17501908 mutex_init(&context->db_page_mutex);
17511909
1752
- resp.tot_bfregs = req.total_num_bfregs;
1753
- resp.num_ports = dev->num_ports;
1910
+ context->cqe_version = min_t(__u8,
1911
+ (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
1912
+ req.max_cqe_version);
17541913
1755
- if (field_avail(typeof(resp), cqe_version, udata->outlen))
1756
- resp.response_length += sizeof(resp.cqe_version);
1914
+ err = set_ucontext_resp(uctx, &resp);
1915
+ if (err)
1916
+ goto out_mdev;
17571917
1758
- if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
1759
- resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
1760
- MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
1761
- resp.response_length += sizeof(resp.cmds_supp_uhw);
1762
- }
1763
-
1764
- if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) {
1765
- if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
1766
- mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
1767
- resp.eth_min_inline++;
1768
- }
1769
- resp.response_length += sizeof(resp.eth_min_inline);
1770
- }
1771
-
1772
- if (field_avail(typeof(resp), clock_info_versions, udata->outlen)) {
1773
- if (mdev->clock_info)
1774
- resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
1775
- resp.response_length += sizeof(resp.clock_info_versions);
1776
- }
1777
-
1778
- /*
1779
- * We don't want to expose information from the PCI bar that is located
1780
- * after 4096 bytes, so if the arch only supports larger pages, let's
1781
- * pretend we don't support reading the HCA's core clock. This is also
1782
- * forced by mmap function.
1783
- */
1784
- if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
1785
- if (PAGE_SIZE <= 4096) {
1786
- resp.comp_mask |=
1787
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
1788
- resp.hca_core_clock_offset =
1789
- offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
1790
- }
1791
- resp.response_length += sizeof(resp.hca_core_clock_offset);
1792
- }
1793
-
1794
- if (field_avail(typeof(resp), log_uar_size, udata->outlen))
1795
- resp.response_length += sizeof(resp.log_uar_size);
1796
-
1797
- if (field_avail(typeof(resp), num_uars_per_page, udata->outlen))
1798
- resp.response_length += sizeof(resp.num_uars_per_page);
1799
-
1800
- if (field_avail(typeof(resp), num_dyn_bfregs, udata->outlen)) {
1801
- resp.num_dyn_bfregs = bfregi->num_dyn_bfregs;
1802
- resp.response_length += sizeof(resp.num_dyn_bfregs);
1803
- }
1804
-
1805
- if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) {
1806
- if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
1807
- resp.dump_fill_mkey = dump_fill_mkey;
1808
- resp.comp_mask |=
1809
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
1810
- }
1811
- resp.response_length += sizeof(resp.dump_fill_mkey);
1812
- }
1813
-
1918
+ resp.response_length = min(udata->outlen, sizeof(resp));
18141919 err = ib_copy_to_udata(udata, &resp, resp.response_length);
18151920 if (err)
18161921 goto out_mdev;
18171922
18181923 bfregi->ver = ver;
18191924 bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
1820
- context->cqe_version = resp.cqe_version;
18211925 context->lib_caps = req.lib_caps;
18221926 print_lib_caps(dev, context->lib_caps);
18231927
1824
- if (mlx5_lag_is_active(dev->mdev)) {
1825
- u8 port = mlx5_core_native_port_num(dev->mdev);
1928
+ if (mlx5_ib_lag_should_assign_affinity(dev)) {
1929
+ u8 port = mlx5_core_native_port_num(dev->mdev) - 1;
18261930
18271931 atomic_set(&context->tx_port_affinity,
18281932 atomic_add_return(
1829
- 1, &dev->roce[port].tx_port_affinity));
1933
+ 1, &dev->port[port].roce.tx_port_affinity));
18301934 }
18311935
1832
- return &context->ibucontext;
1936
+ return 0;
18331937
18341938 out_mdev:
1939
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
1940
+out_devx:
18351941 if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
1836
- mlx5_ib_devx_destroy(dev, context);
1837
-out_td:
1838
- mlx5_ib_dealloc_transport_domain(dev, context->tdn);
1942
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
18391943
18401944 out_uars:
18411945 deallocate_uars(dev, context);
....@@ -1847,29 +1951,47 @@
18471951 kfree(bfregi->count);
18481952
18491953 out_ctx:
1850
- kfree(context);
1851
-
1852
- return ERR_PTR(err);
1954
+ return err;
18531955 }
18541956
1855
-static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
1957
+static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext,
1958
+ struct uverbs_attr_bundle *attrs)
1959
+{
1960
+ struct mlx5_ib_alloc_ucontext_resp uctx_resp = {};
1961
+ int ret;
1962
+
1963
+ ret = set_ucontext_resp(ibcontext, &uctx_resp);
1964
+ if (ret)
1965
+ return ret;
1966
+
1967
+ uctx_resp.response_length =
1968
+ min_t(size_t,
1969
+ uverbs_attr_get_len(attrs,
1970
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX),
1971
+ sizeof(uctx_resp));
1972
+
1973
+ ret = uverbs_copy_to_struct_or_zero(attrs,
1974
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
1975
+ &uctx_resp,
1976
+ sizeof(uctx_resp));
1977
+ return ret;
1978
+}
1979
+
1980
+static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
18561981 {
18571982 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
18581983 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
18591984 struct mlx5_bfreg_info *bfregi;
18601985
1861
- if (context->devx_uid)
1862
- mlx5_ib_devx_destroy(dev, context);
1863
-
18641986 bfregi = &context->bfregi;
1865
- mlx5_ib_dealloc_transport_domain(dev, context->tdn);
1987
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
1988
+
1989
+ if (context->devx_uid)
1990
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
18661991
18671992 deallocate_uars(dev, context);
18681993 kfree(bfregi->sys_pages);
18691994 kfree(bfregi->count);
1870
- kfree(context);
1871
-
1872
- return 0;
18731995 }
18741996
18751997 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
....@@ -1879,7 +2001,18 @@
18792001
18802002 fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
18812003
1882
- return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
2004
+ return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
2005
+}
2006
+
2007
+static u64 uar_index2paddress(struct mlx5_ib_dev *dev,
2008
+ int uar_idx)
2009
+{
2010
+ unsigned int fw_uars_per_page;
2011
+
2012
+ fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
2013
+ MLX5_UARS_IN_PAGE : 1;
2014
+
2015
+ return (dev->mdev->bar_addr + (uar_idx / fw_uars_per_page) * PAGE_SIZE);
18832016 }
18842017
18852018 static int get_command(unsigned long offset)
....@@ -1903,94 +2036,9 @@
19032036 return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
19042037 }
19052038
1906
-static void mlx5_ib_vma_open(struct vm_area_struct *area)
1907
-{
1908
- /* vma_open is called when a new VMA is created on top of our VMA. This
1909
- * is done through either mremap flow or split_vma (usually due to
1910
- * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
1911
- * as this VMA is strongly hardware related. Therefore we set the
1912
- * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
1913
- * calling us again and trying to do incorrect actions. We assume that
1914
- * the original VMA size is exactly a single page, and therefore all
1915
- * "splitting" operation will not happen to it.
1916
- */
1917
- area->vm_ops = NULL;
1918
-}
1919
-
1920
-static void mlx5_ib_vma_close(struct vm_area_struct *area)
1921
-{
1922
- struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
1923
-
1924
- /* It's guaranteed that all VMAs opened on a FD are closed before the
1925
- * file itself is closed, therefore no sync is needed with the regular
1926
- * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
1927
- * However need a sync with accessing the vma as part of
1928
- * mlx5_ib_disassociate_ucontext.
1929
- * The close operation is usually called under mm->mmap_sem except when
1930
- * process is exiting.
1931
- * The exiting case is handled explicitly as part of
1932
- * mlx5_ib_disassociate_ucontext.
1933
- */
1934
- mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
1935
-
1936
- /* setting the vma context pointer to null in the mlx5_ib driver's
1937
- * private data, to protect a race condition in
1938
- * mlx5_ib_disassociate_ucontext().
1939
- */
1940
- mlx5_ib_vma_priv_data->vma = NULL;
1941
- mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
1942
- list_del(&mlx5_ib_vma_priv_data->list);
1943
- mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
1944
- kfree(mlx5_ib_vma_priv_data);
1945
-}
1946
-
1947
-static const struct vm_operations_struct mlx5_ib_vm_ops = {
1948
- .open = mlx5_ib_vma_open,
1949
- .close = mlx5_ib_vma_close
1950
-};
1951
-
1952
-static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
1953
- struct mlx5_ib_ucontext *ctx)
1954
-{
1955
- struct mlx5_ib_vma_private_data *vma_prv;
1956
- struct list_head *vma_head = &ctx->vma_private_list;
1957
-
1958
- vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
1959
- if (!vma_prv)
1960
- return -ENOMEM;
1961
-
1962
- vma_prv->vma = vma;
1963
- vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
1964
- vma->vm_private_data = vma_prv;
1965
- vma->vm_ops = &mlx5_ib_vm_ops;
1966
-
1967
- mutex_lock(&ctx->vma_private_list_mutex);
1968
- list_add(&vma_prv->list, vma_head);
1969
- mutex_unlock(&ctx->vma_private_list_mutex);
1970
-
1971
- return 0;
1972
-}
19732039
19742040 static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
19752041 {
1976
- struct vm_area_struct *vma;
1977
- struct mlx5_ib_vma_private_data *vma_private, *n;
1978
- struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1979
-
1980
- mutex_lock(&context->vma_private_list_mutex);
1981
- list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
1982
- list) {
1983
- vma = vma_private->vma;
1984
- zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
1985
- /* context going to be destroyed, should
1986
- * not access ops any more.
1987
- */
1988
- vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
1989
- vma->vm_ops = NULL;
1990
- list_del(&vma_private->list);
1991
- kfree(vma_private);
1992
- }
1993
- mutex_unlock(&context->vma_private_list_mutex);
19942042 }
19952043
19962044 static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
....@@ -2013,29 +2061,52 @@
20132061 struct vm_area_struct *vma,
20142062 struct mlx5_ib_ucontext *context)
20152063 {
2016
- phys_addr_t pfn;
2017
- int err;
2018
-
2019
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
2064
+ if ((vma->vm_end - vma->vm_start != PAGE_SIZE) ||
2065
+ !(vma->vm_flags & VM_SHARED))
20202066 return -EINVAL;
20212067
20222068 if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1)
20232069 return -EOPNOTSUPP;
20242070
2025
- if (vma->vm_flags & VM_WRITE)
2071
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
20262072 return -EPERM;
20272073 vma->vm_flags &= ~VM_MAYWRITE;
20282074
2029
- if (!dev->mdev->clock_info_page)
2075
+ if (!dev->mdev->clock_info)
20302076 return -EOPNOTSUPP;
20312077
2032
- pfn = page_to_pfn(dev->mdev->clock_info_page);
2033
- err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
2034
- vma->vm_page_prot);
2035
- if (err)
2036
- return err;
2078
+ return vm_insert_page(vma, vma->vm_start,
2079
+ virt_to_page(dev->mdev->clock_info));
2080
+}
20372081
2038
- return mlx5_ib_set_vma_data(vma, context);
2082
+static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
2083
+{
2084
+ struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
2085
+ struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
2086
+ struct mlx5_var_table *var_table = &dev->var_table;
2087
+ struct mlx5_ib_dm *mdm;
2088
+
2089
+ switch (mentry->mmap_flag) {
2090
+ case MLX5_IB_MMAP_TYPE_MEMIC:
2091
+ mdm = container_of(mentry, struct mlx5_ib_dm, mentry);
2092
+ mlx5_cmd_dealloc_memic(&dev->dm, mdm->dev_addr,
2093
+ mdm->size);
2094
+ kfree(mdm);
2095
+ break;
2096
+ case MLX5_IB_MMAP_TYPE_VAR:
2097
+ mutex_lock(&var_table->bitmap_lock);
2098
+ clear_bit(mentry->page_idx, var_table->bitmap);
2099
+ mutex_unlock(&var_table->bitmap_lock);
2100
+ kfree(mentry);
2101
+ break;
2102
+ case MLX5_IB_MMAP_TYPE_UAR_WC:
2103
+ case MLX5_IB_MMAP_TYPE_UAR_NC:
2104
+ mlx5_cmd_free_uar(dev->mdev, mentry->page_idx);
2105
+ kfree(mentry);
2106
+ break;
2107
+ default:
2108
+ WARN_ON(true);
2109
+ }
20392110 }
20402111
20412112 static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
....@@ -2052,6 +2123,9 @@
20522123 int dyn_uar = (cmd == MLX5_IB_MMAP_ALLOC_WC);
20532124 int max_valid_idx = dyn_uar ? bfregi->num_sys_pages :
20542125 bfregi->num_static_sys_pages;
2126
+
2127
+ if (bfregi->lib_uar_dyn)
2128
+ return -EINVAL;
20552129
20562130 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
20572131 return -EINVAL;
....@@ -2070,14 +2144,6 @@
20702144 switch (cmd) {
20712145 case MLX5_IB_MMAP_WC_PAGE:
20722146 case MLX5_IB_MMAP_ALLOC_WC:
2073
-/* Some architectures don't support WC memory */
2074
-#if defined(CONFIG_X86)
2075
- if (!pat_enabled())
2076
- return -EPERM;
2077
-#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU)))
2078
- return -EPERM;
2079
-#endif
2080
- /* fall through */
20812147 case MLX5_IB_MMAP_REGULAR_PAGE:
20822148 /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
20832149 prot = pgprot_writecombine(vma->vm_page_prot);
....@@ -2125,20 +2191,14 @@
21252191 pfn = uar_index2pfn(dev, uar_index);
21262192 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
21272193
2128
- vma->vm_page_prot = prot;
2129
- err = io_remap_pfn_range(vma, vma->vm_start, pfn,
2130
- PAGE_SIZE, vma->vm_page_prot);
2194
+ err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
2195
+ prot, NULL);
21312196 if (err) {
21322197 mlx5_ib_err(dev,
2133
- "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n",
2198
+ "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
21342199 err, mmap_cmd2str(cmd));
2135
- err = -EAGAIN;
21362200 goto err;
21372201 }
2138
-
2139
- err = mlx5_ib_set_vma_data(vma, context);
2140
- if (err)
2141
- goto err;
21422202
21432203 if (dyn_uar)
21442204 bfregi->sys_pages[idx] = uar_index;
....@@ -2156,32 +2216,68 @@
21562216 return err;
21572217 }
21582218
2159
-static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
2219
+static int add_dm_mmap_entry(struct ib_ucontext *context,
2220
+ struct mlx5_ib_dm *mdm,
2221
+ u64 address)
21602222 {
2161
- struct mlx5_ib_ucontext *mctx = to_mucontext(context);
2162
- struct mlx5_ib_dev *dev = to_mdev(context->device);
2163
- u16 page_idx = get_extended_index(vma->vm_pgoff);
2164
- size_t map_size = vma->vm_end - vma->vm_start;
2165
- u32 npages = map_size >> PAGE_SHIFT;
2166
- phys_addr_t pfn;
2167
- pgprot_t prot;
2223
+ mdm->mentry.mmap_flag = MLX5_IB_MMAP_TYPE_MEMIC;
2224
+ mdm->mentry.address = address;
2225
+ return rdma_user_mmap_entry_insert_range(
2226
+ context, &mdm->mentry.rdma_entry,
2227
+ mdm->size,
2228
+ MLX5_IB_MMAP_DEVICE_MEM << 16,
2229
+ (MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
2230
+}
21682231
2169
- if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
2170
- page_idx + npages)
2232
+static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma)
2233
+{
2234
+ unsigned long idx;
2235
+ u8 command;
2236
+
2237
+ command = get_command(vma->vm_pgoff);
2238
+ idx = get_extended_index(vma->vm_pgoff);
2239
+
2240
+ return (command << 16 | idx);
2241
+}
2242
+
2243
+static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
2244
+ struct vm_area_struct *vma,
2245
+ struct ib_ucontext *ucontext)
2246
+{
2247
+ struct mlx5_user_mmap_entry *mentry;
2248
+ struct rdma_user_mmap_entry *entry;
2249
+ unsigned long pgoff;
2250
+ pgprot_t prot;
2251
+ phys_addr_t pfn;
2252
+ int ret;
2253
+
2254
+ pgoff = mlx5_vma_to_pgoff(vma);
2255
+ entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff);
2256
+ if (!entry)
21712257 return -EINVAL;
21722258
2173
- pfn = ((pci_resource_start(dev->mdev->pdev, 0) +
2174
- MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
2175
- PAGE_SHIFT) +
2176
- page_idx;
2177
- prot = pgprot_writecombine(vma->vm_page_prot);
2178
- vma->vm_page_prot = prot;
2259
+ mentry = to_mmmap(entry);
2260
+ pfn = (mentry->address >> PAGE_SHIFT);
2261
+ if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR ||
2262
+ mentry->mmap_flag == MLX5_IB_MMAP_TYPE_UAR_NC)
2263
+ prot = pgprot_noncached(vma->vm_page_prot);
2264
+ else
2265
+ prot = pgprot_writecombine(vma->vm_page_prot);
2266
+ ret = rdma_user_mmap_io(ucontext, vma, pfn,
2267
+ entry->npages * PAGE_SIZE,
2268
+ prot,
2269
+ entry);
2270
+ rdma_user_mmap_entry_put(&mentry->rdma_entry);
2271
+ return ret;
2272
+}
21792273
2180
- if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size,
2181
- vma->vm_page_prot))
2182
- return -EAGAIN;
2274
+static u64 mlx5_entry_to_mmap_offset(struct mlx5_user_mmap_entry *entry)
2275
+{
2276
+ u64 cmd = (entry->rdma_entry.start_pgoff >> 16) & 0xFFFF;
2277
+ u64 index = entry->rdma_entry.start_pgoff & 0xFFFF;
21832278
2184
- return mlx5_ib_set_vma_data(vma, mctx);
2279
+ return (((index >> 8) << 16) | (cmd << MLX5_IB_MMAP_CMD_SHIFT) |
2280
+ (index & 0xFF)) << PAGE_SHIFT;
21852281 }
21862282
21872283 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
....@@ -2194,9 +2290,12 @@
21942290 command = get_command(vma->vm_pgoff);
21952291 switch (command) {
21962292 case MLX5_IB_MMAP_WC_PAGE:
2293
+ case MLX5_IB_MMAP_ALLOC_WC:
2294
+ if (!dev->wc_support)
2295
+ return -EPERM;
2296
+ fallthrough;
21972297 case MLX5_IB_MMAP_NC_PAGE:
21982298 case MLX5_IB_MMAP_REGULAR_PAGE:
2199
- case MLX5_IB_MMAP_ALLOC_WC:
22002299 return uar_mmap(dev, command, vma, context);
22012300
22022301 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
....@@ -2214,25 +2313,128 @@
22142313 if (PAGE_SIZE > 4096)
22152314 return -EOPNOTSUPP;
22162315
2217
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
22182316 pfn = (dev->mdev->iseg_base +
22192317 offsetof(struct mlx5_init_seg, internal_timer_h)) >>
22202318 PAGE_SHIFT;
2221
- if (io_remap_pfn_range(vma, vma->vm_start, pfn,
2222
- PAGE_SIZE, vma->vm_page_prot))
2223
- return -EAGAIN;
2224
- break;
2319
+ return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
2320
+ PAGE_SIZE,
2321
+ pgprot_noncached(vma->vm_page_prot),
2322
+ NULL);
22252323 case MLX5_IB_MMAP_CLOCK_INFO:
22262324 return mlx5_ib_mmap_clock_info_page(dev, vma, context);
22272325
2228
- case MLX5_IB_MMAP_DEVICE_MEM:
2229
- return dm_mmap(ibcontext, vma);
2230
-
22312326 default:
2232
- return -EINVAL;
2327
+ return mlx5_ib_mmap_offset(dev, vma, ibcontext);
22332328 }
22342329
22352330 return 0;
2331
+}
2332
+
2333
+static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
2334
+ u32 type)
2335
+{
2336
+ switch (type) {
2337
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
2338
+ if (!MLX5_CAP_DEV_MEM(dev->mdev, memic))
2339
+ return -EOPNOTSUPP;
2340
+ break;
2341
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
2342
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
2343
+ if (!capable(CAP_SYS_RAWIO) ||
2344
+ !capable(CAP_NET_RAW))
2345
+ return -EPERM;
2346
+
2347
+ if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
2348
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner) ||
2349
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2) ||
2350
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner_v2)))
2351
+ return -EOPNOTSUPP;
2352
+ break;
2353
+ }
2354
+
2355
+ return 0;
2356
+}
2357
+
2358
+static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
2359
+ struct mlx5_ib_dm *dm,
2360
+ struct ib_dm_alloc_attr *attr,
2361
+ struct uverbs_attr_bundle *attrs)
2362
+{
2363
+ struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
2364
+ u64 start_offset;
2365
+ u16 page_idx;
2366
+ int err;
2367
+ u64 address;
2368
+
2369
+ dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
2370
+
2371
+ err = mlx5_cmd_alloc_memic(dm_db, &dm->dev_addr,
2372
+ dm->size, attr->alignment);
2373
+ if (err)
2374
+ return err;
2375
+
2376
+ address = dm->dev_addr & PAGE_MASK;
2377
+ err = add_dm_mmap_entry(ctx, dm, address);
2378
+ if (err)
2379
+ goto err_dealloc;
2380
+
2381
+ page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF;
2382
+ err = uverbs_copy_to(attrs,
2383
+ MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
2384
+ &page_idx,
2385
+ sizeof(page_idx));
2386
+ if (err)
2387
+ goto err_copy;
2388
+
2389
+ start_offset = dm->dev_addr & ~PAGE_MASK;
2390
+ err = uverbs_copy_to(attrs,
2391
+ MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
2392
+ &start_offset, sizeof(start_offset));
2393
+ if (err)
2394
+ goto err_copy;
2395
+
2396
+ return 0;
2397
+
2398
+err_copy:
2399
+ rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
2400
+err_dealloc:
2401
+ mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
2402
+
2403
+ return err;
2404
+}
2405
+
2406
+static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
2407
+ struct mlx5_ib_dm *dm,
2408
+ struct ib_dm_alloc_attr *attr,
2409
+ struct uverbs_attr_bundle *attrs,
2410
+ int type)
2411
+{
2412
+ struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
2413
+ u64 act_size;
2414
+ int err;
2415
+
2416
+ /* Allocation size must a multiple of the basic block size
2417
+ * and a power of 2.
2418
+ */
2419
+ act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev));
2420
+ act_size = roundup_pow_of_two(act_size);
2421
+
2422
+ dm->size = act_size;
2423
+ err = mlx5_dm_sw_icm_alloc(dev, type, act_size, attr->alignment,
2424
+ to_mucontext(ctx)->devx_uid, &dm->dev_addr,
2425
+ &dm->icm_dm.obj_id);
2426
+ if (err)
2427
+ return err;
2428
+
2429
+ err = uverbs_copy_to(attrs,
2430
+ MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
2431
+ &dm->dev_addr, sizeof(dm->dev_addr));
2432
+ if (err)
2433
+ mlx5_dm_sw_icm_dealloc(dev, type, dm->size,
2434
+ to_mucontext(ctx)->devx_uid, dm->dev_addr,
2435
+ dm->icm_dm.obj_id);
2436
+
2437
+ return err;
22362438 }
22372439
22382440 struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
....@@ -2240,1793 +2442,132 @@
22402442 struct ib_dm_alloc_attr *attr,
22412443 struct uverbs_attr_bundle *attrs)
22422444 {
2243
- u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
2244
- struct mlx5_memic *memic = &to_mdev(ibdev)->memic;
2245
- phys_addr_t memic_addr;
22462445 struct mlx5_ib_dm *dm;
2247
- u64 start_offset;
2248
- u32 page_idx;
2446
+ enum mlx5_ib_uapi_dm_type type;
22492447 int err;
2448
+
2449
+ err = uverbs_get_const_default(&type, attrs,
2450
+ MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
2451
+ MLX5_IB_UAPI_DM_TYPE_MEMIC);
2452
+ if (err)
2453
+ return ERR_PTR(err);
2454
+
2455
+ mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n",
2456
+ type, attr->length, attr->alignment);
2457
+
2458
+ err = check_dm_type_support(to_mdev(ibdev), type);
2459
+ if (err)
2460
+ return ERR_PTR(err);
22502461
22512462 dm = kzalloc(sizeof(*dm), GFP_KERNEL);
22522463 if (!dm)
22532464 return ERR_PTR(-ENOMEM);
22542465
2255
- mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n",
2256
- attr->length, act_size, attr->alignment);
2466
+ dm->type = type;
22572467
2258
- err = mlx5_cmd_alloc_memic(memic, &memic_addr,
2259
- act_size, attr->alignment);
2468
+ switch (type) {
2469
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
2470
+ err = handle_alloc_dm_memic(context, dm,
2471
+ attr,
2472
+ attrs);
2473
+ break;
2474
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
2475
+ err = handle_alloc_dm_sw_icm(context, dm,
2476
+ attr, attrs,
2477
+ MLX5_SW_ICM_TYPE_STEERING);
2478
+ break;
2479
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
2480
+ err = handle_alloc_dm_sw_icm(context, dm,
2481
+ attr, attrs,
2482
+ MLX5_SW_ICM_TYPE_HEADER_MODIFY);
2483
+ break;
2484
+ default:
2485
+ err = -EOPNOTSUPP;
2486
+ }
2487
+
22602488 if (err)
22612489 goto err_free;
22622490
2263
- start_offset = memic_addr & ~PAGE_MASK;
2264
- page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) -
2265
- MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
2266
- PAGE_SHIFT;
2267
-
2268
- err = uverbs_copy_to(attrs,
2269
- MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
2270
- &start_offset, sizeof(start_offset));
2271
- if (err)
2272
- goto err_dealloc;
2273
-
2274
- err = uverbs_copy_to(attrs,
2275
- MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
2276
- &page_idx, sizeof(page_idx));
2277
- if (err)
2278
- goto err_dealloc;
2279
-
2280
- bitmap_set(to_mucontext(context)->dm_pages, page_idx,
2281
- DIV_ROUND_UP(act_size, PAGE_SIZE));
2282
-
2283
- dm->dev_addr = memic_addr;
2284
-
22852491 return &dm->ibdm;
22862492
2287
-err_dealloc:
2288
- mlx5_cmd_dealloc_memic(memic, memic_addr,
2289
- act_size);
22902493 err_free:
22912494 kfree(dm);
22922495 return ERR_PTR(err);
22932496 }
22942497
2295
-int mlx5_ib_dealloc_dm(struct ib_dm *ibdm)
2498
+int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
22962499 {
2297
- struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic;
2500
+ struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
2501
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
2502
+ struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev;
22982503 struct mlx5_ib_dm *dm = to_mdm(ibdm);
2299
- u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE);
2300
- u32 page_idx;
23012504 int ret;
23022505
2303
- ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size);
2304
- if (ret)
2305
- return ret;
2306
-
2307
- page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) -
2308
- MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
2309
- PAGE_SHIFT;
2310
- bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages,
2311
- page_idx,
2312
- DIV_ROUND_UP(act_size, PAGE_SIZE));
2506
+ switch (dm->type) {
2507
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
2508
+ rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
2509
+ return 0;
2510
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
2511
+ ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING,
2512
+ dm->size, ctx->devx_uid, dm->dev_addr,
2513
+ dm->icm_dm.obj_id);
2514
+ if (ret)
2515
+ return ret;
2516
+ break;
2517
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
2518
+ ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_HEADER_MODIFY,
2519
+ dm->size, ctx->devx_uid, dm->dev_addr,
2520
+ dm->icm_dm.obj_id);
2521
+ if (ret)
2522
+ return ret;
2523
+ break;
2524
+ default:
2525
+ return -EOPNOTSUPP;
2526
+ }
23132527
23142528 kfree(dm);
23152529
23162530 return 0;
23172531 }
23182532
2319
-static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
2320
- struct ib_ucontext *context,
2321
- struct ib_udata *udata)
2533
+static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
23222534 {
2535
+ struct mlx5_ib_pd *pd = to_mpd(ibpd);
2536
+ struct ib_device *ibdev = ibpd->device;
23232537 struct mlx5_ib_alloc_pd_resp resp;
2324
- struct mlx5_ib_pd *pd;
23252538 int err;
2539
+ u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
2540
+ u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
2541
+ u16 uid = 0;
2542
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
2543
+ udata, struct mlx5_ib_ucontext, ibucontext);
23262544
2327
- pd = kmalloc(sizeof(*pd), GFP_KERNEL);
2328
- if (!pd)
2329
- return ERR_PTR(-ENOMEM);
2545
+ uid = context ? context->devx_uid : 0;
2546
+ MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
2547
+ MLX5_SET(alloc_pd_in, in, uid, uid);
2548
+ err = mlx5_cmd_exec_inout(to_mdev(ibdev)->mdev, alloc_pd, in, out);
2549
+ if (err)
2550
+ return err;
23302551
2331
- err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
2332
- if (err) {
2333
- kfree(pd);
2334
- return ERR_PTR(err);
2335
- }
2336
-
2337
- if (context) {
2552
+ pd->pdn = MLX5_GET(alloc_pd_out, out, pd);
2553
+ pd->uid = uid;
2554
+ if (udata) {
23382555 resp.pdn = pd->pdn;
23392556 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
2340
- mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
2341
- kfree(pd);
2342
- return ERR_PTR(-EFAULT);
2557
+ mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid);
2558
+ return -EFAULT;
23432559 }
23442560 }
23452561
2346
- return &pd->ibpd;
2562
+ return 0;
23472563 }
23482564
2349
-static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
2565
+static int mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
23502566 {
23512567 struct mlx5_ib_dev *mdev = to_mdev(pd->device);
23522568 struct mlx5_ib_pd *mpd = to_mpd(pd);
23532569
2354
- mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
2355
- kfree(mpd);
2356
-
2357
- return 0;
2358
-}
2359
-
2360
-enum {
2361
- MATCH_CRITERIA_ENABLE_OUTER_BIT,
2362
- MATCH_CRITERIA_ENABLE_MISC_BIT,
2363
- MATCH_CRITERIA_ENABLE_INNER_BIT,
2364
- MATCH_CRITERIA_ENABLE_MISC2_BIT
2365
-};
2366
-
2367
-#define HEADER_IS_ZERO(match_criteria, headers) \
2368
- !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
2369
- 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
2370
-
2371
-static u8 get_match_criteria_enable(u32 *match_criteria)
2372
-{
2373
- u8 match_criteria_enable;
2374
-
2375
- match_criteria_enable =
2376
- (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
2377
- MATCH_CRITERIA_ENABLE_OUTER_BIT;
2378
- match_criteria_enable |=
2379
- (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
2380
- MATCH_CRITERIA_ENABLE_MISC_BIT;
2381
- match_criteria_enable |=
2382
- (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
2383
- MATCH_CRITERIA_ENABLE_INNER_BIT;
2384
- match_criteria_enable |=
2385
- (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
2386
- MATCH_CRITERIA_ENABLE_MISC2_BIT;
2387
-
2388
- return match_criteria_enable;
2389
-}
2390
-
2391
-static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
2392
-{
2393
- u8 entry_mask;
2394
- u8 entry_val;
2395
- int err = 0;
2396
-
2397
- if (!mask)
2398
- goto out;
2399
-
2400
- entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
2401
- ip_protocol);
2402
- entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
2403
- ip_protocol);
2404
- if (!entry_mask) {
2405
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
2406
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
2407
- goto out;
2408
- }
2409
- /* Don't override existing ip protocol */
2410
- if (mask != entry_mask || val != entry_val)
2411
- err = -EINVAL;
2412
-out:
2413
- return err;
2414
-}
2415
-
2416
-static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
2417
- bool inner)
2418
-{
2419
- if (inner) {
2420
- MLX5_SET(fte_match_set_misc,
2421
- misc_c, inner_ipv6_flow_label, mask);
2422
- MLX5_SET(fte_match_set_misc,
2423
- misc_v, inner_ipv6_flow_label, val);
2424
- } else {
2425
- MLX5_SET(fte_match_set_misc,
2426
- misc_c, outer_ipv6_flow_label, mask);
2427
- MLX5_SET(fte_match_set_misc,
2428
- misc_v, outer_ipv6_flow_label, val);
2429
- }
2430
-}
2431
-
2432
-static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
2433
-{
2434
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
2435
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
2436
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
2437
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
2438
-}
2439
-
2440
-static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
2441
-{
2442
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
2443
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
2444
- return -EOPNOTSUPP;
2445
-
2446
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
2447
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
2448
- return -EOPNOTSUPP;
2449
-
2450
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
2451
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
2452
- return -EOPNOTSUPP;
2453
-
2454
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
2455
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
2456
- return -EOPNOTSUPP;
2457
-
2458
- return 0;
2459
-}
2460
-
2461
-#define LAST_ETH_FIELD vlan_tag
2462
-#define LAST_IB_FIELD sl
2463
-#define LAST_IPV4_FIELD tos
2464
-#define LAST_IPV6_FIELD traffic_class
2465
-#define LAST_TCP_UDP_FIELD src_port
2466
-#define LAST_TUNNEL_FIELD tunnel_id
2467
-#define LAST_FLOW_TAG_FIELD tag_id
2468
-#define LAST_DROP_FIELD size
2469
-#define LAST_COUNTERS_FIELD counters
2470
-
2471
-/* Field is the last supported field */
2472
-#define FIELDS_NOT_SUPPORTED(filter, field)\
2473
- memchr_inv((void *)&filter.field +\
2474
- sizeof(filter.field), 0,\
2475
- sizeof(filter) -\
2476
- offsetof(typeof(filter), field) -\
2477
- sizeof(filter.field))
2478
-
2479
-static int parse_flow_flow_action(const union ib_flow_spec *ib_spec,
2480
- const struct ib_flow_attr *flow_attr,
2481
- struct mlx5_flow_act *action)
2482
-{
2483
- struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act);
2484
-
2485
- switch (maction->ib_action.type) {
2486
- case IB_FLOW_ACTION_ESP:
2487
- /* Currently only AES_GCM keymat is supported by the driver */
2488
- action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
2489
- action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ?
2490
- MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
2491
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
2492
- return 0;
2493
- default:
2494
- return -EOPNOTSUPP;
2495
- }
2496
-}
2497
-
2498
-static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
2499
- u32 *match_v, const union ib_flow_spec *ib_spec,
2500
- const struct ib_flow_attr *flow_attr,
2501
- struct mlx5_flow_act *action, u32 prev_type)
2502
-{
2503
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
2504
- misc_parameters);
2505
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
2506
- misc_parameters);
2507
- void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
2508
- misc_parameters_2);
2509
- void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
2510
- misc_parameters_2);
2511
- void *headers_c;
2512
- void *headers_v;
2513
- int match_ipv;
2514
- int ret;
2515
-
2516
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
2517
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
2518
- inner_headers);
2519
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
2520
- inner_headers);
2521
- match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2522
- ft_field_support.inner_ip_version);
2523
- } else {
2524
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
2525
- outer_headers);
2526
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
2527
- outer_headers);
2528
- match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2529
- ft_field_support.outer_ip_version);
2530
- }
2531
-
2532
- switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
2533
- case IB_FLOW_SPEC_ETH:
2534
- if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
2535
- return -EOPNOTSUPP;
2536
-
2537
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2538
- dmac_47_16),
2539
- ib_spec->eth.mask.dst_mac);
2540
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2541
- dmac_47_16),
2542
- ib_spec->eth.val.dst_mac);
2543
-
2544
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2545
- smac_47_16),
2546
- ib_spec->eth.mask.src_mac);
2547
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2548
- smac_47_16),
2549
- ib_spec->eth.val.src_mac);
2550
-
2551
- if (ib_spec->eth.mask.vlan_tag) {
2552
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2553
- cvlan_tag, 1);
2554
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2555
- cvlan_tag, 1);
2556
-
2557
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2558
- first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
2559
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2560
- first_vid, ntohs(ib_spec->eth.val.vlan_tag));
2561
-
2562
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2563
- first_cfi,
2564
- ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
2565
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2566
- first_cfi,
2567
- ntohs(ib_spec->eth.val.vlan_tag) >> 12);
2568
-
2569
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2570
- first_prio,
2571
- ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
2572
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2573
- first_prio,
2574
- ntohs(ib_spec->eth.val.vlan_tag) >> 13);
2575
- }
2576
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2577
- ethertype, ntohs(ib_spec->eth.mask.ether_type));
2578
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2579
- ethertype, ntohs(ib_spec->eth.val.ether_type));
2580
- break;
2581
- case IB_FLOW_SPEC_IPV4:
2582
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
2583
- return -EOPNOTSUPP;
2584
-
2585
- if (match_ipv) {
2586
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2587
- ip_version, 0xf);
2588
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2589
- ip_version, MLX5_FS_IPV4_VERSION);
2590
- } else {
2591
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2592
- ethertype, 0xffff);
2593
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2594
- ethertype, ETH_P_IP);
2595
- }
2596
-
2597
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2598
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
2599
- &ib_spec->ipv4.mask.src_ip,
2600
- sizeof(ib_spec->ipv4.mask.src_ip));
2601
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2602
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
2603
- &ib_spec->ipv4.val.src_ip,
2604
- sizeof(ib_spec->ipv4.val.src_ip));
2605
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2606
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2607
- &ib_spec->ipv4.mask.dst_ip,
2608
- sizeof(ib_spec->ipv4.mask.dst_ip));
2609
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2610
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2611
- &ib_spec->ipv4.val.dst_ip,
2612
- sizeof(ib_spec->ipv4.val.dst_ip));
2613
-
2614
- set_tos(headers_c, headers_v,
2615
- ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
2616
-
2617
- if (set_proto(headers_c, headers_v,
2618
- ib_spec->ipv4.mask.proto,
2619
- ib_spec->ipv4.val.proto))
2620
- return -EINVAL;
2621
- break;
2622
- case IB_FLOW_SPEC_IPV6:
2623
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
2624
- return -EOPNOTSUPP;
2625
-
2626
- if (match_ipv) {
2627
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2628
- ip_version, 0xf);
2629
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2630
- ip_version, MLX5_FS_IPV6_VERSION);
2631
- } else {
2632
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2633
- ethertype, 0xffff);
2634
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2635
- ethertype, ETH_P_IPV6);
2636
- }
2637
-
2638
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2639
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
2640
- &ib_spec->ipv6.mask.src_ip,
2641
- sizeof(ib_spec->ipv6.mask.src_ip));
2642
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2643
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
2644
- &ib_spec->ipv6.val.src_ip,
2645
- sizeof(ib_spec->ipv6.val.src_ip));
2646
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2647
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2648
- &ib_spec->ipv6.mask.dst_ip,
2649
- sizeof(ib_spec->ipv6.mask.dst_ip));
2650
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2651
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2652
- &ib_spec->ipv6.val.dst_ip,
2653
- sizeof(ib_spec->ipv6.val.dst_ip));
2654
-
2655
- set_tos(headers_c, headers_v,
2656
- ib_spec->ipv6.mask.traffic_class,
2657
- ib_spec->ipv6.val.traffic_class);
2658
-
2659
- if (set_proto(headers_c, headers_v,
2660
- ib_spec->ipv6.mask.next_hdr,
2661
- ib_spec->ipv6.val.next_hdr))
2662
- return -EINVAL;
2663
-
2664
- set_flow_label(misc_params_c, misc_params_v,
2665
- ntohl(ib_spec->ipv6.mask.flow_label),
2666
- ntohl(ib_spec->ipv6.val.flow_label),
2667
- ib_spec->type & IB_FLOW_SPEC_INNER);
2668
- break;
2669
- case IB_FLOW_SPEC_ESP:
2670
- if (ib_spec->esp.mask.seq)
2671
- return -EOPNOTSUPP;
2672
-
2673
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
2674
- ntohl(ib_spec->esp.mask.spi));
2675
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
2676
- ntohl(ib_spec->esp.val.spi));
2677
- break;
2678
- case IB_FLOW_SPEC_TCP:
2679
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
2680
- LAST_TCP_UDP_FIELD))
2681
- return -EOPNOTSUPP;
2682
-
2683
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
2684
- return -EINVAL;
2685
-
2686
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
2687
- ntohs(ib_spec->tcp_udp.mask.src_port));
2688
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
2689
- ntohs(ib_spec->tcp_udp.val.src_port));
2690
-
2691
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
2692
- ntohs(ib_spec->tcp_udp.mask.dst_port));
2693
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
2694
- ntohs(ib_spec->tcp_udp.val.dst_port));
2695
- break;
2696
- case IB_FLOW_SPEC_UDP:
2697
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
2698
- LAST_TCP_UDP_FIELD))
2699
- return -EOPNOTSUPP;
2700
-
2701
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
2702
- return -EINVAL;
2703
-
2704
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
2705
- ntohs(ib_spec->tcp_udp.mask.src_port));
2706
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
2707
- ntohs(ib_spec->tcp_udp.val.src_port));
2708
-
2709
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
2710
- ntohs(ib_spec->tcp_udp.mask.dst_port));
2711
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
2712
- ntohs(ib_spec->tcp_udp.val.dst_port));
2713
- break;
2714
- case IB_FLOW_SPEC_GRE:
2715
- if (ib_spec->gre.mask.c_ks_res0_ver)
2716
- return -EOPNOTSUPP;
2717
-
2718
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
2719
- return -EINVAL;
2720
-
2721
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2722
- 0xff);
2723
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2724
- IPPROTO_GRE);
2725
-
2726
- MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
2727
- ntohs(ib_spec->gre.mask.protocol));
2728
- MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
2729
- ntohs(ib_spec->gre.val.protocol));
2730
-
2731
- memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
2732
- gre_key_h),
2733
- &ib_spec->gre.mask.key,
2734
- sizeof(ib_spec->gre.mask.key));
2735
- memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
2736
- gre_key_h),
2737
- &ib_spec->gre.val.key,
2738
- sizeof(ib_spec->gre.val.key));
2739
- break;
2740
- case IB_FLOW_SPEC_MPLS:
2741
- switch (prev_type) {
2742
- case IB_FLOW_SPEC_UDP:
2743
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2744
- ft_field_support.outer_first_mpls_over_udp),
2745
- &ib_spec->mpls.mask.tag))
2746
- return -EOPNOTSUPP;
2747
-
2748
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
2749
- outer_first_mpls_over_udp),
2750
- &ib_spec->mpls.val.tag,
2751
- sizeof(ib_spec->mpls.val.tag));
2752
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
2753
- outer_first_mpls_over_udp),
2754
- &ib_spec->mpls.mask.tag,
2755
- sizeof(ib_spec->mpls.mask.tag));
2756
- break;
2757
- case IB_FLOW_SPEC_GRE:
2758
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2759
- ft_field_support.outer_first_mpls_over_gre),
2760
- &ib_spec->mpls.mask.tag))
2761
- return -EOPNOTSUPP;
2762
-
2763
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
2764
- outer_first_mpls_over_gre),
2765
- &ib_spec->mpls.val.tag,
2766
- sizeof(ib_spec->mpls.val.tag));
2767
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
2768
- outer_first_mpls_over_gre),
2769
- &ib_spec->mpls.mask.tag,
2770
- sizeof(ib_spec->mpls.mask.tag));
2771
- break;
2772
- default:
2773
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
2774
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2775
- ft_field_support.inner_first_mpls),
2776
- &ib_spec->mpls.mask.tag))
2777
- return -EOPNOTSUPP;
2778
-
2779
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
2780
- inner_first_mpls),
2781
- &ib_spec->mpls.val.tag,
2782
- sizeof(ib_spec->mpls.val.tag));
2783
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
2784
- inner_first_mpls),
2785
- &ib_spec->mpls.mask.tag,
2786
- sizeof(ib_spec->mpls.mask.tag));
2787
- } else {
2788
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2789
- ft_field_support.outer_first_mpls),
2790
- &ib_spec->mpls.mask.tag))
2791
- return -EOPNOTSUPP;
2792
-
2793
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
2794
- outer_first_mpls),
2795
- &ib_spec->mpls.val.tag,
2796
- sizeof(ib_spec->mpls.val.tag));
2797
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
2798
- outer_first_mpls),
2799
- &ib_spec->mpls.mask.tag,
2800
- sizeof(ib_spec->mpls.mask.tag));
2801
- }
2802
- }
2803
- break;
2804
- case IB_FLOW_SPEC_VXLAN_TUNNEL:
2805
- if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
2806
- LAST_TUNNEL_FIELD))
2807
- return -EOPNOTSUPP;
2808
-
2809
- MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
2810
- ntohl(ib_spec->tunnel.mask.tunnel_id));
2811
- MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
2812
- ntohl(ib_spec->tunnel.val.tunnel_id));
2813
- break;
2814
- case IB_FLOW_SPEC_ACTION_TAG:
2815
- if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
2816
- LAST_FLOW_TAG_FIELD))
2817
- return -EOPNOTSUPP;
2818
- if (ib_spec->flow_tag.tag_id >= BIT(24))
2819
- return -EINVAL;
2820
-
2821
- action->flow_tag = ib_spec->flow_tag.tag_id;
2822
- action->has_flow_tag = true;
2823
- break;
2824
- case IB_FLOW_SPEC_ACTION_DROP:
2825
- if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
2826
- LAST_DROP_FIELD))
2827
- return -EOPNOTSUPP;
2828
- action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
2829
- break;
2830
- case IB_FLOW_SPEC_ACTION_HANDLE:
2831
- ret = parse_flow_flow_action(ib_spec, flow_attr, action);
2832
- if (ret)
2833
- return ret;
2834
- break;
2835
- case IB_FLOW_SPEC_ACTION_COUNT:
2836
- if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
2837
- LAST_COUNTERS_FIELD))
2838
- return -EOPNOTSUPP;
2839
-
2840
- /* for now support only one counters spec per flow */
2841
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
2842
- return -EINVAL;
2843
-
2844
- action->counters = ib_spec->flow_count.counters;
2845
- action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
2846
- break;
2847
- default:
2848
- return -EINVAL;
2849
- }
2850
-
2851
- return 0;
2852
-}
2853
-
2854
-/* If a flow could catch both multicast and unicast packets,
2855
- * it won't fall into the multicast flow steering table and this rule
2856
- * could steal other multicast packets.
2857
- */
2858
-static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
2859
-{
2860
- union ib_flow_spec *flow_spec;
2861
-
2862
- if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
2863
- ib_attr->num_of_specs < 1)
2864
- return false;
2865
-
2866
- flow_spec = (union ib_flow_spec *)(ib_attr + 1);
2867
- if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
2868
- struct ib_flow_spec_ipv4 *ipv4_spec;
2869
-
2870
- ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
2871
- if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
2872
- return true;
2873
-
2874
- return false;
2875
- }
2876
-
2877
- if (flow_spec->type == IB_FLOW_SPEC_ETH) {
2878
- struct ib_flow_spec_eth *eth_spec;
2879
-
2880
- eth_spec = (struct ib_flow_spec_eth *)flow_spec;
2881
- return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
2882
- is_multicast_ether_addr(eth_spec->val.dst_mac);
2883
- }
2884
-
2885
- return false;
2886
-}
2887
-
2888
-enum valid_spec {
2889
- VALID_SPEC_INVALID,
2890
- VALID_SPEC_VALID,
2891
- VALID_SPEC_NA,
2892
-};
2893
-
2894
-static enum valid_spec
2895
-is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
2896
- const struct mlx5_flow_spec *spec,
2897
- const struct mlx5_flow_act *flow_act,
2898
- bool egress)
2899
-{
2900
- const u32 *match_c = spec->match_criteria;
2901
- bool is_crypto =
2902
- (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
2903
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
2904
- bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
2905
- bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
2906
-
2907
- /*
2908
- * Currently only crypto is supported in egress, when regular egress
2909
- * rules would be supported, always return VALID_SPEC_NA.
2910
- */
2911
- if (!is_crypto)
2912
- return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA;
2913
-
2914
- return is_crypto && is_ipsec &&
2915
- (!egress || (!is_drop && !flow_act->has_flow_tag)) ?
2916
- VALID_SPEC_VALID : VALID_SPEC_INVALID;
2917
-}
2918
-
2919
-static bool is_valid_spec(struct mlx5_core_dev *mdev,
2920
- const struct mlx5_flow_spec *spec,
2921
- const struct mlx5_flow_act *flow_act,
2922
- bool egress)
2923
-{
2924
- /* We curretly only support ipsec egress flow */
2925
- return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
2926
-}
2927
-
2928
-static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
2929
- const struct ib_flow_attr *flow_attr,
2930
- bool check_inner)
2931
-{
2932
- union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
2933
- int match_ipv = check_inner ?
2934
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2935
- ft_field_support.inner_ip_version) :
2936
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2937
- ft_field_support.outer_ip_version);
2938
- int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
2939
- bool ipv4_spec_valid, ipv6_spec_valid;
2940
- unsigned int ip_spec_type = 0;
2941
- bool has_ethertype = false;
2942
- unsigned int spec_index;
2943
- bool mask_valid = true;
2944
- u16 eth_type = 0;
2945
- bool type_valid;
2946
-
2947
- /* Validate that ethertype is correct */
2948
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
2949
- if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
2950
- ib_spec->eth.mask.ether_type) {
2951
- mask_valid = (ib_spec->eth.mask.ether_type ==
2952
- htons(0xffff));
2953
- has_ethertype = true;
2954
- eth_type = ntohs(ib_spec->eth.val.ether_type);
2955
- } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
2956
- (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
2957
- ip_spec_type = ib_spec->type;
2958
- }
2959
- ib_spec = (void *)ib_spec + ib_spec->size;
2960
- }
2961
-
2962
- type_valid = (!has_ethertype) || (!ip_spec_type);
2963
- if (!type_valid && mask_valid) {
2964
- ipv4_spec_valid = (eth_type == ETH_P_IP) &&
2965
- (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
2966
- ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
2967
- (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
2968
-
2969
- type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
2970
- (((eth_type == ETH_P_MPLS_UC) ||
2971
- (eth_type == ETH_P_MPLS_MC)) && match_ipv);
2972
- }
2973
-
2974
- return type_valid;
2975
-}
2976
-
2977
-static bool is_valid_attr(struct mlx5_core_dev *mdev,
2978
- const struct ib_flow_attr *flow_attr)
2979
-{
2980
- return is_valid_ethertype(mdev, flow_attr, false) &&
2981
- is_valid_ethertype(mdev, flow_attr, true);
2982
-}
2983
-
2984
-static void put_flow_table(struct mlx5_ib_dev *dev,
2985
- struct mlx5_ib_flow_prio *prio, bool ft_added)
2986
-{
2987
- prio->refcount -= !!ft_added;
2988
- if (!prio->refcount) {
2989
- mlx5_destroy_flow_table(prio->flow_table);
2990
- prio->flow_table = NULL;
2991
- }
2992
-}
2993
-
2994
-static void counters_clear_description(struct ib_counters *counters)
2995
-{
2996
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
2997
-
2998
- mutex_lock(&mcounters->mcntrs_mutex);
2999
- kfree(mcounters->counters_data);
3000
- mcounters->counters_data = NULL;
3001
- mcounters->cntrs_max_index = 0;
3002
- mutex_unlock(&mcounters->mcntrs_mutex);
3003
-}
3004
-
3005
-static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
3006
-{
3007
- struct mlx5_ib_flow_handler *handler = container_of(flow_id,
3008
- struct mlx5_ib_flow_handler,
3009
- ibflow);
3010
- struct mlx5_ib_flow_handler *iter, *tmp;
3011
- struct mlx5_ib_dev *dev = handler->dev;
3012
-
3013
- mutex_lock(&dev->flow_db->lock);
3014
-
3015
- list_for_each_entry_safe(iter, tmp, &handler->list, list) {
3016
- mlx5_del_flow_rules(iter->rule);
3017
- put_flow_table(dev, iter->prio, true);
3018
- list_del(&iter->list);
3019
- kfree(iter);
3020
- }
3021
-
3022
- mlx5_del_flow_rules(handler->rule);
3023
- put_flow_table(dev, handler->prio, true);
3024
- if (handler->ibcounters &&
3025
- atomic_read(&handler->ibcounters->usecnt) == 1)
3026
- counters_clear_description(handler->ibcounters);
3027
-
3028
- mutex_unlock(&dev->flow_db->lock);
3029
- if (handler->flow_matcher)
3030
- atomic_dec(&handler->flow_matcher->usecnt);
3031
- kfree(handler);
3032
-
3033
- return 0;
3034
-}
3035
-
3036
-static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
3037
-{
3038
- priority *= 2;
3039
- if (!dont_trap)
3040
- priority++;
3041
- return priority;
3042
-}
3043
-
3044
-enum flow_table_type {
3045
- MLX5_IB_FT_RX,
3046
- MLX5_IB_FT_TX
3047
-};
3048
-
3049
-#define MLX5_FS_MAX_TYPES 6
3050
-#define MLX5_FS_MAX_ENTRIES BIT(16)
3051
-
3052
-static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
3053
- struct mlx5_ib_flow_prio *prio,
3054
- int priority,
3055
- int num_entries, int num_groups)
3056
-{
3057
- struct mlx5_flow_table *ft;
3058
-
3059
- ft = mlx5_create_auto_grouped_flow_table(ns, priority,
3060
- num_entries,
3061
- num_groups,
3062
- 0, 0);
3063
- if (IS_ERR(ft))
3064
- return ERR_CAST(ft);
3065
-
3066
- prio->flow_table = ft;
3067
- prio->refcount = 0;
3068
- return prio;
3069
-}
3070
-
3071
-static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
3072
- struct ib_flow_attr *flow_attr,
3073
- enum flow_table_type ft_type)
3074
-{
3075
- bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
3076
- struct mlx5_flow_namespace *ns = NULL;
3077
- struct mlx5_ib_flow_prio *prio;
3078
- struct mlx5_flow_table *ft;
3079
- int max_table_size;
3080
- int num_entries;
3081
- int num_groups;
3082
- int priority;
3083
-
3084
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
3085
- log_max_ft_size));
3086
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
3087
- if (ft_type == MLX5_IB_FT_TX)
3088
- priority = 0;
3089
- else if (flow_is_multicast_only(flow_attr) &&
3090
- !dont_trap)
3091
- priority = MLX5_IB_FLOW_MCAST_PRIO;
3092
- else
3093
- priority = ib_prio_to_core_prio(flow_attr->priority,
3094
- dont_trap);
3095
- ns = mlx5_get_flow_namespace(dev->mdev,
3096
- ft_type == MLX5_IB_FT_TX ?
3097
- MLX5_FLOW_NAMESPACE_EGRESS :
3098
- MLX5_FLOW_NAMESPACE_BYPASS);
3099
- num_entries = MLX5_FS_MAX_ENTRIES;
3100
- num_groups = MLX5_FS_MAX_TYPES;
3101
- prio = &dev->flow_db->prios[priority];
3102
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
3103
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
3104
- ns = mlx5_get_flow_namespace(dev->mdev,
3105
- MLX5_FLOW_NAMESPACE_LEFTOVERS);
3106
- build_leftovers_ft_param(&priority,
3107
- &num_entries,
3108
- &num_groups);
3109
- prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
3110
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
3111
- if (!MLX5_CAP_FLOWTABLE(dev->mdev,
3112
- allow_sniffer_and_nic_rx_shared_tir))
3113
- return ERR_PTR(-ENOTSUPP);
3114
-
3115
- ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
3116
- MLX5_FLOW_NAMESPACE_SNIFFER_RX :
3117
- MLX5_FLOW_NAMESPACE_SNIFFER_TX);
3118
-
3119
- prio = &dev->flow_db->sniffer[ft_type];
3120
- priority = 0;
3121
- num_entries = 1;
3122
- num_groups = 1;
3123
- }
3124
-
3125
- if (!ns)
3126
- return ERR_PTR(-ENOTSUPP);
3127
-
3128
- if (num_entries > max_table_size)
3129
- return ERR_PTR(-ENOMEM);
3130
-
3131
- ft = prio->flow_table;
3132
- if (!ft)
3133
- return _get_prio(ns, prio, priority, num_entries, num_groups);
3134
-
3135
- return prio;
3136
-}
3137
-
3138
-static void set_underlay_qp(struct mlx5_ib_dev *dev,
3139
- struct mlx5_flow_spec *spec,
3140
- u32 underlay_qpn)
3141
-{
3142
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
3143
- spec->match_criteria,
3144
- misc_parameters);
3145
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
3146
- misc_parameters);
3147
-
3148
- if (underlay_qpn &&
3149
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
3150
- ft_field_support.bth_dst_qp)) {
3151
- MLX5_SET(fte_match_set_misc,
3152
- misc_params_v, bth_dst_qp, underlay_qpn);
3153
- MLX5_SET(fte_match_set_misc,
3154
- misc_params_c, bth_dst_qp, 0xffffff);
3155
- }
3156
-}
3157
-
3158
-static int read_flow_counters(struct ib_device *ibdev,
3159
- struct mlx5_read_counters_attr *read_attr)
3160
-{
3161
- struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
3162
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
3163
-
3164
- return mlx5_fc_query(dev->mdev, fc,
3165
- &read_attr->out[IB_COUNTER_PACKETS],
3166
- &read_attr->out[IB_COUNTER_BYTES]);
3167
-}
3168
-
3169
-/* flow counters currently expose two counters packets and bytes */
3170
-#define FLOW_COUNTERS_NUM 2
3171
-static int counters_set_description(struct ib_counters *counters,
3172
- enum mlx5_ib_counters_type counters_type,
3173
- struct mlx5_ib_flow_counters_desc *desc_data,
3174
- u32 ncounters)
3175
-{
3176
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
3177
- u32 cntrs_max_index = 0;
3178
- int i;
3179
-
3180
- if (counters_type != MLX5_IB_COUNTERS_FLOW)
3181
- return -EINVAL;
3182
-
3183
- /* init the fields for the object */
3184
- mcounters->type = counters_type;
3185
- mcounters->read_counters = read_flow_counters;
3186
- mcounters->counters_num = FLOW_COUNTERS_NUM;
3187
- mcounters->ncounters = ncounters;
3188
- /* each counter entry have both description and index pair */
3189
- for (i = 0; i < ncounters; i++) {
3190
- if (desc_data[i].description > IB_COUNTER_BYTES)
3191
- return -EINVAL;
3192
-
3193
- if (cntrs_max_index <= desc_data[i].index)
3194
- cntrs_max_index = desc_data[i].index + 1;
3195
- }
3196
-
3197
- mutex_lock(&mcounters->mcntrs_mutex);
3198
- mcounters->counters_data = desc_data;
3199
- mcounters->cntrs_max_index = cntrs_max_index;
3200
- mutex_unlock(&mcounters->mcntrs_mutex);
3201
-
3202
- return 0;
3203
-}
3204
-
3205
-#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
3206
-static int flow_counters_set_data(struct ib_counters *ibcounters,
3207
- struct mlx5_ib_create_flow *ucmd)
3208
-{
3209
- struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
3210
- struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
3211
- struct mlx5_ib_flow_counters_desc *desc_data = NULL;
3212
- bool hw_hndl = false;
3213
- int ret = 0;
3214
-
3215
- if (ucmd && ucmd->ncounters_data != 0) {
3216
- cntrs_data = ucmd->data;
3217
- if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
3218
- return -EINVAL;
3219
-
3220
- desc_data = kcalloc(cntrs_data->ncounters,
3221
- sizeof(*desc_data),
3222
- GFP_KERNEL);
3223
- if (!desc_data)
3224
- return -ENOMEM;
3225
-
3226
- if (copy_from_user(desc_data,
3227
- u64_to_user_ptr(cntrs_data->counters_data),
3228
- sizeof(*desc_data) * cntrs_data->ncounters)) {
3229
- ret = -EFAULT;
3230
- goto free;
3231
- }
3232
- }
3233
-
3234
- if (!mcounters->hw_cntrs_hndl) {
3235
- mcounters->hw_cntrs_hndl = mlx5_fc_create(
3236
- to_mdev(ibcounters->device)->mdev, false);
3237
- if (IS_ERR(mcounters->hw_cntrs_hndl)) {
3238
- ret = PTR_ERR(mcounters->hw_cntrs_hndl);
3239
- goto free;
3240
- }
3241
- hw_hndl = true;
3242
- }
3243
-
3244
- if (desc_data) {
3245
- /* counters already bound to at least one flow */
3246
- if (mcounters->cntrs_max_index) {
3247
- ret = -EINVAL;
3248
- goto free_hndl;
3249
- }
3250
-
3251
- ret = counters_set_description(ibcounters,
3252
- MLX5_IB_COUNTERS_FLOW,
3253
- desc_data,
3254
- cntrs_data->ncounters);
3255
- if (ret)
3256
- goto free_hndl;
3257
-
3258
- } else if (!mcounters->cntrs_max_index) {
3259
- /* counters not bound yet, must have udata passed */
3260
- ret = -EINVAL;
3261
- goto free_hndl;
3262
- }
3263
-
3264
- return 0;
3265
-
3266
-free_hndl:
3267
- if (hw_hndl) {
3268
- mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
3269
- mcounters->hw_cntrs_hndl);
3270
- mcounters->hw_cntrs_hndl = NULL;
3271
- }
3272
-free:
3273
- kfree(desc_data);
3274
- return ret;
3275
-}
3276
-
3277
-static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
3278
- struct mlx5_ib_flow_prio *ft_prio,
3279
- const struct ib_flow_attr *flow_attr,
3280
- struct mlx5_flow_destination *dst,
3281
- u32 underlay_qpn,
3282
- struct mlx5_ib_create_flow *ucmd)
3283
-{
3284
- struct mlx5_flow_table *ft = ft_prio->flow_table;
3285
- struct mlx5_ib_flow_handler *handler;
3286
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
3287
- struct mlx5_flow_spec *spec;
3288
- struct mlx5_flow_destination dest_arr[2] = {};
3289
- struct mlx5_flow_destination *rule_dst = dest_arr;
3290
- const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
3291
- unsigned int spec_index;
3292
- u32 prev_type = 0;
3293
- int err = 0;
3294
- int dest_num = 0;
3295
- bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
3296
-
3297
- if (!is_valid_attr(dev->mdev, flow_attr))
3298
- return ERR_PTR(-EINVAL);
3299
-
3300
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
3301
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
3302
- if (!handler || !spec) {
3303
- err = -ENOMEM;
3304
- goto free;
3305
- }
3306
-
3307
- INIT_LIST_HEAD(&handler->list);
3308
-
3309
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
3310
- err = parse_flow_attr(dev->mdev, spec->match_criteria,
3311
- spec->match_value,
3312
- ib_flow, flow_attr, &flow_act,
3313
- prev_type);
3314
- if (err < 0)
3315
- goto free;
3316
-
3317
- prev_type = ((union ib_flow_spec *)ib_flow)->type;
3318
- ib_flow += ((union ib_flow_spec *)ib_flow)->size;
3319
- }
3320
-
3321
- if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
3322
- memcpy(&dest_arr[0], dst, sizeof(*dst));
3323
- dest_num++;
3324
- }
3325
-
3326
- if (!flow_is_multicast_only(flow_attr))
3327
- set_underlay_qp(dev, spec, underlay_qpn);
3328
-
3329
- if (dev->rep) {
3330
- void *misc;
3331
-
3332
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
3333
- misc_parameters);
3334
- MLX5_SET(fte_match_set_misc, misc, source_port,
3335
- dev->rep->vport);
3336
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
3337
- misc_parameters);
3338
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
3339
- }
3340
-
3341
- spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
3342
-
3343
- if (is_egress &&
3344
- !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
3345
- err = -EINVAL;
3346
- goto free;
3347
- }
3348
-
3349
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
3350
- err = flow_counters_set_data(flow_act.counters, ucmd);
3351
- if (err)
3352
- goto free;
3353
-
3354
- handler->ibcounters = flow_act.counters;
3355
- dest_arr[dest_num].type =
3356
- MLX5_FLOW_DESTINATION_TYPE_COUNTER;
3357
- dest_arr[dest_num].counter =
3358
- to_mcounters(flow_act.counters)->hw_cntrs_hndl;
3359
- dest_num++;
3360
- }
3361
-
3362
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
3363
- if (!dest_num)
3364
- rule_dst = NULL;
3365
- } else {
3366
- if (is_egress)
3367
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
3368
- else
3369
- flow_act.action |=
3370
- dest_num ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
3371
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
3372
- }
3373
-
3374
- if (flow_act.has_flow_tag &&
3375
- (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
3376
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
3377
- mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
3378
- flow_act.flow_tag, flow_attr->type);
3379
- err = -EINVAL;
3380
- goto free;
3381
- }
3382
- handler->rule = mlx5_add_flow_rules(ft, spec,
3383
- &flow_act,
3384
- rule_dst, dest_num);
3385
-
3386
- if (IS_ERR(handler->rule)) {
3387
- err = PTR_ERR(handler->rule);
3388
- goto free;
3389
- }
3390
-
3391
- ft_prio->refcount++;
3392
- handler->prio = ft_prio;
3393
- handler->dev = dev;
3394
-
3395
- ft_prio->flow_table = ft;
3396
-free:
3397
- if (err && handler) {
3398
- if (handler->ibcounters &&
3399
- atomic_read(&handler->ibcounters->usecnt) == 1)
3400
- counters_clear_description(handler->ibcounters);
3401
- kfree(handler);
3402
- }
3403
- kvfree(spec);
3404
- return err ? ERR_PTR(err) : handler;
3405
-}
3406
-
3407
-static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
3408
- struct mlx5_ib_flow_prio *ft_prio,
3409
- const struct ib_flow_attr *flow_attr,
3410
- struct mlx5_flow_destination *dst)
3411
-{
3412
- return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
3413
-}
3414
-
3415
-static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
3416
- struct mlx5_ib_flow_prio *ft_prio,
3417
- struct ib_flow_attr *flow_attr,
3418
- struct mlx5_flow_destination *dst)
3419
-{
3420
- struct mlx5_ib_flow_handler *handler_dst = NULL;
3421
- struct mlx5_ib_flow_handler *handler = NULL;
3422
-
3423
- handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
3424
- if (!IS_ERR(handler)) {
3425
- handler_dst = create_flow_rule(dev, ft_prio,
3426
- flow_attr, dst);
3427
- if (IS_ERR(handler_dst)) {
3428
- mlx5_del_flow_rules(handler->rule);
3429
- ft_prio->refcount--;
3430
- kfree(handler);
3431
- handler = handler_dst;
3432
- } else {
3433
- list_add(&handler_dst->list, &handler->list);
3434
- }
3435
- }
3436
-
3437
- return handler;
3438
-}
3439
-enum {
3440
- LEFTOVERS_MC,
3441
- LEFTOVERS_UC,
3442
-};
3443
-
3444
-static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
3445
- struct mlx5_ib_flow_prio *ft_prio,
3446
- struct ib_flow_attr *flow_attr,
3447
- struct mlx5_flow_destination *dst)
3448
-{
3449
- struct mlx5_ib_flow_handler *handler_ucast = NULL;
3450
- struct mlx5_ib_flow_handler *handler = NULL;
3451
-
3452
- static struct {
3453
- struct ib_flow_attr flow_attr;
3454
- struct ib_flow_spec_eth eth_flow;
3455
- } leftovers_specs[] = {
3456
- [LEFTOVERS_MC] = {
3457
- .flow_attr = {
3458
- .num_of_specs = 1,
3459
- .size = sizeof(leftovers_specs[0])
3460
- },
3461
- .eth_flow = {
3462
- .type = IB_FLOW_SPEC_ETH,
3463
- .size = sizeof(struct ib_flow_spec_eth),
3464
- .mask = {.dst_mac = {0x1} },
3465
- .val = {.dst_mac = {0x1} }
3466
- }
3467
- },
3468
- [LEFTOVERS_UC] = {
3469
- .flow_attr = {
3470
- .num_of_specs = 1,
3471
- .size = sizeof(leftovers_specs[0])
3472
- },
3473
- .eth_flow = {
3474
- .type = IB_FLOW_SPEC_ETH,
3475
- .size = sizeof(struct ib_flow_spec_eth),
3476
- .mask = {.dst_mac = {0x1} },
3477
- .val = {.dst_mac = {} }
3478
- }
3479
- }
3480
- };
3481
-
3482
- handler = create_flow_rule(dev, ft_prio,
3483
- &leftovers_specs[LEFTOVERS_MC].flow_attr,
3484
- dst);
3485
- if (!IS_ERR(handler) &&
3486
- flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
3487
- handler_ucast = create_flow_rule(dev, ft_prio,
3488
- &leftovers_specs[LEFTOVERS_UC].flow_attr,
3489
- dst);
3490
- if (IS_ERR(handler_ucast)) {
3491
- mlx5_del_flow_rules(handler->rule);
3492
- ft_prio->refcount--;
3493
- kfree(handler);
3494
- handler = handler_ucast;
3495
- } else {
3496
- list_add(&handler_ucast->list, &handler->list);
3497
- }
3498
- }
3499
-
3500
- return handler;
3501
-}
3502
-
3503
-static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
3504
- struct mlx5_ib_flow_prio *ft_rx,
3505
- struct mlx5_ib_flow_prio *ft_tx,
3506
- struct mlx5_flow_destination *dst)
3507
-{
3508
- struct mlx5_ib_flow_handler *handler_rx;
3509
- struct mlx5_ib_flow_handler *handler_tx;
3510
- int err;
3511
- static const struct ib_flow_attr flow_attr = {
3512
- .num_of_specs = 0,
3513
- .size = sizeof(flow_attr)
3514
- };
3515
-
3516
- handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
3517
- if (IS_ERR(handler_rx)) {
3518
- err = PTR_ERR(handler_rx);
3519
- goto err;
3520
- }
3521
-
3522
- handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
3523
- if (IS_ERR(handler_tx)) {
3524
- err = PTR_ERR(handler_tx);
3525
- goto err_tx;
3526
- }
3527
-
3528
- list_add(&handler_tx->list, &handler_rx->list);
3529
-
3530
- return handler_rx;
3531
-
3532
-err_tx:
3533
- mlx5_del_flow_rules(handler_rx->rule);
3534
- ft_rx->refcount--;
3535
- kfree(handler_rx);
3536
-err:
3537
- return ERR_PTR(err);
3538
-}
3539
-
3540
-static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
3541
- struct ib_flow_attr *flow_attr,
3542
- int domain,
3543
- struct ib_udata *udata)
3544
-{
3545
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
3546
- struct mlx5_ib_qp *mqp = to_mqp(qp);
3547
- struct mlx5_ib_flow_handler *handler = NULL;
3548
- struct mlx5_flow_destination *dst = NULL;
3549
- struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
3550
- struct mlx5_ib_flow_prio *ft_prio;
3551
- bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
3552
- struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
3553
- size_t min_ucmd_sz, required_ucmd_sz;
3554
- int err;
3555
- int underlay_qpn;
3556
-
3557
- if (udata && udata->inlen) {
3558
- min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
3559
- sizeof(ucmd_hdr.reserved);
3560
- if (udata->inlen < min_ucmd_sz)
3561
- return ERR_PTR(-EOPNOTSUPP);
3562
-
3563
- err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
3564
- if (err)
3565
- return ERR_PTR(err);
3566
-
3567
- /* currently supports only one counters data */
3568
- if (ucmd_hdr.ncounters_data > 1)
3569
- return ERR_PTR(-EINVAL);
3570
-
3571
- required_ucmd_sz = min_ucmd_sz +
3572
- sizeof(struct mlx5_ib_flow_counters_data) *
3573
- ucmd_hdr.ncounters_data;
3574
- if (udata->inlen > required_ucmd_sz &&
3575
- !ib_is_udata_cleared(udata, required_ucmd_sz,
3576
- udata->inlen - required_ucmd_sz))
3577
- return ERR_PTR(-EOPNOTSUPP);
3578
-
3579
- ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
3580
- if (!ucmd)
3581
- return ERR_PTR(-ENOMEM);
3582
-
3583
- err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
3584
- if (err)
3585
- goto free_ucmd;
3586
- }
3587
-
3588
- if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
3589
- err = -ENOMEM;
3590
- goto free_ucmd;
3591
- }
3592
-
3593
- if (domain != IB_FLOW_DOMAIN_USER ||
3594
- flow_attr->port > dev->num_ports ||
3595
- (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
3596
- IB_FLOW_ATTR_FLAGS_EGRESS))) {
3597
- err = -EINVAL;
3598
- goto free_ucmd;
3599
- }
3600
-
3601
- if (is_egress &&
3602
- (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
3603
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
3604
- err = -EINVAL;
3605
- goto free_ucmd;
3606
- }
3607
-
3608
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
3609
- if (!dst) {
3610
- err = -ENOMEM;
3611
- goto free_ucmd;
3612
- }
3613
-
3614
- mutex_lock(&dev->flow_db->lock);
3615
-
3616
- ft_prio = get_flow_table(dev, flow_attr,
3617
- is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
3618
- if (IS_ERR(ft_prio)) {
3619
- err = PTR_ERR(ft_prio);
3620
- goto unlock;
3621
- }
3622
- if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
3623
- ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
3624
- if (IS_ERR(ft_prio_tx)) {
3625
- err = PTR_ERR(ft_prio_tx);
3626
- ft_prio_tx = NULL;
3627
- goto destroy_ft;
3628
- }
3629
- }
3630
-
3631
- if (is_egress) {
3632
- dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
3633
- } else {
3634
- dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
3635
- if (mqp->flags & MLX5_IB_QP_RSS)
3636
- dst->tir_num = mqp->rss_qp.tirn;
3637
- else
3638
- dst->tir_num = mqp->raw_packet_qp.rq.tirn;
3639
- }
3640
-
3641
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
3642
- if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
3643
- handler = create_dont_trap_rule(dev, ft_prio,
3644
- flow_attr, dst);
3645
- } else {
3646
- underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ?
3647
- mqp->underlay_qpn : 0;
3648
- handler = _create_flow_rule(dev, ft_prio, flow_attr,
3649
- dst, underlay_qpn, ucmd);
3650
- }
3651
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
3652
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
3653
- handler = create_leftovers_rule(dev, ft_prio, flow_attr,
3654
- dst);
3655
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
3656
- handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
3657
- } else {
3658
- err = -EINVAL;
3659
- goto destroy_ft;
3660
- }
3661
-
3662
- if (IS_ERR(handler)) {
3663
- err = PTR_ERR(handler);
3664
- handler = NULL;
3665
- goto destroy_ft;
3666
- }
3667
-
3668
- mutex_unlock(&dev->flow_db->lock);
3669
- kfree(dst);
3670
- kfree(ucmd);
3671
-
3672
- return &handler->ibflow;
3673
-
3674
-destroy_ft:
3675
- put_flow_table(dev, ft_prio, false);
3676
- if (ft_prio_tx)
3677
- put_flow_table(dev, ft_prio_tx, false);
3678
-unlock:
3679
- mutex_unlock(&dev->flow_db->lock);
3680
- kfree(dst);
3681
-free_ucmd:
3682
- kfree(ucmd);
3683
- return ERR_PTR(err);
3684
-}
3685
-
3686
-static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev,
3687
- int priority, bool mcast)
3688
-{
3689
- int max_table_size;
3690
- struct mlx5_flow_namespace *ns = NULL;
3691
- struct mlx5_ib_flow_prio *prio;
3692
-
3693
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
3694
- log_max_ft_size));
3695
- if (max_table_size < MLX5_FS_MAX_ENTRIES)
3696
- return ERR_PTR(-ENOMEM);
3697
-
3698
- if (mcast)
3699
- priority = MLX5_IB_FLOW_MCAST_PRIO;
3700
- else
3701
- priority = ib_prio_to_core_prio(priority, false);
3702
-
3703
- ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS);
3704
- if (!ns)
3705
- return ERR_PTR(-ENOTSUPP);
3706
-
3707
- prio = &dev->flow_db->prios[priority];
3708
-
3709
- if (prio->flow_table)
3710
- return prio;
3711
-
3712
- return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES,
3713
- MLX5_FS_MAX_TYPES);
3714
-}
3715
-
3716
-static struct mlx5_ib_flow_handler *
3717
-_create_raw_flow_rule(struct mlx5_ib_dev *dev,
3718
- struct mlx5_ib_flow_prio *ft_prio,
3719
- struct mlx5_flow_destination *dst,
3720
- struct mlx5_ib_flow_matcher *fs_matcher,
3721
- void *cmd_in, int inlen)
3722
-{
3723
- struct mlx5_ib_flow_handler *handler;
3724
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
3725
- struct mlx5_flow_spec *spec;
3726
- struct mlx5_flow_table *ft = ft_prio->flow_table;
3727
- int err = 0;
3728
-
3729
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
3730
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
3731
- if (!handler || !spec) {
3732
- err = -ENOMEM;
3733
- goto free;
3734
- }
3735
-
3736
- INIT_LIST_HEAD(&handler->list);
3737
-
3738
- memcpy(spec->match_value, cmd_in, inlen);
3739
- memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
3740
- fs_matcher->mask_len);
3741
- spec->match_criteria_enable = fs_matcher->match_criteria_enable;
3742
-
3743
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3744
- handler->rule = mlx5_add_flow_rules(ft, spec,
3745
- &flow_act, dst, 1);
3746
-
3747
- if (IS_ERR(handler->rule)) {
3748
- err = PTR_ERR(handler->rule);
3749
- goto free;
3750
- }
3751
-
3752
- ft_prio->refcount++;
3753
- handler->prio = ft_prio;
3754
- handler->dev = dev;
3755
- ft_prio->flow_table = ft;
3756
-
3757
-free:
3758
- if (err)
3759
- kfree(handler);
3760
- kvfree(spec);
3761
- return err ? ERR_PTR(err) : handler;
3762
-}
3763
-
3764
-static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
3765
- void *match_v)
3766
-{
3767
- void *match_c;
3768
- void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
3769
- void *dmac, *dmac_mask;
3770
- void *ipv4, *ipv4_mask;
3771
-
3772
- if (!(fs_matcher->match_criteria_enable &
3773
- (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
3774
- return false;
3775
-
3776
- match_c = fs_matcher->matcher_mask.match_params;
3777
- match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
3778
- outer_headers);
3779
- match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
3780
- outer_headers);
3781
-
3782
- dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
3783
- dmac_47_16);
3784
- dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
3785
- dmac_47_16);
3786
-
3787
- if (is_multicast_ether_addr(dmac) &&
3788
- is_multicast_ether_addr(dmac_mask))
3789
- return true;
3790
-
3791
- ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
3792
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
3793
-
3794
- ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
3795
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
3796
-
3797
- if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
3798
- ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
3799
- return true;
3800
-
3801
- return false;
3802
-}
3803
-
3804
-struct mlx5_ib_flow_handler *
3805
-mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
3806
- struct mlx5_ib_flow_matcher *fs_matcher,
3807
- void *cmd_in, int inlen, int dest_id,
3808
- int dest_type)
3809
-{
3810
- struct mlx5_flow_destination *dst;
3811
- struct mlx5_ib_flow_prio *ft_prio;
3812
- int priority = fs_matcher->priority;
3813
- struct mlx5_ib_flow_handler *handler;
3814
- bool mcast;
3815
- int err;
3816
-
3817
- if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
3818
- return ERR_PTR(-EOPNOTSUPP);
3819
-
3820
- if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
3821
- return ERR_PTR(-ENOMEM);
3822
-
3823
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
3824
- if (!dst)
3825
- return ERR_PTR(-ENOMEM);
3826
-
3827
- mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
3828
- mutex_lock(&dev->flow_db->lock);
3829
-
3830
- ft_prio = _get_flow_table(dev, priority, mcast);
3831
- if (IS_ERR(ft_prio)) {
3832
- err = PTR_ERR(ft_prio);
3833
- goto unlock;
3834
- }
3835
-
3836
- if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
3837
- dst->type = dest_type;
3838
- dst->tir_num = dest_id;
3839
- } else {
3840
- dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
3841
- dst->ft_num = dest_id;
3842
- }
3843
-
3844
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in,
3845
- inlen);
3846
-
3847
- if (IS_ERR(handler)) {
3848
- err = PTR_ERR(handler);
3849
- goto destroy_ft;
3850
- }
3851
-
3852
- mutex_unlock(&dev->flow_db->lock);
3853
- atomic_inc(&fs_matcher->usecnt);
3854
- handler->flow_matcher = fs_matcher;
3855
-
3856
- kfree(dst);
3857
-
3858
- return handler;
3859
-
3860
-destroy_ft:
3861
- put_flow_table(dev, ft_prio, false);
3862
-unlock:
3863
- mutex_unlock(&dev->flow_db->lock);
3864
- kfree(dst);
3865
-
3866
- return ERR_PTR(err);
3867
-}
3868
-
3869
-static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
3870
-{
3871
- u32 flags = 0;
3872
-
3873
- if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
3874
- flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
3875
-
3876
- return flags;
3877
-}
3878
-
3879
-#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
3880
-static struct ib_flow_action *
3881
-mlx5_ib_create_flow_action_esp(struct ib_device *device,
3882
- const struct ib_flow_action_attrs_esp *attr,
3883
- struct uverbs_attr_bundle *attrs)
3884
-{
3885
- struct mlx5_ib_dev *mdev = to_mdev(device);
3886
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
3887
- struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
3888
- struct mlx5_ib_flow_action *action;
3889
- u64 action_flags;
3890
- u64 flags;
3891
- int err = 0;
3892
-
3893
- err = uverbs_get_flags64(
3894
- &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
3895
- ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
3896
- if (err)
3897
- return ERR_PTR(err);
3898
-
3899
- flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
3900
-
3901
- /* We current only support a subset of the standard features. Only a
3902
- * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
3903
- * (with overlap). Full offload mode isn't supported.
3904
- */
3905
- if (!attr->keymat || attr->replay || attr->encap ||
3906
- attr->spi || attr->seq || attr->tfc_pad ||
3907
- attr->hard_limit_pkts ||
3908
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
3909
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
3910
- return ERR_PTR(-EOPNOTSUPP);
3911
-
3912
- if (attr->keymat->protocol !=
3913
- IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
3914
- return ERR_PTR(-EOPNOTSUPP);
3915
-
3916
- aes_gcm = &attr->keymat->keymat.aes_gcm;
3917
-
3918
- if (aes_gcm->icv_len != 16 ||
3919
- aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
3920
- return ERR_PTR(-EOPNOTSUPP);
3921
-
3922
- action = kmalloc(sizeof(*action), GFP_KERNEL);
3923
- if (!action)
3924
- return ERR_PTR(-ENOMEM);
3925
-
3926
- action->esp_aes_gcm.ib_flags = attr->flags;
3927
- memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
3928
- sizeof(accel_attrs.keymat.aes_gcm.aes_key));
3929
- accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
3930
- memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
3931
- sizeof(accel_attrs.keymat.aes_gcm.salt));
3932
- memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
3933
- sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
3934
- accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
3935
- accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
3936
- accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
3937
-
3938
- accel_attrs.esn = attr->esn;
3939
- if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
3940
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
3941
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
3942
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
3943
-
3944
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
3945
- accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
3946
-
3947
- action->esp_aes_gcm.ctx =
3948
- mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
3949
- if (IS_ERR(action->esp_aes_gcm.ctx)) {
3950
- err = PTR_ERR(action->esp_aes_gcm.ctx);
3951
- goto err_parse;
3952
- }
3953
-
3954
- action->esp_aes_gcm.ib_flags = attr->flags;
3955
-
3956
- return &action->ib_action;
3957
-
3958
-err_parse:
3959
- kfree(action);
3960
- return ERR_PTR(err);
3961
-}
3962
-
3963
-static int
3964
-mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
3965
- const struct ib_flow_action_attrs_esp *attr,
3966
- struct uverbs_attr_bundle *attrs)
3967
-{
3968
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
3969
- struct mlx5_accel_esp_xfrm_attrs accel_attrs;
3970
- int err = 0;
3971
-
3972
- if (attr->keymat || attr->replay || attr->encap ||
3973
- attr->spi || attr->seq || attr->tfc_pad ||
3974
- attr->hard_limit_pkts ||
3975
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
3976
- IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
3977
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
3978
- return -EOPNOTSUPP;
3979
-
3980
- /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
3981
- * be modified.
3982
- */
3983
- if (!(maction->esp_aes_gcm.ib_flags &
3984
- IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
3985
- attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
3986
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
3987
- return -EINVAL;
3988
-
3989
- memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
3990
- sizeof(accel_attrs));
3991
-
3992
- accel_attrs.esn = attr->esn;
3993
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
3994
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
3995
- else
3996
- accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
3997
-
3998
- err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
3999
- &accel_attrs);
4000
- if (err)
4001
- return err;
4002
-
4003
- maction->esp_aes_gcm.ib_flags &=
4004
- ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
4005
- maction->esp_aes_gcm.ib_flags |=
4006
- attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
4007
-
4008
- return 0;
4009
-}
4010
-
4011
-static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
4012
-{
4013
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
4014
-
4015
- switch (action->type) {
4016
- case IB_FLOW_ACTION_ESP:
4017
- /*
4018
- * We only support aes_gcm by now, so we implicitly know this is
4019
- * the underline crypto.
4020
- */
4021
- mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
4022
- break;
4023
- default:
4024
- WARN_ON(true);
4025
- break;
4026
- }
4027
-
4028
- kfree(maction);
4029
- return 0;
2570
+ return mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
40302571 }
40312572
40322573 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
....@@ -4034,13 +2575,17 @@
40342575 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
40352576 struct mlx5_ib_qp *mqp = to_mqp(ibqp);
40362577 int err;
2578
+ u16 uid;
40372579
4038
- if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
2580
+ uid = ibqp->pd ?
2581
+ to_mpd(ibqp->pd)->uid : 0;
2582
+
2583
+ if (mqp->flags & IB_QP_CREATE_SOURCE_QPN) {
40392584 mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
40402585 return -EOPNOTSUPP;
40412586 }
40422587
4043
- err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
2588
+ err = mlx5_cmd_attach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
40442589 if (err)
40452590 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
40462591 ibqp->qp_num, gid->raw);
....@@ -4052,8 +2597,11 @@
40522597 {
40532598 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
40542599 int err;
2600
+ u16 uid;
40552601
4056
- err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
2602
+ uid = ibqp->pd ?
2603
+ to_mpd(ibqp->pd)->uid : 0;
2604
+ err = mlx5_cmd_detach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
40572605 if (err)
40582606 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
40592607 ibqp->qp_num, gid->raw);
....@@ -4074,61 +2622,68 @@
40742622 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
40752623 }
40762624
4077
-static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
4078
- char *buf)
2625
+static ssize_t fw_pages_show(struct device *device,
2626
+ struct device_attribute *attr, char *buf)
40792627 {
40802628 struct mlx5_ib_dev *dev =
4081
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
2629
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
40822630
40832631 return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
40842632 }
2633
+static DEVICE_ATTR_RO(fw_pages);
40852634
4086
-static ssize_t show_reg_pages(struct device *device,
2635
+static ssize_t reg_pages_show(struct device *device,
40872636 struct device_attribute *attr, char *buf)
40882637 {
40892638 struct mlx5_ib_dev *dev =
4090
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
2639
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
40912640
40922641 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
40932642 }
2643
+static DEVICE_ATTR_RO(reg_pages);
40942644
4095
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
4096
- char *buf)
2645
+static ssize_t hca_type_show(struct device *device,
2646
+ struct device_attribute *attr, char *buf)
40972647 {
40982648 struct mlx5_ib_dev *dev =
4099
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
2649
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
2650
+
41002651 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
41012652 }
2653
+static DEVICE_ATTR_RO(hca_type);
41022654
4103
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
4104
- char *buf)
2655
+static ssize_t hw_rev_show(struct device *device,
2656
+ struct device_attribute *attr, char *buf)
41052657 {
41062658 struct mlx5_ib_dev *dev =
4107
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
2659
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
2660
+
41082661 return sprintf(buf, "%x\n", dev->mdev->rev_id);
41092662 }
2663
+static DEVICE_ATTR_RO(hw_rev);
41102664
4111
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
4112
- char *buf)
2665
+static ssize_t board_id_show(struct device *device,
2666
+ struct device_attribute *attr, char *buf)
41132667 {
41142668 struct mlx5_ib_dev *dev =
4115
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
2669
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
2670
+
41162671 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
41172672 dev->mdev->board_id);
41182673 }
2674
+static DEVICE_ATTR_RO(board_id);
41192675
4120
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
4121
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
4122
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
4123
-static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
4124
-static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
2676
+static struct attribute *mlx5_class_attributes[] = {
2677
+ &dev_attr_hw_rev.attr,
2678
+ &dev_attr_hca_type.attr,
2679
+ &dev_attr_board_id.attr,
2680
+ &dev_attr_fw_pages.attr,
2681
+ &dev_attr_reg_pages.attr,
2682
+ NULL,
2683
+};
41252684
4126
-static struct device_attribute *mlx5_class_attributes[] = {
4127
- &dev_attr_hw_rev,
4128
- &dev_attr_hca_type,
4129
- &dev_attr_board_id,
4130
- &dev_attr_fw_pages,
4131
- &dev_attr_reg_pages,
2685
+static const struct attribute_group mlx5_attr_group = {
2686
+ .attrs = mlx5_class_attributes,
41322687 };
41332688
41342689 static void pkey_change_handler(struct work_struct *work)
....@@ -4137,9 +2692,7 @@
41372692 container_of(work, struct mlx5_ib_port_resources,
41382693 pkey_change_work);
41392694
4140
- mutex_lock(&ports->devr->mutex);
41412695 mlx5_ib_gsi_pkey_change(ports->gsi);
4142
- mutex_unlock(&ports->devr->mutex);
41432696 }
41442697
41452698 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
....@@ -4196,7 +2749,7 @@
41962749 * lock/unlock above locks Now need to arm all involved CQs.
41972750 */
41982751 list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
4199
- mcq->comp(mcq);
2752
+ mcq->comp(mcq, NULL);
42002753 }
42012754 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
42022755 }
....@@ -4211,14 +2764,74 @@
42112764 atomic_inc(&delay_drop->events_cnt);
42122765
42132766 mutex_lock(&delay_drop->lock);
4214
- err = mlx5_core_set_delay_drop(delay_drop->dev->mdev,
4215
- delay_drop->timeout);
2767
+ err = mlx5_core_set_delay_drop(delay_drop->dev, delay_drop->timeout);
42162768 if (err) {
42172769 mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n",
42182770 delay_drop->timeout);
42192771 delay_drop->activate = false;
42202772 }
42212773 mutex_unlock(&delay_drop->lock);
2774
+}
2775
+
2776
+static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
2777
+ struct ib_event *ibev)
2778
+{
2779
+ u8 port = (eqe->data.port.port >> 4) & 0xf;
2780
+
2781
+ switch (eqe->sub_type) {
2782
+ case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
2783
+ if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
2784
+ IB_LINK_LAYER_ETHERNET)
2785
+ schedule_work(&ibdev->delay_drop.delay_drop_work);
2786
+ break;
2787
+ default: /* do nothing */
2788
+ return;
2789
+ }
2790
+}
2791
+
2792
+static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
2793
+ struct ib_event *ibev)
2794
+{
2795
+ u8 port = (eqe->data.port.port >> 4) & 0xf;
2796
+
2797
+ ibev->element.port_num = port;
2798
+
2799
+ switch (eqe->sub_type) {
2800
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2801
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2802
+ case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
2803
+ /* In RoCE, port up/down events are handled in
2804
+ * mlx5_netdev_event().
2805
+ */
2806
+ if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
2807
+ IB_LINK_LAYER_ETHERNET)
2808
+ return -EINVAL;
2809
+
2810
+ ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ?
2811
+ IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
2812
+ break;
2813
+
2814
+ case MLX5_PORT_CHANGE_SUBTYPE_LID:
2815
+ ibev->event = IB_EVENT_LID_CHANGE;
2816
+ break;
2817
+
2818
+ case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
2819
+ ibev->event = IB_EVENT_PKEY_CHANGE;
2820
+ schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
2821
+ break;
2822
+
2823
+ case MLX5_PORT_CHANGE_SUBTYPE_GUID:
2824
+ ibev->event = IB_EVENT_GID_CHANGE;
2825
+ break;
2826
+
2827
+ case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
2828
+ ibev->event = IB_EVENT_CLIENT_REREGISTER;
2829
+ break;
2830
+ default:
2831
+ return -EINVAL;
2832
+ }
2833
+
2834
+ return 0;
42222835 }
42232836
42242837 static void mlx5_ib_handle_event(struct work_struct *_work)
....@@ -4228,65 +2841,37 @@
42282841 struct mlx5_ib_dev *ibdev;
42292842 struct ib_event ibev;
42302843 bool fatal = false;
4231
- u8 port = (u8)work->param;
42322844
4233
- if (mlx5_core_is_mp_slave(work->dev)) {
4234
- ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
2845
+ if (work->is_slave) {
2846
+ ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi);
42352847 if (!ibdev)
42362848 goto out;
42372849 } else {
4238
- ibdev = work->context;
2850
+ ibdev = work->dev;
42392851 }
42402852
42412853 switch (work->event) {
42422854 case MLX5_DEV_EVENT_SYS_ERROR:
42432855 ibev.event = IB_EVENT_DEVICE_FATAL;
42442856 mlx5_ib_handle_internal_error(ibdev);
2857
+ ibev.element.port_num = (u8)(unsigned long)work->param;
42452858 fatal = true;
42462859 break;
4247
-
4248
- case MLX5_DEV_EVENT_PORT_UP:
4249
- case MLX5_DEV_EVENT_PORT_DOWN:
4250
- case MLX5_DEV_EVENT_PORT_INITIALIZED:
4251
- /* In RoCE, port up/down events are handled in
4252
- * mlx5_netdev_event().
4253
- */
4254
- if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
4255
- IB_LINK_LAYER_ETHERNET)
2860
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
2861
+ if (handle_port_change(ibdev, work->param, &ibev))
42562862 goto out;
4257
-
4258
- ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ?
4259
- IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
42602863 break;
4261
-
4262
- case MLX5_DEV_EVENT_LID_CHANGE:
4263
- ibev.event = IB_EVENT_LID_CHANGE;
4264
- break;
4265
-
4266
- case MLX5_DEV_EVENT_PKEY_CHANGE:
4267
- ibev.event = IB_EVENT_PKEY_CHANGE;
4268
- schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
4269
- break;
4270
-
4271
- case MLX5_DEV_EVENT_GUID_CHANGE:
4272
- ibev.event = IB_EVENT_GID_CHANGE;
4273
- break;
4274
-
4275
- case MLX5_DEV_EVENT_CLIENT_REREG:
4276
- ibev.event = IB_EVENT_CLIENT_REREGISTER;
4277
- break;
4278
- case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
4279
- schedule_work(&ibdev->delay_drop.delay_drop_work);
4280
- goto out;
2864
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
2865
+ handle_general_event(ibdev, work->param, &ibev);
2866
+ fallthrough;
42812867 default:
42822868 goto out;
42832869 }
42842870
4285
- ibev.device = &ibdev->ib_dev;
4286
- ibev.element.port_num = port;
2871
+ ibev.device = &ibdev->ib_dev;
42872872
4288
- if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
4289
- mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
2873
+ if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) {
2874
+ mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num);
42902875 goto out;
42912876 }
42922877
....@@ -4299,22 +2884,43 @@
42992884 kfree(work);
43002885 }
43012886
4302
-static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
4303
- enum mlx5_dev_event event, unsigned long param)
2887
+static int mlx5_ib_event(struct notifier_block *nb,
2888
+ unsigned long event, void *param)
43042889 {
43052890 struct mlx5_ib_event_work *work;
43062891
43072892 work = kmalloc(sizeof(*work), GFP_ATOMIC);
43082893 if (!work)
4309
- return;
2894
+ return NOTIFY_DONE;
43102895
43112896 INIT_WORK(&work->work, mlx5_ib_handle_event);
4312
- work->dev = dev;
2897
+ work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events);
2898
+ work->is_slave = false;
43132899 work->param = param;
4314
- work->context = context;
43152900 work->event = event;
43162901
43172902 queue_work(mlx5_ib_event_wq, &work->work);
2903
+
2904
+ return NOTIFY_OK;
2905
+}
2906
+
2907
+static int mlx5_ib_event_slave_port(struct notifier_block *nb,
2908
+ unsigned long event, void *param)
2909
+{
2910
+ struct mlx5_ib_event_work *work;
2911
+
2912
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
2913
+ if (!work)
2914
+ return NOTIFY_DONE;
2915
+
2916
+ INIT_WORK(&work->work, mlx5_ib_handle_event);
2917
+ work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events);
2918
+ work->is_slave = true;
2919
+ work->param = param;
2920
+ work->event = event;
2921
+ queue_work(mlx5_ib_event_wq, &work->work);
2922
+
2923
+ return NOTIFY_OK;
43182924 }
43192925
43202926 static int set_has_smi_cap(struct mlx5_ib_dev *dev)
....@@ -4323,7 +2929,7 @@
43232929 int err;
43242930 int port;
43252931
4326
- for (port = 1; port <= dev->num_ports; port++) {
2932
+ for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) {
43272933 dev->mdev->port_caps[port - 1].has_smi = false;
43282934 if (MLX5_CAP_GEN(dev->mdev, port_type) ==
43292935 MLX5_CAP_PORT_TYPE_IB) {
....@@ -4354,14 +2960,13 @@
43542960 mlx5_query_ext_port_caps(dev, port);
43552961 }
43562962
4357
-static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
2963
+static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
43582964 {
43592965 struct ib_device_attr *dprops = NULL;
43602966 struct ib_port_attr *pprops = NULL;
43612967 int err = -ENOMEM;
4362
- struct ib_udata uhw = {.inlen = 0, .outlen = 0};
43632968
4364
- pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
2969
+ pprops = kzalloc(sizeof(*pprops), GFP_KERNEL);
43652970 if (!pprops)
43662971 goto out;
43672972
....@@ -4369,17 +2974,12 @@
43692974 if (!dprops)
43702975 goto out;
43712976
4372
- err = set_has_smi_cap(dev);
4373
- if (err)
4374
- goto out;
4375
-
4376
- err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
2977
+ err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL);
43772978 if (err) {
43782979 mlx5_ib_warn(dev, "query_device failed %d\n", err);
43792980 goto out;
43802981 }
43812982
4382
- memset(pprops, 0, sizeof(*pprops));
43832983 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
43842984 if (err) {
43852985 mlx5_ib_warn(dev, "query_port %d failed %d\n",
....@@ -4401,7 +3001,1387 @@
44013001 return err;
44023002 }
44033003
4404
-static void destroy_umrc_res(struct mlx5_ib_dev *dev)
3004
+static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
3005
+{
3006
+ /* For representors use port 1, is this is the only native
3007
+ * port
3008
+ */
3009
+ if (dev->is_rep)
3010
+ return __get_port_caps(dev, 1);
3011
+ return __get_port_caps(dev, port);
3012
+}
3013
+
3014
+static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
3015
+{
3016
+ switch (umr_fence_cap) {
3017
+ case MLX5_CAP_UMR_FENCE_NONE:
3018
+ return MLX5_FENCE_MODE_NONE;
3019
+ case MLX5_CAP_UMR_FENCE_SMALL:
3020
+ return MLX5_FENCE_MODE_INITIATOR_SMALL;
3021
+ default:
3022
+ return MLX5_FENCE_MODE_STRONG_ORDERING;
3023
+ }
3024
+}
3025
+
3026
+static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
3027
+{
3028
+ struct mlx5_ib_resources *devr = &dev->devr;
3029
+ struct ib_srq_init_attr attr;
3030
+ struct ib_device *ibdev;
3031
+ struct ib_cq_init_attr cq_attr = {.cqe = 1};
3032
+ int port;
3033
+ int ret = 0;
3034
+
3035
+ ibdev = &dev->ib_dev;
3036
+
3037
+ if (!MLX5_CAP_GEN(dev->mdev, xrc))
3038
+ return -EOPNOTSUPP;
3039
+
3040
+ mutex_init(&devr->mutex);
3041
+
3042
+ devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd);
3043
+ if (!devr->p0)
3044
+ return -ENOMEM;
3045
+
3046
+ devr->p0->device = ibdev;
3047
+ devr->p0->uobject = NULL;
3048
+ atomic_set(&devr->p0->usecnt, 0);
3049
+
3050
+ ret = mlx5_ib_alloc_pd(devr->p0, NULL);
3051
+ if (ret)
3052
+ goto error0;
3053
+
3054
+ devr->c0 = rdma_zalloc_drv_obj(ibdev, ib_cq);
3055
+ if (!devr->c0) {
3056
+ ret = -ENOMEM;
3057
+ goto error1;
3058
+ }
3059
+
3060
+ devr->c0->device = &dev->ib_dev;
3061
+ atomic_set(&devr->c0->usecnt, 0);
3062
+
3063
+ ret = mlx5_ib_create_cq(devr->c0, &cq_attr, NULL);
3064
+ if (ret)
3065
+ goto err_create_cq;
3066
+
3067
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0);
3068
+ if (ret)
3069
+ goto error2;
3070
+
3071
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0);
3072
+ if (ret)
3073
+ goto error3;
3074
+
3075
+ memset(&attr, 0, sizeof(attr));
3076
+ attr.attr.max_sge = 1;
3077
+ attr.attr.max_wr = 1;
3078
+ attr.srq_type = IB_SRQT_XRC;
3079
+ attr.ext.cq = devr->c0;
3080
+
3081
+ devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq);
3082
+ if (!devr->s0) {
3083
+ ret = -ENOMEM;
3084
+ goto error4;
3085
+ }
3086
+
3087
+ devr->s0->device = &dev->ib_dev;
3088
+ devr->s0->pd = devr->p0;
3089
+ devr->s0->srq_type = IB_SRQT_XRC;
3090
+ devr->s0->ext.cq = devr->c0;
3091
+ ret = mlx5_ib_create_srq(devr->s0, &attr, NULL);
3092
+ if (ret)
3093
+ goto err_create;
3094
+
3095
+ atomic_inc(&devr->s0->ext.cq->usecnt);
3096
+ atomic_inc(&devr->p0->usecnt);
3097
+ atomic_set(&devr->s0->usecnt, 0);
3098
+
3099
+ memset(&attr, 0, sizeof(attr));
3100
+ attr.attr.max_sge = 1;
3101
+ attr.attr.max_wr = 1;
3102
+ attr.srq_type = IB_SRQT_BASIC;
3103
+ devr->s1 = rdma_zalloc_drv_obj(ibdev, ib_srq);
3104
+ if (!devr->s1) {
3105
+ ret = -ENOMEM;
3106
+ goto error5;
3107
+ }
3108
+
3109
+ devr->s1->device = &dev->ib_dev;
3110
+ devr->s1->pd = devr->p0;
3111
+ devr->s1->srq_type = IB_SRQT_BASIC;
3112
+ devr->s1->ext.cq = devr->c0;
3113
+
3114
+ ret = mlx5_ib_create_srq(devr->s1, &attr, NULL);
3115
+ if (ret)
3116
+ goto error6;
3117
+
3118
+ atomic_inc(&devr->p0->usecnt);
3119
+ atomic_set(&devr->s1->usecnt, 0);
3120
+
3121
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
3122
+ INIT_WORK(&devr->ports[port].pkey_change_work,
3123
+ pkey_change_handler);
3124
+
3125
+ return 0;
3126
+
3127
+error6:
3128
+ kfree(devr->s1);
3129
+error5:
3130
+ mlx5_ib_destroy_srq(devr->s0, NULL);
3131
+err_create:
3132
+ kfree(devr->s0);
3133
+error4:
3134
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
3135
+error3:
3136
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
3137
+error2:
3138
+ mlx5_ib_destroy_cq(devr->c0, NULL);
3139
+err_create_cq:
3140
+ kfree(devr->c0);
3141
+error1:
3142
+ mlx5_ib_dealloc_pd(devr->p0, NULL);
3143
+error0:
3144
+ kfree(devr->p0);
3145
+ return ret;
3146
+}
3147
+
3148
+static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
3149
+{
3150
+ struct mlx5_ib_resources *devr = &dev->devr;
3151
+ int port;
3152
+
3153
+ mlx5_ib_destroy_srq(devr->s1, NULL);
3154
+ kfree(devr->s1);
3155
+ mlx5_ib_destroy_srq(devr->s0, NULL);
3156
+ kfree(devr->s0);
3157
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
3158
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
3159
+ mlx5_ib_destroy_cq(devr->c0, NULL);
3160
+ kfree(devr->c0);
3161
+ mlx5_ib_dealloc_pd(devr->p0, NULL);
3162
+ kfree(devr->p0);
3163
+
3164
+ /* Make sure no change P_Key work items are still executing */
3165
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
3166
+ cancel_work_sync(&devr->ports[port].pkey_change_work);
3167
+}
3168
+
3169
+static u32 get_core_cap_flags(struct ib_device *ibdev,
3170
+ struct mlx5_hca_vport_context *rep)
3171
+{
3172
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
3173
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
3174
+ u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
3175
+ u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
3176
+ bool raw_support = !mlx5_core_mp_enabled(dev->mdev);
3177
+ u32 ret = 0;
3178
+
3179
+ if (rep->grh_required)
3180
+ ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED;
3181
+
3182
+ if (ll == IB_LINK_LAYER_INFINIBAND)
3183
+ return ret | RDMA_CORE_PORT_IBA_IB;
3184
+
3185
+ if (raw_support)
3186
+ ret |= RDMA_CORE_PORT_RAW_PACKET;
3187
+
3188
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
3189
+ return ret;
3190
+
3191
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
3192
+ return ret;
3193
+
3194
+ if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
3195
+ ret |= RDMA_CORE_PORT_IBA_ROCE;
3196
+
3197
+ if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
3198
+ ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
3199
+
3200
+ return ret;
3201
+}
3202
+
3203
+static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
3204
+ struct ib_port_immutable *immutable)
3205
+{
3206
+ struct ib_port_attr attr;
3207
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
3208
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
3209
+ struct mlx5_hca_vport_context rep = {0};
3210
+ int err;
3211
+
3212
+ err = ib_query_port(ibdev, port_num, &attr);
3213
+ if (err)
3214
+ return err;
3215
+
3216
+ if (ll == IB_LINK_LAYER_INFINIBAND) {
3217
+ err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0,
3218
+ &rep);
3219
+ if (err)
3220
+ return err;
3221
+ }
3222
+
3223
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
3224
+ immutable->gid_tbl_len = attr.gid_tbl_len;
3225
+ immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep);
3226
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
3227
+
3228
+ return 0;
3229
+}
3230
+
3231
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
3232
+ struct ib_port_immutable *immutable)
3233
+{
3234
+ struct ib_port_attr attr;
3235
+ int err;
3236
+
3237
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
3238
+
3239
+ err = ib_query_port(ibdev, port_num, &attr);
3240
+ if (err)
3241
+ return err;
3242
+
3243
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
3244
+ immutable->gid_tbl_len = attr.gid_tbl_len;
3245
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
3246
+
3247
+ return 0;
3248
+}
3249
+
3250
+static void get_dev_fw_str(struct ib_device *ibdev, char *str)
3251
+{
3252
+ struct mlx5_ib_dev *dev =
3253
+ container_of(ibdev, struct mlx5_ib_dev, ib_dev);
3254
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d",
3255
+ fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev),
3256
+ fw_rev_sub(dev->mdev));
3257
+}
3258
+
3259
+static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
3260
+{
3261
+ struct mlx5_core_dev *mdev = dev->mdev;
3262
+ struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev,
3263
+ MLX5_FLOW_NAMESPACE_LAG);
3264
+ struct mlx5_flow_table *ft;
3265
+ int err;
3266
+
3267
+ if (!ns || !mlx5_lag_is_roce(mdev))
3268
+ return 0;
3269
+
3270
+ err = mlx5_cmd_create_vport_lag(mdev);
3271
+ if (err)
3272
+ return err;
3273
+
3274
+ ft = mlx5_create_lag_demux_flow_table(ns, 0, 0);
3275
+ if (IS_ERR(ft)) {
3276
+ err = PTR_ERR(ft);
3277
+ goto err_destroy_vport_lag;
3278
+ }
3279
+
3280
+ dev->flow_db->lag_demux_ft = ft;
3281
+ dev->lag_active = true;
3282
+ return 0;
3283
+
3284
+err_destroy_vport_lag:
3285
+ mlx5_cmd_destroy_vport_lag(mdev);
3286
+ return err;
3287
+}
3288
+
3289
+static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
3290
+{
3291
+ struct mlx5_core_dev *mdev = dev->mdev;
3292
+
3293
+ if (dev->lag_active) {
3294
+ dev->lag_active = false;
3295
+
3296
+ mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
3297
+ dev->flow_db->lag_demux_ft = NULL;
3298
+
3299
+ mlx5_cmd_destroy_vport_lag(mdev);
3300
+ }
3301
+}
3302
+
3303
+static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
3304
+{
3305
+ int err;
3306
+
3307
+ dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event;
3308
+ err = register_netdevice_notifier(&dev->port[port_num].roce.nb);
3309
+ if (err) {
3310
+ dev->port[port_num].roce.nb.notifier_call = NULL;
3311
+ return err;
3312
+ }
3313
+
3314
+ return 0;
3315
+}
3316
+
3317
+static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
3318
+{
3319
+ if (dev->port[port_num].roce.nb.notifier_call) {
3320
+ unregister_netdevice_notifier(&dev->port[port_num].roce.nb);
3321
+ dev->port[port_num].roce.nb.notifier_call = NULL;
3322
+ }
3323
+}
3324
+
3325
+static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
3326
+{
3327
+ int err;
3328
+
3329
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
3330
+ if (err)
3331
+ return err;
3332
+
3333
+ err = mlx5_eth_lag_init(dev);
3334
+ if (err)
3335
+ goto err_disable_roce;
3336
+
3337
+ return 0;
3338
+
3339
+err_disable_roce:
3340
+ mlx5_nic_vport_disable_roce(dev->mdev);
3341
+
3342
+ return err;
3343
+}
3344
+
3345
+static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
3346
+{
3347
+ mlx5_eth_lag_cleanup(dev);
3348
+ mlx5_nic_vport_disable_roce(dev->mdev);
3349
+}
3350
+
3351
+static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
3352
+ enum rdma_netdev_t type,
3353
+ struct rdma_netdev_alloc_params *params)
3354
+{
3355
+ if (type != RDMA_NETDEV_IPOIB)
3356
+ return -EOPNOTSUPP;
3357
+
3358
+ return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params);
3359
+}
3360
+
3361
+static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
3362
+ size_t count, loff_t *pos)
3363
+{
3364
+ struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
3365
+ char lbuf[20];
3366
+ int len;
3367
+
3368
+ len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout);
3369
+ return simple_read_from_buffer(buf, count, pos, lbuf, len);
3370
+}
3371
+
3372
+static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf,
3373
+ size_t count, loff_t *pos)
3374
+{
3375
+ struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
3376
+ u32 timeout;
3377
+ u32 var;
3378
+
3379
+ if (kstrtouint_from_user(buf, count, 0, &var))
3380
+ return -EFAULT;
3381
+
3382
+ timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS *
3383
+ 1000);
3384
+ if (timeout != var)
3385
+ mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n",
3386
+ timeout);
3387
+
3388
+ delay_drop->timeout = timeout;
3389
+
3390
+ return count;
3391
+}
3392
+
3393
+static const struct file_operations fops_delay_drop_timeout = {
3394
+ .owner = THIS_MODULE,
3395
+ .open = simple_open,
3396
+ .write = delay_drop_timeout_write,
3397
+ .read = delay_drop_timeout_read,
3398
+};
3399
+
3400
+static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
3401
+ struct mlx5_ib_multiport_info *mpi)
3402
+{
3403
+ u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
3404
+ struct mlx5_ib_port *port = &ibdev->port[port_num];
3405
+ int comps;
3406
+ int err;
3407
+ int i;
3408
+
3409
+ lockdep_assert_held(&mlx5_ib_multiport_mutex);
3410
+
3411
+ mlx5_ib_cleanup_cong_debugfs(ibdev, port_num);
3412
+
3413
+ spin_lock(&port->mp.mpi_lock);
3414
+ if (!mpi->ibdev) {
3415
+ spin_unlock(&port->mp.mpi_lock);
3416
+ return;
3417
+ }
3418
+
3419
+ mpi->ibdev = NULL;
3420
+
3421
+ spin_unlock(&port->mp.mpi_lock);
3422
+ if (mpi->mdev_events.notifier_call)
3423
+ mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
3424
+ mpi->mdev_events.notifier_call = NULL;
3425
+ mlx5_remove_netdev_notifier(ibdev, port_num);
3426
+ spin_lock(&port->mp.mpi_lock);
3427
+
3428
+ comps = mpi->mdev_refcnt;
3429
+ if (comps) {
3430
+ mpi->unaffiliate = true;
3431
+ init_completion(&mpi->unref_comp);
3432
+ spin_unlock(&port->mp.mpi_lock);
3433
+
3434
+ for (i = 0; i < comps; i++)
3435
+ wait_for_completion(&mpi->unref_comp);
3436
+
3437
+ spin_lock(&port->mp.mpi_lock);
3438
+ mpi->unaffiliate = false;
3439
+ }
3440
+
3441
+ port->mp.mpi = NULL;
3442
+
3443
+ spin_unlock(&port->mp.mpi_lock);
3444
+
3445
+ err = mlx5_nic_vport_unaffiliate_multiport(mpi->mdev);
3446
+
3447
+ mlx5_ib_dbg(ibdev, "unaffiliated port %d\n", port_num + 1);
3448
+ /* Log an error, still needed to cleanup the pointers and add
3449
+ * it back to the list.
3450
+ */
3451
+ if (err)
3452
+ mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n",
3453
+ port_num + 1);
3454
+
3455
+ ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN;
3456
+}
3457
+
3458
+static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
3459
+ struct mlx5_ib_multiport_info *mpi)
3460
+{
3461
+ u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
3462
+ int err;
3463
+
3464
+ lockdep_assert_held(&mlx5_ib_multiport_mutex);
3465
+
3466
+ spin_lock(&ibdev->port[port_num].mp.mpi_lock);
3467
+ if (ibdev->port[port_num].mp.mpi) {
3468
+ mlx5_ib_dbg(ibdev, "port %d already affiliated.\n",
3469
+ port_num + 1);
3470
+ spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
3471
+ return false;
3472
+ }
3473
+
3474
+ ibdev->port[port_num].mp.mpi = mpi;
3475
+ mpi->ibdev = ibdev;
3476
+ mpi->mdev_events.notifier_call = NULL;
3477
+ spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
3478
+
3479
+ err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev);
3480
+ if (err)
3481
+ goto unbind;
3482
+
3483
+ err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev));
3484
+ if (err)
3485
+ goto unbind;
3486
+
3487
+ err = mlx5_add_netdev_notifier(ibdev, port_num);
3488
+ if (err) {
3489
+ mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n",
3490
+ port_num + 1);
3491
+ goto unbind;
3492
+ }
3493
+
3494
+ mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
3495
+ mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
3496
+
3497
+ mlx5_ib_init_cong_debugfs(ibdev, port_num);
3498
+
3499
+ return true;
3500
+
3501
+unbind:
3502
+ mlx5_ib_unbind_slave_port(ibdev, mpi);
3503
+ return false;
3504
+}
3505
+
3506
+static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
3507
+{
3508
+ int port_num = mlx5_core_native_port_num(dev->mdev) - 1;
3509
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
3510
+ port_num + 1);
3511
+ struct mlx5_ib_multiport_info *mpi;
3512
+ int err;
3513
+ int i;
3514
+
3515
+ if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
3516
+ return 0;
3517
+
3518
+ err = mlx5_query_nic_vport_system_image_guid(dev->mdev,
3519
+ &dev->sys_image_guid);
3520
+ if (err)
3521
+ return err;
3522
+
3523
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
3524
+ if (err)
3525
+ return err;
3526
+
3527
+ mutex_lock(&mlx5_ib_multiport_mutex);
3528
+ for (i = 0; i < dev->num_ports; i++) {
3529
+ bool bound = false;
3530
+
3531
+ /* build a stub multiport info struct for the native port. */
3532
+ if (i == port_num) {
3533
+ mpi = kzalloc(sizeof(*mpi), GFP_KERNEL);
3534
+ if (!mpi) {
3535
+ mutex_unlock(&mlx5_ib_multiport_mutex);
3536
+ mlx5_nic_vport_disable_roce(dev->mdev);
3537
+ return -ENOMEM;
3538
+ }
3539
+
3540
+ mpi->is_master = true;
3541
+ mpi->mdev = dev->mdev;
3542
+ mpi->sys_image_guid = dev->sys_image_guid;
3543
+ dev->port[i].mp.mpi = mpi;
3544
+ mpi->ibdev = dev;
3545
+ mpi = NULL;
3546
+ continue;
3547
+ }
3548
+
3549
+ list_for_each_entry(mpi, &mlx5_ib_unaffiliated_port_list,
3550
+ list) {
3551
+ if (dev->sys_image_guid == mpi->sys_image_guid &&
3552
+ (mlx5_core_native_port_num(mpi->mdev) - 1) == i) {
3553
+ bound = mlx5_ib_bind_slave_port(dev, mpi);
3554
+ }
3555
+
3556
+ if (bound) {
3557
+ dev_dbg(mpi->mdev->device,
3558
+ "removing port from unaffiliated list.\n");
3559
+ mlx5_ib_dbg(dev, "port %d bound\n", i + 1);
3560
+ list_del(&mpi->list);
3561
+ break;
3562
+ }
3563
+ }
3564
+ if (!bound) {
3565
+ get_port_caps(dev, i + 1);
3566
+ mlx5_ib_dbg(dev, "no free port found for port %d\n",
3567
+ i + 1);
3568
+ }
3569
+ }
3570
+
3571
+ list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list);
3572
+ mutex_unlock(&mlx5_ib_multiport_mutex);
3573
+ return err;
3574
+}
3575
+
3576
+static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
3577
+{
3578
+ int port_num = mlx5_core_native_port_num(dev->mdev) - 1;
3579
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
3580
+ port_num + 1);
3581
+ int i;
3582
+
3583
+ if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
3584
+ return;
3585
+
3586
+ mutex_lock(&mlx5_ib_multiport_mutex);
3587
+ for (i = 0; i < dev->num_ports; i++) {
3588
+ if (dev->port[i].mp.mpi) {
3589
+ /* Destroy the native port stub */
3590
+ if (i == port_num) {
3591
+ kfree(dev->port[i].mp.mpi);
3592
+ dev->port[i].mp.mpi = NULL;
3593
+ } else {
3594
+ mlx5_ib_dbg(dev, "unbinding port_num: %d\n", i + 1);
3595
+ list_add_tail(&dev->port[i].mp.mpi->list,
3596
+ &mlx5_ib_unaffiliated_port_list);
3597
+ mlx5_ib_unbind_slave_port(dev, dev->port[i].mp.mpi);
3598
+ }
3599
+ }
3600
+ }
3601
+
3602
+ mlx5_ib_dbg(dev, "removing from devlist\n");
3603
+ list_del(&dev->ib_dev_list);
3604
+ mutex_unlock(&mlx5_ib_multiport_mutex);
3605
+
3606
+ mlx5_nic_vport_disable_roce(dev->mdev);
3607
+}
3608
+
3609
+static int mmap_obj_cleanup(struct ib_uobject *uobject,
3610
+ enum rdma_remove_reason why,
3611
+ struct uverbs_attr_bundle *attrs)
3612
+{
3613
+ struct mlx5_user_mmap_entry *obj = uobject->object;
3614
+
3615
+ rdma_user_mmap_entry_remove(&obj->rdma_entry);
3616
+ return 0;
3617
+}
3618
+
3619
+static int mlx5_rdma_user_mmap_entry_insert(struct mlx5_ib_ucontext *c,
3620
+ struct mlx5_user_mmap_entry *entry,
3621
+ size_t length)
3622
+{
3623
+ return rdma_user_mmap_entry_insert_range(
3624
+ &c->ibucontext, &entry->rdma_entry, length,
3625
+ (MLX5_IB_MMAP_OFFSET_START << 16),
3626
+ ((MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1));
3627
+}
3628
+
3629
+static struct mlx5_user_mmap_entry *
3630
+alloc_var_entry(struct mlx5_ib_ucontext *c)
3631
+{
3632
+ struct mlx5_user_mmap_entry *entry;
3633
+ struct mlx5_var_table *var_table;
3634
+ u32 page_idx;
3635
+ int err;
3636
+
3637
+ var_table = &to_mdev(c->ibucontext.device)->var_table;
3638
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
3639
+ if (!entry)
3640
+ return ERR_PTR(-ENOMEM);
3641
+
3642
+ mutex_lock(&var_table->bitmap_lock);
3643
+ page_idx = find_first_zero_bit(var_table->bitmap,
3644
+ var_table->num_var_hw_entries);
3645
+ if (page_idx >= var_table->num_var_hw_entries) {
3646
+ err = -ENOSPC;
3647
+ mutex_unlock(&var_table->bitmap_lock);
3648
+ goto end;
3649
+ }
3650
+
3651
+ set_bit(page_idx, var_table->bitmap);
3652
+ mutex_unlock(&var_table->bitmap_lock);
3653
+
3654
+ entry->address = var_table->hw_start_addr +
3655
+ (page_idx * var_table->stride_size);
3656
+ entry->page_idx = page_idx;
3657
+ entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR;
3658
+
3659
+ err = mlx5_rdma_user_mmap_entry_insert(c, entry,
3660
+ var_table->stride_size);
3661
+ if (err)
3662
+ goto err_insert;
3663
+
3664
+ return entry;
3665
+
3666
+err_insert:
3667
+ mutex_lock(&var_table->bitmap_lock);
3668
+ clear_bit(page_idx, var_table->bitmap);
3669
+ mutex_unlock(&var_table->bitmap_lock);
3670
+end:
3671
+ kfree(entry);
3672
+ return ERR_PTR(err);
3673
+}
3674
+
3675
+static int UVERBS_HANDLER(MLX5_IB_METHOD_VAR_OBJ_ALLOC)(
3676
+ struct uverbs_attr_bundle *attrs)
3677
+{
3678
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
3679
+ attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE);
3680
+ struct mlx5_ib_ucontext *c;
3681
+ struct mlx5_user_mmap_entry *entry;
3682
+ u64 mmap_offset;
3683
+ u32 length;
3684
+ int err;
3685
+
3686
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
3687
+ if (IS_ERR(c))
3688
+ return PTR_ERR(c);
3689
+
3690
+ entry = alloc_var_entry(c);
3691
+ if (IS_ERR(entry))
3692
+ return PTR_ERR(entry);
3693
+
3694
+ mmap_offset = mlx5_entry_to_mmap_offset(entry);
3695
+ length = entry->rdma_entry.npages * PAGE_SIZE;
3696
+ uobj->object = entry;
3697
+ uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE);
3698
+
3699
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET,
3700
+ &mmap_offset, sizeof(mmap_offset));
3701
+ if (err)
3702
+ return err;
3703
+
3704
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID,
3705
+ &entry->page_idx, sizeof(entry->page_idx));
3706
+ if (err)
3707
+ return err;
3708
+
3709
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH,
3710
+ &length, sizeof(length));
3711
+ return err;
3712
+}
3713
+
3714
+DECLARE_UVERBS_NAMED_METHOD(
3715
+ MLX5_IB_METHOD_VAR_OBJ_ALLOC,
3716
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE,
3717
+ MLX5_IB_OBJECT_VAR,
3718
+ UVERBS_ACCESS_NEW,
3719
+ UA_MANDATORY),
3720
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID,
3721
+ UVERBS_ATTR_TYPE(u32),
3722
+ UA_MANDATORY),
3723
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH,
3724
+ UVERBS_ATTR_TYPE(u32),
3725
+ UA_MANDATORY),
3726
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET,
3727
+ UVERBS_ATTR_TYPE(u64),
3728
+ UA_MANDATORY));
3729
+
3730
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
3731
+ MLX5_IB_METHOD_VAR_OBJ_DESTROY,
3732
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_DESTROY_HANDLE,
3733
+ MLX5_IB_OBJECT_VAR,
3734
+ UVERBS_ACCESS_DESTROY,
3735
+ UA_MANDATORY));
3736
+
3737
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR,
3738
+ UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup),
3739
+ &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC),
3740
+ &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY));
3741
+
3742
+static bool var_is_supported(struct ib_device *device)
3743
+{
3744
+ struct mlx5_ib_dev *dev = to_mdev(device);
3745
+
3746
+ return (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
3747
+ MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q);
3748
+}
3749
+
3750
+static struct mlx5_user_mmap_entry *
3751
+alloc_uar_entry(struct mlx5_ib_ucontext *c,
3752
+ enum mlx5_ib_uapi_uar_alloc_type alloc_type)
3753
+{
3754
+ struct mlx5_user_mmap_entry *entry;
3755
+ struct mlx5_ib_dev *dev;
3756
+ u32 uar_index;
3757
+ int err;
3758
+
3759
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
3760
+ if (!entry)
3761
+ return ERR_PTR(-ENOMEM);
3762
+
3763
+ dev = to_mdev(c->ibucontext.device);
3764
+ err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
3765
+ if (err)
3766
+ goto end;
3767
+
3768
+ entry->page_idx = uar_index;
3769
+ entry->address = uar_index2paddress(dev, uar_index);
3770
+ if (alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
3771
+ entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_WC;
3772
+ else
3773
+ entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_NC;
3774
+
3775
+ err = mlx5_rdma_user_mmap_entry_insert(c, entry, PAGE_SIZE);
3776
+ if (err)
3777
+ goto err_insert;
3778
+
3779
+ return entry;
3780
+
3781
+err_insert:
3782
+ mlx5_cmd_free_uar(dev->mdev, uar_index);
3783
+end:
3784
+ kfree(entry);
3785
+ return ERR_PTR(err);
3786
+}
3787
+
3788
+static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)(
3789
+ struct uverbs_attr_bundle *attrs)
3790
+{
3791
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
3792
+ attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE);
3793
+ enum mlx5_ib_uapi_uar_alloc_type alloc_type;
3794
+ struct mlx5_ib_ucontext *c;
3795
+ struct mlx5_user_mmap_entry *entry;
3796
+ u64 mmap_offset;
3797
+ u32 length;
3798
+ int err;
3799
+
3800
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
3801
+ if (IS_ERR(c))
3802
+ return PTR_ERR(c);
3803
+
3804
+ err = uverbs_get_const(&alloc_type, attrs,
3805
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE);
3806
+ if (err)
3807
+ return err;
3808
+
3809
+ if (alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF &&
3810
+ alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC)
3811
+ return -EOPNOTSUPP;
3812
+
3813
+ if (!to_mdev(c->ibucontext.device)->wc_support &&
3814
+ alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
3815
+ return -EOPNOTSUPP;
3816
+
3817
+ entry = alloc_uar_entry(c, alloc_type);
3818
+ if (IS_ERR(entry))
3819
+ return PTR_ERR(entry);
3820
+
3821
+ mmap_offset = mlx5_entry_to_mmap_offset(entry);
3822
+ length = entry->rdma_entry.npages * PAGE_SIZE;
3823
+ uobj->object = entry;
3824
+ uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE);
3825
+
3826
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
3827
+ &mmap_offset, sizeof(mmap_offset));
3828
+ if (err)
3829
+ return err;
3830
+
3831
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
3832
+ &entry->page_idx, sizeof(entry->page_idx));
3833
+ if (err)
3834
+ return err;
3835
+
3836
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
3837
+ &length, sizeof(length));
3838
+ return err;
3839
+}
3840
+
3841
+DECLARE_UVERBS_NAMED_METHOD(
3842
+ MLX5_IB_METHOD_UAR_OBJ_ALLOC,
3843
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE,
3844
+ MLX5_IB_OBJECT_UAR,
3845
+ UVERBS_ACCESS_NEW,
3846
+ UA_MANDATORY),
3847
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE,
3848
+ enum mlx5_ib_uapi_uar_alloc_type,
3849
+ UA_MANDATORY),
3850
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
3851
+ UVERBS_ATTR_TYPE(u32),
3852
+ UA_MANDATORY),
3853
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
3854
+ UVERBS_ATTR_TYPE(u32),
3855
+ UA_MANDATORY),
3856
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
3857
+ UVERBS_ATTR_TYPE(u64),
3858
+ UA_MANDATORY));
3859
+
3860
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
3861
+ MLX5_IB_METHOD_UAR_OBJ_DESTROY,
3862
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_DESTROY_HANDLE,
3863
+ MLX5_IB_OBJECT_UAR,
3864
+ UVERBS_ACCESS_DESTROY,
3865
+ UA_MANDATORY));
3866
+
3867
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR,
3868
+ UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup),
3869
+ &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_ALLOC),
3870
+ &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY));
3871
+
3872
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
3873
+ mlx5_ib_dm,
3874
+ UVERBS_OBJECT_DM,
3875
+ UVERBS_METHOD_DM_ALLOC,
3876
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
3877
+ UVERBS_ATTR_TYPE(u64),
3878
+ UA_MANDATORY),
3879
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
3880
+ UVERBS_ATTR_TYPE(u16),
3881
+ UA_OPTIONAL),
3882
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
3883
+ enum mlx5_ib_uapi_dm_type,
3884
+ UA_OPTIONAL));
3885
+
3886
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
3887
+ mlx5_ib_flow_action,
3888
+ UVERBS_OBJECT_FLOW_ACTION,
3889
+ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
3890
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
3891
+ enum mlx5_ib_uapi_flow_action_flags));
3892
+
3893
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
3894
+ mlx5_ib_query_context,
3895
+ UVERBS_OBJECT_DEVICE,
3896
+ UVERBS_METHOD_QUERY_CONTEXT,
3897
+ UVERBS_ATTR_PTR_OUT(
3898
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
3899
+ UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp,
3900
+ dump_fill_mkey),
3901
+ UA_MANDATORY));
3902
+
3903
+static const struct uapi_definition mlx5_ib_defs[] = {
3904
+ UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
3905
+ UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
3906
+ UAPI_DEF_CHAIN(mlx5_ib_qos_defs),
3907
+ UAPI_DEF_CHAIN(mlx5_ib_std_types_defs),
3908
+
3909
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
3910
+ &mlx5_ib_flow_action),
3911
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
3912
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
3913
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
3914
+ UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
3915
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR),
3916
+ {}
3917
+};
3918
+
3919
+static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
3920
+{
3921
+ mlx5_ib_cleanup_multiport_master(dev);
3922
+ WARN_ON(!xa_empty(&dev->odp_mkeys));
3923
+ cleanup_srcu_struct(&dev->odp_srcu);
3924
+ mutex_destroy(&dev->cap_mask_mutex);
3925
+ WARN_ON(!xa_empty(&dev->sig_mrs));
3926
+ WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
3927
+}
3928
+
3929
+static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
3930
+{
3931
+ struct mlx5_core_dev *mdev = dev->mdev;
3932
+ int err;
3933
+ int i;
3934
+
3935
+ for (i = 0; i < dev->num_ports; i++) {
3936
+ spin_lock_init(&dev->port[i].mp.mpi_lock);
3937
+ rwlock_init(&dev->port[i].roce.netdev_lock);
3938
+ dev->port[i].roce.dev = dev;
3939
+ dev->port[i].roce.native_port_num = i + 1;
3940
+ dev->port[i].roce.last_port_state = IB_PORT_DOWN;
3941
+ }
3942
+
3943
+ mlx5_ib_internal_fill_odp_caps(dev);
3944
+
3945
+ err = mlx5_ib_init_multiport_master(dev);
3946
+ if (err)
3947
+ return err;
3948
+
3949
+ err = set_has_smi_cap(dev);
3950
+ if (err)
3951
+ goto err_mp;
3952
+
3953
+ if (!mlx5_core_mp_enabled(mdev)) {
3954
+ for (i = 1; i <= dev->num_ports; i++) {
3955
+ err = get_port_caps(dev, i);
3956
+ if (err)
3957
+ break;
3958
+ }
3959
+ } else {
3960
+ err = get_port_caps(dev, mlx5_core_native_port_num(mdev));
3961
+ }
3962
+ if (err)
3963
+ goto err_mp;
3964
+
3965
+ if (mlx5_use_mad_ifc(dev))
3966
+ get_ext_port_caps(dev);
3967
+
3968
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
3969
+ dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
3970
+ dev->ib_dev.phys_port_cnt = dev->num_ports;
3971
+ dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev);
3972
+ dev->ib_dev.dev.parent = mdev->device;
3973
+ dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES;
3974
+
3975
+ err = init_srcu_struct(&dev->odp_srcu);
3976
+ if (err)
3977
+ goto err_mp;
3978
+
3979
+ mutex_init(&dev->cap_mask_mutex);
3980
+ INIT_LIST_HEAD(&dev->qp_list);
3981
+ spin_lock_init(&dev->reset_flow_resource_lock);
3982
+ xa_init(&dev->odp_mkeys);
3983
+ xa_init(&dev->sig_mrs);
3984
+ atomic_set(&dev->mkey_var, 0);
3985
+
3986
+ spin_lock_init(&dev->dm.lock);
3987
+ dev->dm.dev = mdev;
3988
+ return 0;
3989
+
3990
+err_mp:
3991
+ mlx5_ib_cleanup_multiport_master(dev);
3992
+ return err;
3993
+}
3994
+
3995
+static int mlx5_ib_enable_driver(struct ib_device *dev)
3996
+{
3997
+ struct mlx5_ib_dev *mdev = to_mdev(dev);
3998
+ int ret;
3999
+
4000
+ ret = mlx5_ib_test_wc(mdev);
4001
+ mlx5_ib_dbg(mdev, "Write-Combining %s",
4002
+ mdev->wc_support ? "supported" : "not supported");
4003
+
4004
+ return ret;
4005
+}
4006
+
4007
+static const struct ib_device_ops mlx5_ib_dev_ops = {
4008
+ .owner = THIS_MODULE,
4009
+ .driver_id = RDMA_DRIVER_MLX5,
4010
+ .uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION,
4011
+
4012
+ .add_gid = mlx5_ib_add_gid,
4013
+ .alloc_mr = mlx5_ib_alloc_mr,
4014
+ .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity,
4015
+ .alloc_pd = mlx5_ib_alloc_pd,
4016
+ .alloc_ucontext = mlx5_ib_alloc_ucontext,
4017
+ .attach_mcast = mlx5_ib_mcg_attach,
4018
+ .check_mr_status = mlx5_ib_check_mr_status,
4019
+ .create_ah = mlx5_ib_create_ah,
4020
+ .create_cq = mlx5_ib_create_cq,
4021
+ .create_qp = mlx5_ib_create_qp,
4022
+ .create_srq = mlx5_ib_create_srq,
4023
+ .dealloc_pd = mlx5_ib_dealloc_pd,
4024
+ .dealloc_ucontext = mlx5_ib_dealloc_ucontext,
4025
+ .del_gid = mlx5_ib_del_gid,
4026
+ .dereg_mr = mlx5_ib_dereg_mr,
4027
+ .destroy_ah = mlx5_ib_destroy_ah,
4028
+ .destroy_cq = mlx5_ib_destroy_cq,
4029
+ .destroy_qp = mlx5_ib_destroy_qp,
4030
+ .destroy_srq = mlx5_ib_destroy_srq,
4031
+ .detach_mcast = mlx5_ib_mcg_detach,
4032
+ .disassociate_ucontext = mlx5_ib_disassociate_ucontext,
4033
+ .drain_rq = mlx5_ib_drain_rq,
4034
+ .drain_sq = mlx5_ib_drain_sq,
4035
+ .enable_driver = mlx5_ib_enable_driver,
4036
+ .get_dev_fw_str = get_dev_fw_str,
4037
+ .get_dma_mr = mlx5_ib_get_dma_mr,
4038
+ .get_link_layer = mlx5_ib_port_link_layer,
4039
+ .map_mr_sg = mlx5_ib_map_mr_sg,
4040
+ .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
4041
+ .mmap = mlx5_ib_mmap,
4042
+ .mmap_free = mlx5_ib_mmap_free,
4043
+ .modify_cq = mlx5_ib_modify_cq,
4044
+ .modify_device = mlx5_ib_modify_device,
4045
+ .modify_port = mlx5_ib_modify_port,
4046
+ .modify_qp = mlx5_ib_modify_qp,
4047
+ .modify_srq = mlx5_ib_modify_srq,
4048
+ .poll_cq = mlx5_ib_poll_cq,
4049
+ .post_recv = mlx5_ib_post_recv_nodrain,
4050
+ .post_send = mlx5_ib_post_send_nodrain,
4051
+ .post_srq_recv = mlx5_ib_post_srq_recv,
4052
+ .process_mad = mlx5_ib_process_mad,
4053
+ .query_ah = mlx5_ib_query_ah,
4054
+ .query_device = mlx5_ib_query_device,
4055
+ .query_gid = mlx5_ib_query_gid,
4056
+ .query_pkey = mlx5_ib_query_pkey,
4057
+ .query_qp = mlx5_ib_query_qp,
4058
+ .query_srq = mlx5_ib_query_srq,
4059
+ .query_ucontext = mlx5_ib_query_ucontext,
4060
+ .reg_user_mr = mlx5_ib_reg_user_mr,
4061
+ .req_notify_cq = mlx5_ib_arm_cq,
4062
+ .rereg_user_mr = mlx5_ib_rereg_user_mr,
4063
+ .resize_cq = mlx5_ib_resize_cq,
4064
+
4065
+ INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
4066
+ INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
4067
+ INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
4068
+ INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
4069
+ INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
4070
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
4071
+};
4072
+
4073
+static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = {
4074
+ .rdma_netdev_get_params = mlx5_ib_rn_get_params,
4075
+};
4076
+
4077
+static const struct ib_device_ops mlx5_ib_dev_sriov_ops = {
4078
+ .get_vf_config = mlx5_ib_get_vf_config,
4079
+ .get_vf_guid = mlx5_ib_get_vf_guid,
4080
+ .get_vf_stats = mlx5_ib_get_vf_stats,
4081
+ .set_vf_guid = mlx5_ib_set_vf_guid,
4082
+ .set_vf_link_state = mlx5_ib_set_vf_link_state,
4083
+};
4084
+
4085
+static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
4086
+ .alloc_mw = mlx5_ib_alloc_mw,
4087
+ .dealloc_mw = mlx5_ib_dealloc_mw,
4088
+
4089
+ INIT_RDMA_OBJ_SIZE(ib_mw, mlx5_ib_mw, ibmw),
4090
+};
4091
+
4092
+static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
4093
+ .alloc_xrcd = mlx5_ib_alloc_xrcd,
4094
+ .dealloc_xrcd = mlx5_ib_dealloc_xrcd,
4095
+
4096
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd),
4097
+};
4098
+
4099
+static const struct ib_device_ops mlx5_ib_dev_dm_ops = {
4100
+ .alloc_dm = mlx5_ib_alloc_dm,
4101
+ .dealloc_dm = mlx5_ib_dealloc_dm,
4102
+ .reg_dm_mr = mlx5_ib_reg_dm_mr,
4103
+};
4104
+
4105
+static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev)
4106
+{
4107
+ struct mlx5_core_dev *mdev = dev->mdev;
4108
+ struct mlx5_var_table *var_table = &dev->var_table;
4109
+ u8 log_doorbell_bar_size;
4110
+ u8 log_doorbell_stride;
4111
+ u64 bar_size;
4112
+
4113
+ log_doorbell_bar_size = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
4114
+ log_doorbell_bar_size);
4115
+ log_doorbell_stride = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
4116
+ log_doorbell_stride);
4117
+ var_table->hw_start_addr = dev->mdev->bar_addr +
4118
+ MLX5_CAP64_DEV_VDPA_EMULATION(mdev,
4119
+ doorbell_bar_offset);
4120
+ bar_size = (1ULL << log_doorbell_bar_size) * 4096;
4121
+ var_table->stride_size = 1ULL << log_doorbell_stride;
4122
+ var_table->num_var_hw_entries = div_u64(bar_size,
4123
+ var_table->stride_size);
4124
+ mutex_init(&var_table->bitmap_lock);
4125
+ var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries,
4126
+ GFP_KERNEL);
4127
+ return (var_table->bitmap) ? 0 : -ENOMEM;
4128
+}
4129
+
4130
+static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev)
4131
+{
4132
+ bitmap_free(dev->var_table.bitmap);
4133
+}
4134
+
4135
+static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
4136
+{
4137
+ struct mlx5_core_dev *mdev = dev->mdev;
4138
+ int err;
4139
+
4140
+ dev->ib_dev.uverbs_cmd_mask =
4141
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
4142
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
4143
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
4144
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
4145
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
4146
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
4147
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
4148
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
4149
+ (1ull << IB_USER_VERBS_CMD_REREG_MR) |
4150
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
4151
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
4152
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
4153
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
4154
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
4155
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
4156
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
4157
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
4158
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
4159
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
4160
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
4161
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
4162
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
4163
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
4164
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
4165
+ (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
4166
+ (1ull << IB_USER_VERBS_CMD_OPEN_QP);
4167
+ dev->ib_dev.uverbs_ex_cmd_mask =
4168
+ (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
4169
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
4170
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
4171
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) |
4172
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
4173
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
4174
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
4175
+
4176
+ if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
4177
+ IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
4178
+ ib_set_device_ops(&dev->ib_dev,
4179
+ &mlx5_ib_dev_ipoib_enhanced_ops);
4180
+
4181
+ if (mlx5_core_is_pf(mdev))
4182
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_sriov_ops);
4183
+
4184
+ dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
4185
+
4186
+ if (MLX5_CAP_GEN(mdev, imaicl)) {
4187
+ dev->ib_dev.uverbs_cmd_mask |=
4188
+ (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
4189
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
4190
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops);
4191
+ }
4192
+
4193
+ if (MLX5_CAP_GEN(mdev, xrc)) {
4194
+ dev->ib_dev.uverbs_cmd_mask |=
4195
+ (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
4196
+ (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
4197
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops);
4198
+ }
4199
+
4200
+ if (MLX5_CAP_DEV_MEM(mdev, memic) ||
4201
+ MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
4202
+ MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)
4203
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
4204
+
4205
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
4206
+
4207
+ if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
4208
+ dev->ib_dev.driver_def = mlx5_ib_defs;
4209
+
4210
+ err = init_node_data(dev);
4211
+ if (err)
4212
+ return err;
4213
+
4214
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
4215
+ (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) ||
4216
+ MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
4217
+ mutex_init(&dev->lb.mutex);
4218
+
4219
+ if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
4220
+ MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) {
4221
+ err = mlx5_ib_init_var_table(dev);
4222
+ if (err)
4223
+ return err;
4224
+ }
4225
+
4226
+ dev->ib_dev.use_cq_dim = true;
4227
+
4228
+ return 0;
4229
+}
4230
+
4231
+static const struct ib_device_ops mlx5_ib_dev_port_ops = {
4232
+ .get_port_immutable = mlx5_port_immutable,
4233
+ .query_port = mlx5_ib_query_port,
4234
+};
4235
+
4236
+static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
4237
+{
4238
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_ops);
4239
+ return 0;
4240
+}
4241
+
4242
+static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = {
4243
+ .get_port_immutable = mlx5_port_rep_immutable,
4244
+ .query_port = mlx5_ib_rep_query_port,
4245
+};
4246
+
4247
+static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev)
4248
+{
4249
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops);
4250
+ return 0;
4251
+}
4252
+
4253
+static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
4254
+ .create_rwq_ind_table = mlx5_ib_create_rwq_ind_table,
4255
+ .create_wq = mlx5_ib_create_wq,
4256
+ .destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table,
4257
+ .destroy_wq = mlx5_ib_destroy_wq,
4258
+ .get_netdev = mlx5_ib_get_netdev,
4259
+ .modify_wq = mlx5_ib_modify_wq,
4260
+
4261
+ INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table,
4262
+ ib_rwq_ind_tbl),
4263
+};
4264
+
4265
+static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
4266
+{
4267
+ struct mlx5_core_dev *mdev = dev->mdev;
4268
+ enum rdma_link_layer ll;
4269
+ int port_type_cap;
4270
+ u8 port_num = 0;
4271
+ int err;
4272
+
4273
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
4274
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
4275
+
4276
+ if (ll == IB_LINK_LAYER_ETHERNET) {
4277
+ dev->ib_dev.uverbs_ex_cmd_mask |=
4278
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
4279
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
4280
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
4281
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
4282
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
4283
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
4284
+
4285
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
4286
+
4287
+ /* Register only for native ports */
4288
+ err = mlx5_add_netdev_notifier(dev, port_num);
4289
+ if (err || dev->is_rep || !mlx5_is_roce_enabled(mdev))
4290
+ /*
4291
+ * We don't enable ETH interface for
4292
+ * 1. IB representors
4293
+ * 2. User disabled ROCE through devlink interface
4294
+ */
4295
+ return err;
4296
+
4297
+ err = mlx5_enable_eth(dev);
4298
+ if (err)
4299
+ goto cleanup;
4300
+ }
4301
+
4302
+ return 0;
4303
+cleanup:
4304
+ mlx5_remove_netdev_notifier(dev, port_num);
4305
+ return err;
4306
+}
4307
+
4308
+static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
4309
+{
4310
+ struct mlx5_core_dev *mdev = dev->mdev;
4311
+ enum rdma_link_layer ll;
4312
+ int port_type_cap;
4313
+ u8 port_num;
4314
+
4315
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
4316
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
4317
+
4318
+ if (ll == IB_LINK_LAYER_ETHERNET) {
4319
+ if (!dev->is_rep)
4320
+ mlx5_disable_eth(dev);
4321
+
4322
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
4323
+ mlx5_remove_netdev_notifier(dev, port_num);
4324
+ }
4325
+}
4326
+
4327
+static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev)
4328
+{
4329
+ mlx5_ib_init_cong_debugfs(dev,
4330
+ mlx5_core_native_port_num(dev->mdev) - 1);
4331
+ return 0;
4332
+}
4333
+
4334
+static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
4335
+{
4336
+ mlx5_ib_cleanup_cong_debugfs(dev,
4337
+ mlx5_core_native_port_num(dev->mdev) - 1);
4338
+}
4339
+
4340
+static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev)
4341
+{
4342
+ dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
4343
+ return PTR_ERR_OR_ZERO(dev->mdev->priv.uar);
4344
+}
4345
+
4346
+static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
4347
+{
4348
+ mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
4349
+}
4350
+
4351
+static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
4352
+{
4353
+ int err;
4354
+
4355
+ err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
4356
+ if (err)
4357
+ return err;
4358
+
4359
+ err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
4360
+ if (err)
4361
+ mlx5_free_bfreg(dev->mdev, &dev->bfreg);
4362
+
4363
+ return err;
4364
+}
4365
+
4366
+static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
4367
+{
4368
+ mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
4369
+ mlx5_free_bfreg(dev->mdev, &dev->bfreg);
4370
+}
4371
+
4372
+static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
4373
+{
4374
+ const char *name;
4375
+
4376
+ rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group);
4377
+ if (!mlx5_lag_is_roce(dev->mdev))
4378
+ name = "mlx5_%d";
4379
+ else
4380
+ name = "mlx5_bond_%d";
4381
+ return ib_register_device(&dev->ib_dev, name, &dev->mdev->pdev->dev);
4382
+}
4383
+
4384
+static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
44054385 {
44064386 int err;
44074387
....@@ -4410,18 +4390,23 @@
44104390 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
44114391
44124392 if (dev->umrc.qp)
4413
- mlx5_ib_destroy_qp(dev->umrc.qp);
4393
+ mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
44144394 if (dev->umrc.cq)
44154395 ib_free_cq(dev->umrc.cq);
44164396 if (dev->umrc.pd)
44174397 ib_dealloc_pd(dev->umrc.pd);
44184398 }
44194399
4400
+static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
4401
+{
4402
+ ib_unregister_device(&dev->ib_dev);
4403
+}
4404
+
44204405 enum {
44214406 MAX_UMR_WR = 128,
44224407 };
44234408
4424
-static int create_umr_res(struct mlx5_ib_dev *dev)
4409
+static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
44254410 {
44264411 struct ib_qp_init_attr *init_attr = NULL;
44274412 struct ib_qp_attr *attr = NULL;
....@@ -4515,7 +4500,7 @@
45154500 return 0;
45164501
45174502 error_4:
4518
- mlx5_ib_destroy_qp(qp);
4503
+ mlx5_ib_destroy_qp(qp, NULL);
45194504 dev->umrc.qp = NULL;
45204505
45214506 error_3:
....@@ -4532,789 +4517,12 @@
45324517 return ret;
45334518 }
45344519
4535
-static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
4520
+static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
45364521 {
4537
- switch (umr_fence_cap) {
4538
- case MLX5_CAP_UMR_FENCE_NONE:
4539
- return MLX5_FENCE_MODE_NONE;
4540
- case MLX5_CAP_UMR_FENCE_SMALL:
4541
- return MLX5_FENCE_MODE_INITIATOR_SMALL;
4542
- default:
4543
- return MLX5_FENCE_MODE_STRONG_ORDERING;
4544
- }
4545
-}
4546
-
4547
-static int create_dev_resources(struct mlx5_ib_resources *devr)
4548
-{
4549
- struct ib_srq_init_attr attr;
4550
- struct mlx5_ib_dev *dev;
4551
- struct ib_cq_init_attr cq_attr = {.cqe = 1};
4552
- int port;
4553
- int ret = 0;
4554
-
4555
- dev = container_of(devr, struct mlx5_ib_dev, devr);
4556
-
4557
- mutex_init(&devr->mutex);
4558
-
4559
- devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
4560
- if (IS_ERR(devr->p0)) {
4561
- ret = PTR_ERR(devr->p0);
4562
- goto error0;
4563
- }
4564
- devr->p0->device = &dev->ib_dev;
4565
- devr->p0->uobject = NULL;
4566
- atomic_set(&devr->p0->usecnt, 0);
4567
-
4568
- devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
4569
- if (IS_ERR(devr->c0)) {
4570
- ret = PTR_ERR(devr->c0);
4571
- goto error1;
4572
- }
4573
- devr->c0->device = &dev->ib_dev;
4574
- devr->c0->uobject = NULL;
4575
- devr->c0->comp_handler = NULL;
4576
- devr->c0->event_handler = NULL;
4577
- devr->c0->cq_context = NULL;
4578
- atomic_set(&devr->c0->usecnt, 0);
4579
-
4580
- devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
4581
- if (IS_ERR(devr->x0)) {
4582
- ret = PTR_ERR(devr->x0);
4583
- goto error2;
4584
- }
4585
- devr->x0->device = &dev->ib_dev;
4586
- devr->x0->inode = NULL;
4587
- atomic_set(&devr->x0->usecnt, 0);
4588
- mutex_init(&devr->x0->tgt_qp_mutex);
4589
- INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
4590
-
4591
- devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
4592
- if (IS_ERR(devr->x1)) {
4593
- ret = PTR_ERR(devr->x1);
4594
- goto error3;
4595
- }
4596
- devr->x1->device = &dev->ib_dev;
4597
- devr->x1->inode = NULL;
4598
- atomic_set(&devr->x1->usecnt, 0);
4599
- mutex_init(&devr->x1->tgt_qp_mutex);
4600
- INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
4601
-
4602
- memset(&attr, 0, sizeof(attr));
4603
- attr.attr.max_sge = 1;
4604
- attr.attr.max_wr = 1;
4605
- attr.srq_type = IB_SRQT_XRC;
4606
- attr.ext.cq = devr->c0;
4607
- attr.ext.xrc.xrcd = devr->x0;
4608
-
4609
- devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
4610
- if (IS_ERR(devr->s0)) {
4611
- ret = PTR_ERR(devr->s0);
4612
- goto error4;
4613
- }
4614
- devr->s0->device = &dev->ib_dev;
4615
- devr->s0->pd = devr->p0;
4616
- devr->s0->uobject = NULL;
4617
- devr->s0->event_handler = NULL;
4618
- devr->s0->srq_context = NULL;
4619
- devr->s0->srq_type = IB_SRQT_XRC;
4620
- devr->s0->ext.xrc.xrcd = devr->x0;
4621
- devr->s0->ext.cq = devr->c0;
4622
- atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
4623
- atomic_inc(&devr->s0->ext.cq->usecnt);
4624
- atomic_inc(&devr->p0->usecnt);
4625
- atomic_set(&devr->s0->usecnt, 0);
4626
-
4627
- memset(&attr, 0, sizeof(attr));
4628
- attr.attr.max_sge = 1;
4629
- attr.attr.max_wr = 1;
4630
- attr.srq_type = IB_SRQT_BASIC;
4631
- devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
4632
- if (IS_ERR(devr->s1)) {
4633
- ret = PTR_ERR(devr->s1);
4634
- goto error5;
4635
- }
4636
- devr->s1->device = &dev->ib_dev;
4637
- devr->s1->pd = devr->p0;
4638
- devr->s1->uobject = NULL;
4639
- devr->s1->event_handler = NULL;
4640
- devr->s1->srq_context = NULL;
4641
- devr->s1->srq_type = IB_SRQT_BASIC;
4642
- devr->s1->ext.cq = devr->c0;
4643
- atomic_inc(&devr->p0->usecnt);
4644
- atomic_set(&devr->s1->usecnt, 0);
4645
-
4646
- for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
4647
- INIT_WORK(&devr->ports[port].pkey_change_work,
4648
- pkey_change_handler);
4649
- devr->ports[port].devr = devr;
4650
- }
4651
-
4652
- return 0;
4653
-
4654
-error5:
4655
- mlx5_ib_destroy_srq(devr->s0);
4656
-error4:
4657
- mlx5_ib_dealloc_xrcd(devr->x1);
4658
-error3:
4659
- mlx5_ib_dealloc_xrcd(devr->x0);
4660
-error2:
4661
- mlx5_ib_destroy_cq(devr->c0);
4662
-error1:
4663
- mlx5_ib_dealloc_pd(devr->p0);
4664
-error0:
4665
- return ret;
4666
-}
4667
-
4668
-static void destroy_dev_resources(struct mlx5_ib_resources *devr)
4669
-{
4670
- struct mlx5_ib_dev *dev =
4671
- container_of(devr, struct mlx5_ib_dev, devr);
4672
- int port;
4673
-
4674
- mlx5_ib_destroy_srq(devr->s1);
4675
- mlx5_ib_destroy_srq(devr->s0);
4676
- mlx5_ib_dealloc_xrcd(devr->x0);
4677
- mlx5_ib_dealloc_xrcd(devr->x1);
4678
- mlx5_ib_destroy_cq(devr->c0);
4679
- mlx5_ib_dealloc_pd(devr->p0);
4680
-
4681
- /* Make sure no change P_Key work items are still executing */
4682
- for (port = 0; port < dev->num_ports; ++port)
4683
- cancel_work_sync(&devr->ports[port].pkey_change_work);
4684
-}
4685
-
4686
-static u32 get_core_cap_flags(struct ib_device *ibdev,
4687
- struct mlx5_hca_vport_context *rep)
4688
-{
4689
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
4690
- enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
4691
- u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
4692
- u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
4693
- bool raw_support = !mlx5_core_mp_enabled(dev->mdev);
4694
- u32 ret = 0;
4695
-
4696
- if (rep->grh_required)
4697
- ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED;
4698
-
4699
- if (ll == IB_LINK_LAYER_INFINIBAND)
4700
- return ret | RDMA_CORE_PORT_IBA_IB;
4701
-
4702
- if (raw_support)
4703
- ret |= RDMA_CORE_PORT_RAW_PACKET;
4704
-
4705
- if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
4706
- return ret;
4707
-
4708
- if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
4709
- return ret;
4710
-
4711
- if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
4712
- ret |= RDMA_CORE_PORT_IBA_ROCE;
4713
-
4714
- if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
4715
- ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
4716
-
4717
- return ret;
4718
-}
4719
-
4720
-static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
4721
- struct ib_port_immutable *immutable)
4722
-{
4723
- struct ib_port_attr attr;
4724
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
4725
- enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
4726
- struct mlx5_hca_vport_context rep = {0};
4727
- int err;
4728
-
4729
- err = ib_query_port(ibdev, port_num, &attr);
4730
- if (err)
4731
- return err;
4732
-
4733
- if (ll == IB_LINK_LAYER_INFINIBAND) {
4734
- err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0,
4735
- &rep);
4736
- if (err)
4737
- return err;
4738
- }
4739
-
4740
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
4741
- immutable->gid_tbl_len = attr.gid_tbl_len;
4742
- immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep);
4743
- if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
4744
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
4745
-
4746
- return 0;
4747
-}
4748
-
4749
-static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
4750
- struct ib_port_immutable *immutable)
4751
-{
4752
- struct ib_port_attr attr;
4753
- int err;
4754
-
4755
- immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
4756
-
4757
- err = ib_query_port(ibdev, port_num, &attr);
4758
- if (err)
4759
- return err;
4760
-
4761
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
4762
- immutable->gid_tbl_len = attr.gid_tbl_len;
4763
- immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
4764
-
4765
- return 0;
4766
-}
4767
-
4768
-static void get_dev_fw_str(struct ib_device *ibdev, char *str)
4769
-{
4770
- struct mlx5_ib_dev *dev =
4771
- container_of(ibdev, struct mlx5_ib_dev, ib_dev);
4772
- snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d",
4773
- fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev),
4774
- fw_rev_sub(dev->mdev));
4775
-}
4776
-
4777
-static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
4778
-{
4779
- struct mlx5_core_dev *mdev = dev->mdev;
4780
- struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev,
4781
- MLX5_FLOW_NAMESPACE_LAG);
4782
- struct mlx5_flow_table *ft;
4783
- int err;
4784
-
4785
- if (!ns || !mlx5_lag_is_active(mdev))
4786
- return 0;
4787
-
4788
- err = mlx5_cmd_create_vport_lag(mdev);
4789
- if (err)
4790
- return err;
4791
-
4792
- ft = mlx5_create_lag_demux_flow_table(ns, 0, 0);
4793
- if (IS_ERR(ft)) {
4794
- err = PTR_ERR(ft);
4795
- goto err_destroy_vport_lag;
4796
- }
4797
-
4798
- dev->flow_db->lag_demux_ft = ft;
4799
- return 0;
4800
-
4801
-err_destroy_vport_lag:
4802
- mlx5_cmd_destroy_vport_lag(mdev);
4803
- return err;
4804
-}
4805
-
4806
-static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
4807
-{
4808
- struct mlx5_core_dev *mdev = dev->mdev;
4809
-
4810
- if (dev->flow_db->lag_demux_ft) {
4811
- mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
4812
- dev->flow_db->lag_demux_ft = NULL;
4813
-
4814
- mlx5_cmd_destroy_vport_lag(mdev);
4815
- }
4816
-}
4817
-
4818
-static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
4819
-{
4820
- int err;
4821
-
4822
- dev->roce[port_num].nb.notifier_call = mlx5_netdev_event;
4823
- err = register_netdevice_notifier(&dev->roce[port_num].nb);
4824
- if (err) {
4825
- dev->roce[port_num].nb.notifier_call = NULL;
4826
- return err;
4827
- }
4828
-
4829
- return 0;
4830
-}
4831
-
4832
-static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
4833
-{
4834
- if (dev->roce[port_num].nb.notifier_call) {
4835
- unregister_netdevice_notifier(&dev->roce[port_num].nb);
4836
- dev->roce[port_num].nb.notifier_call = NULL;
4837
- }
4838
-}
4839
-
4840
-static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
4841
-{
4842
- int err;
4843
-
4844
- if (MLX5_CAP_GEN(dev->mdev, roce)) {
4845
- err = mlx5_nic_vport_enable_roce(dev->mdev);
4846
- if (err)
4847
- return err;
4848
- }
4849
-
4850
- err = mlx5_eth_lag_init(dev);
4851
- if (err)
4852
- goto err_disable_roce;
4853
-
4854
- return 0;
4855
-
4856
-err_disable_roce:
4857
- if (MLX5_CAP_GEN(dev->mdev, roce))
4858
- mlx5_nic_vport_disable_roce(dev->mdev);
4859
-
4860
- return err;
4861
-}
4862
-
4863
-static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
4864
-{
4865
- mlx5_eth_lag_cleanup(dev);
4866
- if (MLX5_CAP_GEN(dev->mdev, roce))
4867
- mlx5_nic_vport_disable_roce(dev->mdev);
4868
-}
4869
-
4870
-struct mlx5_ib_counter {
4871
- const char *name;
4872
- size_t offset;
4873
-};
4874
-
4875
-#define INIT_Q_COUNTER(_name) \
4876
- { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
4877
-
4878
-static const struct mlx5_ib_counter basic_q_cnts[] = {
4879
- INIT_Q_COUNTER(rx_write_requests),
4880
- INIT_Q_COUNTER(rx_read_requests),
4881
- INIT_Q_COUNTER(rx_atomic_requests),
4882
- INIT_Q_COUNTER(out_of_buffer),
4883
-};
4884
-
4885
-static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
4886
- INIT_Q_COUNTER(out_of_sequence),
4887
-};
4888
-
4889
-static const struct mlx5_ib_counter retrans_q_cnts[] = {
4890
- INIT_Q_COUNTER(duplicate_request),
4891
- INIT_Q_COUNTER(rnr_nak_retry_err),
4892
- INIT_Q_COUNTER(packet_seq_err),
4893
- INIT_Q_COUNTER(implied_nak_seq_err),
4894
- INIT_Q_COUNTER(local_ack_timeout_err),
4895
-};
4896
-
4897
-#define INIT_CONG_COUNTER(_name) \
4898
- { .name = #_name, .offset = \
4899
- MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
4900
-
4901
-static const struct mlx5_ib_counter cong_cnts[] = {
4902
- INIT_CONG_COUNTER(rp_cnp_ignored),
4903
- INIT_CONG_COUNTER(rp_cnp_handled),
4904
- INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
4905
- INIT_CONG_COUNTER(np_cnp_sent),
4906
-};
4907
-
4908
-static const struct mlx5_ib_counter extended_err_cnts[] = {
4909
- INIT_Q_COUNTER(resp_local_length_error),
4910
- INIT_Q_COUNTER(resp_cqe_error),
4911
- INIT_Q_COUNTER(req_cqe_error),
4912
- INIT_Q_COUNTER(req_remote_invalid_request),
4913
- INIT_Q_COUNTER(req_remote_access_errors),
4914
- INIT_Q_COUNTER(resp_remote_access_errors),
4915
- INIT_Q_COUNTER(resp_cqe_flush_error),
4916
- INIT_Q_COUNTER(req_cqe_flush_error),
4917
-};
4918
-
4919
-#define INIT_EXT_PPCNT_COUNTER(_name) \
4920
- { .name = #_name, .offset = \
4921
- MLX5_BYTE_OFF(ppcnt_reg, \
4922
- counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
4923
-
4924
-static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
4925
- INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
4926
-};
4927
-
4928
-static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
4929
-{
4930
- int i;
4931
-
4932
- for (i = 0; i < dev->num_ports; i++) {
4933
- if (dev->port[i].cnts.set_id_valid)
4934
- mlx5_core_dealloc_q_counter(dev->mdev,
4935
- dev->port[i].cnts.set_id);
4936
- kfree(dev->port[i].cnts.names);
4937
- kfree(dev->port[i].cnts.offsets);
4938
- }
4939
-}
4940
-
4941
-static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
4942
- struct mlx5_ib_counters *cnts)
4943
-{
4944
- u32 num_counters;
4945
-
4946
- num_counters = ARRAY_SIZE(basic_q_cnts);
4947
-
4948
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
4949
- num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
4950
-
4951
- if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
4952
- num_counters += ARRAY_SIZE(retrans_q_cnts);
4953
-
4954
- if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
4955
- num_counters += ARRAY_SIZE(extended_err_cnts);
4956
-
4957
- cnts->num_q_counters = num_counters;
4958
-
4959
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
4960
- cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
4961
- num_counters += ARRAY_SIZE(cong_cnts);
4962
- }
4963
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
4964
- cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
4965
- num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
4966
- }
4967
- cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
4968
- if (!cnts->names)
4969
- return -ENOMEM;
4970
-
4971
- cnts->offsets = kcalloc(num_counters,
4972
- sizeof(cnts->offsets), GFP_KERNEL);
4973
- if (!cnts->offsets)
4974
- goto err_names;
4975
-
4976
- return 0;
4977
-
4978
-err_names:
4979
- kfree(cnts->names);
4980
- cnts->names = NULL;
4981
- return -ENOMEM;
4982
-}
4983
-
4984
-static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
4985
- const char **names,
4986
- size_t *offsets)
4987
-{
4988
- int i;
4989
- int j = 0;
4990
-
4991
- for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
4992
- names[j] = basic_q_cnts[i].name;
4993
- offsets[j] = basic_q_cnts[i].offset;
4994
- }
4995
-
4996
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
4997
- for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
4998
- names[j] = out_of_seq_q_cnts[i].name;
4999
- offsets[j] = out_of_seq_q_cnts[i].offset;
5000
- }
5001
- }
5002
-
5003
- if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
5004
- for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
5005
- names[j] = retrans_q_cnts[i].name;
5006
- offsets[j] = retrans_q_cnts[i].offset;
5007
- }
5008
- }
5009
-
5010
- if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
5011
- for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
5012
- names[j] = extended_err_cnts[i].name;
5013
- offsets[j] = extended_err_cnts[i].offset;
5014
- }
5015
- }
5016
-
5017
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
5018
- for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
5019
- names[j] = cong_cnts[i].name;
5020
- offsets[j] = cong_cnts[i].offset;
5021
- }
5022
- }
5023
-
5024
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
5025
- for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
5026
- names[j] = ext_ppcnt_cnts[i].name;
5027
- offsets[j] = ext_ppcnt_cnts[i].offset;
5028
- }
5029
- }
5030
-}
5031
-
5032
-static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
5033
-{
5034
- int err = 0;
5035
- int i;
5036
-
5037
- for (i = 0; i < dev->num_ports; i++) {
5038
- err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
5039
- if (err)
5040
- goto err_alloc;
5041
-
5042
- mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
5043
- dev->port[i].cnts.offsets);
5044
-
5045
- err = mlx5_core_alloc_q_counter(dev->mdev,
5046
- &dev->port[i].cnts.set_id);
5047
- if (err) {
5048
- mlx5_ib_warn(dev,
5049
- "couldn't allocate queue counter for port %d, err %d\n",
5050
- i + 1, err);
5051
- goto err_alloc;
5052
- }
5053
- dev->port[i].cnts.set_id_valid = true;
5054
- }
5055
-
5056
- return 0;
5057
-
5058
-err_alloc:
5059
- mlx5_ib_dealloc_counters(dev);
5060
- return err;
5061
-}
5062
-
5063
-static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
5064
- u8 port_num)
5065
-{
5066
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
5067
- struct mlx5_ib_port *port = &dev->port[port_num - 1];
5068
-
5069
- /* We support only per port stats */
5070
- if (port_num == 0)
5071
- return NULL;
5072
-
5073
- return rdma_alloc_hw_stats_struct(port->cnts.names,
5074
- port->cnts.num_q_counters +
5075
- port->cnts.num_cong_counters +
5076
- port->cnts.num_ext_ppcnt_counters,
5077
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
5078
-}
5079
-
5080
-static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
5081
- struct mlx5_ib_port *port,
5082
- struct rdma_hw_stats *stats)
5083
-{
5084
- int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
5085
- void *out;
5086
- __be32 val;
5087
- int ret, i;
5088
-
5089
- out = kvzalloc(outlen, GFP_KERNEL);
5090
- if (!out)
5091
- return -ENOMEM;
5092
-
5093
- ret = mlx5_core_query_q_counter(mdev,
5094
- port->cnts.set_id, 0,
5095
- out, outlen);
5096
- if (ret)
5097
- goto free;
5098
-
5099
- for (i = 0; i < port->cnts.num_q_counters; i++) {
5100
- val = *(__be32 *)(out + port->cnts.offsets[i]);
5101
- stats->value[i] = (u64)be32_to_cpu(val);
5102
- }
5103
-
5104
-free:
5105
- kvfree(out);
5106
- return ret;
5107
-}
5108
-
5109
-static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
5110
- struct mlx5_ib_port *port,
5111
- struct rdma_hw_stats *stats)
5112
-{
5113
- int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters;
5114
- int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
5115
- int ret, i;
5116
- void *out;
5117
-
5118
- out = kvzalloc(sz, GFP_KERNEL);
5119
- if (!out)
5120
- return -ENOMEM;
5121
-
5122
- ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out);
5123
- if (ret)
5124
- goto free;
5125
-
5126
- for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) {
5127
- stats->value[i + offset] =
5128
- be64_to_cpup((__be64 *)(out +
5129
- port->cnts.offsets[i + offset]));
5130
- }
5131
-
5132
-free:
5133
- kvfree(out);
5134
- return ret;
5135
-}
5136
-
5137
-static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
5138
- struct rdma_hw_stats *stats,
5139
- u8 port_num, int index)
5140
-{
5141
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
5142
- struct mlx5_ib_port *port = &dev->port[port_num - 1];
5143
- struct mlx5_core_dev *mdev;
5144
- int ret, num_counters;
5145
- u8 mdev_port_num;
5146
-
5147
- if (!stats)
5148
- return -EINVAL;
5149
-
5150
- num_counters = port->cnts.num_q_counters +
5151
- port->cnts.num_cong_counters +
5152
- port->cnts.num_ext_ppcnt_counters;
5153
-
5154
- /* q_counters are per IB device, query the master mdev */
5155
- ret = mlx5_ib_query_q_counters(dev->mdev, port, stats);
5156
- if (ret)
5157
- return ret;
5158
-
5159
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
5160
- ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats);
5161
- if (ret)
5162
- return ret;
5163
- }
5164
-
5165
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
5166
- mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
5167
- &mdev_port_num);
5168
- if (!mdev) {
5169
- /* If port is not affiliated yet, its in down state
5170
- * which doesn't have any counters yet, so it would be
5171
- * zero. So no need to read from the HCA.
5172
- */
5173
- goto done;
5174
- }
5175
- ret = mlx5_lag_query_cong_counters(dev->mdev,
5176
- stats->value +
5177
- port->cnts.num_q_counters,
5178
- port->cnts.num_cong_counters,
5179
- port->cnts.offsets +
5180
- port->cnts.num_q_counters);
5181
-
5182
- mlx5_ib_put_native_port_mdev(dev, port_num);
5183
- if (ret)
5184
- return ret;
5185
- }
5186
-
5187
-done:
5188
- return num_counters;
5189
-}
5190
-
5191
-static struct net_device*
5192
-mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
5193
- u8 port_num,
5194
- enum rdma_netdev_t type,
5195
- const char *name,
5196
- unsigned char name_assign_type,
5197
- void (*setup)(struct net_device *))
5198
-{
5199
- struct net_device *netdev;
5200
-
5201
- if (type != RDMA_NETDEV_IPOIB)
5202
- return ERR_PTR(-EOPNOTSUPP);
5203
-
5204
- netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
5205
- name, setup);
5206
- return netdev;
5207
-}
5208
-
5209
-static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
5210
-{
5211
- if (!dev->delay_drop.dbg)
5212
- return;
5213
- debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs);
5214
- kfree(dev->delay_drop.dbg);
5215
- dev->delay_drop.dbg = NULL;
5216
-}
4522
+ struct dentry *root;
52174523
5218
-static void cancel_delay_drop(struct mlx5_ib_dev *dev)
5219
-{
52204524 if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
5221
- return;
5222
-
5223
- cancel_work_sync(&dev->delay_drop.delay_drop_work);
5224
- delay_drop_debugfs_cleanup(dev);
5225
-}
5226
-
5227
-static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
5228
- size_t count, loff_t *pos)
5229
-{
5230
- struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
5231
- char lbuf[20];
5232
- int len;
5233
-
5234
- len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout);
5235
- return simple_read_from_buffer(buf, count, pos, lbuf, len);
5236
-}
5237
-
5238
-static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf,
5239
- size_t count, loff_t *pos)
5240
-{
5241
- struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
5242
- u32 timeout;
5243
- u32 var;
5244
-
5245
- if (kstrtouint_from_user(buf, count, 0, &var))
5246
- return -EFAULT;
5247
-
5248
- timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS *
5249
- 1000);
5250
- if (timeout != var)
5251
- mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n",
5252
- timeout);
5253
-
5254
- delay_drop->timeout = timeout;
5255
-
5256
- return count;
5257
-}
5258
-
5259
-static const struct file_operations fops_delay_drop_timeout = {
5260
- .owner = THIS_MODULE,
5261
- .open = simple_open,
5262
- .write = delay_drop_timeout_write,
5263
- .read = delay_drop_timeout_read,
5264
-};
5265
-
5266
-static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
5267
-{
5268
- struct mlx5_ib_dbg_delay_drop *dbg;
5269
-
5270
- if (!mlx5_debugfs_root)
52714525 return 0;
5272
-
5273
- dbg = kzalloc(sizeof(*dbg), GFP_KERNEL);
5274
- if (!dbg)
5275
- return -ENOMEM;
5276
-
5277
- dev->delay_drop.dbg = dbg;
5278
-
5279
- dbg->dir_debugfs =
5280
- debugfs_create_dir("delay_drop",
5281
- dev->mdev->priv.dbg_root);
5282
- if (!dbg->dir_debugfs)
5283
- goto out_debugfs;
5284
-
5285
- dbg->events_cnt_debugfs =
5286
- debugfs_create_atomic_t("num_timeout_events", 0400,
5287
- dbg->dir_debugfs,
5288
- &dev->delay_drop.events_cnt);
5289
- if (!dbg->events_cnt_debugfs)
5290
- goto out_debugfs;
5291
-
5292
- dbg->rqs_cnt_debugfs =
5293
- debugfs_create_atomic_t("num_rqs", 0400,
5294
- dbg->dir_debugfs,
5295
- &dev->delay_drop.rqs_cnt);
5296
- if (!dbg->rqs_cnt_debugfs)
5297
- goto out_debugfs;
5298
-
5299
- dbg->timeout_debugfs =
5300
- debugfs_create_file("timeout", 0600,
5301
- dbg->dir_debugfs,
5302
- &dev->delay_drop,
5303
- &fops_delay_drop_timeout);
5304
- if (!dbg->timeout_debugfs)
5305
- goto out_debugfs;
5306
-
5307
- return 0;
5308
-
5309
-out_debugfs:
5310
- delay_drop_debugfs_cleanup(dev);
5311
- return -ENOMEM;
5312
-}
5313
-
5314
-static void init_delay_drop(struct mlx5_ib_dev *dev)
5315
-{
5316
- if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
5317
- return;
53184526
53194527 mutex_init(&dev->delay_drop.lock);
53204528 dev->delay_drop.dev = dev;
....@@ -5324,855 +4532,52 @@
53244532 atomic_set(&dev->delay_drop.rqs_cnt, 0);
53254533 atomic_set(&dev->delay_drop.events_cnt, 0);
53264534
5327
- if (delay_drop_debugfs_init(dev))
5328
- mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
5329
-}
5330
-
5331
-static const struct cpumask *
5332
-mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
5333
-{
5334
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
5335
-
5336
- return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector);
5337
-}
5338
-
5339
-/* The mlx5_ib_multiport_mutex should be held when calling this function */
5340
-static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
5341
- struct mlx5_ib_multiport_info *mpi)
5342
-{
5343
- u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
5344
- struct mlx5_ib_port *port = &ibdev->port[port_num];
5345
- int comps;
5346
- int err;
5347
- int i;
5348
-
5349
- mlx5_ib_cleanup_cong_debugfs(ibdev, port_num);
5350
-
5351
- spin_lock(&port->mp.mpi_lock);
5352
- if (!mpi->ibdev) {
5353
- spin_unlock(&port->mp.mpi_lock);
5354
- return;
5355
- }
5356
- mpi->ibdev = NULL;
5357
-
5358
- spin_unlock(&port->mp.mpi_lock);
5359
- mlx5_remove_netdev_notifier(ibdev, port_num);
5360
- spin_lock(&port->mp.mpi_lock);
5361
-
5362
- comps = mpi->mdev_refcnt;
5363
- if (comps) {
5364
- mpi->unaffiliate = true;
5365
- init_completion(&mpi->unref_comp);
5366
- spin_unlock(&port->mp.mpi_lock);
5367
-
5368
- for (i = 0; i < comps; i++)
5369
- wait_for_completion(&mpi->unref_comp);
5370
-
5371
- spin_lock(&port->mp.mpi_lock);
5372
- mpi->unaffiliate = false;
5373
- }
5374
-
5375
- port->mp.mpi = NULL;
5376
-
5377
- spin_unlock(&port->mp.mpi_lock);
5378
-
5379
- err = mlx5_nic_vport_unaffiliate_multiport(mpi->mdev);
5380
-
5381
- mlx5_ib_dbg(ibdev, "unaffiliated port %d\n", port_num + 1);
5382
- /* Log an error, still needed to cleanup the pointers and add
5383
- * it back to the list.
5384
- */
5385
- if (err)
5386
- mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n",
5387
- port_num + 1);
5388
-
5389
- ibdev->roce[port_num].last_port_state = IB_PORT_DOWN;
5390
-}
5391
-
5392
-/* The mlx5_ib_multiport_mutex should be held when calling this function */
5393
-static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
5394
- struct mlx5_ib_multiport_info *mpi)
5395
-{
5396
- u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
5397
- int err;
5398
-
5399
- spin_lock(&ibdev->port[port_num].mp.mpi_lock);
5400
- if (ibdev->port[port_num].mp.mpi) {
5401
- mlx5_ib_dbg(ibdev, "port %d already affiliated.\n",
5402
- port_num + 1);
5403
- spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
5404
- return false;
5405
- }
5406
-
5407
- ibdev->port[port_num].mp.mpi = mpi;
5408
- mpi->ibdev = ibdev;
5409
- spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
5410
-
5411
- err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev);
5412
- if (err)
5413
- goto unbind;
5414
-
5415
- err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev));
5416
- if (err)
5417
- goto unbind;
5418
-
5419
- err = mlx5_add_netdev_notifier(ibdev, port_num);
5420
- if (err) {
5421
- mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n",
5422
- port_num + 1);
5423
- goto unbind;
5424
- }
5425
-
5426
- err = mlx5_ib_init_cong_debugfs(ibdev, port_num);
5427
- if (err)
5428
- goto unbind;
5429
-
5430
- return true;
5431
-
5432
-unbind:
5433
- mlx5_ib_unbind_slave_port(ibdev, mpi);
5434
- return false;
5435
-}
5436
-
5437
-static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
5438
-{
5439
- int port_num = mlx5_core_native_port_num(dev->mdev) - 1;
5440
- enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
5441
- port_num + 1);
5442
- struct mlx5_ib_multiport_info *mpi;
5443
- int err;
5444
- int i;
5445
-
5446
- if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
4535
+ if (!mlx5_debugfs_root)
54474536 return 0;
54484537
5449
- err = mlx5_query_nic_vport_system_image_guid(dev->mdev,
5450
- &dev->sys_image_guid);
5451
- if (err)
5452
- return err;
4538
+ root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root);
4539
+ dev->delay_drop.dir_debugfs = root;
54534540
5454
- err = mlx5_nic_vport_enable_roce(dev->mdev);
5455
- if (err)
5456
- return err;
5457
-
5458
- mutex_lock(&mlx5_ib_multiport_mutex);
5459
- for (i = 0; i < dev->num_ports; i++) {
5460
- bool bound = false;
5461
-
5462
- /* build a stub multiport info struct for the native port. */
5463
- if (i == port_num) {
5464
- mpi = kzalloc(sizeof(*mpi), GFP_KERNEL);
5465
- if (!mpi) {
5466
- mutex_unlock(&mlx5_ib_multiport_mutex);
5467
- mlx5_nic_vport_disable_roce(dev->mdev);
5468
- return -ENOMEM;
5469
- }
5470
-
5471
- mpi->is_master = true;
5472
- mpi->mdev = dev->mdev;
5473
- mpi->sys_image_guid = dev->sys_image_guid;
5474
- dev->port[i].mp.mpi = mpi;
5475
- mpi->ibdev = dev;
5476
- mpi = NULL;
5477
- continue;
5478
- }
5479
-
5480
- list_for_each_entry(mpi, &mlx5_ib_unaffiliated_port_list,
5481
- list) {
5482
- if (dev->sys_image_guid == mpi->sys_image_guid &&
5483
- (mlx5_core_native_port_num(mpi->mdev) - 1) == i) {
5484
- bound = mlx5_ib_bind_slave_port(dev, mpi);
5485
- }
5486
-
5487
- if (bound) {
5488
- dev_dbg(&mpi->mdev->pdev->dev, "removing port from unaffiliated list.\n");
5489
- mlx5_ib_dbg(dev, "port %d bound\n", i + 1);
5490
- list_del(&mpi->list);
5491
- break;
5492
- }
5493
- }
5494
- if (!bound) {
5495
- get_port_caps(dev, i + 1);
5496
- mlx5_ib_dbg(dev, "no free port found for port %d\n",
5497
- i + 1);
5498
- }
5499
- }
5500
-
5501
- list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list);
5502
- mutex_unlock(&mlx5_ib_multiport_mutex);
5503
- return err;
5504
-}
5505
-
5506
-static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
5507
-{
5508
- int port_num = mlx5_core_native_port_num(dev->mdev) - 1;
5509
- enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
5510
- port_num + 1);
5511
- int i;
5512
-
5513
- if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
5514
- return;
5515
-
5516
- mutex_lock(&mlx5_ib_multiport_mutex);
5517
- for (i = 0; i < dev->num_ports; i++) {
5518
- if (dev->port[i].mp.mpi) {
5519
- /* Destroy the native port stub */
5520
- if (i == port_num) {
5521
- kfree(dev->port[i].mp.mpi);
5522
- dev->port[i].mp.mpi = NULL;
5523
- } else {
5524
- mlx5_ib_dbg(dev, "unbinding port_num: %d\n", i + 1);
5525
- list_add_tail(&dev->port[i].mp.mpi->list,
5526
- &mlx5_ib_unaffiliated_port_list);
5527
- mlx5_ib_unbind_slave_port(dev, dev->port[i].mp.mpi);
5528
- }
5529
- }
5530
- }
5531
-
5532
- mlx5_ib_dbg(dev, "removing from devlist\n");
5533
- list_del(&dev->ib_dev_list);
5534
- mutex_unlock(&mlx5_ib_multiport_mutex);
5535
-
5536
- mlx5_nic_vport_disable_roce(dev->mdev);
5537
-}
5538
-
5539
-ADD_UVERBS_ATTRIBUTES_SIMPLE(
5540
- mlx5_ib_dm,
5541
- UVERBS_OBJECT_DM,
5542
- UVERBS_METHOD_DM_ALLOC,
5543
- UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
5544
- UVERBS_ATTR_TYPE(u64),
5545
- UA_MANDATORY),
5546
- UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
5547
- UVERBS_ATTR_TYPE(u16),
5548
- UA_MANDATORY));
5549
-
5550
-ADD_UVERBS_ATTRIBUTES_SIMPLE(
5551
- mlx5_ib_flow_action,
5552
- UVERBS_OBJECT_FLOW_ACTION,
5553
- UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
5554
- UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
5555
- enum mlx5_ib_uapi_flow_action_flags));
5556
-
5557
-static int populate_specs_root(struct mlx5_ib_dev *dev)
5558
-{
5559
- const struct uverbs_object_tree_def **trees = dev->driver_trees;
5560
- size_t num_trees = 0;
5561
-
5562
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
5563
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
5564
- trees[num_trees++] = &mlx5_ib_flow_action;
5565
-
5566
- if (MLX5_CAP_DEV_MEM(dev->mdev, memic))
5567
- trees[num_trees++] = &mlx5_ib_dm;
5568
-
5569
- if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
5570
- MLX5_GENERAL_OBJ_TYPES_CAP_UCTX)
5571
- trees[num_trees++] = mlx5_ib_get_devx_tree();
5572
-
5573
- num_trees += mlx5_ib_get_flow_trees(trees + num_trees);
5574
-
5575
- WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees));
5576
- trees[num_trees] = NULL;
5577
- dev->ib_dev.driver_specs = trees;
5578
-
5579
- return 0;
5580
-}
5581
-
5582
-static int mlx5_ib_read_counters(struct ib_counters *counters,
5583
- struct ib_counters_read_attr *read_attr,
5584
- struct uverbs_attr_bundle *attrs)
5585
-{
5586
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
5587
- struct mlx5_read_counters_attr mread_attr = {};
5588
- struct mlx5_ib_flow_counters_desc *desc;
5589
- int ret, i;
5590
-
5591
- mutex_lock(&mcounters->mcntrs_mutex);
5592
- if (mcounters->cntrs_max_index > read_attr->ncounters) {
5593
- ret = -EINVAL;
5594
- goto err_bound;
5595
- }
5596
-
5597
- mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
5598
- GFP_KERNEL);
5599
- if (!mread_attr.out) {
5600
- ret = -ENOMEM;
5601
- goto err_bound;
5602
- }
5603
-
5604
- mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
5605
- mread_attr.flags = read_attr->flags;
5606
- ret = mcounters->read_counters(counters->device, &mread_attr);
5607
- if (ret)
5608
- goto err_read;
5609
-
5610
- /* do the pass over the counters data array to assign according to the
5611
- * descriptions and indexing pairs
5612
- */
5613
- desc = mcounters->counters_data;
5614
- for (i = 0; i < mcounters->ncounters; i++)
5615
- read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
5616
-
5617
-err_read:
5618
- kfree(mread_attr.out);
5619
-err_bound:
5620
- mutex_unlock(&mcounters->mcntrs_mutex);
5621
- return ret;
5622
-}
5623
-
5624
-static int mlx5_ib_destroy_counters(struct ib_counters *counters)
5625
-{
5626
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
5627
-
5628
- counters_clear_description(counters);
5629
- if (mcounters->hw_cntrs_hndl)
5630
- mlx5_fc_destroy(to_mdev(counters->device)->mdev,
5631
- mcounters->hw_cntrs_hndl);
5632
-
5633
- kfree(mcounters);
5634
-
5635
- return 0;
5636
-}
5637
-
5638
-static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
5639
- struct uverbs_attr_bundle *attrs)
5640
-{
5641
- struct mlx5_ib_mcounters *mcounters;
5642
-
5643
- mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL);
5644
- if (!mcounters)
5645
- return ERR_PTR(-ENOMEM);
5646
-
5647
- mutex_init(&mcounters->mcntrs_mutex);
5648
-
5649
- return &mcounters->ibcntrs;
5650
-}
5651
-
5652
-void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
5653
-{
5654
- mlx5_ib_cleanup_multiport_master(dev);
5655
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
5656
- cleanup_srcu_struct(&dev->mr_srcu);
5657
-#endif
5658
- kfree(dev->port);
5659
-}
5660
-
5661
-int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
5662
-{
5663
- struct mlx5_core_dev *mdev = dev->mdev;
5664
- const char *name;
5665
- int err;
5666
- int i;
5667
-
5668
- dev->port = kcalloc(dev->num_ports, sizeof(*dev->port),
5669
- GFP_KERNEL);
5670
- if (!dev->port)
5671
- return -ENOMEM;
5672
-
5673
- for (i = 0; i < dev->num_ports; i++) {
5674
- spin_lock_init(&dev->port[i].mp.mpi_lock);
5675
- rwlock_init(&dev->roce[i].netdev_lock);
5676
- }
5677
-
5678
- err = mlx5_ib_init_multiport_master(dev);
5679
- if (err)
5680
- goto err_free_port;
5681
-
5682
- if (!mlx5_core_mp_enabled(mdev)) {
5683
- for (i = 1; i <= dev->num_ports; i++) {
5684
- err = get_port_caps(dev, i);
5685
- if (err)
5686
- break;
5687
- }
5688
- } else {
5689
- err = get_port_caps(dev, mlx5_core_native_port_num(mdev));
5690
- }
5691
- if (err)
5692
- goto err_mp;
5693
-
5694
- if (mlx5_use_mad_ifc(dev))
5695
- get_ext_port_caps(dev);
5696
-
5697
- if (!mlx5_lag_is_active(mdev))
5698
- name = "mlx5_%d";
5699
- else
5700
- name = "mlx5_bond_%d";
5701
-
5702
- strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX);
5703
- dev->ib_dev.owner = THIS_MODULE;
5704
- dev->ib_dev.node_type = RDMA_NODE_IB_CA;
5705
- dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
5706
- dev->ib_dev.phys_port_cnt = dev->num_ports;
5707
- dev->ib_dev.num_comp_vectors =
5708
- dev->mdev->priv.eq_table.num_comp_vectors;
5709
- dev->ib_dev.dev.parent = &mdev->pdev->dev;
5710
-
5711
- mutex_init(&dev->cap_mask_mutex);
5712
- INIT_LIST_HEAD(&dev->qp_list);
5713
- spin_lock_init(&dev->reset_flow_resource_lock);
5714
-
5715
- spin_lock_init(&dev->memic.memic_lock);
5716
- dev->memic.dev = mdev;
5717
-
5718
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
5719
- err = init_srcu_struct(&dev->mr_srcu);
5720
- if (err)
5721
- goto err_free_port;
5722
-#endif
5723
-
5724
- return 0;
5725
-err_mp:
5726
- mlx5_ib_cleanup_multiport_master(dev);
5727
-
5728
-err_free_port:
5729
- kfree(dev->port);
5730
-
5731
- return -ENOMEM;
5732
-}
5733
-
5734
-static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
5735
-{
5736
- dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
5737
-
5738
- if (!dev->flow_db)
5739
- return -ENOMEM;
5740
-
5741
- mutex_init(&dev->flow_db->lock);
5742
-
5743
- return 0;
5744
-}
5745
-
5746
-int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
5747
-{
5748
- struct mlx5_ib_dev *nic_dev;
5749
-
5750
- nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
5751
-
5752
- if (!nic_dev)
5753
- return -EINVAL;
5754
-
5755
- dev->flow_db = nic_dev->flow_db;
5756
-
5757
- return 0;
5758
-}
5759
-
5760
-static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
5761
-{
5762
- kfree(dev->flow_db);
5763
-}
5764
-
5765
-int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
5766
-{
5767
- struct mlx5_core_dev *mdev = dev->mdev;
5768
- int err;
5769
-
5770
- dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
5771
- dev->ib_dev.uverbs_cmd_mask =
5772
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
5773
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
5774
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
5775
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
5776
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
5777
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
5778
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
5779
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
5780
- (1ull << IB_USER_VERBS_CMD_REREG_MR) |
5781
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
5782
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
5783
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
5784
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
5785
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
5786
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
5787
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
5788
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
5789
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
5790
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
5791
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
5792
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
5793
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
5794
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
5795
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
5796
- (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
5797
- (1ull << IB_USER_VERBS_CMD_OPEN_QP);
5798
- dev->ib_dev.uverbs_ex_cmd_mask =
5799
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
5800
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
5801
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
5802
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) |
5803
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
5804
-
5805
- dev->ib_dev.query_device = mlx5_ib_query_device;
5806
- dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
5807
- dev->ib_dev.query_gid = mlx5_ib_query_gid;
5808
- dev->ib_dev.add_gid = mlx5_ib_add_gid;
5809
- dev->ib_dev.del_gid = mlx5_ib_del_gid;
5810
- dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
5811
- dev->ib_dev.modify_device = mlx5_ib_modify_device;
5812
- dev->ib_dev.modify_port = mlx5_ib_modify_port;
5813
- dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
5814
- dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
5815
- dev->ib_dev.mmap = mlx5_ib_mmap;
5816
- dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
5817
- dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
5818
- dev->ib_dev.create_ah = mlx5_ib_create_ah;
5819
- dev->ib_dev.query_ah = mlx5_ib_query_ah;
5820
- dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
5821
- dev->ib_dev.create_srq = mlx5_ib_create_srq;
5822
- dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
5823
- dev->ib_dev.query_srq = mlx5_ib_query_srq;
5824
- dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
5825
- dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
5826
- dev->ib_dev.create_qp = mlx5_ib_create_qp;
5827
- dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
5828
- dev->ib_dev.query_qp = mlx5_ib_query_qp;
5829
- dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
5830
- dev->ib_dev.drain_sq = mlx5_ib_drain_sq;
5831
- dev->ib_dev.drain_rq = mlx5_ib_drain_rq;
5832
- dev->ib_dev.post_send = mlx5_ib_post_send;
5833
- dev->ib_dev.post_recv = mlx5_ib_post_recv;
5834
- dev->ib_dev.create_cq = mlx5_ib_create_cq;
5835
- dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
5836
- dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
5837
- dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
5838
- dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
5839
- dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
5840
- dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
5841
- dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
5842
- dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr;
5843
- dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
5844
- dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
5845
- dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
5846
- dev->ib_dev.process_mad = mlx5_ib_process_mad;
5847
- dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
5848
- dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
5849
- dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
5850
- dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
5851
- dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
5852
- if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
5853
- dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev;
5854
-
5855
- if (mlx5_core_is_pf(mdev)) {
5856
- dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
5857
- dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
5858
- dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats;
5859
- dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
5860
- }
5861
-
5862
- dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
5863
-
5864
- dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
5865
-
5866
- if (MLX5_CAP_GEN(mdev, imaicl)) {
5867
- dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
5868
- dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
5869
- dev->ib_dev.uverbs_cmd_mask |=
5870
- (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
5871
- (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
5872
- }
5873
-
5874
- if (MLX5_CAP_GEN(mdev, xrc)) {
5875
- dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
5876
- dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
5877
- dev->ib_dev.uverbs_cmd_mask |=
5878
- (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
5879
- (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
5880
- }
5881
-
5882
- if (MLX5_CAP_DEV_MEM(mdev, memic)) {
5883
- dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm;
5884
- dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm;
5885
- dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr;
5886
- }
5887
-
5888
- dev->ib_dev.create_flow = mlx5_ib_create_flow;
5889
- dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
5890
- dev->ib_dev.uverbs_ex_cmd_mask |=
5891
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
5892
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
5893
- dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp;
5894
- dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
5895
- dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
5896
- dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
5897
- dev->ib_dev.create_counters = mlx5_ib_create_counters;
5898
- dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters;
5899
- dev->ib_dev.read_counters = mlx5_ib_read_counters;
5900
-
5901
- err = init_node_data(dev);
5902
- if (err)
5903
- return err;
5904
-
5905
- if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
5906
- (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) ||
5907
- MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
5908
- mutex_init(&dev->lb_mutex);
5909
-
5910
- return 0;
5911
-}
5912
-
5913
-static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
5914
-{
5915
- dev->ib_dev.get_port_immutable = mlx5_port_immutable;
5916
- dev->ib_dev.query_port = mlx5_ib_query_port;
5917
-
5918
- return 0;
5919
-}
5920
-
5921
-int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
5922
-{
5923
- dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable;
5924
- dev->ib_dev.query_port = mlx5_ib_rep_query_port;
5925
-
5926
- return 0;
5927
-}
5928
-
5929
-static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
5930
-{
5931
- u8 port_num;
5932
- int i;
5933
-
5934
- for (i = 0; i < dev->num_ports; i++) {
5935
- dev->roce[i].dev = dev;
5936
- dev->roce[i].native_port_num = i + 1;
5937
- dev->roce[i].last_port_state = IB_PORT_DOWN;
5938
- }
5939
-
5940
- dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
5941
- dev->ib_dev.create_wq = mlx5_ib_create_wq;
5942
- dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
5943
- dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
5944
- dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
5945
- dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
5946
-
5947
- dev->ib_dev.uverbs_ex_cmd_mask |=
5948
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
5949
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
5950
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
5951
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
5952
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
5953
-
5954
- port_num = mlx5_core_native_port_num(dev->mdev) - 1;
5955
-
5956
- return mlx5_add_netdev_notifier(dev, port_num);
5957
-}
5958
-
5959
-static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
5960
-{
5961
- u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
5962
-
5963
- mlx5_remove_netdev_notifier(dev, port_num);
5964
-}
5965
-
5966
-int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
5967
-{
5968
- struct mlx5_core_dev *mdev = dev->mdev;
5969
- enum rdma_link_layer ll;
5970
- int port_type_cap;
5971
- int err = 0;
5972
-
5973
- port_type_cap = MLX5_CAP_GEN(mdev, port_type);
5974
- ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
5975
-
5976
- if (ll == IB_LINK_LAYER_ETHERNET)
5977
- err = mlx5_ib_stage_common_roce_init(dev);
5978
-
5979
- return err;
5980
-}
5981
-
5982
-void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
5983
-{
5984
- mlx5_ib_stage_common_roce_cleanup(dev);
5985
-}
5986
-
5987
-static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
5988
-{
5989
- struct mlx5_core_dev *mdev = dev->mdev;
5990
- enum rdma_link_layer ll;
5991
- int port_type_cap;
5992
- int err;
5993
-
5994
- port_type_cap = MLX5_CAP_GEN(mdev, port_type);
5995
- ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
5996
-
5997
- if (ll == IB_LINK_LAYER_ETHERNET) {
5998
- err = mlx5_ib_stage_common_roce_init(dev);
5999
- if (err)
6000
- return err;
6001
-
6002
- err = mlx5_enable_eth(dev);
6003
- if (err)
6004
- goto cleanup;
6005
- }
6006
-
6007
- return 0;
6008
-cleanup:
6009
- mlx5_ib_stage_common_roce_cleanup(dev);
6010
-
6011
- return err;
6012
-}
6013
-
6014
-static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
6015
-{
6016
- struct mlx5_core_dev *mdev = dev->mdev;
6017
- enum rdma_link_layer ll;
6018
- int port_type_cap;
6019
-
6020
- port_type_cap = MLX5_CAP_GEN(mdev, port_type);
6021
- ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
6022
-
6023
- if (ll == IB_LINK_LAYER_ETHERNET) {
6024
- mlx5_disable_eth(dev);
6025
- mlx5_ib_stage_common_roce_cleanup(dev);
6026
- }
6027
-}
6028
-
6029
-int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
6030
-{
6031
- return create_dev_resources(&dev->devr);
6032
-}
6033
-
6034
-void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
6035
-{
6036
- destroy_dev_resources(&dev->devr);
6037
-}
6038
-
6039
-static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
6040
-{
6041
- mlx5_ib_internal_fill_odp_caps(dev);
6042
-
6043
- return mlx5_ib_odp_init_one(dev);
6044
-}
6045
-
6046
-int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
6047
-{
6048
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
6049
- dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
6050
- dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
6051
-
6052
- return mlx5_ib_alloc_counters(dev);
6053
- }
6054
-
6055
- return 0;
6056
-}
6057
-
6058
-void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
6059
-{
6060
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
6061
- mlx5_ib_dealloc_counters(dev);
6062
-}
6063
-
6064
-static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev)
6065
-{
6066
- return mlx5_ib_init_cong_debugfs(dev,
6067
- mlx5_core_native_port_num(dev->mdev) - 1);
6068
-}
6069
-
6070
-static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
6071
-{
6072
- mlx5_ib_cleanup_cong_debugfs(dev,
6073
- mlx5_core_native_port_num(dev->mdev) - 1);
6074
-}
6075
-
6076
-static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev)
6077
-{
6078
- dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
6079
- return PTR_ERR_OR_ZERO(dev->mdev->priv.uar);
6080
-}
6081
-
6082
-static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
6083
-{
6084
- mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
6085
-}
6086
-
6087
-int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
6088
-{
6089
- int err;
6090
-
6091
- err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
6092
- if (err)
6093
- return err;
6094
-
6095
- err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
6096
- if (err)
6097
- mlx5_free_bfreg(dev->mdev, &dev->bfreg);
6098
-
6099
- return err;
6100
-}
6101
-
6102
-void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
6103
-{
6104
- mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
6105
- mlx5_free_bfreg(dev->mdev, &dev->bfreg);
6106
-}
6107
-
6108
-static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
6109
-{
6110
- return populate_specs_root(dev);
6111
-}
6112
-
6113
-int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
6114
-{
6115
- return ib_register_device(&dev->ib_dev, NULL);
6116
-}
6117
-
6118
-void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
6119
-{
6120
- destroy_umrc_res(dev);
6121
-}
6122
-
6123
-void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
6124
-{
6125
- ib_unregister_device(&dev->ib_dev);
6126
-}
6127
-
6128
-int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
6129
-{
6130
- return create_umr_res(dev);
6131
-}
6132
-
6133
-static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
6134
-{
6135
- init_delay_drop(dev);
6136
-
4541
+ debugfs_create_atomic_t("num_timeout_events", 0400, root,
4542
+ &dev->delay_drop.events_cnt);
4543
+ debugfs_create_atomic_t("num_rqs", 0400, root,
4544
+ &dev->delay_drop.rqs_cnt);
4545
+ debugfs_create_file("timeout", 0600, root, &dev->delay_drop,
4546
+ &fops_delay_drop_timeout);
61374547 return 0;
61384548 }
61394549
61404550 static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
61414551 {
6142
- cancel_delay_drop(dev);
4552
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
4553
+ return;
4554
+
4555
+ cancel_work_sync(&dev->delay_drop.delay_drop_work);
4556
+ if (!dev->delay_drop.dir_debugfs)
4557
+ return;
4558
+
4559
+ debugfs_remove_recursive(dev->delay_drop.dir_debugfs);
4560
+ dev->delay_drop.dir_debugfs = NULL;
61434561 }
61444562
6145
-int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
4563
+static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
61464564 {
6147
- int err;
6148
- int i;
6149
-
6150
- for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
6151
- err = device_create_file(&dev->ib_dev.dev,
6152
- mlx5_class_attributes[i]);
6153
- if (err)
6154
- return err;
6155
- }
6156
-
4565
+ dev->mdev_events.notifier_call = mlx5_ib_event;
4566
+ mlx5_notifier_register(dev->mdev, &dev->mdev_events);
61574567 return 0;
61584568 }
61594569
6160
-static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
4570
+static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
61614571 {
6162
- mlx5_ib_register_vport_reps(dev);
6163
-
6164
- return 0;
6165
-}
6166
-
6167
-static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
6168
-{
6169
- mlx5_ib_unregister_vport_reps(dev);
4572
+ mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
61704573 }
61714574
61724575 void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
61734576 const struct mlx5_ib_profile *profile,
61744577 int stage)
61754578 {
4579
+ dev->ib_active = false;
4580
+
61764581 /* Number of stages to cleanup */
61774582 while (stage) {
61784583 stage--;
....@@ -6180,7 +4585,8 @@
61804585 profile->stage[stage].cleanup(dev);
61814586 }
61824587
6183
- ib_dealloc_device((struct ib_device *)dev);
4588
+ kfree(dev->port);
4589
+ ib_dealloc_device(&dev->ib_dev);
61844590 }
61854591
61864592 void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
....@@ -6189,7 +4595,7 @@
61894595 int err;
61904596 int i;
61914597
6192
- printk_once(KERN_INFO "%s", mlx5_version);
4598
+ dev->profile = profile;
61934599
61944600 for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
61954601 if (profile->stage[i].init) {
....@@ -6199,7 +4605,6 @@
61994605 }
62004606 }
62014607
6202
- dev->profile = profile;
62034608 dev->ib_active = true;
62044609
62054610 return dev;
....@@ -6214,27 +4619,36 @@
62144619 STAGE_CREATE(MLX5_IB_STAGE_INIT,
62154620 mlx5_ib_stage_init_init,
62164621 mlx5_ib_stage_init_cleanup),
6217
- STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
6218
- mlx5_ib_stage_flow_db_init,
6219
- mlx5_ib_stage_flow_db_cleanup),
4622
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
4623
+ mlx5_ib_fs_init,
4624
+ mlx5_ib_fs_cleanup),
62204625 STAGE_CREATE(MLX5_IB_STAGE_CAPS,
62214626 mlx5_ib_stage_caps_init,
6222
- NULL),
4627
+ mlx5_ib_stage_caps_cleanup),
62234628 STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
62244629 mlx5_ib_stage_non_default_cb,
62254630 NULL),
62264631 STAGE_CREATE(MLX5_IB_STAGE_ROCE,
6227
- mlx5_ib_stage_roce_init,
6228
- mlx5_ib_stage_roce_cleanup),
4632
+ mlx5_ib_roce_init,
4633
+ mlx5_ib_roce_cleanup),
4634
+ STAGE_CREATE(MLX5_IB_STAGE_QP,
4635
+ mlx5_init_qp_table,
4636
+ mlx5_cleanup_qp_table),
4637
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
4638
+ mlx5_init_srq_table,
4639
+ mlx5_cleanup_srq_table),
62294640 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
6230
- mlx5_ib_stage_dev_res_init,
6231
- mlx5_ib_stage_dev_res_cleanup),
4641
+ mlx5_ib_dev_res_init,
4642
+ mlx5_ib_dev_res_cleanup),
4643
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
4644
+ mlx5_ib_stage_dev_notifier_init,
4645
+ mlx5_ib_stage_dev_notifier_cleanup),
62324646 STAGE_CREATE(MLX5_IB_STAGE_ODP,
6233
- mlx5_ib_stage_odp_init,
6234
- NULL),
4647
+ mlx5_ib_odp_init_one,
4648
+ mlx5_ib_odp_cleanup_one),
62354649 STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
6236
- mlx5_ib_stage_counters_init,
6237
- mlx5_ib_stage_counters_cleanup),
4650
+ mlx5_ib_counters_init,
4651
+ mlx5_ib_counters_cleanup),
62384652 STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
62394653 mlx5_ib_stage_cong_debugfs_init,
62404654 mlx5_ib_stage_cong_debugfs_cleanup),
....@@ -6247,9 +4661,9 @@
62474661 STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
62484662 NULL,
62494663 mlx5_ib_stage_pre_ib_reg_umr_cleanup),
6250
- STAGE_CREATE(MLX5_IB_STAGE_SPECS,
6251
- mlx5_ib_stage_populate_specs,
6252
- NULL),
4664
+ STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
4665
+ mlx5_ib_devx_init,
4666
+ mlx5_ib_devx_cleanup),
62534667 STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
62544668 mlx5_ib_stage_ib_reg_init,
62554669 mlx5_ib_stage_ib_reg_cleanup),
....@@ -6259,33 +4673,45 @@
62594673 STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
62604674 mlx5_ib_stage_delay_drop_init,
62614675 mlx5_ib_stage_delay_drop_cleanup),
6262
- STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
6263
- mlx5_ib_stage_class_attr_init,
4676
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
4677
+ mlx5_ib_restrack_init,
62644678 NULL),
62654679 };
62664680
6267
-static const struct mlx5_ib_profile nic_rep_profile = {
4681
+const struct mlx5_ib_profile raw_eth_profile = {
62684682 STAGE_CREATE(MLX5_IB_STAGE_INIT,
62694683 mlx5_ib_stage_init_init,
62704684 mlx5_ib_stage_init_cleanup),
6271
- STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
6272
- mlx5_ib_stage_flow_db_init,
6273
- mlx5_ib_stage_flow_db_cleanup),
4685
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
4686
+ mlx5_ib_fs_init,
4687
+ mlx5_ib_fs_cleanup),
62744688 STAGE_CREATE(MLX5_IB_STAGE_CAPS,
62754689 mlx5_ib_stage_caps_init,
6276
- NULL),
4690
+ mlx5_ib_stage_caps_cleanup),
62774691 STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
6278
- mlx5_ib_stage_rep_non_default_cb,
4692
+ mlx5_ib_stage_raw_eth_non_default_cb,
62794693 NULL),
62804694 STAGE_CREATE(MLX5_IB_STAGE_ROCE,
6281
- mlx5_ib_stage_rep_roce_init,
6282
- mlx5_ib_stage_rep_roce_cleanup),
4695
+ mlx5_ib_roce_init,
4696
+ mlx5_ib_roce_cleanup),
4697
+ STAGE_CREATE(MLX5_IB_STAGE_QP,
4698
+ mlx5_init_qp_table,
4699
+ mlx5_cleanup_qp_table),
4700
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
4701
+ mlx5_init_srq_table,
4702
+ mlx5_cleanup_srq_table),
62834703 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
6284
- mlx5_ib_stage_dev_res_init,
6285
- mlx5_ib_stage_dev_res_cleanup),
4704
+ mlx5_ib_dev_res_init,
4705
+ mlx5_ib_dev_res_cleanup),
4706
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
4707
+ mlx5_ib_stage_dev_notifier_init,
4708
+ mlx5_ib_stage_dev_notifier_cleanup),
62864709 STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
6287
- mlx5_ib_stage_counters_init,
6288
- mlx5_ib_stage_counters_cleanup),
4710
+ mlx5_ib_counters_init,
4711
+ mlx5_ib_counters_cleanup),
4712
+ STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
4713
+ mlx5_ib_stage_cong_debugfs_init,
4714
+ mlx5_ib_stage_cong_debugfs_cleanup),
62894715 STAGE_CREATE(MLX5_IB_STAGE_UAR,
62904716 mlx5_ib_stage_uar_init,
62914717 mlx5_ib_stage_uar_cleanup),
....@@ -6295,21 +4721,18 @@
62954721 STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
62964722 NULL,
62974723 mlx5_ib_stage_pre_ib_reg_umr_cleanup),
6298
- STAGE_CREATE(MLX5_IB_STAGE_SPECS,
6299
- mlx5_ib_stage_populate_specs,
6300
- NULL),
4724
+ STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
4725
+ mlx5_ib_devx_init,
4726
+ mlx5_ib_devx_cleanup),
63014727 STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
63024728 mlx5_ib_stage_ib_reg_init,
63034729 mlx5_ib_stage_ib_reg_cleanup),
63044730 STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
63054731 mlx5_ib_stage_post_ib_reg_umr_init,
63064732 NULL),
6307
- STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
6308
- mlx5_ib_stage_class_attr_init,
4733
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
4734
+ mlx5_ib_restrack_init,
63094735 NULL),
6310
- STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
6311
- mlx5_ib_stage_rep_reg_init,
6312
- mlx5_ib_stage_rep_reg_cleanup),
63134736 };
63144737
63154738 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
....@@ -6346,7 +4769,8 @@
63464769
63474770 if (!bound) {
63484771 list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list);
6349
- dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n");
4772
+ dev_dbg(mdev->device,
4773
+ "no suitable IB device found to bind to, added to unaffiliated list.\n");
63504774 }
63514775 mutex_unlock(&mlx5_ib_multiport_mutex);
63524776
....@@ -6355,11 +4779,18 @@
63554779
63564780 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
63574781 {
4782
+ const struct mlx5_ib_profile *profile;
63584783 enum rdma_link_layer ll;
63594784 struct mlx5_ib_dev *dev;
63604785 int port_type_cap;
4786
+ int num_ports;
63614787
6362
- printk_once(KERN_INFO "%s", mlx5_version);
4788
+ if (MLX5_ESWITCH_MANAGER(mdev) &&
4789
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) {
4790
+ if (!mlx5_core_mp_enabled(mdev))
4791
+ mlx5_ib_register_vport_reps(mdev);
4792
+ return mdev;
4793
+ }
63634794
63644795 port_type_cap = MLX5_CAP_GEN(mdev, port_type);
63654796 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
....@@ -6367,28 +4798,38 @@
63674798 if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET)
63684799 return mlx5_ib_add_slave_port(mdev);
63694800
6370
- dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
4801
+ num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
4802
+ MLX5_CAP_GEN(mdev, num_vhca_ports));
4803
+ dev = ib_alloc_device(mlx5_ib_dev, ib_dev);
63714804 if (!dev)
63724805 return NULL;
6373
-
6374
- dev->mdev = mdev;
6375
- dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
6376
- MLX5_CAP_GEN(mdev, num_vhca_ports));
6377
-
6378
- if (MLX5_ESWITCH_MANAGER(mdev) &&
6379
- mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
6380
- dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
6381
-
6382
- return __mlx5_ib_add(dev, &nic_rep_profile);
4806
+ dev->port = kcalloc(num_ports, sizeof(*dev->port),
4807
+ GFP_KERNEL);
4808
+ if (!dev->port) {
4809
+ ib_dealloc_device(&dev->ib_dev);
4810
+ return NULL;
63834811 }
63844812
6385
- return __mlx5_ib_add(dev, &pf_profile);
4813
+ dev->mdev = mdev;
4814
+ dev->num_ports = num_ports;
4815
+
4816
+ if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_enabled(mdev))
4817
+ profile = &raw_eth_profile;
4818
+ else
4819
+ profile = &pf_profile;
4820
+
4821
+ return __mlx5_ib_add(dev, profile);
63864822 }
63874823
63884824 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
63894825 {
63904826 struct mlx5_ib_multiport_info *mpi;
63914827 struct mlx5_ib_dev *dev;
4828
+
4829
+ if (MLX5_ESWITCH_MANAGER(mdev) && context == mdev) {
4830
+ mlx5_ib_unregister_vport_reps(mdev);
4831
+ return;
4832
+ }
63924833
63934834 if (mlx5_core_is_mp_slave(mdev)) {
63944835 mpi = context;
....@@ -6408,10 +4849,6 @@
64084849 static struct mlx5_interface mlx5_ib_interface = {
64094850 .add = mlx5_ib_add,
64104851 .remove = mlx5_ib_remove,
6411
- .event = mlx5_ib_event,
6412
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
6413
- .pfault = mlx5_ib_pfault,
6414
-#endif
64154852 .protocol = MLX5_INTERFACE_PROTOCOL_IB,
64164853 };
64174854