hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/net/rds/ib.c
....@@ -1,5 +1,5 @@
11 /*
2
- * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
2
+ * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
33 *
44 * This software is available to you under a choice of one of two
55 * licenses. You may choose to be licensed under the terms of the GNU
....@@ -87,7 +87,7 @@
8787
8888 spin_lock_irqsave(&rds_ibdev->spinlock, flags);
8989 list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node)
90
- rds_conn_drop(ic->conn);
90
+ rds_conn_path_drop(&ic->conn->c_path[0], true);
9191 spin_unlock_irqrestore(&rds_ibdev->spinlock, flags);
9292 }
9393
....@@ -125,19 +125,23 @@
125125 queue_work(rds_wq, &rds_ibdev->free_work);
126126 }
127127
128
-static void rds_ib_add_one(struct ib_device *device)
128
+static int rds_ib_add_one(struct ib_device *device)
129129 {
130130 struct rds_ib_device *rds_ibdev;
131
- bool has_fr, has_fmr;
131
+ int ret;
132132
133133 /* Only handle IB (no iWARP) devices */
134134 if (device->node_type != RDMA_NODE_IB_CA)
135
- return;
135
+ return -EOPNOTSUPP;
136
+
137
+ /* Device must support FRWR */
138
+ if (!(device->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
139
+ return -EOPNOTSUPP;
136140
137141 rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
138142 ibdev_to_node(device));
139143 if (!rds_ibdev)
140
- return;
144
+ return -ENOMEM;
141145
142146 spin_lock_init(&rds_ibdev->spinlock);
143147 refcount_set(&rds_ibdev->refcount, 1);
....@@ -149,13 +153,14 @@
149153 rds_ibdev->max_wrs = device->attrs.max_qp_wr;
150154 rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE);
151155
152
- has_fr = (device->attrs.device_cap_flags &
153
- IB_DEVICE_MEM_MGT_EXTENSIONS);
154
- has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
155
- device->map_phys_fmr && device->unmap_fmr);
156
- rds_ibdev->use_fastreg = (has_fr && !has_fmr);
156
+ rds_ibdev->odp_capable =
157
+ !!(device->attrs.device_cap_flags &
158
+ IB_DEVICE_ON_DEMAND_PAGING) &&
159
+ !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps &
160
+ IB_ODP_SUPPORT_WRITE) &&
161
+ !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps &
162
+ IB_ODP_SUPPORT_READ);
157163
158
- rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
159164 rds_ibdev->max_1m_mrs = device->attrs.max_mr ?
160165 min_t(unsigned int, (device->attrs.max_mr / 2),
161166 rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size;
....@@ -173,12 +178,14 @@
173178 if (!rds_ibdev->vector_load) {
174179 pr_err("RDS/IB: %s failed to allocate vector memory\n",
175180 __func__);
181
+ ret = -ENOMEM;
176182 goto put_dev;
177183 }
178184
179185 rds_ibdev->dev = device;
180186 rds_ibdev->pd = ib_alloc_pd(device, 0);
181187 if (IS_ERR(rds_ibdev->pd)) {
188
+ ret = PTR_ERR(rds_ibdev->pd);
182189 rds_ibdev->pd = NULL;
183190 goto put_dev;
184191 }
....@@ -186,6 +193,7 @@
186193 rds_ibdev->mr_1m_pool =
187194 rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);
188195 if (IS_ERR(rds_ibdev->mr_1m_pool)) {
196
+ ret = PTR_ERR(rds_ibdev->mr_1m_pool);
189197 rds_ibdev->mr_1m_pool = NULL;
190198 goto put_dev;
191199 }
....@@ -193,18 +201,16 @@
193201 rds_ibdev->mr_8k_pool =
194202 rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL);
195203 if (IS_ERR(rds_ibdev->mr_8k_pool)) {
204
+ ret = PTR_ERR(rds_ibdev->mr_8k_pool);
196205 rds_ibdev->mr_8k_pool = NULL;
197206 goto put_dev;
198207 }
199208
200
- rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_mrs = %d, max_8k_mrs = %d\n",
201
- device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
202
- rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_mrs,
203
- rds_ibdev->max_8k_mrs);
209
+ rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, max_1m_mrs = %d, max_8k_mrs = %d\n",
210
+ device->attrs.max_mr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
211
+ rds_ibdev->max_1m_mrs, rds_ibdev->max_8k_mrs);
204212
205
- pr_info("RDS/IB: %s: %s supported and preferred\n",
206
- device->name,
207
- rds_ibdev->use_fastreg ? "FRMR" : "FMR");
213
+ pr_info("RDS/IB: %s: added\n", device->name);
208214
209215 down_write(&rds_ib_devices_lock);
210216 list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
....@@ -212,12 +218,13 @@
212218 refcount_inc(&rds_ibdev->refcount);
213219
214220 ib_set_client_data(device, &rds_ib_client, rds_ibdev);
215
- refcount_inc(&rds_ibdev->refcount);
216221
217222 rds_ib_nodev_connect();
223
+ return 0;
218224
219225 put_dev:
220226 rds_ib_dev_put(rds_ibdev);
227
+ return ret;
221228 }
222229
223230 /*
....@@ -259,9 +266,6 @@
259266 {
260267 struct rds_ib_device *rds_ibdev = client_data;
261268
262
- if (!rds_ibdev)
263
- return;
264
-
265269 rds_ib_dev_shutdown(rds_ibdev);
266270
267271 /* stop connection attempts from getting a reference to this device. */
....@@ -291,7 +295,7 @@
291295 void *buffer)
292296 {
293297 struct rds_info_rdma_connection *iinfo = buffer;
294
- struct rds_ib_connection *ic;
298
+ struct rds_ib_connection *ic = conn->c_transport_data;
295299
296300 /* We will only ever look at IB transports */
297301 if (conn->c_trans != &rds_ib_transport)
....@@ -301,13 +305,15 @@
301305
302306 iinfo->src_addr = conn->c_laddr.s6_addr32[3];
303307 iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
308
+ if (ic) {
309
+ iinfo->tos = conn->c_tos;
310
+ iinfo->sl = ic->i_sl;
311
+ }
304312
305313 memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
306314 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
307315 if (rds_conn_state(conn) == RDS_CONN_UP) {
308316 struct rds_ib_device *rds_ibdev;
309
-
310
- ic = conn->c_transport_data;
311317
312318 rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid,
313319 (union ib_gid *)&iinfo->dst_gid);
....@@ -317,6 +323,7 @@
317323 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
318324 iinfo->max_send_sge = rds_ibdev->max_sge;
319325 rds_ib_get_mr_info(rds_ibdev, iinfo);
326
+ iinfo->cache_allocs = atomic_read(&ic->i_cache_allocs);
320327 }
321328 return 1;
322329 }
....@@ -327,7 +334,7 @@
327334 void *buffer)
328335 {
329336 struct rds6_info_rdma_connection *iinfo6 = buffer;
330
- struct rds_ib_connection *ic;
337
+ struct rds_ib_connection *ic = conn->c_transport_data;
331338
332339 /* We will only ever look at IB transports */
333340 if (conn->c_trans != &rds_ib_transport)
....@@ -335,6 +342,10 @@
335342
336343 iinfo6->src_addr = conn->c_laddr;
337344 iinfo6->dst_addr = conn->c_faddr;
345
+ if (ic) {
346
+ iinfo6->tos = conn->c_tos;
347
+ iinfo6->sl = ic->i_sl;
348
+ }
338349
339350 memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid));
340351 memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid));
....@@ -342,7 +353,6 @@
342353 if (rds_conn_state(conn) == RDS_CONN_UP) {
343354 struct rds_ib_device *rds_ibdev;
344355
345
- ic = conn->c_transport_data;
346356 rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid,
347357 (union ib_gid *)&iinfo6->dst_gid);
348358 rds_ibdev = ic->rds_ibdev;
....@@ -350,6 +360,7 @@
350360 iinfo6->max_recv_wr = ic->i_recv_ring.w_nr;
351361 iinfo6->max_send_sge = rds_ibdev->max_sge;
352362 rds6_ib_get_mr_info(rds_ibdev, iinfo6);
363
+ iinfo6->cache_allocs = atomic_read(&ic->i_cache_allocs);
353364 }
354365 return 1;
355366 }
....@@ -514,6 +525,15 @@
514525 rds_ib_mr_exit();
515526 }
516527
528
+static u8 rds_ib_get_tos_map(u8 tos)
529
+{
530
+ /* 1:1 user to transport map for RDMA transport.
531
+ * In future, if custom map is desired, hook can export
532
+ * user configurable map.
533
+ */
534
+ return tos;
535
+}
536
+
517537 struct rds_transport rds_ib_transport = {
518538 .laddr_check = rds_ib_laddr_check,
519539 .xmit_path_complete = rds_ib_xmit_path_complete,
....@@ -536,6 +556,7 @@
536556 .sync_mr = rds_ib_sync_mr,
537557 .free_mr = rds_ib_free_mr,
538558 .flush_mrs = rds_ib_flush_mrs,
559
+ .get_tos_map = rds_ib_get_tos_map,
539560 .t_owner = THIS_MODULE,
540561 .t_name = "infiniband",
541562 .t_unloading = rds_ib_is_unloading,