hc
2024-05-10 10ebd8556b7990499c896a550e3d416b444211e6
kernel/net/smc/smc_pnet.c
....@@ -12,6 +12,7 @@
1212 #include <linux/module.h>
1313 #include <linux/list.h>
1414 #include <linux/ctype.h>
15
+#include <linux/mutex.h>
1516 #include <net/netlink.h>
1617 #include <net/genetlink.h>
1718
....@@ -20,14 +21,21 @@
2021
2122 #include <rdma/ib_verbs.h>
2223
24
+#include <net/netns/generic.h>
25
+#include "smc_netns.h"
26
+
2327 #include "smc_pnet.h"
2428 #include "smc_ib.h"
2529 #include "smc_ism.h"
30
+#include "smc_core.h"
2631
27
-static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
32
+static struct net_device *__pnet_find_base_ndev(struct net_device *ndev);
33
+static struct net_device *pnet_find_base_ndev(struct net_device *ndev);
34
+
35
+static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
2836 [SMC_PNETID_NAME] = {
2937 .type = NLA_NUL_STRING,
30
- .len = SMC_MAX_PNETID_LEN - 1
38
+ .len = SMC_MAX_PNETID_LEN
3139 },
3240 [SMC_PNETID_ETHNAME] = {
3341 .type = NLA_NUL_STRING,
....@@ -42,155 +50,228 @@
4250
4351 static struct genl_family smc_pnet_nl_family;
4452
45
-/**
46
- * struct smc_pnettable - SMC PNET table anchor
47
- * @lock: Lock for list action
48
- * @pnetlist: List of PNETIDs
49
- */
50
-static struct smc_pnettable {
51
- rwlock_t lock;
52
- struct list_head pnetlist;
53
-} smc_pnettable = {
54
- .pnetlist = LIST_HEAD_INIT(smc_pnettable.pnetlist),
55
- .lock = __RW_LOCK_UNLOCKED(smc_pnettable.lock)
53
+enum smc_pnet_nametype {
54
+ SMC_PNET_ETH = 1,
55
+ SMC_PNET_IB = 2,
5656 };
5757
58
-/**
59
- * struct smc_pnetentry - pnet identifier name entry
60
- * @list: List node.
61
- * @pnet_name: Pnet identifier name
62
- * @ndev: pointer to network device.
63
- * @smcibdev: Pointer to IB device.
64
- */
58
+/* pnet entry stored in pnet table */
6559 struct smc_pnetentry {
6660 struct list_head list;
6761 char pnet_name[SMC_MAX_PNETID_LEN + 1];
68
- struct net_device *ndev;
69
- struct smc_ib_device *smcibdev;
70
- u8 ib_port;
62
+ enum smc_pnet_nametype type;
63
+ union {
64
+ struct {
65
+ char eth_name[IFNAMSIZ + 1];
66
+ struct net_device *ndev;
67
+ };
68
+ struct {
69
+ char ib_name[IB_DEVICE_NAME_MAX + 1];
70
+ u8 ib_port;
71
+ };
72
+ };
7173 };
7274
73
-/* Check if two RDMA device entries are identical. Use device name and port
74
- * number for comparison.
75
- */
76
-static bool smc_pnet_same_ibname(struct smc_pnetentry *pnetelem, char *ibname,
77
- u8 ibport)
75
+/* Check if the pnetid is set */
76
+bool smc_pnet_is_pnetid_set(u8 *pnetid)
7877 {
79
- return pnetelem->ib_port == ibport &&
80
- !strncmp(pnetelem->smcibdev->ibdev->name, ibname,
81
- sizeof(pnetelem->smcibdev->ibdev->name));
78
+ if (pnetid[0] == 0 || pnetid[0] == _S)
79
+ return false;
80
+ return true;
8281 }
8382
84
-/* Find a pnetid in the pnet table.
85
- */
86
-static struct smc_pnetentry *smc_pnet_find_pnetid(char *pnet_name)
83
+/* Check if two given pnetids match */
84
+static bool smc_pnet_match(u8 *pnetid1, u8 *pnetid2)
8785 {
88
- struct smc_pnetentry *pnetelem, *found_pnetelem = NULL;
86
+ int i;
8987
90
- read_lock(&smc_pnettable.lock);
91
- list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
92
- if (!strncmp(pnetelem->pnet_name, pnet_name,
93
- sizeof(pnetelem->pnet_name))) {
94
- found_pnetelem = pnetelem;
88
+ for (i = 0; i < SMC_MAX_PNETID_LEN; i++) {
89
+ if ((pnetid1[i] == 0 || pnetid1[i] == _S) &&
90
+ (pnetid2[i] == 0 || pnetid2[i] == _S))
9591 break;
96
- }
92
+ if (pnetid1[i] != pnetid2[i])
93
+ return false;
9794 }
98
- read_unlock(&smc_pnettable.lock);
99
- return found_pnetelem;
95
+ return true;
10096 }
10197
10298 /* Remove a pnetid from the pnet table.
10399 */
104
-static int smc_pnet_remove_by_pnetid(char *pnet_name)
100
+static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
105101 {
106102 struct smc_pnetentry *pnetelem, *tmp_pe;
103
+ struct smc_pnettable *pnettable;
104
+ struct smc_ib_device *ibdev;
105
+ struct smcd_dev *smcd_dev;
106
+ struct smc_net *sn;
107107 int rc = -ENOENT;
108
+ int ibport;
108109
109
- write_lock(&smc_pnettable.lock);
110
- list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
110
+ /* get pnettable for namespace */
111
+ sn = net_generic(net, smc_net_id);
112
+ pnettable = &sn->pnettable;
113
+
114
+ /* remove table entry */
115
+ mutex_lock(&pnettable->lock);
116
+ list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
111117 list) {
112
- if (!strncmp(pnetelem->pnet_name, pnet_name,
113
- sizeof(pnetelem->pnet_name))) {
118
+ if (!pnet_name ||
119
+ smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
114120 list_del(&pnetelem->list);
115
- dev_put(pnetelem->ndev);
121
+ if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) {
122
+ dev_put(pnetelem->ndev);
123
+ pr_warn_ratelimited("smc: net device %s "
124
+ "erased user defined "
125
+ "pnetid %.16s\n",
126
+ pnetelem->eth_name,
127
+ pnetelem->pnet_name);
128
+ }
116129 kfree(pnetelem);
117130 rc = 0;
118
- break;
119131 }
120132 }
121
- write_unlock(&smc_pnettable.lock);
133
+ mutex_unlock(&pnettable->lock);
134
+
135
+ /* if this is not the initial namespace, stop here */
136
+ if (net != &init_net)
137
+ return rc;
138
+
139
+ /* remove ib devices */
140
+ mutex_lock(&smc_ib_devices.mutex);
141
+ list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
142
+ for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
143
+ if (ibdev->pnetid_by_user[ibport] &&
144
+ (!pnet_name ||
145
+ smc_pnet_match(pnet_name,
146
+ ibdev->pnetid[ibport]))) {
147
+ pr_warn_ratelimited("smc: ib device %s ibport "
148
+ "%d erased user defined "
149
+ "pnetid %.16s\n",
150
+ ibdev->ibdev->name,
151
+ ibport + 1,
152
+ ibdev->pnetid[ibport]);
153
+ memset(ibdev->pnetid[ibport], 0,
154
+ SMC_MAX_PNETID_LEN);
155
+ ibdev->pnetid_by_user[ibport] = false;
156
+ rc = 0;
157
+ }
158
+ }
159
+ }
160
+ mutex_unlock(&smc_ib_devices.mutex);
161
+ /* remove smcd devices */
162
+ mutex_lock(&smcd_dev_list.mutex);
163
+ list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
164
+ if (smcd_dev->pnetid_by_user &&
165
+ (!pnet_name ||
166
+ smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
167
+ pr_warn_ratelimited("smc: smcd device %s "
168
+ "erased user defined pnetid "
169
+ "%.16s\n", dev_name(&smcd_dev->dev),
170
+ smcd_dev->pnetid);
171
+ memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
172
+ smcd_dev->pnetid_by_user = false;
173
+ rc = 0;
174
+ }
175
+ }
176
+ mutex_unlock(&smcd_dev_list.mutex);
122177 return rc;
123178 }
124179
125
-/* Remove a pnet entry mentioning a given network device from the pnet table.
180
+/* Add the reference to a given network device to the pnet table.
181
+ */
182
+static int smc_pnet_add_by_ndev(struct net_device *ndev)
183
+{
184
+ struct smc_pnetentry *pnetelem, *tmp_pe;
185
+ struct smc_pnettable *pnettable;
186
+ struct net *net = dev_net(ndev);
187
+ struct smc_net *sn;
188
+ int rc = -ENOENT;
189
+
190
+ /* get pnettable for namespace */
191
+ sn = net_generic(net, smc_net_id);
192
+ pnettable = &sn->pnettable;
193
+
194
+ mutex_lock(&pnettable->lock);
195
+ list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
196
+ if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
197
+ !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
198
+ dev_hold(ndev);
199
+ pnetelem->ndev = ndev;
200
+ rc = 0;
201
+ pr_warn_ratelimited("smc: adding net device %s with "
202
+ "user defined pnetid %.16s\n",
203
+ pnetelem->eth_name,
204
+ pnetelem->pnet_name);
205
+ break;
206
+ }
207
+ }
208
+ mutex_unlock(&pnettable->lock);
209
+ return rc;
210
+}
211
+
212
+/* Remove the reference to a given network device from the pnet table.
126213 */
127214 static int smc_pnet_remove_by_ndev(struct net_device *ndev)
128215 {
129216 struct smc_pnetentry *pnetelem, *tmp_pe;
217
+ struct smc_pnettable *pnettable;
218
+ struct net *net = dev_net(ndev);
219
+ struct smc_net *sn;
130220 int rc = -ENOENT;
131221
132
- write_lock(&smc_pnettable.lock);
133
- list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
134
- list) {
135
- if (pnetelem->ndev == ndev) {
136
- list_del(&pnetelem->list);
222
+ /* get pnettable for namespace */
223
+ sn = net_generic(net, smc_net_id);
224
+ pnettable = &sn->pnettable;
225
+
226
+ mutex_lock(&pnettable->lock);
227
+ list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
228
+ if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
137229 dev_put(pnetelem->ndev);
138
- kfree(pnetelem);
230
+ pnetelem->ndev = NULL;
139231 rc = 0;
232
+ pr_warn_ratelimited("smc: removing net device %s with "
233
+ "user defined pnetid %.16s\n",
234
+ pnetelem->eth_name,
235
+ pnetelem->pnet_name);
140236 break;
141237 }
142238 }
143
- write_unlock(&smc_pnettable.lock);
239
+ mutex_unlock(&pnettable->lock);
144240 return rc;
145241 }
146242
147
-/* Remove a pnet entry mentioning a given ib device from the pnet table.
243
+/* Apply pnetid to ib device when no pnetid is set.
148244 */
149
-int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev)
245
+static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
246
+ char *pnet_name)
150247 {
151
- struct smc_pnetentry *pnetelem, *tmp_pe;
152
- int rc = -ENOENT;
248
+ bool applied = false;
153249
154
- write_lock(&smc_pnettable.lock);
155
- list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
156
- list) {
157
- if (pnetelem->smcibdev == ibdev) {
158
- list_del(&pnetelem->list);
159
- dev_put(pnetelem->ndev);
160
- kfree(pnetelem);
161
- rc = 0;
162
- break;
163
- }
250
+ mutex_lock(&smc_ib_devices.mutex);
251
+ if (!smc_pnet_is_pnetid_set(ib_dev->pnetid[ib_port - 1])) {
252
+ memcpy(ib_dev->pnetid[ib_port - 1], pnet_name,
253
+ SMC_MAX_PNETID_LEN);
254
+ ib_dev->pnetid_by_user[ib_port - 1] = true;
255
+ applied = true;
164256 }
165
- write_unlock(&smc_pnettable.lock);
166
- return rc;
257
+ mutex_unlock(&smc_ib_devices.mutex);
258
+ return applied;
167259 }
168260
169
-/* Append a pnetid to the end of the pnet table if not already on this list.
261
+/* Apply pnetid to smcd device when no pnetid is set.
170262 */
171
-static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
263
+static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name)
172264 {
173
- struct smc_pnetentry *pnetelem;
174
- int rc = -EEXIST;
265
+ bool applied = false;
175266
176
- write_lock(&smc_pnettable.lock);
177
- list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
178
- if (!strncmp(pnetelem->pnet_name, new_pnetelem->pnet_name,
179
- sizeof(new_pnetelem->pnet_name)) ||
180
- !strncmp(pnetelem->ndev->name, new_pnetelem->ndev->name,
181
- sizeof(new_pnetelem->ndev->name)) ||
182
- smc_pnet_same_ibname(pnetelem,
183
- new_pnetelem->smcibdev->ibdev->name,
184
- new_pnetelem->ib_port)) {
185
- dev_put(pnetelem->ndev);
186
- goto found;
187
- }
267
+ mutex_lock(&smcd_dev_list.mutex);
268
+ if (!smc_pnet_is_pnetid_set(smcd_dev->pnetid)) {
269
+ memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN);
270
+ smcd_dev->pnetid_by_user = true;
271
+ applied = true;
188272 }
189
- list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
190
- rc = 0;
191
-found:
192
- write_unlock(&smc_pnettable.lock);
193
- return rc;
273
+ mutex_unlock(&smcd_dev_list.mutex);
274
+ return applied;
194275 }
195276
196277 /* The limit for pnetid is 16 characters.
....@@ -225,146 +306,263 @@
225306 {
226307 struct smc_ib_device *ibdev;
227308
228
- spin_lock(&smc_ib_devices.lock);
309
+ mutex_lock(&smc_ib_devices.mutex);
229310 list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
230311 if (!strncmp(ibdev->ibdev->name, ib_name,
231
- sizeof(ibdev->ibdev->name))) {
312
+ sizeof(ibdev->ibdev->name)) ||
313
+ (ibdev->ibdev->dev.parent &&
314
+ !strncmp(dev_name(ibdev->ibdev->dev.parent), ib_name,
315
+ IB_DEVICE_NAME_MAX - 1))) {
232316 goto out;
233317 }
234318 }
235319 ibdev = NULL;
236320 out:
237
- spin_unlock(&smc_ib_devices.lock);
321
+ mutex_unlock(&smc_ib_devices.mutex);
238322 return ibdev;
239323 }
240324
241
-/* Parse the supplied netlink attributes and fill a pnetentry structure.
242
- * For ethernet and infiniband device names verify that the devices exist.
243
- */
244
-static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem,
245
- struct nlattr *tb[])
325
+/* Find an smcd device by a given name. The device might not exist. */
326
+static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
246327 {
247
- char *string, *ibname;
328
+ struct smcd_dev *smcd_dev;
329
+
330
+ mutex_lock(&smcd_dev_list.mutex);
331
+ list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
332
+ if (!strncmp(dev_name(&smcd_dev->dev), smcd_name,
333
+ IB_DEVICE_NAME_MAX - 1))
334
+ goto out;
335
+ }
336
+ smcd_dev = NULL;
337
+out:
338
+ mutex_unlock(&smcd_dev_list.mutex);
339
+ return smcd_dev;
340
+}
341
+
342
+static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
343
+ char *eth_name, char *pnet_name)
344
+{
345
+ struct smc_pnetentry *tmp_pe, *new_pe;
346
+ struct net_device *ndev, *base_ndev;
347
+ u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
348
+ bool new_netdev;
248349 int rc;
249350
250
- memset(pnetelem, 0, sizeof(*pnetelem));
251
- INIT_LIST_HEAD(&pnetelem->list);
351
+ /* check if (base) netdev already has a pnetid. If there is one, we do
352
+ * not want to add a pnet table entry
353
+ */
354
+ rc = -EEXIST;
355
+ ndev = dev_get_by_name(net, eth_name); /* dev_hold() */
356
+ if (ndev) {
357
+ base_ndev = pnet_find_base_ndev(ndev);
358
+ if (!smc_pnetid_by_dev_port(base_ndev->dev.parent,
359
+ base_ndev->dev_port, ndev_pnetid))
360
+ goto out_put;
361
+ }
362
+
363
+ /* add a new netdev entry to the pnet table if there isn't one */
364
+ rc = -ENOMEM;
365
+ new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
366
+ if (!new_pe)
367
+ goto out_put;
368
+ new_pe->type = SMC_PNET_ETH;
369
+ memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
370
+ strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
371
+ new_pe->ndev = ndev;
372
+
373
+ rc = -EEXIST;
374
+ new_netdev = true;
375
+ mutex_lock(&pnettable->lock);
376
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
377
+ if (tmp_pe->type == SMC_PNET_ETH &&
378
+ !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
379
+ new_netdev = false;
380
+ break;
381
+ }
382
+ }
383
+ if (new_netdev) {
384
+ list_add_tail(&new_pe->list, &pnettable->pnetlist);
385
+ mutex_unlock(&pnettable->lock);
386
+ } else {
387
+ mutex_unlock(&pnettable->lock);
388
+ kfree(new_pe);
389
+ goto out_put;
390
+ }
391
+ if (ndev)
392
+ pr_warn_ratelimited("smc: net device %s "
393
+ "applied user defined pnetid %.16s\n",
394
+ new_pe->eth_name, new_pe->pnet_name);
395
+ return 0;
396
+
397
+out_put:
398
+ if (ndev)
399
+ dev_put(ndev);
400
+ return rc;
401
+}
402
+
403
+static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
404
+ u8 ib_port, char *pnet_name)
405
+{
406
+ struct smc_pnetentry *tmp_pe, *new_pe;
407
+ struct smc_ib_device *ib_dev;
408
+ bool smcddev_applied = true;
409
+ bool ibdev_applied = true;
410
+ struct smcd_dev *smcd_dev;
411
+ bool new_ibdev;
412
+
413
+ /* try to apply the pnetid to active devices */
414
+ ib_dev = smc_pnet_find_ib(ib_name);
415
+ if (ib_dev) {
416
+ ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name);
417
+ if (ibdev_applied)
418
+ pr_warn_ratelimited("smc: ib device %s ibport %d "
419
+ "applied user defined pnetid "
420
+ "%.16s\n", ib_dev->ibdev->name,
421
+ ib_port,
422
+ ib_dev->pnetid[ib_port - 1]);
423
+ }
424
+ smcd_dev = smc_pnet_find_smcd(ib_name);
425
+ if (smcd_dev) {
426
+ smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name);
427
+ if (smcddev_applied)
428
+ pr_warn_ratelimited("smc: smcd device %s "
429
+ "applied user defined pnetid "
430
+ "%.16s\n", dev_name(&smcd_dev->dev),
431
+ smcd_dev->pnetid);
432
+ }
433
+ /* Apply fails when a device has a hardware-defined pnetid set, do not
434
+ * add a pnet table entry in that case.
435
+ */
436
+ if (!ibdev_applied || !smcddev_applied)
437
+ return -EEXIST;
438
+
439
+ /* add a new ib entry to the pnet table if there isn't one */
440
+ new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
441
+ if (!new_pe)
442
+ return -ENOMEM;
443
+ new_pe->type = SMC_PNET_IB;
444
+ memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
445
+ strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX);
446
+ new_pe->ib_port = ib_port;
447
+
448
+ new_ibdev = true;
449
+ mutex_lock(&pnettable->lock);
450
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
451
+ if (tmp_pe->type == SMC_PNET_IB &&
452
+ !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
453
+ new_ibdev = false;
454
+ break;
455
+ }
456
+ }
457
+ if (new_ibdev) {
458
+ list_add_tail(&new_pe->list, &pnettable->pnetlist);
459
+ mutex_unlock(&pnettable->lock);
460
+ } else {
461
+ mutex_unlock(&pnettable->lock);
462
+ kfree(new_pe);
463
+ }
464
+ return (new_ibdev) ? 0 : -EEXIST;
465
+}
466
+
467
+/* Append a pnetid to the end of the pnet table if not already on this list.
468
+ */
469
+static int smc_pnet_enter(struct net *net, struct nlattr *tb[])
470
+{
471
+ char pnet_name[SMC_MAX_PNETID_LEN + 1];
472
+ struct smc_pnettable *pnettable;
473
+ bool new_netdev = false;
474
+ bool new_ibdev = false;
475
+ struct smc_net *sn;
476
+ u8 ibport = 1;
477
+ char *string;
478
+ int rc;
479
+
480
+ /* get pnettable for namespace */
481
+ sn = net_generic(net, smc_net_id);
482
+ pnettable = &sn->pnettable;
252483
253484 rc = -EINVAL;
254485 if (!tb[SMC_PNETID_NAME])
255486 goto error;
256487 string = (char *)nla_data(tb[SMC_PNETID_NAME]);
257
- if (!smc_pnetid_valid(string, pnetelem->pnet_name))
488
+ if (!smc_pnetid_valid(string, pnet_name))
258489 goto error;
490
+
491
+ if (tb[SMC_PNETID_ETHNAME]) {
492
+ string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
493
+ rc = smc_pnet_add_eth(pnettable, net, string, pnet_name);
494
+ if (!rc)
495
+ new_netdev = true;
496
+ else if (rc != -EEXIST)
497
+ goto error;
498
+ }
499
+
500
+ /* if this is not the initial namespace, stop here */
501
+ if (net != &init_net)
502
+ return new_netdev ? 0 : -EEXIST;
259503
260504 rc = -EINVAL;
261
- if (!tb[SMC_PNETID_ETHNAME])
262
- goto error;
263
- rc = -ENOENT;
264
- string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
265
- pnetelem->ndev = dev_get_by_name(net, string);
266
- if (!pnetelem->ndev)
267
- goto error;
268
-
269
- rc = -EINVAL;
270
- if (!tb[SMC_PNETID_IBNAME])
271
- goto error;
272
- rc = -ENOENT;
273
- ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
274
- ibname = strim(ibname);
275
- pnetelem->smcibdev = smc_pnet_find_ib(ibname);
276
- if (!pnetelem->smcibdev)
277
- goto error;
278
-
279
- rc = -EINVAL;
280
- if (!tb[SMC_PNETID_IBPORT])
281
- goto error;
282
- pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
283
- if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS)
284
- goto error;
285
-
286
- return 0;
505
+ if (tb[SMC_PNETID_IBNAME]) {
506
+ string = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
507
+ string = strim(string);
508
+ if (tb[SMC_PNETID_IBPORT]) {
509
+ ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]);
510
+ if (ibport < 1 || ibport > SMC_MAX_PORTS)
511
+ goto error;
512
+ }
513
+ rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name);
514
+ if (!rc)
515
+ new_ibdev = true;
516
+ else if (rc != -EEXIST)
517
+ goto error;
518
+ }
519
+ return (new_netdev || new_ibdev) ? 0 : -EEXIST;
287520
288521 error:
289
- if (pnetelem->ndev)
290
- dev_put(pnetelem->ndev);
291522 return rc;
292523 }
293524
294525 /* Convert an smc_pnetentry to a netlink attribute sequence */
295
-static int smc_pnet_set_nla(struct sk_buff *msg, struct smc_pnetentry *pnetelem)
526
+static int smc_pnet_set_nla(struct sk_buff *msg,
527
+ struct smc_pnetentry *pnetelem)
296528 {
297
- if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name) ||
298
- nla_put_string(msg, SMC_PNETID_ETHNAME, pnetelem->ndev->name) ||
299
- nla_put_string(msg, SMC_PNETID_IBNAME,
300
- pnetelem->smcibdev->ibdev->name) ||
301
- nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
529
+ if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name))
302530 return -1;
531
+ if (pnetelem->type == SMC_PNET_ETH) {
532
+ if (nla_put_string(msg, SMC_PNETID_ETHNAME,
533
+ pnetelem->eth_name))
534
+ return -1;
535
+ } else {
536
+ if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a"))
537
+ return -1;
538
+ }
539
+ if (pnetelem->type == SMC_PNET_IB) {
540
+ if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) ||
541
+ nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
542
+ return -1;
543
+ } else {
544
+ if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
545
+ nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
546
+ return -1;
547
+ }
548
+
303549 return 0;
304
-}
305
-
306
-/* Retrieve one PNETID entry */
307
-static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
308
-{
309
- struct smc_pnetentry *pnetelem;
310
- struct sk_buff *msg;
311
- void *hdr;
312
- int rc;
313
-
314
- if (!info->attrs[SMC_PNETID_NAME])
315
- return -EINVAL;
316
- pnetelem = smc_pnet_find_pnetid(
317
- (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
318
- if (!pnetelem)
319
- return -ENOENT;
320
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
321
- if (!msg)
322
- return -ENOMEM;
323
-
324
- hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
325
- &smc_pnet_nl_family, 0, SMC_PNETID_GET);
326
- if (!hdr) {
327
- rc = -EMSGSIZE;
328
- goto err_out;
329
- }
330
-
331
- if (smc_pnet_set_nla(msg, pnetelem)) {
332
- rc = -ENOBUFS;
333
- goto err_out;
334
- }
335
-
336
- genlmsg_end(msg, hdr);
337
- return genlmsg_reply(msg, info);
338
-
339
-err_out:
340
- nlmsg_free(msg);
341
- return rc;
342550 }
343551
344552 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
345553 {
346554 struct net *net = genl_info_net(info);
347
- struct smc_pnetentry *pnetelem;
348
- int rc;
349555
350
- pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
351
- if (!pnetelem)
352
- return -ENOMEM;
353
- rc = smc_pnet_fill_entry(net, pnetelem, info->attrs);
354
- if (!rc)
355
- rc = smc_pnet_enter(pnetelem);
356
- if (rc) {
357
- kfree(pnetelem);
358
- return rc;
359
- }
360
- return rc;
556
+ return smc_pnet_enter(net, info->attrs);
361557 }
362558
363559 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
364560 {
561
+ struct net *net = genl_info_net(info);
562
+
365563 if (!info->attrs[SMC_PNETID_NAME])
366564 return -EINVAL;
367
- return smc_pnet_remove_by_pnetid(
565
+ return smc_pnet_remove_by_pnetid(net,
368566 (char *)nla_data(info->attrs[SMC_PNETID_NAME]));
369567 }
370568
....@@ -392,41 +590,84 @@
392590 return 0;
393591 }
394592
395
-static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
593
+static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
594
+ u32 seq, u8 *pnetid, int start_idx)
396595 {
596
+ struct smc_pnettable *pnettable;
397597 struct smc_pnetentry *pnetelem;
598
+ struct smc_net *sn;
398599 int idx = 0;
399600
400
- read_lock(&smc_pnettable.lock);
401
- list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
402
- if (idx++ < cb->args[0])
601
+ /* get pnettable for namespace */
602
+ sn = net_generic(net, smc_net_id);
603
+ pnettable = &sn->pnettable;
604
+
605
+ /* dump pnettable entries */
606
+ mutex_lock(&pnettable->lock);
607
+ list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
608
+ if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
403609 continue;
404
- if (smc_pnet_dumpinfo(skb, NETLINK_CB(cb->skb).portid,
405
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
610
+ if (idx++ < start_idx)
611
+ continue;
612
+ /* if this is not the initial namespace, dump only netdev */
613
+ if (net != &init_net && pnetelem->type != SMC_PNET_ETH)
614
+ continue;
615
+ if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
406616 pnetelem)) {
407617 --idx;
408618 break;
409619 }
410620 }
621
+ mutex_unlock(&pnettable->lock);
622
+ return idx;
623
+}
624
+
625
+static int smc_pnet_dump(struct sk_buff *skb, struct netlink_callback *cb)
626
+{
627
+ struct net *net = sock_net(skb->sk);
628
+ int idx;
629
+
630
+ idx = _smc_pnet_dump(net, skb, NETLINK_CB(cb->skb).portid,
631
+ cb->nlh->nlmsg_seq, NULL, cb->args[0]);
632
+
411633 cb->args[0] = idx;
412
- read_unlock(&smc_pnettable.lock);
413634 return skb->len;
635
+}
636
+
637
+/* Retrieve one PNETID entry */
638
+static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info)
639
+{
640
+ struct net *net = genl_info_net(info);
641
+ struct sk_buff *msg;
642
+ void *hdr;
643
+
644
+ if (!info->attrs[SMC_PNETID_NAME])
645
+ return -EINVAL;
646
+
647
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
648
+ if (!msg)
649
+ return -ENOMEM;
650
+
651
+ _smc_pnet_dump(net, msg, info->snd_portid, info->snd_seq,
652
+ nla_data(info->attrs[SMC_PNETID_NAME]), 0);
653
+
654
+ /* finish multi part message and send it */
655
+ hdr = nlmsg_put(msg, info->snd_portid, info->snd_seq, NLMSG_DONE, 0,
656
+ NLM_F_MULTI);
657
+ if (!hdr) {
658
+ nlmsg_free(msg);
659
+ return -EMSGSIZE;
660
+ }
661
+ return genlmsg_reply(msg, info);
414662 }
415663
416664 /* Remove and delete all pnetids from pnet table.
417665 */
418666 static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
419667 {
420
- struct smc_pnetentry *pnetelem, *tmp_pe;
668
+ struct net *net = genl_info_net(info);
421669
422
- write_lock(&smc_pnettable.lock);
423
- list_for_each_entry_safe(pnetelem, tmp_pe, &smc_pnettable.pnetlist,
424
- list) {
425
- list_del(&pnetelem->list);
426
- dev_put(pnetelem->ndev);
427
- kfree(pnetelem);
428
- }
429
- write_unlock(&smc_pnettable.lock);
670
+ smc_pnet_remove_by_pnetid(net, NULL);
430671 return 0;
431672 }
432673
....@@ -434,28 +675,28 @@
434675 static const struct genl_ops smc_pnet_ops[] = {
435676 {
436677 .cmd = SMC_PNETID_GET,
437
- .flags = GENL_ADMIN_PERM,
438
- .policy = smc_pnet_policy,
678
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
679
+ /* can be retrieved by unprivileged users */
439680 .doit = smc_pnet_get,
440681 .dumpit = smc_pnet_dump,
441682 .start = smc_pnet_dump_start
442683 },
443684 {
444685 .cmd = SMC_PNETID_ADD,
686
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
445687 .flags = GENL_ADMIN_PERM,
446
- .policy = smc_pnet_policy,
447688 .doit = smc_pnet_add
448689 },
449690 {
450691 .cmd = SMC_PNETID_DEL,
692
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
451693 .flags = GENL_ADMIN_PERM,
452
- .policy = smc_pnet_policy,
453694 .doit = smc_pnet_del
454695 },
455696 {
456697 .cmd = SMC_PNETID_FLUSH,
698
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
457699 .flags = GENL_ADMIN_PERM,
458
- .policy = smc_pnet_policy,
459700 .doit = smc_pnet_flush
460701 }
461702 };
....@@ -466,21 +707,141 @@
466707 .name = SMCR_GENL_FAMILY_NAME,
467708 .version = SMCR_GENL_FAMILY_VERSION,
468709 .maxattr = SMC_PNETID_MAX,
710
+ .policy = smc_pnet_policy,
469711 .netnsok = true,
470712 .module = THIS_MODULE,
471713 .ops = smc_pnet_ops,
472714 .n_ops = ARRAY_SIZE(smc_pnet_ops)
473715 };
474716
717
+bool smc_pnet_is_ndev_pnetid(struct net *net, u8 *pnetid)
718
+{
719
+ struct smc_net *sn = net_generic(net, smc_net_id);
720
+ struct smc_pnetids_ndev_entry *pe;
721
+ bool rc = false;
722
+
723
+ read_lock(&sn->pnetids_ndev.lock);
724
+ list_for_each_entry(pe, &sn->pnetids_ndev.list, list) {
725
+ if (smc_pnet_match(pnetid, pe->pnetid)) {
726
+ rc = true;
727
+ goto unlock;
728
+ }
729
+ }
730
+
731
+unlock:
732
+ read_unlock(&sn->pnetids_ndev.lock);
733
+ return rc;
734
+}
735
+
736
+static int smc_pnet_add_pnetid(struct net *net, u8 *pnetid)
737
+{
738
+ struct smc_net *sn = net_generic(net, smc_net_id);
739
+ struct smc_pnetids_ndev_entry *pe, *pi;
740
+
741
+ pe = kzalloc(sizeof(*pe), GFP_KERNEL);
742
+ if (!pe)
743
+ return -ENOMEM;
744
+
745
+ write_lock(&sn->pnetids_ndev.lock);
746
+ list_for_each_entry(pi, &sn->pnetids_ndev.list, list) {
747
+ if (smc_pnet_match(pnetid, pe->pnetid)) {
748
+ refcount_inc(&pi->refcnt);
749
+ kfree(pe);
750
+ goto unlock;
751
+ }
752
+ }
753
+ refcount_set(&pe->refcnt, 1);
754
+ memcpy(pe->pnetid, pnetid, SMC_MAX_PNETID_LEN);
755
+ list_add_tail(&pe->list, &sn->pnetids_ndev.list);
756
+
757
+unlock:
758
+ write_unlock(&sn->pnetids_ndev.lock);
759
+ return 0;
760
+}
761
+
762
+static void smc_pnet_remove_pnetid(struct net *net, u8 *pnetid)
763
+{
764
+ struct smc_net *sn = net_generic(net, smc_net_id);
765
+ struct smc_pnetids_ndev_entry *pe, *pe2;
766
+
767
+ write_lock(&sn->pnetids_ndev.lock);
768
+ list_for_each_entry_safe(pe, pe2, &sn->pnetids_ndev.list, list) {
769
+ if (smc_pnet_match(pnetid, pe->pnetid)) {
770
+ if (refcount_dec_and_test(&pe->refcnt)) {
771
+ list_del(&pe->list);
772
+ kfree(pe);
773
+ }
774
+ break;
775
+ }
776
+ }
777
+ write_unlock(&sn->pnetids_ndev.lock);
778
+}
779
+
780
+static void smc_pnet_add_base_pnetid(struct net *net, struct net_device *dev,
781
+ u8 *ndev_pnetid)
782
+{
783
+ struct net_device *base_dev;
784
+
785
+ base_dev = __pnet_find_base_ndev(dev);
786
+ if (base_dev->flags & IFF_UP &&
787
+ !smc_pnetid_by_dev_port(base_dev->dev.parent, base_dev->dev_port,
788
+ ndev_pnetid)) {
789
+ /* add to PNETIDs list */
790
+ smc_pnet_add_pnetid(net, ndev_pnetid);
791
+ }
792
+}
793
+
794
+/* create initial list of netdevice pnetids */
795
+static void smc_pnet_create_pnetids_list(struct net *net)
796
+{
797
+ u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
798
+ struct net_device *dev;
799
+
800
+ rtnl_lock();
801
+ for_each_netdev(net, dev)
802
+ smc_pnet_add_base_pnetid(net, dev, ndev_pnetid);
803
+ rtnl_unlock();
804
+}
805
+
806
+/* clean up list of netdevice pnetids */
807
+static void smc_pnet_destroy_pnetids_list(struct net *net)
808
+{
809
+ struct smc_net *sn = net_generic(net, smc_net_id);
810
+ struct smc_pnetids_ndev_entry *pe, *temp_pe;
811
+
812
+ write_lock(&sn->pnetids_ndev.lock);
813
+ list_for_each_entry_safe(pe, temp_pe, &sn->pnetids_ndev.list, list) {
814
+ list_del(&pe->list);
815
+ kfree(pe);
816
+ }
817
+ write_unlock(&sn->pnetids_ndev.lock);
818
+}
819
+
475820 static int smc_pnet_netdev_event(struct notifier_block *this,
476821 unsigned long event, void *ptr)
477822 {
478823 struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
824
+ struct net *net = dev_net(event_dev);
825
+ u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
479826
480827 switch (event) {
481828 case NETDEV_REBOOT:
482829 case NETDEV_UNREGISTER:
483830 smc_pnet_remove_by_ndev(event_dev);
831
+ return NOTIFY_OK;
832
+ case NETDEV_REGISTER:
833
+ smc_pnet_add_by_ndev(event_dev);
834
+ return NOTIFY_OK;
835
+ case NETDEV_UP:
836
+ smc_pnet_add_base_pnetid(net, event_dev, ndev_pnetid);
837
+ return NOTIFY_OK;
838
+ case NETDEV_DOWN:
839
+ event_dev = __pnet_find_base_ndev(event_dev);
840
+ if (!smc_pnetid_by_dev_port(event_dev->dev.parent,
841
+ event_dev->dev_port, ndev_pnetid)) {
842
+ /* remove from PNETIDs list */
843
+ smc_pnet_remove_pnetid(net, ndev_pnetid);
844
+ }
484845 return NOTIFY_OK;
485846 default:
486847 return NOTIFY_DONE;
....@@ -490,6 +851,23 @@
490851 static struct notifier_block smc_netdev_notifier = {
491852 .notifier_call = smc_pnet_netdev_event
492853 };
854
+
855
+/* init network namespace */
856
+int smc_pnet_net_init(struct net *net)
857
+{
858
+ struct smc_net *sn = net_generic(net, smc_net_id);
859
+ struct smc_pnettable *pnettable = &sn->pnettable;
860
+ struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev;
861
+
862
+ INIT_LIST_HEAD(&pnettable->pnetlist);
863
+ mutex_init(&pnettable->lock);
864
+ INIT_LIST_HEAD(&pnetids_ndev->list);
865
+ rwlock_init(&pnetids_ndev->lock);
866
+
867
+ smc_pnet_create_pnetids_list(net);
868
+
869
+ return 0;
870
+}
493871
494872 int __init smc_pnet_init(void)
495873 {
....@@ -501,14 +879,39 @@
501879 rc = register_netdevice_notifier(&smc_netdev_notifier);
502880 if (rc)
503881 genl_unregister_family(&smc_pnet_nl_family);
882
+
504883 return rc;
884
+}
885
+
886
+/* exit network namespace */
887
+void smc_pnet_net_exit(struct net *net)
888
+{
889
+ /* flush pnet table */
890
+ smc_pnet_remove_by_pnetid(net, NULL);
891
+ smc_pnet_destroy_pnetids_list(net);
505892 }
506893
507894 void smc_pnet_exit(void)
508895 {
509
- smc_pnet_flush(NULL, NULL);
510896 unregister_netdevice_notifier(&smc_netdev_notifier);
511897 genl_unregister_family(&smc_pnet_nl_family);
898
+}
899
+
900
+static struct net_device *__pnet_find_base_ndev(struct net_device *ndev)
901
+{
902
+ int i, nest_lvl;
903
+
904
+ ASSERT_RTNL();
905
+ nest_lvl = ndev->lower_level;
906
+ for (i = 0; i < nest_lvl; i++) {
907
+ struct list_head *lower = &ndev->adj_list.lower;
908
+
909
+ if (list_empty(lower))
910
+ break;
911
+ lower = lower->next;
912
+ ndev = netdev_lower_get_next(ndev, &lower);
913
+ }
914
+ return ndev;
512915 }
513916
514917 /* Determine one base device for stacked net devices.
....@@ -518,131 +921,176 @@
518921 */
519922 static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
520923 {
521
- int i, nest_lvl;
522
-
523924 rtnl_lock();
524
- nest_lvl = dev_get_nest_level(ndev);
525
- for (i = 0; i < nest_lvl; i++) {
526
- struct list_head *lower = &ndev->adj_list.lower;
527
-
528
- if (list_empty(lower))
529
- break;
530
- lower = lower->next;
531
- ndev = netdev_lower_get_next(ndev, &lower);
532
- }
925
+ ndev = __pnet_find_base_ndev(ndev);
533926 rtnl_unlock();
534927 return ndev;
535928 }
536929
537
-/* Determine the corresponding IB device port based on the hardware PNETID.
538
- * Searching stops at the first matching active IB device port with vlan_id
539
- * configured.
540
- */
541
-static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
542
- struct smc_ib_device **smcibdev,
543
- u8 *ibport, unsigned short vlan_id,
544
- u8 gid[])
930
+static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
931
+ u8 *pnetid)
545932 {
546
- u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
933
+ struct smc_pnettable *pnettable;
934
+ struct net *net = dev_net(ndev);
935
+ struct smc_pnetentry *pnetelem;
936
+ struct smc_net *sn;
937
+ int rc = -ENOENT;
938
+
939
+ /* get pnettable for namespace */
940
+ sn = net_generic(net, smc_net_id);
941
+ pnettable = &sn->pnettable;
942
+
943
+ mutex_lock(&pnettable->lock);
944
+ list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
945
+ if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) {
946
+ /* get pnetid of netdev device */
947
+ memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
948
+ rc = 0;
949
+ break;
950
+ }
951
+ }
952
+ mutex_unlock(&pnettable->lock);
953
+ return rc;
954
+}
955
+
956
+/* find a roce device for the given pnetid */
957
+static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
958
+ struct smc_init_info *ini,
959
+ struct smc_ib_device *known_dev)
960
+{
547961 struct smc_ib_device *ibdev;
548962 int i;
549963
550
- ndev = pnet_find_base_ndev(ndev);
551
- if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
552
- ndev_pnetid))
553
- return; /* pnetid could not be determined */
554
-
555
- spin_lock(&smc_ib_devices.lock);
964
+ ini->ib_dev = NULL;
965
+ mutex_lock(&smc_ib_devices.mutex);
556966 list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
967
+ if (ibdev == known_dev)
968
+ continue;
557969 for (i = 1; i <= SMC_MAX_PORTS; i++) {
558970 if (!rdma_is_port_valid(ibdev->ibdev, i))
559971 continue;
560
- if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid,
561
- SMC_MAX_PNETID_LEN) &&
972
+ if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) &&
562973 smc_ib_port_active(ibdev, i) &&
563
- !smc_ib_determine_gid(ibdev, i, vlan_id, gid,
564
- NULL)) {
565
- *smcibdev = ibdev;
566
- *ibport = i;
974
+ !test_bit(i - 1, ibdev->ports_going_away) &&
975
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
976
+ ini->ib_gid, NULL)) {
977
+ ini->ib_dev = ibdev;
978
+ ini->ib_port = i;
567979 goto out;
568980 }
569981 }
570982 }
571983 out:
572
- spin_unlock(&smc_ib_devices.lock);
984
+ mutex_unlock(&smc_ib_devices.mutex);
985
+}
986
+
987
+/* find alternate roce device with same pnet_id and vlan_id */
988
+void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
989
+ struct smc_init_info *ini,
990
+ struct smc_ib_device *known_dev)
991
+{
992
+ _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev);
993
+}
994
+
995
+/* if handshake network device belongs to a roce device, return its
996
+ * IB device and port
997
+ */
998
+static void smc_pnet_find_rdma_dev(struct net_device *netdev,
999
+ struct smc_init_info *ini)
1000
+{
1001
+ struct smc_ib_device *ibdev;
1002
+
1003
+ mutex_lock(&smc_ib_devices.mutex);
1004
+ list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
1005
+ struct net_device *ndev;
1006
+ int i;
1007
+
1008
+ for (i = 1; i <= SMC_MAX_PORTS; i++) {
1009
+ if (!rdma_is_port_valid(ibdev->ibdev, i))
1010
+ continue;
1011
+ if (!ibdev->ibdev->ops.get_netdev)
1012
+ continue;
1013
+ ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i);
1014
+ if (!ndev)
1015
+ continue;
1016
+ dev_put(ndev);
1017
+ if (netdev == ndev &&
1018
+ smc_ib_port_active(ibdev, i) &&
1019
+ !test_bit(i - 1, ibdev->ports_going_away) &&
1020
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
1021
+ ini->ib_gid, NULL)) {
1022
+ ini->ib_dev = ibdev;
1023
+ ini->ib_port = i;
1024
+ break;
1025
+ }
1026
+ }
1027
+ }
1028
+ mutex_unlock(&smc_ib_devices.mutex);
1029
+}
1030
+
1031
+/* Determine the corresponding IB device port based on the hardware PNETID.
1032
+ * Searching stops at the first matching active IB device port with vlan_id
1033
+ * configured.
1034
+ * If nothing found, check pnetid table.
1035
+ * If nothing found, try to use handshake device
1036
+ */
1037
+static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
1038
+ struct smc_init_info *ini)
1039
+{
1040
+ u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
1041
+
1042
+ ndev = pnet_find_base_ndev(ndev);
1043
+ if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
1044
+ ndev_pnetid) &&
1045
+ smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
1046
+ smc_pnet_find_rdma_dev(ndev, ini);
1047
+ return; /* pnetid could not be determined */
1048
+ }
1049
+ _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL);
5731050 }
5741051
5751052 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
576
- struct smcd_dev **smcismdev)
1053
+ struct smc_init_info *ini)
5771054 {
5781055 u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
5791056 struct smcd_dev *ismdev;
5801057
5811058 ndev = pnet_find_base_ndev(ndev);
5821059 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
583
- ndev_pnetid))
1060
+ ndev_pnetid) &&
1061
+ smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid))
5841062 return; /* pnetid could not be determined */
5851063
586
- spin_lock(&smcd_dev_list.lock);
1064
+ mutex_lock(&smcd_dev_list.mutex);
5871065 list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
588
- if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) {
589
- *smcismdev = ismdev;
1066
+ if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
1067
+ !ismdev->going_away &&
1068
+ (!ini->ism_peer_gid[0] ||
1069
+ !smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id,
1070
+ ismdev))) {
1071
+ ini->ism_dev[0] = ismdev;
5901072 break;
5911073 }
5921074 }
593
- spin_unlock(&smcd_dev_list.lock);
594
-}
595
-
596
-/* Lookup of coupled ib_device via SMC pnet table */
597
-static void smc_pnet_find_roce_by_table(struct net_device *netdev,
598
- struct smc_ib_device **smcibdev,
599
- u8 *ibport, unsigned short vlan_id,
600
- u8 gid[])
601
-{
602
- struct smc_pnetentry *pnetelem;
603
-
604
- read_lock(&smc_pnettable.lock);
605
- list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
606
- if (netdev == pnetelem->ndev) {
607
- if (smc_ib_port_active(pnetelem->smcibdev,
608
- pnetelem->ib_port) &&
609
- !smc_ib_determine_gid(pnetelem->smcibdev,
610
- pnetelem->ib_port, vlan_id,
611
- gid, NULL)) {
612
- *smcibdev = pnetelem->smcibdev;
613
- *ibport = pnetelem->ib_port;
614
- }
615
- break;
616
- }
617
- }
618
- read_unlock(&smc_pnettable.lock);
1075
+ mutex_unlock(&smcd_dev_list.mutex);
6191076 }
6201077
6211078 /* PNET table analysis for a given sock:
6221079 * determine ib_device and port belonging to used internal TCP socket
6231080 * ethernet interface.
6241081 */
625
-void smc_pnet_find_roce_resource(struct sock *sk,
626
- struct smc_ib_device **smcibdev, u8 *ibport,
627
- unsigned short vlan_id, u8 gid[])
1082
+void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini)
6281083 {
6291084 struct dst_entry *dst = sk_dst_get(sk);
6301085
631
- *smcibdev = NULL;
632
- *ibport = 0;
633
-
1086
+ ini->ib_dev = NULL;
1087
+ ini->ib_port = 0;
6341088 if (!dst)
6351089 goto out;
6361090 if (!dst->dev)
6371091 goto out_rel;
6381092
639
- /* if possible, lookup via hardware-defined pnetid */
640
- smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid);
641
- if (*smcibdev)
642
- goto out_rel;
643
-
644
- /* lookup via SMC PNET table */
645
- smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid);
1093
+ smc_pnet_find_roce_by_pnetid(dst->dev, ini);
6461094
6471095 out_rel:
6481096 dst_release(dst);
....@@ -650,21 +1098,77 @@
6501098 return;
6511099 }
6521100
653
-void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
1101
+void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini)
6541102 {
6551103 struct dst_entry *dst = sk_dst_get(sk);
6561104
657
- *smcismdev = NULL;
1105
+ ini->ism_dev[0] = NULL;
6581106 if (!dst)
6591107 goto out;
6601108 if (!dst->dev)
6611109 goto out_rel;
6621110
663
- /* if possible, lookup via hardware-defined pnetid */
664
- smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
1111
+ smc_pnet_find_ism_by_pnetid(dst->dev, ini);
6651112
6661113 out_rel:
6671114 dst_release(dst);
6681115 out:
6691116 return;
6701117 }
1118
+
1119
+/* Lookup and apply a pnet table entry to the given ib device.
1120
+ */
1121
+int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
1122
+{
1123
+ char *ib_name = smcibdev->ibdev->name;
1124
+ struct smc_pnettable *pnettable;
1125
+ struct smc_pnetentry *tmp_pe;
1126
+ struct smc_net *sn;
1127
+ int rc = -ENOENT;
1128
+
1129
+ /* get pnettable for init namespace */
1130
+ sn = net_generic(&init_net, smc_net_id);
1131
+ pnettable = &sn->pnettable;
1132
+
1133
+ mutex_lock(&pnettable->lock);
1134
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
1135
+ if (tmp_pe->type == SMC_PNET_IB &&
1136
+ !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) &&
1137
+ tmp_pe->ib_port == ib_port) {
1138
+ smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name);
1139
+ rc = 0;
1140
+ break;
1141
+ }
1142
+ }
1143
+ mutex_unlock(&pnettable->lock);
1144
+
1145
+ return rc;
1146
+}
1147
+
1148
+/* Lookup and apply a pnet table entry to the given smcd device.
1149
+ */
1150
+int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
1151
+{
1152
+ const char *ib_name = dev_name(&smcddev->dev);
1153
+ struct smc_pnettable *pnettable;
1154
+ struct smc_pnetentry *tmp_pe;
1155
+ struct smc_net *sn;
1156
+ int rc = -ENOENT;
1157
+
1158
+ /* get pnettable for init namespace */
1159
+ sn = net_generic(&init_net, smc_net_id);
1160
+ pnettable = &sn->pnettable;
1161
+
1162
+ mutex_lock(&pnettable->lock);
1163
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
1164
+ if (tmp_pe->type == SMC_PNET_IB &&
1165
+ !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
1166
+ smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name);
1167
+ rc = 0;
1168
+ break;
1169
+ }
1170
+ }
1171
+ mutex_unlock(&pnettable->lock);
1172
+
1173
+ return rc;
1174
+}