hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/infiniband/core/cache.c
....@@ -46,14 +46,13 @@
4646
4747 struct ib_pkey_cache {
4848 int table_len;
49
- u16 table[0];
49
+ u16 table[];
5050 };
5151
5252 struct ib_update_work {
5353 struct work_struct work;
54
- struct ib_device *device;
55
- u8 port_num;
56
- bool enforce_security;
54
+ struct ib_event event;
55
+ bool enforce_security;
5756 };
5857
5958 union ib_gid zgid;
....@@ -78,11 +77,22 @@
7877 GID_TABLE_ENTRY_PENDING_DEL = 3,
7978 };
8079
80
+struct roce_gid_ndev_storage {
81
+ struct rcu_head rcu_head;
82
+ struct net_device *ndev;
83
+};
84
+
8185 struct ib_gid_table_entry {
8286 struct kref kref;
8387 struct work_struct del_work;
8488 struct ib_gid_attr attr;
8589 void *context;
90
+ /* Store the ndev pointer to release reference later on in
91
+ * call_rcu context because by that time gid_table_entry
92
+ * and attr might be already freed. So keep a copy of it.
93
+ * ndev_storage is freed by rcu callback.
94
+ */
95
+ struct roce_gid_ndev_storage *ndev_storage;
8696 enum gid_table_entry_state state;
8797 };
8898
....@@ -119,11 +129,15 @@
119129 event.element.port_num = port;
120130 event.event = IB_EVENT_GID_CHANGE;
121131
122
- ib_dispatch_event(&event);
132
+ ib_dispatch_event_clients(&event);
123133 }
124134
125135 static const char * const gid_type_str[] = {
136
+ /* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for
137
+ * user space compatibility reasons.
138
+ */
126139 [IB_GID_TYPE_IB] = "IB/RoCE v1",
140
+ [IB_GID_TYPE_ROCE] = "IB/RoCE v1",
127141 [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2",
128142 };
129143
....@@ -185,7 +199,7 @@
185199
186200 static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
187201 {
188
- return device->cache.ports[port - rdma_start_port(device)].gid;
202
+ return device->port_data[port].cache.gid;
189203 }
190204
191205 static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
....@@ -206,19 +220,28 @@
206220 queue_work(ib_wq, &entry->del_work);
207221 }
208222
223
+static void put_gid_ndev(struct rcu_head *head)
224
+{
225
+ struct roce_gid_ndev_storage *storage =
226
+ container_of(head, struct roce_gid_ndev_storage, rcu_head);
227
+
228
+ WARN_ON(!storage->ndev);
229
+ /* At this point its safe to release netdev reference,
230
+ * as all callers working on gid_attr->ndev are done
231
+ * using this netdev.
232
+ */
233
+ dev_put(storage->ndev);
234
+ kfree(storage);
235
+}
236
+
209237 static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
210238 {
211239 struct ib_device *device = entry->attr.device;
212240 u8 port_num = entry->attr.port_num;
213241 struct ib_gid_table *table = rdma_gid_table(device, port_num);
214242
215
- pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
216
- device->name, port_num, entry->attr.index,
217
- entry->attr.gid.raw);
218
-
219
- if (rdma_cap_roce_gid_table(device, port_num) &&
220
- entry->state != GID_TABLE_ENTRY_INVALID)
221
- device->del_gid(&entry->attr, &entry->context);
243
+ dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
244
+ port_num, entry->attr.index, entry->attr.gid.raw);
222245
223246 write_lock_irq(&table->rwlock);
224247
....@@ -233,8 +256,8 @@
233256 /* Now this index is ready to be allocated */
234257 write_unlock_irq(&table->rwlock);
235258
236
- if (entry->attr.ndev)
237
- dev_put(entry->attr.ndev);
259
+ if (entry->ndev_storage)
260
+ call_rcu(&entry->ndev_storage->rcu_head, put_gid_ndev);
238261 kfree(entry);
239262 }
240263
....@@ -271,14 +294,25 @@
271294 alloc_gid_entry(const struct ib_gid_attr *attr)
272295 {
273296 struct ib_gid_table_entry *entry;
297
+ struct net_device *ndev;
274298
275299 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
276300 if (!entry)
277301 return NULL;
302
+
303
+ ndev = rcu_dereference_protected(attr->ndev, 1);
304
+ if (ndev) {
305
+ entry->ndev_storage = kzalloc(sizeof(*entry->ndev_storage),
306
+ GFP_KERNEL);
307
+ if (!entry->ndev_storage) {
308
+ kfree(entry);
309
+ return NULL;
310
+ }
311
+ dev_hold(ndev);
312
+ entry->ndev_storage->ndev = ndev;
313
+ }
278314 kref_init(&entry->kref);
279315 memcpy(&entry->attr, attr, sizeof(*attr));
280
- if (entry->attr.ndev)
281
- dev_hold(entry->attr.ndev);
282316 INIT_WORK(&entry->del_work, free_gid_work);
283317 entry->state = GID_TABLE_ENTRY_INVALID;
284318 return entry;
....@@ -289,9 +323,9 @@
289323 {
290324 entry->state = GID_TABLE_ENTRY_VALID;
291325
292
- pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
293
- entry->attr.device->name, entry->attr.port_num,
294
- entry->attr.index, entry->attr.gid.raw);
326
+ dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n",
327
+ __func__, entry->attr.port_num, entry->attr.index,
328
+ entry->attr.gid.raw);
295329
296330 lockdep_assert_held(&table->lock);
297331 write_lock_irq(&table->rwlock);
....@@ -320,17 +354,16 @@
320354 int ret;
321355
322356 if (!attr->ndev) {
323
- pr_err("%s NULL netdev device=%s port=%d index=%d\n",
324
- __func__, attr->device->name, attr->port_num,
325
- attr->index);
357
+ dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n",
358
+ __func__, attr->port_num, attr->index);
326359 return -EINVAL;
327360 }
328361 if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
329
- ret = attr->device->add_gid(attr, &entry->context);
362
+ ret = attr->device->ops.add_gid(attr, &entry->context);
330363 if (ret) {
331
- pr_err("%s GID add failed device=%s port=%d index=%d\n",
332
- __func__, attr->device->name, attr->port_num,
333
- attr->index);
364
+ dev_err(&attr->device->dev,
365
+ "%s GID add failed port=%d index=%d\n",
366
+ __func__, attr->port_num, attr->index);
334367 return ret;
335368 }
336369 }
....@@ -349,13 +382,13 @@
349382 static void del_gid(struct ib_device *ib_dev, u8 port,
350383 struct ib_gid_table *table, int ix)
351384 {
385
+ struct roce_gid_ndev_storage *ndev_storage;
352386 struct ib_gid_table_entry *entry;
353387
354388 lockdep_assert_held(&table->lock);
355389
356
- pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
357
- ib_dev->name, port, ix,
358
- table->data_vec[ix]->attr.gid.raw);
390
+ dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port,
391
+ ix, table->data_vec[ix]->attr.gid.raw);
359392
360393 write_lock_irq(&table->rwlock);
361394 entry = table->data_vec[ix];
....@@ -366,6 +399,16 @@
366399 if (!rdma_protocol_roce(ib_dev, port))
367400 table->data_vec[ix] = NULL;
368401 write_unlock_irq(&table->rwlock);
402
+
403
+ ndev_storage = entry->ndev_storage;
404
+ if (ndev_storage) {
405
+ entry->ndev_storage = NULL;
406
+ rcu_assign_pointer(entry->attr.ndev, NULL);
407
+ call_rcu(&ndev_storage->rcu_head, put_gid_ndev);
408
+ }
409
+
410
+ if (rdma_cap_roce_gid_table(ib_dev, port))
411
+ ib_dev->ops.del_gid(&entry->attr, &entry->context);
369412
370413 put_gid_entry_locked(entry);
371414 }
....@@ -547,32 +590,11 @@
547590 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
548591 union ib_gid *gid, struct ib_gid_attr *attr)
549592 {
550
- struct net_device *idev;
551
- unsigned long mask;
552
- int ret;
593
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
594
+ GID_ATTR_FIND_MASK_GID_TYPE |
595
+ GID_ATTR_FIND_MASK_NETDEV;
553596
554
- if (ib_dev->get_netdev) {
555
- idev = ib_dev->get_netdev(ib_dev, port);
556
- if (idev && attr->ndev != idev) {
557
- union ib_gid default_gid;
558
-
559
- /* Adding default GIDs in not permitted */
560
- make_default_gid(idev, &default_gid);
561
- if (!memcmp(gid, &default_gid, sizeof(*gid))) {
562
- dev_put(idev);
563
- return -EPERM;
564
- }
565
- }
566
- if (idev)
567
- dev_put(idev);
568
- }
569
-
570
- mask = GID_ATTR_FIND_MASK_GID |
571
- GID_ATTR_FIND_MASK_GID_TYPE |
572
- GID_ATTR_FIND_MASK_NETDEV;
573
-
574
- ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
575
- return ret;
597
+ return __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
576598 }
577599
578600 static int
....@@ -769,7 +791,7 @@
769791 return NULL;
770792 }
771793
772
-static void release_gid_table(struct ib_device *device, u8 port,
794
+static void release_gid_table(struct ib_device *device,
773795 struct ib_gid_table *table)
774796 {
775797 bool leak = false;
....@@ -782,15 +804,16 @@
782804 if (is_gid_entry_free(table->data_vec[i]))
783805 continue;
784806 if (kref_read(&table->data_vec[i]->kref) > 1) {
785
- pr_err("GID entry ref leak for %s (index %d) ref=%d\n",
786
- device->name, i,
787
- kref_read(&table->data_vec[i]->kref));
807
+ dev_err(&device->dev,
808
+ "GID entry ref leak for index %d ref=%d\n", i,
809
+ kref_read(&table->data_vec[i]->kref));
788810 leak = true;
789811 }
790812 }
791813 if (leak)
792814 return;
793815
816
+ mutex_destroy(&table->lock);
794817 kfree(table->data_vec);
795818 kfree(table);
796819 }
....@@ -799,22 +822,16 @@
799822 struct ib_gid_table *table)
800823 {
801824 int i;
802
- bool deleted = false;
803825
804826 if (!table)
805827 return;
806828
807829 mutex_lock(&table->lock);
808830 for (i = 0; i < table->sz; ++i) {
809
- if (is_gid_entry_valid(table->data_vec[i])) {
831
+ if (is_gid_entry_valid(table->data_vec[i]))
810832 del_gid(ib_dev, port, table, i);
811
- deleted = true;
812
- }
813833 }
814834 mutex_unlock(&table->lock);
815
-
816
- if (deleted)
817
- dispatch_gid_change_event(ib_dev, port);
818835 }
819836
820837 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
....@@ -867,31 +884,27 @@
867884
868885 static void gid_table_release_one(struct ib_device *ib_dev)
869886 {
870
- struct ib_gid_table *table;
871
- u8 port;
887
+ unsigned int p;
872888
873
- for (port = 0; port < ib_dev->phys_port_cnt; port++) {
874
- table = ib_dev->cache.ports[port].gid;
875
- release_gid_table(ib_dev, port, table);
876
- ib_dev->cache.ports[port].gid = NULL;
889
+ rdma_for_each_port (ib_dev, p) {
890
+ release_gid_table(ib_dev, ib_dev->port_data[p].cache.gid);
891
+ ib_dev->port_data[p].cache.gid = NULL;
877892 }
878893 }
879894
880895 static int _gid_table_setup_one(struct ib_device *ib_dev)
881896 {
882
- u8 port;
883897 struct ib_gid_table *table;
898
+ unsigned int rdma_port;
884899
885
- for (port = 0; port < ib_dev->phys_port_cnt; port++) {
886
- u8 rdma_port = port + rdma_start_port(ib_dev);
887
-
888
- table = alloc_gid_table(
889
- ib_dev->port_immutable[rdma_port].gid_tbl_len);
900
+ rdma_for_each_port (ib_dev, rdma_port) {
901
+ table = alloc_gid_table(
902
+ ib_dev->port_data[rdma_port].immutable.gid_tbl_len);
890903 if (!table)
891904 goto rollback_table_setup;
892905
893906 gid_table_reserve_default(ib_dev, rdma_port, table);
894
- ib_dev->cache.ports[port].gid = table;
907
+ ib_dev->port_data[rdma_port].cache.gid = table;
895908 }
896909 return 0;
897910
....@@ -902,14 +915,11 @@
902915
903916 static void gid_table_cleanup_one(struct ib_device *ib_dev)
904917 {
905
- struct ib_gid_table *table;
906
- u8 port;
918
+ unsigned int p;
907919
908
- for (port = 0; port < ib_dev->phys_port_cnt; port++) {
909
- table = ib_dev->cache.ports[port].gid;
910
- cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
911
- table);
912
- }
920
+ rdma_for_each_port (ib_dev, p)
921
+ cleanup_gid_table_port(ib_dev, p,
922
+ ib_dev->port_data[p].cache.gid);
913923 }
914924
915925 static int gid_table_setup_one(struct ib_device *ib_dev)
....@@ -967,6 +977,23 @@
967977 EXPORT_SYMBOL(rdma_query_gid);
968978
969979 /**
980
+ * rdma_read_gid_hw_context - Read the HW GID context from GID attribute
981
+ * @attr: Potinter to the GID attribute
982
+ *
983
+ * rdma_read_gid_hw_context() reads the drivers GID HW context corresponding
984
+ * to the SGID attr. Callers are required to already be holding the reference
985
+ * to an existing GID entry.
986
+ *
987
+ * Returns the HW GID context
988
+ *
989
+ */
990
+void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr)
991
+{
992
+ return container_of(attr, struct ib_gid_table_entry, attr)->context;
993
+}
994
+EXPORT_SYMBOL(rdma_read_gid_hw_context);
995
+
996
+/**
970997 * rdma_find_gid - Returns SGID attributes if the matching GID is found.
971998 * @device: The device to query.
972999 * @gid: The GID value to search for.
....@@ -987,17 +1014,17 @@
9871014 unsigned long mask = GID_ATTR_FIND_MASK_GID |
9881015 GID_ATTR_FIND_MASK_GID_TYPE;
9891016 struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
990
- u8 p;
1017
+ unsigned int p;
9911018
9921019 if (ndev)
9931020 mask |= GID_ATTR_FIND_MASK_NETDEV;
9941021
995
- for (p = 0; p < device->phys_port_cnt; p++) {
1022
+ rdma_for_each_port(device, p) {
9961023 struct ib_gid_table *table;
9971024 unsigned long flags;
9981025 int index;
9991026
1000
- table = device->cache.ports[p].gid;
1027
+ table = device->port_data[p].cache.gid;
10011028 read_lock_irqsave(&table->rwlock, flags);
10021029 index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
10031030 if (index >= 0) {
....@@ -1027,16 +1054,16 @@
10271054 if (!rdma_is_port_valid(device, port_num))
10281055 return -EINVAL;
10291056
1030
- read_lock_irqsave(&device->cache.lock, flags);
1057
+ read_lock_irqsave(&device->cache_lock, flags);
10311058
1032
- cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
1059
+ cache = device->port_data[port_num].cache.pkey;
10331060
1034
- if (index < 0 || index >= cache->table_len)
1061
+ if (!cache || index < 0 || index >= cache->table_len)
10351062 ret = -EINVAL;
10361063 else
10371064 *pkey = cache->table[index];
10381065
1039
- read_unlock_irqrestore(&device->cache.lock, flags);
1066
+ read_unlock_irqrestore(&device->cache_lock, flags);
10401067
10411068 return ret;
10421069 }
....@@ -1047,15 +1074,13 @@
10471074 u64 *sn_pfx)
10481075 {
10491076 unsigned long flags;
1050
- int p;
10511077
10521078 if (!rdma_is_port_valid(device, port_num))
10531079 return -EINVAL;
10541080
1055
- p = port_num - rdma_start_port(device);
1056
- read_lock_irqsave(&device->cache.lock, flags);
1057
- *sn_pfx = device->cache.ports[p].subnet_prefix;
1058
- read_unlock_irqrestore(&device->cache.lock, flags);
1081
+ read_lock_irqsave(&device->cache_lock, flags);
1082
+ *sn_pfx = device->port_data[port_num].cache.subnet_prefix;
1083
+ read_unlock_irqrestore(&device->cache_lock, flags);
10591084
10601085 return 0;
10611086 }
....@@ -1075,9 +1100,13 @@
10751100 if (!rdma_is_port_valid(device, port_num))
10761101 return -EINVAL;
10771102
1078
- read_lock_irqsave(&device->cache.lock, flags);
1103
+ read_lock_irqsave(&device->cache_lock, flags);
10791104
1080
- cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
1105
+ cache = device->port_data[port_num].cache.pkey;
1106
+ if (!cache) {
1107
+ ret = -EINVAL;
1108
+ goto err;
1109
+ }
10811110
10821111 *index = -1;
10831112
....@@ -1096,7 +1125,8 @@
10961125 ret = 0;
10971126 }
10981127
1099
- read_unlock_irqrestore(&device->cache.lock, flags);
1128
+err:
1129
+ read_unlock_irqrestore(&device->cache_lock, flags);
11001130
11011131 return ret;
11021132 }
....@@ -1115,9 +1145,13 @@
11151145 if (!rdma_is_port_valid(device, port_num))
11161146 return -EINVAL;
11171147
1118
- read_lock_irqsave(&device->cache.lock, flags);
1148
+ read_lock_irqsave(&device->cache_lock, flags);
11191149
1120
- cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
1150
+ cache = device->port_data[port_num].cache.pkey;
1151
+ if (!cache) {
1152
+ ret = -EINVAL;
1153
+ goto err;
1154
+ }
11211155
11221156 *index = -1;
11231157
....@@ -1128,7 +1162,8 @@
11281162 break;
11291163 }
11301164
1131
- read_unlock_irqrestore(&device->cache.lock, flags);
1165
+err:
1166
+ read_unlock_irqrestore(&device->cache_lock, flags);
11321167
11331168 return ret;
11341169 }
....@@ -1144,9 +1179,9 @@
11441179 if (!rdma_is_port_valid(device, port_num))
11451180 return -EINVAL;
11461181
1147
- read_lock_irqsave(&device->cache.lock, flags);
1148
- *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
1149
- read_unlock_irqrestore(&device->cache.lock, flags);
1182
+ read_lock_irqsave(&device->cache_lock, flags);
1183
+ *lmc = device->port_data[port_num].cache.lmc;
1184
+ read_unlock_irqrestore(&device->cache_lock, flags);
11501185
11511186 return ret;
11521187 }
....@@ -1162,10 +1197,9 @@
11621197 if (!rdma_is_port_valid(device, port_num))
11631198 return -EINVAL;
11641199
1165
- read_lock_irqsave(&device->cache.lock, flags);
1166
- *port_state = device->cache.ports[port_num
1167
- - rdma_start_port(device)].port_state;
1168
- read_unlock_irqrestore(&device->cache.lock, flags);
1200
+ read_lock_irqsave(&device->cache_lock, flags);
1201
+ *port_state = device->port_data[port_num].cache.port_state;
1202
+ read_unlock_irqrestore(&device->cache_lock, flags);
11691203
11701204 return ret;
11711205 }
....@@ -1190,7 +1224,7 @@
11901224 const struct ib_gid_attr *
11911225 rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
11921226 {
1193
- const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
1227
+ const struct ib_gid_attr *attr = ERR_PTR(-ENODATA);
11941228 struct ib_gid_table *table;
11951229 unsigned long flags;
11961230
....@@ -1212,6 +1246,64 @@
12121246 return attr;
12131247 }
12141248 EXPORT_SYMBOL(rdma_get_gid_attr);
1249
+
1250
+/**
1251
+ * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries.
1252
+ * @device: The device to query.
1253
+ * @entries: Entries where GID entries are returned.
1254
+ * @max_entries: Maximum number of entries that can be returned.
1255
+ * Entries array must be allocated to hold max_entries number of entries.
1256
+ * @num_entries: Updated to the number of entries that were successfully read.
1257
+ *
1258
+ * Returns number of entries on success or appropriate error code.
1259
+ */
1260
+ssize_t rdma_query_gid_table(struct ib_device *device,
1261
+ struct ib_uverbs_gid_entry *entries,
1262
+ size_t max_entries)
1263
+{
1264
+ const struct ib_gid_attr *gid_attr;
1265
+ ssize_t num_entries = 0, ret;
1266
+ struct ib_gid_table *table;
1267
+ unsigned int port_num, i;
1268
+ struct net_device *ndev;
1269
+ unsigned long flags;
1270
+
1271
+ rdma_for_each_port(device, port_num) {
1272
+ table = rdma_gid_table(device, port_num);
1273
+ read_lock_irqsave(&table->rwlock, flags);
1274
+ for (i = 0; i < table->sz; i++) {
1275
+ if (!is_gid_entry_valid(table->data_vec[i]))
1276
+ continue;
1277
+ if (num_entries >= max_entries) {
1278
+ ret = -EINVAL;
1279
+ goto err;
1280
+ }
1281
+
1282
+ gid_attr = &table->data_vec[i]->attr;
1283
+
1284
+ memcpy(&entries->gid, &gid_attr->gid,
1285
+ sizeof(gid_attr->gid));
1286
+ entries->gid_index = gid_attr->index;
1287
+ entries->port_num = gid_attr->port_num;
1288
+ entries->gid_type = gid_attr->gid_type;
1289
+ ndev = rcu_dereference_protected(
1290
+ gid_attr->ndev,
1291
+ lockdep_is_held(&table->rwlock));
1292
+ if (ndev)
1293
+ entries->netdev_ifindex = ndev->ifindex;
1294
+
1295
+ num_entries++;
1296
+ entries++;
1297
+ }
1298
+ read_unlock_irqrestore(&table->rwlock, flags);
1299
+ }
1300
+
1301
+ return num_entries;
1302
+err:
1303
+ read_unlock_irqrestore(&table->rwlock, flags);
1304
+ return ret;
1305
+}
1306
+EXPORT_SYMBOL(rdma_query_gid_table);
12151307
12161308 /**
12171309 * rdma_put_gid_attr - Release reference to the GID attribute
....@@ -1252,6 +1344,103 @@
12521344 }
12531345 EXPORT_SYMBOL(rdma_hold_gid_attr);
12541346
1347
+/**
1348
+ * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
1349
+ * which must be in UP state.
1350
+ *
1351
+ * @attr:Pointer to the GID attribute
1352
+ *
1353
+ * Returns pointer to netdevice if the netdevice was attached to GID and
1354
+ * netdevice is in UP state. Caller must hold RCU lock as this API
1355
+ * reads the netdev flags which can change while netdevice migrates to
1356
+ * different net namespace. Returns ERR_PTR with error code otherwise.
1357
+ *
1358
+ */
1359
+struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
1360
+{
1361
+ struct ib_gid_table_entry *entry =
1362
+ container_of(attr, struct ib_gid_table_entry, attr);
1363
+ struct ib_device *device = entry->attr.device;
1364
+ struct net_device *ndev = ERR_PTR(-EINVAL);
1365
+ u8 port_num = entry->attr.port_num;
1366
+ struct ib_gid_table *table;
1367
+ unsigned long flags;
1368
+ bool valid;
1369
+
1370
+ table = rdma_gid_table(device, port_num);
1371
+
1372
+ read_lock_irqsave(&table->rwlock, flags);
1373
+ valid = is_gid_entry_valid(table->data_vec[attr->index]);
1374
+ if (valid) {
1375
+ ndev = rcu_dereference(attr->ndev);
1376
+ if (!ndev)
1377
+ ndev = ERR_PTR(-ENODEV);
1378
+ }
1379
+ read_unlock_irqrestore(&table->rwlock, flags);
1380
+ return ndev;
1381
+}
1382
+EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu);
1383
+
1384
+static int get_lower_dev_vlan(struct net_device *lower_dev,
1385
+ struct netdev_nested_priv *priv)
1386
+{
1387
+ u16 *vlan_id = (u16 *)priv->data;
1388
+
1389
+ if (is_vlan_dev(lower_dev))
1390
+ *vlan_id = vlan_dev_vlan_id(lower_dev);
1391
+
1392
+ /* We are interested only in first level vlan device, so
1393
+ * always return 1 to stop iterating over next level devices.
1394
+ */
1395
+ return 1;
1396
+}
1397
+
1398
+/**
1399
+ * rdma_read_gid_l2_fields - Read the vlan ID and source MAC address
1400
+ * of a GID entry.
1401
+ *
1402
+ * @attr: GID attribute pointer whose L2 fields to be read
1403
+ * @vlan_id: Pointer to vlan id to fill up if the GID entry has
1404
+ * vlan id. It is optional.
1405
+ * @smac: Pointer to smac to fill up for a GID entry. It is optional.
1406
+ *
1407
+ * rdma_read_gid_l2_fields() returns 0 on success and returns vlan id
1408
+ * (if gid entry has vlan) and source MAC, or returns error.
1409
+ */
1410
+int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
1411
+ u16 *vlan_id, u8 *smac)
1412
+{
1413
+ struct netdev_nested_priv priv = {
1414
+ .data = (void *)vlan_id,
1415
+ };
1416
+ struct net_device *ndev;
1417
+
1418
+ rcu_read_lock();
1419
+ ndev = rcu_dereference(attr->ndev);
1420
+ if (!ndev) {
1421
+ rcu_read_unlock();
1422
+ return -ENODEV;
1423
+ }
1424
+ if (smac)
1425
+ ether_addr_copy(smac, ndev->dev_addr);
1426
+ if (vlan_id) {
1427
+ *vlan_id = 0xffff;
1428
+ if (is_vlan_dev(ndev)) {
1429
+ *vlan_id = vlan_dev_vlan_id(ndev);
1430
+ } else {
1431
+ /* If the netdev is upper device and if it's lower
1432
+ * device is vlan device, consider vlan id of the
1433
+ * the lower vlan device for this gid entry.
1434
+ */
1435
+ netdev_walk_all_lower_dev_rcu(attr->ndev,
1436
+ get_lower_dev_vlan, &priv);
1437
+ }
1438
+ }
1439
+ rcu_read_unlock();
1440
+ return 0;
1441
+}
1442
+EXPORT_SYMBOL(rdma_read_gid_l2_fields);
1443
+
12551444 static int config_non_roce_gid_cache(struct ib_device *device,
12561445 u8 port, int gid_tbl_len)
12571446 {
....@@ -1266,12 +1455,13 @@
12661455
12671456 mutex_lock(&table->lock);
12681457 for (i = 0; i < gid_tbl_len; ++i) {
1269
- if (!device->query_gid)
1458
+ if (!device->ops.query_gid)
12701459 continue;
1271
- ret = device->query_gid(device, port, i, &gid_attr.gid);
1460
+ ret = device->ops.query_gid(device, port, i, &gid_attr.gid);
12721461 if (ret) {
1273
- pr_warn("query_gid failed (%d) for %s (index %d)\n",
1274
- ret, device->name, i);
1462
+ dev_warn(&device->dev,
1463
+ "query_gid failed (%d) for index %d\n", ret,
1464
+ i);
12751465 goto err;
12761466 }
12771467 gid_attr.index = i;
....@@ -1282,9 +1472,8 @@
12821472 return ret;
12831473 }
12841474
1285
-static void ib_cache_update(struct ib_device *device,
1286
- u8 port,
1287
- bool enforce_security)
1475
+static int
1476
+ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
12881477 {
12891478 struct ib_port_attr *tprops = NULL;
12901479 struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
....@@ -1292,16 +1481,15 @@
12921481 int ret;
12931482
12941483 if (!rdma_is_port_valid(device, port))
1295
- return;
1484
+ return -EINVAL;
12961485
12971486 tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
12981487 if (!tprops)
1299
- return;
1488
+ return -ENOMEM;
13001489
13011490 ret = ib_query_port(device, port, tprops);
13021491 if (ret) {
1303
- pr_warn("ib_query_port failed (%d) for %s\n",
1304
- ret, device->name);
1492
+ dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
13051493 goto err;
13061494 }
13071495
....@@ -1312,36 +1500,39 @@
13121500 goto err;
13131501 }
13141502
1315
- pkey_cache = kmalloc(struct_size(pkey_cache, table,
1316
- tprops->pkey_tbl_len),
1317
- GFP_KERNEL);
1318
- if (!pkey_cache)
1319
- goto err;
1320
-
1321
- pkey_cache->table_len = tprops->pkey_tbl_len;
1322
-
1323
- for (i = 0; i < pkey_cache->table_len; ++i) {
1324
- ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
1325
- if (ret) {
1326
- pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
1327
- ret, device->name, i);
1503
+ if (tprops->pkey_tbl_len) {
1504
+ pkey_cache = kmalloc(struct_size(pkey_cache, table,
1505
+ tprops->pkey_tbl_len),
1506
+ GFP_KERNEL);
1507
+ if (!pkey_cache) {
1508
+ ret = -ENOMEM;
13281509 goto err;
1510
+ }
1511
+
1512
+ pkey_cache->table_len = tprops->pkey_tbl_len;
1513
+
1514
+ for (i = 0; i < pkey_cache->table_len; ++i) {
1515
+ ret = ib_query_pkey(device, port, i,
1516
+ pkey_cache->table + i);
1517
+ if (ret) {
1518
+ dev_warn(&device->dev,
1519
+ "ib_query_pkey failed (%d) for index %d\n",
1520
+ ret, i);
1521
+ goto err;
1522
+ }
13291523 }
13301524 }
13311525
1332
- write_lock_irq(&device->cache.lock);
1526
+ write_lock_irq(&device->cache_lock);
13331527
1334
- old_pkey_cache = device->cache.ports[port -
1335
- rdma_start_port(device)].pkey;
1528
+ old_pkey_cache = device->port_data[port].cache.pkey;
13361529
1337
- device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
1338
- device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
1339
- device->cache.ports[port - rdma_start_port(device)].port_state =
1340
- tprops->state;
1530
+ device->port_data[port].cache.pkey = pkey_cache;
1531
+ device->port_data[port].cache.lmc = tprops->lmc;
1532
+ device->port_data[port].cache.port_state = tprops->state;
13411533
1342
- device->cache.ports[port - rdma_start_port(device)].subnet_prefix =
1343
- tprops->subnet_prefix;
1344
- write_unlock_irq(&device->cache.lock);
1534
+ device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
1535
+ write_unlock_irq(&device->cache_lock);
13451536
13461537 if (enforce_security)
13471538 ib_security_cache_change(device,
....@@ -1350,85 +1541,108 @@
13501541
13511542 kfree(old_pkey_cache);
13521543 kfree(tprops);
1353
- return;
1544
+ return 0;
13541545
13551546 err:
13561547 kfree(pkey_cache);
13571548 kfree(tprops);
1549
+ return ret;
13581550 }
13591551
1360
-static void ib_cache_task(struct work_struct *_work)
1552
+static void ib_cache_event_task(struct work_struct *_work)
1553
+{
1554
+ struct ib_update_work *work =
1555
+ container_of(_work, struct ib_update_work, work);
1556
+ int ret;
1557
+
1558
+ /* Before distributing the cache update event, first sync
1559
+ * the cache.
1560
+ */
1561
+ ret = ib_cache_update(work->event.device, work->event.element.port_num,
1562
+ work->enforce_security);
1563
+
1564
+ /* GID event is notified already for individual GID entries by
1565
+ * dispatch_gid_change_event(). Hence, notifiy for rest of the
1566
+ * events.
1567
+ */
1568
+ if (!ret && work->event.event != IB_EVENT_GID_CHANGE)
1569
+ ib_dispatch_event_clients(&work->event);
1570
+
1571
+ kfree(work);
1572
+}
1573
+
1574
+static void ib_generic_event_task(struct work_struct *_work)
13611575 {
13621576 struct ib_update_work *work =
13631577 container_of(_work, struct ib_update_work, work);
13641578
1365
- ib_cache_update(work->device,
1366
- work->port_num,
1367
- work->enforce_security);
1579
+ ib_dispatch_event_clients(&work->event);
13681580 kfree(work);
13691581 }
13701582
1371
-static void ib_cache_event(struct ib_event_handler *handler,
1372
- struct ib_event *event)
1583
+static bool is_cache_update_event(const struct ib_event *event)
1584
+{
1585
+ return (event->event == IB_EVENT_PORT_ERR ||
1586
+ event->event == IB_EVENT_PORT_ACTIVE ||
1587
+ event->event == IB_EVENT_LID_CHANGE ||
1588
+ event->event == IB_EVENT_PKEY_CHANGE ||
1589
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
1590
+ event->event == IB_EVENT_GID_CHANGE);
1591
+}
1592
+
1593
+/**
1594
+ * ib_dispatch_event - Dispatch an asynchronous event
1595
+ * @event:Event to dispatch
1596
+ *
1597
+ * Low-level drivers must call ib_dispatch_event() to dispatch the
1598
+ * event to all registered event handlers when an asynchronous event
1599
+ * occurs.
1600
+ */
1601
+void ib_dispatch_event(const struct ib_event *event)
13731602 {
13741603 struct ib_update_work *work;
13751604
1376
- if (event->event == IB_EVENT_PORT_ERR ||
1377
- event->event == IB_EVENT_PORT_ACTIVE ||
1378
- event->event == IB_EVENT_LID_CHANGE ||
1379
- event->event == IB_EVENT_PKEY_CHANGE ||
1380
- event->event == IB_EVENT_SM_CHANGE ||
1381
- event->event == IB_EVENT_CLIENT_REREGISTER ||
1382
- event->event == IB_EVENT_GID_CHANGE) {
1383
- work = kmalloc(sizeof *work, GFP_ATOMIC);
1384
- if (work) {
1385
- INIT_WORK(&work->work, ib_cache_task);
1386
- work->device = event->device;
1387
- work->port_num = event->element.port_num;
1388
- if (event->event == IB_EVENT_PKEY_CHANGE ||
1389
- event->event == IB_EVENT_GID_CHANGE)
1390
- work->enforce_security = true;
1391
- else
1392
- work->enforce_security = false;
1605
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
1606
+ if (!work)
1607
+ return;
13931608
1394
- queue_work(ib_wq, &work->work);
1395
- }
1396
- }
1609
+ if (is_cache_update_event(event))
1610
+ INIT_WORK(&work->work, ib_cache_event_task);
1611
+ else
1612
+ INIT_WORK(&work->work, ib_generic_event_task);
1613
+
1614
+ work->event = *event;
1615
+ if (event->event == IB_EVENT_PKEY_CHANGE ||
1616
+ event->event == IB_EVENT_GID_CHANGE)
1617
+ work->enforce_security = true;
1618
+
1619
+ queue_work(ib_wq, &work->work);
13971620 }
1621
+EXPORT_SYMBOL(ib_dispatch_event);
13981622
13991623 int ib_cache_setup_one(struct ib_device *device)
14001624 {
1401
- int p;
1625
+ unsigned int p;
14021626 int err;
14031627
1404
- rwlock_init(&device->cache.lock);
1405
-
1406
- device->cache.ports =
1407
- kcalloc(rdma_end_port(device) - rdma_start_port(device) + 1,
1408
- sizeof(*device->cache.ports),
1409
- GFP_KERNEL);
1410
- if (!device->cache.ports)
1411
- return -ENOMEM;
1628
+ rwlock_init(&device->cache_lock);
14121629
14131630 err = gid_table_setup_one(device);
1414
- if (err) {
1415
- kfree(device->cache.ports);
1416
- device->cache.ports = NULL;
1631
+ if (err)
14171632 return err;
1633
+
1634
+ rdma_for_each_port (device, p) {
1635
+ err = ib_cache_update(device, p, true);
1636
+ if (err)
1637
+ return err;
14181638 }
14191639
1420
- for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1421
- ib_cache_update(device, p + rdma_start_port(device), true);
1422
-
1423
- INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
1424
- device, ib_cache_event);
1425
- ib_register_event_handler(&device->cache.event_handler);
14261640 return 0;
14271641 }
14281642
14291643 void ib_cache_release_one(struct ib_device *device)
14301644 {
1431
- int p;
1645
+ unsigned int p;
14321646
14331647 /*
14341648 * The release function frees all the cache elements.
....@@ -1436,23 +1650,20 @@
14361650 * all the device's resources when the cache could no
14371651 * longer be accessed.
14381652 */
1439
- for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
1440
- kfree(device->cache.ports[p].pkey);
1653
+ rdma_for_each_port (device, p)
1654
+ kfree(device->port_data[p].cache.pkey);
14411655
14421656 gid_table_release_one(device);
1443
- kfree(device->cache.ports);
14441657 }
14451658
14461659 void ib_cache_cleanup_one(struct ib_device *device)
14471660 {
1448
- /* The cleanup function unregisters the event handler,
1449
- * waits for all in-progress workqueue elements and cleans
1450
- * up the GID cache. This function should be called after
1451
- * the device was removed from the devices list and all
1452
- * clients were removed, so the cache exists but is
1661
+ /* The cleanup function waits for all in-progress workqueue
1662
+ * elements and cleans up the GID cache. This function should be
1663
+ * called after the device was removed from the devices list and
1664
+ * all clients were removed, so the cache exists but is
14531665 * non-functional and shouldn't be updated anymore.
14541666 */
1455
- ib_unregister_event_handler(&device->cache.event_handler);
14561667 flush_workqueue(ib_wq);
14571668 gid_table_cleanup_one(device);
14581669