hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/net/ipv4/fib_trie.c
....@@ -1,8 +1,5 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
2
- * This program is free software; you can redistribute it and/or
3
- * modify it under the terms of the GNU General Public License
4
- * as published by the Free Software Foundation; either version
5
- * 2 of the License, or (at your option) any later version.
63 *
74 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
85 * & Swedish University of Agricultural Sciences.
....@@ -16,15 +13,12 @@
1613 *
1714 * An experimental study of compression methods for dynamic tries
1815 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19
- * http://www.csc.kth.se/~snilsson/software/dyntrie2/
20
- *
16
+ * https://www.csc.kth.se/~snilsson/software/dyntrie2/
2117 *
2218 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
2319 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
2420 *
25
- *
2621 * Code from fib_hash has been reused which includes the following header:
27
- *
2822 *
2923 * INET An implementation of the TCP/IP protocol suite for the LINUX
3024 * operating system. INET is implemented using the BSD Socket
....@@ -32,13 +26,7 @@
3226 *
3327 * IPv4 FIB: lookup engine and maintenance routines.
3428 *
35
- *
3629 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
37
- *
38
- * This program is free software; you can redistribute it and/or
39
- * modify it under the terms of the GNU General Public License
40
- * as published by the Free Software Foundation; either version
41
- * 2 of the License, or (at your option) any later version.
4230 *
4331 * Substantial contributions to this work comes from:
4432 *
....@@ -47,9 +35,6 @@
4735 * Paul E. McKenney <paulmck@us.ibm.com>
4836 * Patrick McHardy <kaber@trash.net>
4937 */
50
-
51
-#define VERSION "0.409"
52
-
5338 #include <linux/cache.h>
5439 #include <linux/uaccess.h>
5540 #include <linux/bitops.h>
....@@ -86,11 +71,13 @@
8671 #include <trace/events/fib.h>
8772 #include "fib_lookup.h"
8873
89
-static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
74
+static int call_fib_entry_notifier(struct notifier_block *nb,
9075 enum fib_event_type event_type, u32 dst,
91
- int dst_len, struct fib_alias *fa)
76
+ int dst_len, struct fib_alias *fa,
77
+ struct netlink_ext_ack *extack)
9278 {
9379 struct fib_entry_notifier_info info = {
80
+ .info.extack = extack,
9481 .dst = dst,
9582 .dst_len = dst_len,
9683 .fi = fa->fa_info,
....@@ -98,7 +85,7 @@
9885 .type = fa->fa_type,
9986 .tb_id = fa->tb_id,
10087 };
101
- return call_fib4_notifier(nb, net, event_type, &info.info);
88
+ return call_fib4_notifier(nb, event_type, &info.info);
10289 }
10390
10491 static int call_fib_entry_notifiers(struct net *net,
....@@ -183,14 +170,16 @@
183170 };
184171
185172 static struct key_vector *resize(struct trie *t, struct key_vector *tn);
186
-static size_t tnode_free_size;
173
+static unsigned int tnode_free_size;
187174
188175 /*
189
- * synchronize_rcu after call_rcu for that many pages; it should be especially
190
- * useful before resizing the root node with PREEMPT_NONE configs; the value was
191
- * obtained experimentally, aiming to avoid visible slowdown.
176
+ * synchronize_rcu after call_rcu for outstanding dirty memory; it should be
177
+ * especially useful before resizing the root node with PREEMPT_NONE configs;
178
+ * the value was obtained experimentally, aiming to avoid visible slowdown.
192179 */
193
-static const int sync_pages = 128;
180
+unsigned int sysctl_fib_sync_mem = 512 * 1024;
181
+unsigned int sysctl_fib_sync_mem_min = 64 * 1024;
182
+unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024;
194183
195184 static struct kmem_cache *fn_alias_kmem __ro_after_init;
196185 static struct kmem_cache *trie_leaf_kmem __ro_after_init;
....@@ -312,8 +301,6 @@
312301 call_rcu(&fa->rcu, __alias_free_mem);
313302 }
314303
315
-#define TNODE_KMALLOC_MAX \
316
- ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *))
317304 #define TNODE_VMALLOC_MAX \
318305 ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *))
319306
....@@ -348,12 +335,18 @@
348335
349336 static inline void empty_child_inc(struct key_vector *n)
350337 {
351
- ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children;
338
+ tn_info(n)->empty_children++;
339
+
340
+ if (!tn_info(n)->empty_children)
341
+ tn_info(n)->full_children++;
352342 }
353343
354344 static inline void empty_child_dec(struct key_vector *n)
355345 {
356
- tn_info(n)->empty_children-- ? : tn_info(n)->full_children--;
346
+ if (!tn_info(n)->empty_children)
347
+ tn_info(n)->full_children--;
348
+
349
+ tn_info(n)->empty_children--;
357350 }
358351
359352 static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
....@@ -504,7 +497,7 @@
504497 tn = container_of(head, struct tnode, rcu)->kv;
505498 }
506499
507
- if (tnode_free_size >= PAGE_SIZE * sync_pages) {
500
+ if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
508501 tnode_free_size = 0;
509502 synchronize_rcu();
510503 }
....@@ -982,9 +975,12 @@
982975
983976 /* Return the first fib alias matching TOS with
984977 * priority less than or equal to PRIO.
978
+ * If 'find_first' is set, return the first matching
979
+ * fib alias, regardless of TOS and priority.
985980 */
986981 static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
987
- u8 tos, u32 prio, u32 tb_id)
982
+ u8 tos, u32 prio, u32 tb_id,
983
+ bool find_first)
988984 {
989985 struct fib_alias *fa;
990986
....@@ -1000,6 +996,8 @@
1000996 continue;
1001997 if (fa->tb_id != tb_id)
1002998 break;
999
+ if (find_first)
1000
+ return fa;
10031001 if (fa->fa_tos > tos)
10041002 continue;
10051003 if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
....@@ -1008,6 +1006,52 @@
10081006
10091007 return NULL;
10101008 }
1009
+
1010
+static struct fib_alias *
1011
+fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
1012
+{
1013
+ u8 slen = KEYLENGTH - fri->dst_len;
1014
+ struct key_vector *l, *tp;
1015
+ struct fib_table *tb;
1016
+ struct fib_alias *fa;
1017
+ struct trie *t;
1018
+
1019
+ tb = fib_get_table(net, fri->tb_id);
1020
+ if (!tb)
1021
+ return NULL;
1022
+
1023
+ t = (struct trie *)tb->tb_data;
1024
+ l = fib_find_node(t, &tp, be32_to_cpu(fri->dst));
1025
+ if (!l)
1026
+ return NULL;
1027
+
1028
+ hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
1029
+ if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
1030
+ fa->fa_tos == fri->tos && fa->fa_info == fri->fi &&
1031
+ fa->fa_type == fri->type)
1032
+ return fa;
1033
+ }
1034
+
1035
+ return NULL;
1036
+}
1037
+
1038
+void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
1039
+{
1040
+ struct fib_alias *fa_match;
1041
+
1042
+ rcu_read_lock();
1043
+
1044
+ fa_match = fib_find_matching_alias(net, fri);
1045
+ if (!fa_match)
1046
+ goto out;
1047
+
1048
+ fa_match->offload = fri->offload;
1049
+ fa_match->trap = fri->trap;
1050
+
1051
+out:
1052
+ rcu_read_unlock();
1053
+}
1054
+EXPORT_SYMBOL_GPL(fib_alias_hw_flags_set);
10111055
10121056 static void trie_rebalance(struct trie *t, struct key_vector *tn)
10131057 {
....@@ -1065,9 +1109,6 @@
10651109 return -ENOMEM;
10661110 }
10671111
1068
-/* fib notifier for ADD is sent before calling fib_insert_alias with
1069
- * the expectation that the only possible failure ENOMEM
1070
- */
10711112 static int fib_insert_alias(struct trie *t, struct key_vector *tp,
10721113 struct key_vector *l, struct fib_alias *new,
10731114 struct fib_alias *fa, t_key key)
....@@ -1120,11 +1161,13 @@
11201161 return true;
11211162 }
11221163
1164
+static void fib_remove_alias(struct trie *t, struct key_vector *tp,
1165
+ struct key_vector *l, struct fib_alias *old);
1166
+
11231167 /* Caller must hold RTNL. */
11241168 int fib_table_insert(struct net *net, struct fib_table *tb,
11251169 struct fib_config *cfg, struct netlink_ext_ack *extack)
11261170 {
1127
- enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
11281171 struct trie *t = (struct trie *)tb->tb_data;
11291172 struct fib_alias *fa, *new_fa;
11301173 struct key_vector *l, *tp;
....@@ -1151,7 +1194,7 @@
11511194
11521195 l = fib_find_node(t, &tp, key);
11531196 fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
1154
- tb->tb_id) : NULL;
1197
+ tb->tb_id, false) : NULL;
11551198
11561199 /* Now fa, if non-NULL, points to the first fib alias
11571200 * with the same keys [prefix,tos,priority], if such key already
....@@ -1218,18 +1261,28 @@
12181261 new_fa->fa_slen = fa->fa_slen;
12191262 new_fa->tb_id = tb->tb_id;
12201263 new_fa->fa_default = -1;
1264
+ new_fa->offload = 0;
1265
+ new_fa->trap = 0;
12211266
1222
- err = call_fib_entry_notifiers(net,
1223
- FIB_EVENT_ENTRY_REPLACE,
1224
- key, plen, new_fa,
1225
- extack);
1226
- if (err)
1227
- goto out_free_new_fa;
1267
+ hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1268
+
1269
+ if (fib_find_alias(&l->leaf, fa->fa_slen, 0, 0,
1270
+ tb->tb_id, true) == new_fa) {
1271
+ enum fib_event_type fib_event;
1272
+
1273
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
1274
+ err = call_fib_entry_notifiers(net, fib_event,
1275
+ key, plen,
1276
+ new_fa, extack);
1277
+ if (err) {
1278
+ hlist_replace_rcu(&new_fa->fa_list,
1279
+ &fa->fa_list);
1280
+ goto out_free_new_fa;
1281
+ }
1282
+ }
12281283
12291284 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
12301285 tb->tb_id, &cfg->fc_nlinfo, nlflags);
1231
-
1232
- hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
12331286
12341287 alias_free_mem_rcu(fa);
12351288
....@@ -1246,12 +1299,10 @@
12461299 if (fa_match)
12471300 goto out;
12481301
1249
- if (cfg->fc_nlflags & NLM_F_APPEND) {
1250
- event = FIB_EVENT_ENTRY_APPEND;
1302
+ if (cfg->fc_nlflags & NLM_F_APPEND)
12511303 nlflags |= NLM_F_APPEND;
1252
- } else {
1304
+ else
12531305 fa = fa_first;
1254
- }
12551306 }
12561307 err = -ENOENT;
12571308 if (!(cfg->fc_nlflags & NLM_F_CREATE))
....@@ -1270,15 +1321,31 @@
12701321 new_fa->fa_slen = slen;
12711322 new_fa->tb_id = tb->tb_id;
12721323 new_fa->fa_default = -1;
1273
-
1274
- err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
1275
- if (err)
1276
- goto out_free_new_fa;
1324
+ new_fa->offload = 0;
1325
+ new_fa->trap = 0;
12771326
12781327 /* Insert new entry to the list. */
12791328 err = fib_insert_alias(t, tp, l, new_fa, fa, key);
12801329 if (err)
1281
- goto out_fib_notif;
1330
+ goto out_free_new_fa;
1331
+
1332
+ /* The alias was already inserted, so the node must exist. */
1333
+ l = l ? l : fib_find_node(t, &tp, key);
1334
+ if (WARN_ON_ONCE(!l)) {
1335
+ err = -ENOENT;
1336
+ goto out_free_new_fa;
1337
+ }
1338
+
1339
+ if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
1340
+ new_fa) {
1341
+ enum fib_event_type fib_event;
1342
+
1343
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
1344
+ err = call_fib_entry_notifiers(net, fib_event, key, plen,
1345
+ new_fa, extack);
1346
+ if (err)
1347
+ goto out_remove_new_fa;
1348
+ }
12821349
12831350 if (!plen)
12841351 tb->tb_num_default++;
....@@ -1289,14 +1356,8 @@
12891356 succeeded:
12901357 return 0;
12911358
1292
-out_fib_notif:
1293
- /* notifier was sent that entry would be added to trie, but
1294
- * the add failed and need to recover. Only failure for
1295
- * fib_insert_alias is ENOMEM.
1296
- */
1297
- NL_SET_ERR_MSG(extack, "Failed to insert route into trie");
1298
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key,
1299
- plen, new_fa, NULL);
1359
+out_remove_new_fa:
1360
+ fib_remove_alias(t, tp, l, new_fa);
13001361 out_free_new_fa:
13011362 kmem_cache_free(fn_alias_kmem, new_fa);
13021363 out:
....@@ -1310,6 +1371,26 @@
13101371 t_key prefix = n->key;
13111372
13121373 return (key ^ prefix) & (prefix | -prefix);
1374
+}
1375
+
1376
+bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
1377
+ const struct flowi4 *flp)
1378
+{
1379
+ if (nhc->nhc_flags & RTNH_F_DEAD)
1380
+ return false;
1381
+
1382
+ if (ip_ignore_linkdown(nhc->nhc_dev) &&
1383
+ nhc->nhc_flags & RTNH_F_LINKDOWN &&
1384
+ !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
1385
+ return false;
1386
+
1387
+ if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
1388
+ if (flp->flowi4_oif &&
1389
+ flp->flowi4_oif != nhc->nhc_oif)
1390
+ return false;
1391
+ }
1392
+
1393
+ return true;
13131394 }
13141395
13151396 /* should be called with rcu_read_lock */
....@@ -1444,6 +1525,7 @@
14441525 /* Step 3: Process the leaf, if that fails fall back to backtracing */
14451526 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
14461527 struct fib_info *fi = fa->fa_info;
1528
+ struct fib_nh_common *nhc;
14471529 int nhsel, err;
14481530
14491531 if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
....@@ -1452,13 +1534,15 @@
14521534 }
14531535 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
14541536 continue;
1455
- if (fi->fib_dead)
1537
+ /* Paired with WRITE_ONCE() in fib_release_info() */
1538
+ if (READ_ONCE(fi->fib_dead))
14561539 continue;
14571540 if (fa->fa_info->fib_scope < flp->flowi4_scope)
14581541 continue;
14591542 fib_alias_accessed(fa);
14601543 err = fib_props[fa->fa_type].error;
14611544 if (unlikely(err < 0)) {
1545
+out_reject:
14621546 #ifdef CONFIG_IP_FIB_TRIE_STATS
14631547 this_cpu_inc(stats->semantic_match_passed);
14641548 #endif
....@@ -1467,29 +1551,33 @@
14671551 }
14681552 if (fi->fib_flags & RTNH_F_DEAD)
14691553 continue;
1470
- for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1471
- const struct fib_nh *nh = &fi->fib_nh[nhsel];
1472
- struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
14731554
1474
- if (nh->nh_flags & RTNH_F_DEAD)
1475
- continue;
1476
- if (in_dev &&
1477
- IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1478
- nh->nh_flags & RTNH_F_LINKDOWN &&
1479
- !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
1480
- continue;
1481
- if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
1482
- if (flp->flowi4_oif &&
1483
- flp->flowi4_oif != nh->nh_oif)
1484
- continue;
1555
+ if (unlikely(fi->nh)) {
1556
+ if (nexthop_is_blackhole(fi->nh)) {
1557
+ err = fib_props[RTN_BLACKHOLE].error;
1558
+ goto out_reject;
14851559 }
14861560
1561
+ nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp,
1562
+ &nhsel);
1563
+ if (nhc)
1564
+ goto set_result;
1565
+ goto miss;
1566
+ }
1567
+
1568
+ for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
1569
+ nhc = fib_info_nhc(fi, nhsel);
1570
+
1571
+ if (!fib_lookup_good_nhc(nhc, fib_flags, flp))
1572
+ continue;
1573
+set_result:
14871574 if (!(fib_flags & FIB_LOOKUP_NOREF))
14881575 refcount_inc(&fi->fib_clntref);
14891576
14901577 res->prefix = htonl(n->key);
14911578 res->prefixlen = KEYLENGTH - fa->fa_slen;
14921579 res->nh_sel = nhsel;
1580
+ res->nhc = nhc;
14931581 res->type = fa->fa_type;
14941582 res->scope = fi->fib_scope;
14951583 res->fi = fi;
....@@ -1498,11 +1586,12 @@
14981586 #ifdef CONFIG_IP_FIB_TRIE_STATS
14991587 this_cpu_inc(stats->semantic_match_passed);
15001588 #endif
1501
- trace_fib_table_lookup(tb->tb_id, flp, nh, err);
1589
+ trace_fib_table_lookup(tb->tb_id, flp, nhc, err);
15021590
15031591 return err;
15041592 }
15051593 }
1594
+miss:
15061595 #ifdef CONFIG_IP_FIB_TRIE_STATS
15071596 this_cpu_inc(stats->semantic_match_miss);
15081597 #endif
....@@ -1541,6 +1630,36 @@
15411630 node_pull_suffix(tp, fa->fa_slen);
15421631 }
15431632
1633
+static void fib_notify_alias_delete(struct net *net, u32 key,
1634
+ struct hlist_head *fah,
1635
+ struct fib_alias *fa_to_delete,
1636
+ struct netlink_ext_ack *extack)
1637
+{
1638
+ struct fib_alias *fa_next, *fa_to_notify;
1639
+ u32 tb_id = fa_to_delete->tb_id;
1640
+ u8 slen = fa_to_delete->fa_slen;
1641
+ enum fib_event_type fib_event;
1642
+
1643
+ /* Do not notify if we do not care about the route. */
1644
+ if (fib_find_alias(fah, slen, 0, 0, tb_id, true) != fa_to_delete)
1645
+ return;
1646
+
1647
+ /* Determine if the route should be replaced by the next route in the
1648
+ * list.
1649
+ */
1650
+ fa_next = hlist_entry_safe(fa_to_delete->fa_list.next,
1651
+ struct fib_alias, fa_list);
1652
+ if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) {
1653
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
1654
+ fa_to_notify = fa_next;
1655
+ } else {
1656
+ fib_event = FIB_EVENT_ENTRY_DEL;
1657
+ fa_to_notify = fa_to_delete;
1658
+ }
1659
+ call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen,
1660
+ fa_to_notify, extack);
1661
+}
1662
+
15441663 /* Caller must hold RTNL. */
15451664 int fib_table_delete(struct net *net, struct fib_table *tb,
15461665 struct fib_config *cfg, struct netlink_ext_ack *extack)
....@@ -1562,7 +1681,7 @@
15621681 if (!l)
15631682 return -ESRCH;
15641683
1565
- fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id);
1684
+ fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false);
15661685 if (!fa)
15671686 return -ESRCH;
15681687
....@@ -1584,7 +1703,7 @@
15841703 fi->fib_prefsrc == cfg->fc_prefsrc) &&
15851704 (!cfg->fc_protocol ||
15861705 fi->fib_protocol == cfg->fc_protocol) &&
1587
- fib_nh_match(cfg, fi, extack) == 0 &&
1706
+ fib_nh_match(net, cfg, fi, extack) == 0 &&
15881707 fib_metrics_match(cfg, fi)) {
15891708 fa_to_delete = fa;
15901709 break;
....@@ -1594,8 +1713,7 @@
15941713 if (!fa_to_delete)
15951714 return -ESRCH;
15961715
1597
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
1598
- fa_to_delete, extack);
1716
+ fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack);
15991717 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
16001718 &cfg->fc_nlinfo, 0);
16011719
....@@ -1919,10 +2037,8 @@
19192037 continue;
19202038 }
19212039
1922
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
1923
- n->key,
1924
- KEYLENGTH - fa->fa_slen, fa,
1925
- NULL);
2040
+ fib_notify_alias_delete(net, n->key, &n->leaf, fa,
2041
+ NULL);
19262042 hlist_del_rcu(&fa->fa_list);
19272043 fib_release_info(fa->fa_info);
19282044 alias_free_mem_rcu(fa);
....@@ -1942,10 +2058,85 @@
19422058 return found;
19432059 }
19442060
1945
-static void fib_leaf_notify(struct net *net, struct key_vector *l,
1946
- struct fib_table *tb, struct notifier_block *nb)
2061
+/* derived from fib_trie_free */
2062
+static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
2063
+ struct nl_info *info)
2064
+{
2065
+ struct trie *t = (struct trie *)tb->tb_data;
2066
+ struct key_vector *pn = t->kv;
2067
+ unsigned long cindex = 1;
2068
+ struct fib_alias *fa;
2069
+
2070
+ for (;;) {
2071
+ struct key_vector *n;
2072
+
2073
+ if (!(cindex--)) {
2074
+ t_key pkey = pn->key;
2075
+
2076
+ if (IS_TRIE(pn))
2077
+ break;
2078
+
2079
+ pn = node_parent(pn);
2080
+ cindex = get_index(pkey, pn);
2081
+ continue;
2082
+ }
2083
+
2084
+ /* grab the next available node */
2085
+ n = get_child(pn, cindex);
2086
+ if (!n)
2087
+ continue;
2088
+
2089
+ if (IS_TNODE(n)) {
2090
+ /* record pn and cindex for leaf walking */
2091
+ pn = n;
2092
+ cindex = 1ul << n->bits;
2093
+
2094
+ continue;
2095
+ }
2096
+
2097
+ hlist_for_each_entry(fa, &n->leaf, fa_list) {
2098
+ struct fib_info *fi = fa->fa_info;
2099
+
2100
+ if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)
2101
+ continue;
2102
+
2103
+ rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
2104
+ KEYLENGTH - fa->fa_slen, tb->tb_id,
2105
+ info, NLM_F_REPLACE);
2106
+
2107
+ /* call_fib_entry_notifiers will be removed when
2108
+ * in-kernel notifier is implemented and supported
2109
+ * for nexthop objects
2110
+ */
2111
+ call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
2112
+ n->key,
2113
+ KEYLENGTH - fa->fa_slen, fa,
2114
+ NULL);
2115
+ }
2116
+ }
2117
+}
2118
+
2119
+void fib_info_notify_update(struct net *net, struct nl_info *info)
2120
+{
2121
+ unsigned int h;
2122
+
2123
+ for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2124
+ struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2125
+ struct fib_table *tb;
2126
+
2127
+ hlist_for_each_entry_rcu(tb, head, tb_hlist,
2128
+ lockdep_rtnl_is_held())
2129
+ __fib_info_notify_update(net, tb, info);
2130
+ }
2131
+}
2132
+
2133
+static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
2134
+ struct notifier_block *nb,
2135
+ struct netlink_ext_ack *extack)
19472136 {
19482137 struct fib_alias *fa;
2138
+ int last_slen = -1;
2139
+ int err;
19492140
19502141 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
19512142 struct fib_info *fi = fa->fa_info;
....@@ -1959,39 +2150,57 @@
19592150 if (tb->tb_id != fa->tb_id)
19602151 continue;
19612152
1962
- call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
1963
- KEYLENGTH - fa->fa_slen, fa);
2153
+ if (fa->fa_slen == last_slen)
2154
+ continue;
2155
+
2156
+ last_slen = fa->fa_slen;
2157
+ err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE,
2158
+ l->key, KEYLENGTH - fa->fa_slen,
2159
+ fa, extack);
2160
+ if (err)
2161
+ return err;
19642162 }
2163
+ return 0;
19652164 }
19662165
1967
-static void fib_table_notify(struct net *net, struct fib_table *tb,
1968
- struct notifier_block *nb)
2166
+static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb,
2167
+ struct netlink_ext_ack *extack)
19692168 {
19702169 struct trie *t = (struct trie *)tb->tb_data;
19712170 struct key_vector *l, *tp = t->kv;
19722171 t_key key = 0;
2172
+ int err;
19732173
19742174 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
1975
- fib_leaf_notify(net, l, tb, nb);
2175
+ err = fib_leaf_notify(l, tb, nb, extack);
2176
+ if (err)
2177
+ return err;
19762178
19772179 key = l->key + 1;
19782180 /* stop in case of wrap around */
19792181 if (key < l->key)
19802182 break;
19812183 }
2184
+ return 0;
19822185 }
19832186
1984
-void fib_notify(struct net *net, struct notifier_block *nb)
2187
+int fib_notify(struct net *net, struct notifier_block *nb,
2188
+ struct netlink_ext_ack *extack)
19852189 {
19862190 unsigned int h;
2191
+ int err;
19872192
19882193 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
19892194 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
19902195 struct fib_table *tb;
19912196
1992
- hlist_for_each_entry_rcu(tb, head, tb_hlist)
1993
- fib_table_notify(net, tb, nb);
2197
+ hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2198
+ err = fib_table_notify(tb, nb, extack);
2199
+ if (err)
2200
+ return err;
2201
+ }
19942202 }
2203
+ return 0;
19952204 }
19962205
19972206 static void __trie_free_rcu(struct rcu_head *head)
....@@ -2012,48 +2221,93 @@
20122221 }
20132222
20142223 static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
2015
- struct sk_buff *skb, struct netlink_callback *cb)
2224
+ struct sk_buff *skb, struct netlink_callback *cb,
2225
+ struct fib_dump_filter *filter)
20162226 {
2227
+ unsigned int flags = NLM_F_MULTI;
20172228 __be32 xkey = htonl(l->key);
2229
+ int i, s_i, i_fa, s_fa, err;
20182230 struct fib_alias *fa;
2019
- int i, s_i;
2231
+
2232
+ if (filter->filter_set ||
2233
+ !filter->dump_exceptions || !filter->dump_routes)
2234
+ flags |= NLM_F_DUMP_FILTERED;
20202235
20212236 s_i = cb->args[4];
2237
+ s_fa = cb->args[5];
20222238 i = 0;
20232239
20242240 /* rcu_read_lock is hold by caller */
20252241 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2026
- int err;
2242
+ struct fib_info *fi = fa->fa_info;
20272243
2028
- if (i < s_i) {
2029
- i++;
2030
- continue;
2244
+ if (i < s_i)
2245
+ goto next;
2246
+
2247
+ i_fa = 0;
2248
+
2249
+ if (tb->tb_id != fa->tb_id)
2250
+ goto next;
2251
+
2252
+ if (filter->filter_set) {
2253
+ if (filter->rt_type && fa->fa_type != filter->rt_type)
2254
+ goto next;
2255
+
2256
+ if ((filter->protocol &&
2257
+ fi->fib_protocol != filter->protocol))
2258
+ goto next;
2259
+
2260
+ if (filter->dev &&
2261
+ !fib_info_nh_uses_dev(fi, filter->dev))
2262
+ goto next;
20312263 }
20322264
2033
- if (tb->tb_id != fa->tb_id) {
2034
- i++;
2035
- continue;
2265
+ if (filter->dump_routes) {
2266
+ if (!s_fa) {
2267
+ struct fib_rt_info fri;
2268
+
2269
+ fri.fi = fi;
2270
+ fri.tb_id = tb->tb_id;
2271
+ fri.dst = xkey;
2272
+ fri.dst_len = KEYLENGTH - fa->fa_slen;
2273
+ fri.tos = fa->fa_tos;
2274
+ fri.type = fa->fa_type;
2275
+ fri.offload = fa->offload;
2276
+ fri.trap = fa->trap;
2277
+ err = fib_dump_info(skb,
2278
+ NETLINK_CB(cb->skb).portid,
2279
+ cb->nlh->nlmsg_seq,
2280
+ RTM_NEWROUTE, &fri, flags);
2281
+ if (err < 0)
2282
+ goto stop;
2283
+ }
2284
+
2285
+ i_fa++;
20362286 }
20372287
2038
- err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
2039
- cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2040
- tb->tb_id, fa->fa_type,
2041
- xkey, KEYLENGTH - fa->fa_slen,
2042
- fa->fa_tos, fa->fa_info, NLM_F_MULTI);
2043
- if (err < 0) {
2044
- cb->args[4] = i;
2045
- return err;
2288
+ if (filter->dump_exceptions) {
2289
+ err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
2290
+ &i_fa, s_fa, flags);
2291
+ if (err < 0)
2292
+ goto stop;
20462293 }
2294
+
2295
+next:
20472296 i++;
20482297 }
20492298
20502299 cb->args[4] = i;
20512300 return skb->len;
2301
+
2302
+stop:
2303
+ cb->args[4] = i;
2304
+ cb->args[5] = i_fa;
2305
+ return err;
20522306 }
20532307
20542308 /* rcu_read_lock needs to be hold by caller from readside */
20552309 int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
2056
- struct netlink_callback *cb)
2310
+ struct netlink_callback *cb, struct fib_dump_filter *filter)
20572311 {
20582312 struct trie *t = (struct trie *)tb->tb_data;
20592313 struct key_vector *l, *tp = t->kv;
....@@ -2063,10 +2317,16 @@
20632317 int count = cb->args[2];
20642318 t_key key = cb->args[3];
20652319
2320
+ /* First time here, count and key are both always 0. Count > 0
2321
+ * and key == 0 means the dump has wrapped around and we are done.
2322
+ */
2323
+ if (count && !key)
2324
+ return skb->len;
2325
+
20662326 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
20672327 int err;
20682328
2069
- err = fn_trie_dump_leaf(l, tb, skb, cb);
2329
+ err = fn_trie_dump_leaf(l, tb, skb, cb, filter);
20702330 if (err < 0) {
20712331 cb->args[3] = key;
20722332 cb->args[2] = count;
....@@ -2633,14 +2893,18 @@
26332893 rcu_read_unlock();
26342894 }
26352895
2636
-static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2896
+static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
26372897 {
26382898 unsigned int flags = 0;
26392899
26402900 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
26412901 flags = RTF_REJECT;
2642
- if (fi && fi->fib_nh->nh_gw)
2643
- flags |= RTF_GATEWAY;
2902
+ if (fi) {
2903
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
2904
+
2905
+ if (nhc->nhc_gw.ipv4)
2906
+ flags |= RTF_GATEWAY;
2907
+ }
26442908 if (mask == htonl(0xFFFFFFFF))
26452909 flags |= RTF_HOST;
26462910 flags |= RTF_UP;
....@@ -2671,7 +2935,7 @@
26712935 prefix = htonl(l->key);
26722936
26732937 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2674
- const struct fib_info *fi = fa->fa_info;
2938
+ struct fib_info *fi = fa->fa_info;
26752939 __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
26762940 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
26772941
....@@ -2684,26 +2948,31 @@
26842948
26852949 seq_setwidth(seq, 127);
26862950
2687
- if (fi)
2951
+ if (fi) {
2952
+ struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
2953
+ __be32 gw = 0;
2954
+
2955
+ if (nhc->nhc_gw_family == AF_INET)
2956
+ gw = nhc->nhc_gw.ipv4;
2957
+
26882958 seq_printf(seq,
26892959 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
26902960 "%d\t%08X\t%d\t%u\t%u",
2691
- fi->fib_dev ? fi->fib_dev->name : "*",
2692
- prefix,
2693
- fi->fib_nh->nh_gw, flags, 0, 0,
2961
+ nhc->nhc_dev ? nhc->nhc_dev->name : "*",
2962
+ prefix, gw, flags, 0, 0,
26942963 fi->fib_priority,
26952964 mask,
26962965 (fi->fib_advmss ?
26972966 fi->fib_advmss + 40 : 0),
26982967 fi->fib_window,
26992968 fi->fib_rtt >> 3);
2700
- else
2969
+ } else {
27012970 seq_printf(seq,
27022971 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
27032972 "%d\t%08X\t%d\t%u\t%u",
27042973 prefix, 0, flags, 0, 0, 0,
27052974 mask, 0, 0, 0);
2706
-
2975
+ }
27072976 seq_pad(seq, '\n');
27082977 }
27092978