forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-09 95099d4622f8cb224d94e314c7a8e0df60b13f87
kernel/drivers/net/vxlan.c
....@@ -1,11 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * VXLAN: Virtual eXtensible Local Area Network
34 *
45 * Copyright (c) 2012-2013 Vyatta Inc.
5
- *
6
- * This program is free software; you can redistribute it and/or modify
7
- * it under the terms of the GNU General Public License version 2 as
8
- * published by the Free Software Foundation.
96 */
107
118 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
....@@ -20,6 +17,7 @@
2017 #include <linux/ethtool.h>
2118 #include <net/arp.h>
2219 #include <net/ndisc.h>
20
+#include <net/ipv6_stubs.h>
2321 #include <net/ip.h>
2422 #include <net/icmp.h>
2523 #include <net/rtnetlink.h>
....@@ -28,6 +26,7 @@
2826 #include <net/netns/generic.h>
2927 #include <net/tun_proto.h>
3028 #include <net/vxlan.h>
29
+#include <net/nexthop.h>
3130
3231 #if IS_ENABLED(CONFIG_IPV6)
3332 #include <net/ip6_tunnel.h>
....@@ -79,8 +78,13 @@
7978 u8 eth_addr[ETH_ALEN];
8079 u16 state; /* see ndm_state */
8180 __be32 vni;
82
- u8 flags; /* see ndm_flags */
81
+ u16 flags; /* see ndm_flags and below */
82
+ struct list_head nh_list;
83
+ struct nexthop __rcu *nh;
84
+ struct vxlan_dev __rcu *vdev;
8385 };
86
+
87
+#define NTF_VXLAN_ADDED_BY_USER 0x100
8488
8589 /* salt for hash table */
8690 static u32 vxlan_salt __read_mostly;
....@@ -101,22 +105,6 @@
101105 return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr);
102106 else
103107 return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
104
-}
105
-
106
-static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
107
-{
108
- if (ipa->sa.sa_family == AF_INET6)
109
- return ipv6_addr_any(&ipa->sin6.sin6_addr);
110
- else
111
- return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
112
-}
113
-
114
-static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
115
-{
116
- if (ipa->sa.sa_family == AF_INET6)
117
- return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
118
- else
119
- return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
120108 }
121109
122110 static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
....@@ -149,16 +137,6 @@
149137 bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
150138 {
151139 return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
152
-}
153
-
154
-static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
155
-{
156
- return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
157
-}
158
-
159
-static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
160
-{
161
- return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
162140 }
163141
164142 static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
....@@ -200,19 +178,24 @@
200178 */
201179 static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
202180 {
181
+ if (rcu_access_pointer(fdb->nh))
182
+ return NULL;
203183 return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
204184 }
205185
206186 static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
207187 {
188
+ if (rcu_access_pointer(fdb->nh))
189
+ return NULL;
208190 return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
209191 }
210192
211
-/* Find VXLAN socket based on network namespace, address family and UDP port
212
- * and enabled unshareable flags.
193
+/* Find VXLAN socket based on network namespace, address family, UDP port,
194
+ * enabled unshareable flags and socket device binding (see l3mdev with
195
+ * non-default VRF).
213196 */
214197 static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
215
- __be16 port, u32 flags)
198
+ __be16 port, u32 flags, int ifindex)
216199 {
217200 struct vxlan_sock *vs;
218201
....@@ -221,7 +204,8 @@
221204 hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
222205 if (inet_sk(vs->sock->sk)->inet_sport == port &&
223206 vxlan_get_sk_family(vs) == family &&
224
- vs->flags == flags)
207
+ vs->flags == flags &&
208
+ vs->sock->sk->sk_bound_dev_if == ifindex)
225209 return vs;
226210 }
227211 return NULL;
....@@ -261,7 +245,7 @@
261245 {
262246 struct vxlan_sock *vs;
263247
264
- vs = vxlan_find_sock(net, family, port, flags);
248
+ vs = vxlan_find_sock(net, family, port, flags, ifindex);
265249 if (!vs)
266250 return NULL;
267251
....@@ -276,9 +260,12 @@
276260 {
277261 unsigned long now = jiffies;
278262 struct nda_cacheinfo ci;
279
- struct nlmsghdr *nlh;
280
- struct ndmsg *ndm;
281263 bool send_ip, send_eth;
264
+ struct nlmsghdr *nlh;
265
+ struct nexthop *nh;
266
+ struct ndmsg *ndm;
267
+ int nh_family;
268
+ u32 nh_id;
282269
283270 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
284271 if (nlh == NULL)
....@@ -289,15 +276,29 @@
289276
290277 send_eth = send_ip = true;
291278
279
+ rcu_read_lock();
280
+ nh = rcu_dereference(fdb->nh);
281
+ if (nh) {
282
+ nh_family = nexthop_get_family(nh);
283
+ nh_id = nh->id;
284
+ }
285
+ rcu_read_unlock();
286
+
292287 if (type == RTM_GETNEIGH) {
293
- send_ip = !vxlan_addr_any(&rdst->remote_ip);
288
+ if (rdst) {
289
+ send_ip = !vxlan_addr_any(&rdst->remote_ip);
290
+ ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
291
+ } else if (nh) {
292
+ ndm->ndm_family = nh_family;
293
+ }
294294 send_eth = !is_zero_ether_addr(fdb->eth_addr);
295
- ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
296295 } else
297296 ndm->ndm_family = AF_BRIDGE;
298297 ndm->ndm_state = fdb->state;
299298 ndm->ndm_ifindex = vxlan->dev->ifindex;
300299 ndm->ndm_flags = fdb->flags;
300
+ if (rdst && rdst->offloaded)
301
+ ndm->ndm_flags |= NTF_OFFLOADED;
301302 ndm->ndm_type = RTN_UNICAST;
302303
303304 if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
....@@ -307,22 +308,29 @@
307308
308309 if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
309310 goto nla_put_failure;
311
+ if (nh) {
312
+ if (nla_put_u32(skb, NDA_NH_ID, nh_id))
313
+ goto nla_put_failure;
314
+ } else if (rdst) {
315
+ if (send_ip && vxlan_nla_put_addr(skb, NDA_DST,
316
+ &rdst->remote_ip))
317
+ goto nla_put_failure;
310318
311
- if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, &rdst->remote_ip))
312
- goto nla_put_failure;
319
+ if (rdst->remote_port &&
320
+ rdst->remote_port != vxlan->cfg.dst_port &&
321
+ nla_put_be16(skb, NDA_PORT, rdst->remote_port))
322
+ goto nla_put_failure;
323
+ if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
324
+ nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
325
+ goto nla_put_failure;
326
+ if (rdst->remote_ifindex &&
327
+ nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
328
+ goto nla_put_failure;
329
+ }
313330
314
- if (rdst->remote_port && rdst->remote_port != vxlan->cfg.dst_port &&
315
- nla_put_be16(skb, NDA_PORT, rdst->remote_port))
316
- goto nla_put_failure;
317
- if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
318
- nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
319
- goto nla_put_failure;
320331 if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
321332 nla_put_u32(skb, NDA_SRC_VNI,
322333 be32_to_cpu(fdb->vni)))
323
- goto nla_put_failure;
324
- if (rdst->remote_ifindex &&
325
- nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
326334 goto nla_put_failure;
327335
328336 ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
....@@ -353,8 +361,8 @@
353361 + nla_total_size(sizeof(struct nda_cacheinfo));
354362 }
355363
356
-static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
357
- struct vxlan_rdst *rd, int type)
364
+static void __vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
365
+ struct vxlan_rdst *rd, int type)
358366 {
359367 struct net *net = dev_net(vxlan->dev);
360368 struct sk_buff *skb;
....@@ -379,6 +387,70 @@
379387 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
380388 }
381389
390
+static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan,
391
+ const struct vxlan_fdb *fdb,
392
+ const struct vxlan_rdst *rd,
393
+ struct netlink_ext_ack *extack,
394
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
395
+{
396
+ fdb_info->info.dev = vxlan->dev;
397
+ fdb_info->info.extack = extack;
398
+ fdb_info->remote_ip = rd->remote_ip;
399
+ fdb_info->remote_port = rd->remote_port;
400
+ fdb_info->remote_vni = rd->remote_vni;
401
+ fdb_info->remote_ifindex = rd->remote_ifindex;
402
+ memcpy(fdb_info->eth_addr, fdb->eth_addr, ETH_ALEN);
403
+ fdb_info->vni = fdb->vni;
404
+ fdb_info->offloaded = rd->offloaded;
405
+ fdb_info->added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER;
406
+}
407
+
408
+static int vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
409
+ struct vxlan_fdb *fdb,
410
+ struct vxlan_rdst *rd,
411
+ bool adding,
412
+ struct netlink_ext_ack *extack)
413
+{
414
+ struct switchdev_notifier_vxlan_fdb_info info;
415
+ enum switchdev_notifier_type notifier_type;
416
+ int ret;
417
+
418
+ if (WARN_ON(!rd))
419
+ return 0;
420
+
421
+ notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
422
+ : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
423
+ vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, NULL, &info);
424
+ ret = call_switchdev_notifiers(notifier_type, vxlan->dev,
425
+ &info.info, extack);
426
+ return notifier_to_errno(ret);
427
+}
428
+
429
+static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
430
+ struct vxlan_rdst *rd, int type, bool swdev_notify,
431
+ struct netlink_ext_ack *extack)
432
+{
433
+ int err;
434
+
435
+ if (swdev_notify && rd) {
436
+ switch (type) {
437
+ case RTM_NEWNEIGH:
438
+ err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
439
+ true, extack);
440
+ if (err)
441
+ return err;
442
+ break;
443
+ case RTM_DELNEIGH:
444
+ vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
445
+ false, extack);
446
+ break;
447
+ }
448
+ }
449
+
450
+ __vxlan_fdb_notify(vxlan, fdb, rd, type);
451
+ return 0;
452
+}
453
+
382454 static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
383455 {
384456 struct vxlan_dev *vxlan = netdev_priv(dev);
....@@ -390,7 +462,7 @@
390462 .remote_vni = cpu_to_be32(VXLAN_N_VID),
391463 };
392464
393
- vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH);
465
+ vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
394466 }
395467
396468 static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
....@@ -402,7 +474,7 @@
402474
403475 memcpy(f.eth_addr, eth_addr, ETH_ALEN);
404476
405
- vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH);
477
+ vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
406478 }
407479
408480 /* Hash Ethernet address */
....@@ -427,14 +499,19 @@
427499 return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1);
428500 }
429501
502
+static u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni)
503
+{
504
+ if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
505
+ return eth_vni_hash(mac, vni);
506
+ else
507
+ return eth_hash(mac);
508
+}
509
+
430510 /* Hash chain to use given mac address */
431511 static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
432512 const u8 *mac, __be32 vni)
433513 {
434
- if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
435
- return &vxlan->fdb_head[eth_vni_hash(mac, vni)];
436
- else
437
- return &vxlan->fdb_head[eth_hash(mac)];
514
+ return &vxlan->fdb_head[fdb_head_index(vxlan, mac, vni)];
438515 }
439516
440517 /* Look up Ethernet address in forwarding table */
....@@ -464,7 +541,7 @@
464541 struct vxlan_fdb *f;
465542
466543 f = __vxlan_find_mac(vxlan, mac, vni);
467
- if (f)
544
+ if (f && f->used != jiffies)
468545 f->used = jiffies;
469546
470547 return f;
....@@ -488,10 +565,117 @@
488565 return NULL;
489566 }
490567
568
+int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
569
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
570
+{
571
+ struct vxlan_dev *vxlan = netdev_priv(dev);
572
+ u8 eth_addr[ETH_ALEN + 2] = { 0 };
573
+ struct vxlan_rdst *rdst;
574
+ struct vxlan_fdb *f;
575
+ int rc = 0;
576
+
577
+ if (is_multicast_ether_addr(mac) ||
578
+ is_zero_ether_addr(mac))
579
+ return -EINVAL;
580
+
581
+ ether_addr_copy(eth_addr, mac);
582
+
583
+ rcu_read_lock();
584
+
585
+ f = __vxlan_find_mac(vxlan, eth_addr, vni);
586
+ if (!f) {
587
+ rc = -ENOENT;
588
+ goto out;
589
+ }
590
+
591
+ rdst = first_remote_rcu(f);
592
+ vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, NULL, fdb_info);
593
+
594
+out:
595
+ rcu_read_unlock();
596
+ return rc;
597
+}
598
+EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
599
+
600
+static int vxlan_fdb_notify_one(struct notifier_block *nb,
601
+ const struct vxlan_dev *vxlan,
602
+ const struct vxlan_fdb *f,
603
+ const struct vxlan_rdst *rdst,
604
+ struct netlink_ext_ack *extack)
605
+{
606
+ struct switchdev_notifier_vxlan_fdb_info fdb_info;
607
+ int rc;
608
+
609
+ vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, extack, &fdb_info);
610
+ rc = nb->notifier_call(nb, SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
611
+ &fdb_info);
612
+ return notifier_to_errno(rc);
613
+}
614
+
615
+int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
616
+ struct notifier_block *nb,
617
+ struct netlink_ext_ack *extack)
618
+{
619
+ struct vxlan_dev *vxlan;
620
+ struct vxlan_rdst *rdst;
621
+ struct vxlan_fdb *f;
622
+ unsigned int h;
623
+ int rc = 0;
624
+
625
+ if (!netif_is_vxlan(dev))
626
+ return -EINVAL;
627
+ vxlan = netdev_priv(dev);
628
+
629
+ for (h = 0; h < FDB_HASH_SIZE; ++h) {
630
+ spin_lock_bh(&vxlan->hash_lock[h]);
631
+ hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) {
632
+ if (f->vni == vni) {
633
+ list_for_each_entry(rdst, &f->remotes, list) {
634
+ rc = vxlan_fdb_notify_one(nb, vxlan,
635
+ f, rdst,
636
+ extack);
637
+ if (rc)
638
+ goto unlock;
639
+ }
640
+ }
641
+ }
642
+ spin_unlock_bh(&vxlan->hash_lock[h]);
643
+ }
644
+ return 0;
645
+
646
+unlock:
647
+ spin_unlock_bh(&vxlan->hash_lock[h]);
648
+ return rc;
649
+}
650
+EXPORT_SYMBOL_GPL(vxlan_fdb_replay);
651
+
652
+void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni)
653
+{
654
+ struct vxlan_dev *vxlan;
655
+ struct vxlan_rdst *rdst;
656
+ struct vxlan_fdb *f;
657
+ unsigned int h;
658
+
659
+ if (!netif_is_vxlan(dev))
660
+ return;
661
+ vxlan = netdev_priv(dev);
662
+
663
+ for (h = 0; h < FDB_HASH_SIZE; ++h) {
664
+ spin_lock_bh(&vxlan->hash_lock[h]);
665
+ hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist)
666
+ if (f->vni == vni)
667
+ list_for_each_entry(rdst, &f->remotes, list)
668
+ rdst->offloaded = false;
669
+ spin_unlock_bh(&vxlan->hash_lock[h]);
670
+ }
671
+
672
+}
673
+EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload);
674
+
491675 /* Replace destination of unicast mac */
492676 static int vxlan_fdb_replace(struct vxlan_fdb *f,
493677 union vxlan_addr *ip, __be16 port, __be32 vni,
494
- __u32 ifindex)
678
+ __u32 ifindex, struct vxlan_rdst *oldrd)
495679 {
496680 struct vxlan_rdst *rd;
497681
....@@ -503,11 +687,13 @@
503687 if (!rd)
504688 return 0;
505689
690
+ *oldrd = *rd;
506691 dst_cache_reset(&rd->dst_cache);
507692 rd->remote_ip = *ip;
508693 rd->remote_port = port;
509694 rd->remote_vni = vni;
510695 rd->remote_ifindex = ifindex;
696
+ rd->offloaded = false;
511697 return 1;
512698 }
513699
....@@ -524,15 +710,16 @@
524710
525711 rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
526712 if (rd == NULL)
527
- return -ENOBUFS;
713
+ return -ENOMEM;
528714
529715 if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
530716 kfree(rd);
531
- return -ENOBUFS;
717
+ return -ENOMEM;
532718 }
533719
534720 rd->remote_ip = *ip;
535721 rd->remote_port = port;
722
+ rd->offloaded = false;
536723 rd->remote_vni = vni;
537724 rd->remote_ifindex = ifindex;
538725
....@@ -637,9 +824,9 @@
637824 return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
638825 }
639826
640
-static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
641
- const u8 *mac, __u16 state,
642
- __be32 src_vni, __u8 ndm_flags)
827
+static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
828
+ __u16 state, __be32 src_vni,
829
+ __u16 ndm_flags)
643830 {
644831 struct vxlan_fdb *f;
645832
....@@ -650,17 +837,93 @@
650837 f->flags = ndm_flags;
651838 f->updated = f->used = jiffies;
652839 f->vni = src_vni;
840
+ f->nh = NULL;
841
+ RCU_INIT_POINTER(f->vdev, vxlan);
842
+ INIT_LIST_HEAD(&f->nh_list);
653843 INIT_LIST_HEAD(&f->remotes);
654844 memcpy(f->eth_addr, mac, ETH_ALEN);
655845
656846 return f;
657847 }
658848
849
+static void vxlan_fdb_insert(struct vxlan_dev *vxlan, const u8 *mac,
850
+ __be32 src_vni, struct vxlan_fdb *f)
851
+{
852
+ ++vxlan->addrcnt;
853
+ hlist_add_head_rcu(&f->hlist,
854
+ vxlan_fdb_head(vxlan, mac, src_vni));
855
+}
856
+
857
+static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
858
+ u32 nhid, struct netlink_ext_ack *extack)
859
+{
860
+ struct nexthop *old_nh = rtnl_dereference(fdb->nh);
861
+ struct nexthop *nh;
862
+ int err = -EINVAL;
863
+
864
+ if (old_nh && old_nh->id == nhid)
865
+ return 0;
866
+
867
+ nh = nexthop_find_by_id(vxlan->net, nhid);
868
+ if (!nh) {
869
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
870
+ goto err_inval;
871
+ }
872
+
873
+ if (nh) {
874
+ if (!nexthop_get(nh)) {
875
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
876
+ nh = NULL;
877
+ goto err_inval;
878
+ }
879
+ if (!nexthop_is_fdb(nh)) {
880
+ NL_SET_ERR_MSG(extack, "Nexthop is not a fdb nexthop");
881
+ goto err_inval;
882
+ }
883
+
884
+ if (!nexthop_is_multipath(nh)) {
885
+ NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group");
886
+ goto err_inval;
887
+ }
888
+
889
+ /* check nexthop group family */
890
+ switch (vxlan->default_dst.remote_ip.sa.sa_family) {
891
+ case AF_INET:
892
+ if (!nexthop_has_v4(nh)) {
893
+ err = -EAFNOSUPPORT;
894
+ NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
895
+ goto err_inval;
896
+ }
897
+ break;
898
+ case AF_INET6:
899
+ if (nexthop_has_v4(nh)) {
900
+ err = -EAFNOSUPPORT;
901
+ NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
902
+ goto err_inval;
903
+ }
904
+ }
905
+ }
906
+
907
+ if (old_nh) {
908
+ list_del_rcu(&fdb->nh_list);
909
+ nexthop_put(old_nh);
910
+ }
911
+ rcu_assign_pointer(fdb->nh, nh);
912
+ list_add_tail_rcu(&fdb->nh_list, &nh->fdb_list);
913
+ return 1;
914
+
915
+err_inval:
916
+ if (nh)
917
+ nexthop_put(nh);
918
+ return err;
919
+}
920
+
659921 static int vxlan_fdb_create(struct vxlan_dev *vxlan,
660922 const u8 *mac, union vxlan_addr *ip,
661923 __u16 state, __be16 port, __be32 src_vni,
662
- __be32 vni, __u32 ifindex, __u8 ndm_flags,
663
- struct vxlan_fdb **fdb)
924
+ __be32 vni, __u32 ifindex, __u16 ndm_flags,
925
+ u32 nhid, struct vxlan_fdb **fdb,
926
+ struct netlink_ext_ack *extack)
664927 {
665928 struct vxlan_rdst *rd = NULL;
666929 struct vxlan_fdb *f;
....@@ -675,98 +938,33 @@
675938 if (!f)
676939 return -ENOMEM;
677940
678
- rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
679
- if (rc < 0) {
680
- kfree(f);
681
- return rc;
682
- }
683
-
684
- ++vxlan->addrcnt;
685
- hlist_add_head_rcu(&f->hlist,
686
- vxlan_fdb_head(vxlan, mac, src_vni));
941
+ if (nhid)
942
+ rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
943
+ else
944
+ rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
945
+ if (rc < 0)
946
+ goto errout;
687947
688948 *fdb = f;
689949
690950 return 0;
951
+
952
+errout:
953
+ kfree(f);
954
+ return rc;
691955 }
692956
693
-/* Add new entry to forwarding table -- assumes lock held */
694
-static int vxlan_fdb_update(struct vxlan_dev *vxlan,
695
- const u8 *mac, union vxlan_addr *ip,
696
- __u16 state, __u16 flags,
697
- __be16 port, __be32 src_vni, __be32 vni,
698
- __u32 ifindex, __u8 ndm_flags)
957
+static void __vxlan_fdb_free(struct vxlan_fdb *f)
699958 {
700
- struct vxlan_rdst *rd = NULL;
701
- struct vxlan_fdb *f;
702
- int notify = 0;
703
- int rc;
704
-
705
- f = __vxlan_find_mac(vxlan, mac, src_vni);
706
- if (f) {
707
- if (flags & NLM_F_EXCL) {
708
- netdev_dbg(vxlan->dev,
709
- "lost race to create %pM\n", mac);
710
- return -EEXIST;
711
- }
712
- if (f->state != state) {
713
- f->state = state;
714
- f->updated = jiffies;
715
- notify = 1;
716
- }
717
- if (f->flags != ndm_flags) {
718
- f->flags = ndm_flags;
719
- f->updated = jiffies;
720
- notify = 1;
721
- }
722
- if ((flags & NLM_F_REPLACE)) {
723
- /* Only change unicasts */
724
- if (!(is_multicast_ether_addr(f->eth_addr) ||
725
- is_zero_ether_addr(f->eth_addr))) {
726
- notify |= vxlan_fdb_replace(f, ip, port, vni,
727
- ifindex);
728
- } else
729
- return -EOPNOTSUPP;
730
- }
731
- if ((flags & NLM_F_APPEND) &&
732
- (is_multicast_ether_addr(f->eth_addr) ||
733
- is_zero_ether_addr(f->eth_addr))) {
734
- rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
735
-
736
- if (rc < 0)
737
- return rc;
738
- notify |= rc;
739
- }
740
- } else {
741
- if (!(flags & NLM_F_CREATE))
742
- return -ENOENT;
743
-
744
- /* Disallow replace to add a multicast entry */
745
- if ((flags & NLM_F_REPLACE) &&
746
- (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
747
- return -EOPNOTSUPP;
748
-
749
- netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
750
- rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
751
- vni, ifindex, ndm_flags, &f);
752
- if (rc < 0)
753
- return rc;
754
- notify = 1;
755
- }
756
-
757
- if (notify) {
758
- if (rd == NULL)
759
- rd = first_remote_rtnl(f);
760
- vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH);
761
- }
762
-
763
- return 0;
764
-}
765
-
766
-static void vxlan_fdb_free(struct rcu_head *head)
767
-{
768
- struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
769959 struct vxlan_rdst *rd, *nd;
960
+ struct nexthop *nh;
961
+
962
+ nh = rcu_dereference_raw(f->nh);
963
+ if (nh) {
964
+ rcu_assign_pointer(f->nh, NULL);
965
+ rcu_assign_pointer(f->vdev, NULL);
966
+ nexthop_put(nh);
967
+ }
770968
771969 list_for_each_entry_safe(rd, nd, &f->remotes, list) {
772970 dst_cache_destroy(&rd->dst_cache);
....@@ -775,17 +973,33 @@
775973 kfree(f);
776974 }
777975
778
-static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
779
- bool do_notify)
976
+static void vxlan_fdb_free(struct rcu_head *head)
780977 {
781
- netdev_dbg(vxlan->dev,
782
- "delete %pM\n", f->eth_addr);
978
+ struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
979
+
980
+ __vxlan_fdb_free(f);
981
+}
982
+
983
+static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
984
+ bool do_notify, bool swdev_notify)
985
+{
986
+ struct vxlan_rdst *rd;
987
+
988
+ netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr);
783989
784990 --vxlan->addrcnt;
785
- if (do_notify)
786
- vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
991
+ if (do_notify) {
992
+ if (rcu_access_pointer(f->nh))
993
+ vxlan_fdb_notify(vxlan, f, NULL, RTM_DELNEIGH,
994
+ swdev_notify, NULL);
995
+ else
996
+ list_for_each_entry(rd, &f->remotes, list)
997
+ vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
998
+ swdev_notify, NULL);
999
+ }
7871000
7881001 hlist_del_rcu(&f->hlist);
1002
+ list_del_rcu(&f->nh_list);
7891003 call_rcu(&f->rcu, vxlan_fdb_free);
7901004 }
7911005
....@@ -797,20 +1011,193 @@
7971011 kfree(rd);
7981012 }
7991013
1014
+static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
1015
+ union vxlan_addr *ip,
1016
+ __u16 state, __u16 flags,
1017
+ __be16 port, __be32 vni,
1018
+ __u32 ifindex, __u16 ndm_flags,
1019
+ struct vxlan_fdb *f, u32 nhid,
1020
+ bool swdev_notify,
1021
+ struct netlink_ext_ack *extack)
1022
+{
1023
+ __u16 fdb_flags = (ndm_flags & ~NTF_USE);
1024
+ struct vxlan_rdst *rd = NULL;
1025
+ struct vxlan_rdst oldrd;
1026
+ int notify = 0;
1027
+ int rc = 0;
1028
+ int err;
1029
+
1030
+ if (nhid && !rcu_access_pointer(f->nh)) {
1031
+ NL_SET_ERR_MSG(extack,
1032
+ "Cannot replace an existing non nexthop fdb with a nexthop");
1033
+ return -EOPNOTSUPP;
1034
+ }
1035
+
1036
+ if (nhid && (flags & NLM_F_APPEND)) {
1037
+ NL_SET_ERR_MSG(extack,
1038
+ "Cannot append to a nexthop fdb");
1039
+ return -EOPNOTSUPP;
1040
+ }
1041
+
1042
+ /* Do not allow an externally learned entry to take over an entry added
1043
+ * by the user.
1044
+ */
1045
+ if (!(fdb_flags & NTF_EXT_LEARNED) ||
1046
+ !(f->flags & NTF_VXLAN_ADDED_BY_USER)) {
1047
+ if (f->state != state) {
1048
+ f->state = state;
1049
+ f->updated = jiffies;
1050
+ notify = 1;
1051
+ }
1052
+ if (f->flags != fdb_flags) {
1053
+ f->flags = fdb_flags;
1054
+ f->updated = jiffies;
1055
+ notify = 1;
1056
+ }
1057
+ }
1058
+
1059
+ if ((flags & NLM_F_REPLACE)) {
1060
+ /* Only change unicasts */
1061
+ if (!(is_multicast_ether_addr(f->eth_addr) ||
1062
+ is_zero_ether_addr(f->eth_addr))) {
1063
+ if (nhid) {
1064
+ rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
1065
+ if (rc < 0)
1066
+ return rc;
1067
+ } else {
1068
+ rc = vxlan_fdb_replace(f, ip, port, vni,
1069
+ ifindex, &oldrd);
1070
+ }
1071
+ notify |= rc;
1072
+ } else {
1073
+ NL_SET_ERR_MSG(extack, "Cannot replace non-unicast fdb entries");
1074
+ return -EOPNOTSUPP;
1075
+ }
1076
+ }
1077
+ if ((flags & NLM_F_APPEND) &&
1078
+ (is_multicast_ether_addr(f->eth_addr) ||
1079
+ is_zero_ether_addr(f->eth_addr))) {
1080
+ rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
1081
+
1082
+ if (rc < 0)
1083
+ return rc;
1084
+ notify |= rc;
1085
+ }
1086
+
1087
+ if (ndm_flags & NTF_USE)
1088
+ f->used = jiffies;
1089
+
1090
+ if (notify) {
1091
+ if (rd == NULL)
1092
+ rd = first_remote_rtnl(f);
1093
+
1094
+ err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH,
1095
+ swdev_notify, extack);
1096
+ if (err)
1097
+ goto err_notify;
1098
+ }
1099
+
1100
+ return 0;
1101
+
1102
+err_notify:
1103
+ if (nhid)
1104
+ return err;
1105
+ if ((flags & NLM_F_REPLACE) && rc)
1106
+ *rd = oldrd;
1107
+ else if ((flags & NLM_F_APPEND) && rc) {
1108
+ list_del_rcu(&rd->list);
1109
+ call_rcu(&rd->rcu, vxlan_dst_free);
1110
+ }
1111
+ return err;
1112
+}
1113
+
1114
+static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
1115
+ const u8 *mac, union vxlan_addr *ip,
1116
+ __u16 state, __u16 flags,
1117
+ __be16 port, __be32 src_vni, __be32 vni,
1118
+ __u32 ifindex, __u16 ndm_flags, u32 nhid,
1119
+ bool swdev_notify,
1120
+ struct netlink_ext_ack *extack)
1121
+{
1122
+ __u16 fdb_flags = (ndm_flags & ~NTF_USE);
1123
+ struct vxlan_fdb *f;
1124
+ int rc;
1125
+
1126
+ /* Disallow replace to add a multicast entry */
1127
+ if ((flags & NLM_F_REPLACE) &&
1128
+ (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
1129
+ return -EOPNOTSUPP;
1130
+
1131
+ netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
1132
+ rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
1133
+ vni, ifindex, fdb_flags, nhid, &f, extack);
1134
+ if (rc < 0)
1135
+ return rc;
1136
+
1137
+ vxlan_fdb_insert(vxlan, mac, src_vni, f);
1138
+ rc = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH,
1139
+ swdev_notify, extack);
1140
+ if (rc)
1141
+ goto err_notify;
1142
+
1143
+ return 0;
1144
+
1145
+err_notify:
1146
+ vxlan_fdb_destroy(vxlan, f, false, false);
1147
+ return rc;
1148
+}
1149
+
1150
+/* Add new entry to forwarding table -- assumes lock held */
1151
+static int vxlan_fdb_update(struct vxlan_dev *vxlan,
1152
+ const u8 *mac, union vxlan_addr *ip,
1153
+ __u16 state, __u16 flags,
1154
+ __be16 port, __be32 src_vni, __be32 vni,
1155
+ __u32 ifindex, __u16 ndm_flags, u32 nhid,
1156
+ bool swdev_notify,
1157
+ struct netlink_ext_ack *extack)
1158
+{
1159
+ struct vxlan_fdb *f;
1160
+
1161
+ f = __vxlan_find_mac(vxlan, mac, src_vni);
1162
+ if (f) {
1163
+ if (flags & NLM_F_EXCL) {
1164
+ netdev_dbg(vxlan->dev,
1165
+ "lost race to create %pM\n", mac);
1166
+ return -EEXIST;
1167
+ }
1168
+
1169
+ return vxlan_fdb_update_existing(vxlan, ip, state, flags, port,
1170
+ vni, ifindex, ndm_flags, f,
1171
+ nhid, swdev_notify, extack);
1172
+ } else {
1173
+ if (!(flags & NLM_F_CREATE))
1174
+ return -ENOENT;
1175
+
1176
+ return vxlan_fdb_update_create(vxlan, mac, ip, state, flags,
1177
+ port, src_vni, vni, ifindex,
1178
+ ndm_flags, nhid, swdev_notify,
1179
+ extack);
1180
+ }
1181
+}
1182
+
8001183 static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
801
- struct vxlan_rdst *rd)
1184
+ struct vxlan_rdst *rd, bool swdev_notify)
8021185 {
8031186 list_del_rcu(&rd->list);
804
- vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
1187
+ vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify, NULL);
8051188 call_rcu(&rd->rcu, vxlan_dst_free);
8061189 }
8071190
8081191 static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
8091192 union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
810
- __be32 *vni, u32 *ifindex)
1193
+ __be32 *vni, u32 *ifindex, u32 *nhid)
8111194 {
8121195 struct net *net = dev_net(vxlan->dev);
8131196 int err;
1197
+
1198
+ if (tb[NDA_NH_ID] && (tb[NDA_DST] || tb[NDA_VNI] || tb[NDA_IFINDEX] ||
1199
+ tb[NDA_PORT]))
1200
+ return -EINVAL;
8141201
8151202 if (tb[NDA_DST]) {
8161203 err = vxlan_nla_get_addr(ip, tb[NDA_DST]);
....@@ -818,6 +1205,7 @@
8181205 return err;
8191206 } else {
8201207 union vxlan_addr *remote = &vxlan->default_dst.remote_ip;
1208
+
8211209 if (remote->sa.sa_family == AF_INET) {
8221210 ip->sin.sin_addr.s_addr = htonl(INADDR_ANY);
8231211 ip->sa.sa_family = AF_INET;
....@@ -866,20 +1254,27 @@
8661254 *ifindex = 0;
8671255 }
8681256
1257
+ if (tb[NDA_NH_ID])
1258
+ *nhid = nla_get_u32(tb[NDA_NH_ID]);
1259
+ else
1260
+ *nhid = 0;
1261
+
8691262 return 0;
8701263 }
8711264
8721265 /* Add static entry (via netlink) */
8731266 static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
8741267 struct net_device *dev,
875
- const unsigned char *addr, u16 vid, u16 flags)
1268
+ const unsigned char *addr, u16 vid, u16 flags,
1269
+ struct netlink_ext_ack *extack)
8761270 {
8771271 struct vxlan_dev *vxlan = netdev_priv(dev);
8781272 /* struct net *net = dev_net(vxlan->dev); */
8791273 union vxlan_addr ip;
8801274 __be16 port;
8811275 __be32 src_vni, vni;
882
- u32 ifindex;
1276
+ u32 ifindex, nhid;
1277
+ u32 hash_index;
8831278 int err;
8841279
8851280 if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
....@@ -888,20 +1283,24 @@
8881283 return -EINVAL;
8891284 }
8901285
891
- if (tb[NDA_DST] == NULL)
1286
+ if (!tb || (!tb[NDA_DST] && !tb[NDA_NH_ID]))
8921287 return -EINVAL;
8931288
894
- err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
1289
+ err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
1290
+ &nhid);
8951291 if (err)
8961292 return err;
8971293
8981294 if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family)
8991295 return -EAFNOSUPPORT;
9001296
901
- spin_lock_bh(&vxlan->hash_lock);
1297
+ hash_index = fdb_head_index(vxlan, addr, src_vni);
1298
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
9021299 err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
903
- port, src_vni, vni, ifindex, ndm->ndm_flags);
904
- spin_unlock_bh(&vxlan->hash_lock);
1300
+ port, src_vni, vni, ifindex,
1301
+ ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
1302
+ nhid, true, extack);
1303
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
9051304
9061305 return err;
9071306 }
....@@ -909,10 +1308,10 @@
9091308 static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
9101309 const unsigned char *addr, union vxlan_addr ip,
9111310 __be16 port, __be32 src_vni, __be32 vni,
912
- u32 ifindex, u16 vid)
1311
+ u32 ifindex, bool swdev_notify)
9131312 {
914
- struct vxlan_fdb *f;
9151313 struct vxlan_rdst *rd = NULL;
1314
+ struct vxlan_fdb *f;
9161315 int err = -ENOENT;
9171316
9181317 f = vxlan_find_mac(vxlan, addr, src_vni);
....@@ -929,11 +1328,11 @@
9291328 * otherwise destroy the fdb entry
9301329 */
9311330 if (rd && !list_is_singular(&f->remotes)) {
932
- vxlan_fdb_dst_destroy(vxlan, f, rd);
1331
+ vxlan_fdb_dst_destroy(vxlan, f, rd, swdev_notify);
9331332 goto out;
9341333 }
9351334
936
- vxlan_fdb_destroy(vxlan, f, true);
1335
+ vxlan_fdb_destroy(vxlan, f, true, swdev_notify);
9371336
9381337 out:
9391338 return 0;
....@@ -947,18 +1346,21 @@
9471346 struct vxlan_dev *vxlan = netdev_priv(dev);
9481347 union vxlan_addr ip;
9491348 __be32 src_vni, vni;
1349
+ u32 ifindex, nhid;
1350
+ u32 hash_index;
9501351 __be16 port;
951
- u32 ifindex;
9521352 int err;
9531353
954
- err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
1354
+ err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
1355
+ &nhid);
9551356 if (err)
9561357 return err;
9571358
958
- spin_lock_bh(&vxlan->hash_lock);
1359
+ hash_index = fdb_head_index(vxlan, addr, src_vni);
1360
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
9591361 err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex,
960
- vid);
961
- spin_unlock_bh(&vxlan->hash_lock);
1362
+ true);
1363
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
9621364
9631365 return err;
9641366 }
....@@ -978,6 +1380,23 @@
9781380 rcu_read_lock();
9791381 hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
9801382 struct vxlan_rdst *rd;
1383
+
1384
+ if (rcu_access_pointer(f->nh)) {
1385
+ if (*idx < cb->args[2])
1386
+ goto skip_nh;
1387
+ err = vxlan_fdb_info(skb, vxlan, f,
1388
+ NETLINK_CB(cb->skb).portid,
1389
+ cb->nlh->nlmsg_seq,
1390
+ RTM_NEWNEIGH,
1391
+ NLM_F_MULTI, NULL);
1392
+ if (err < 0) {
1393
+ rcu_read_unlock();
1394
+ goto out;
1395
+ }
1396
+skip_nh:
1397
+ *idx += 1;
1398
+ continue;
1399
+ }
9811400
9821401 list_for_each_entry_rcu(rd, &f->remotes, list) {
9831402 if (*idx < cb->args[2])
....@@ -999,6 +1418,39 @@
9991418 rcu_read_unlock();
10001419 }
10011420 out:
1421
+ return err;
1422
+}
1423
+
1424
+static int vxlan_fdb_get(struct sk_buff *skb,
1425
+ struct nlattr *tb[],
1426
+ struct net_device *dev,
1427
+ const unsigned char *addr,
1428
+ u16 vid, u32 portid, u32 seq,
1429
+ struct netlink_ext_ack *extack)
1430
+{
1431
+ struct vxlan_dev *vxlan = netdev_priv(dev);
1432
+ struct vxlan_fdb *f;
1433
+ __be32 vni;
1434
+ int err;
1435
+
1436
+ if (tb[NDA_VNI])
1437
+ vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
1438
+ else
1439
+ vni = vxlan->default_dst.remote_vni;
1440
+
1441
+ rcu_read_lock();
1442
+
1443
+ f = __vxlan_find_mac(vxlan, addr, vni);
1444
+ if (!f) {
1445
+ NL_SET_ERR_MSG(extack, "Fdb entry not found");
1446
+ err = -ENOENT;
1447
+ goto errout;
1448
+ }
1449
+
1450
+ err = vxlan_fdb_info(skb, vxlan, f, portid, seq,
1451
+ RTM_NEWNEIGH, 0, first_remote_rcu(f));
1452
+errout:
1453
+ rcu_read_unlock();
10021454 return err;
10031455 }
10041456
....@@ -1032,6 +1484,10 @@
10321484 if (f->state & (NUD_PERMANENT | NUD_NOARP))
10331485 return true;
10341486
1487
+ /* Don't override an fdb with nexthop with a learnt entry */
1488
+ if (rcu_access_pointer(f->nh))
1489
+ return true;
1490
+
10351491 if (net_ratelimit())
10361492 netdev_info(dev,
10371493 "%pM migrated from %pIS to %pIS\n",
....@@ -1039,10 +1495,12 @@
10391495
10401496 rdst->remote_ip = *src_ip;
10411497 f->updated = jiffies;
1042
- vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH);
1498
+ vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
10431499 } else {
1500
+ u32 hash_index = fdb_head_index(vxlan, src_mac, vni);
1501
+
10441502 /* learned new entry */
1045
- spin_lock(&vxlan->hash_lock);
1503
+ spin_lock(&vxlan->hash_lock[hash_index]);
10461504
10471505 /* close off race between vxlan_flush and incoming packets */
10481506 if (netif_running(dev))
....@@ -1052,8 +1510,8 @@
10521510 vxlan->cfg.dst_port,
10531511 vni,
10541512 vxlan->default_dst.remote_vni,
1055
- ifindex, NTF_SELF);
1056
- spin_unlock(&vxlan->hash_lock);
1513
+ ifindex, NTF_SELF, 0, true, NULL);
1514
+ spin_unlock(&vxlan->hash_lock[hash_index]);
10571515 }
10581516
10591517 return false;
....@@ -1368,7 +1826,6 @@
13681826 /* Callback from net/ipv4/udp.c to receive packets */
13691827 static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
13701828 {
1371
- struct pcpu_sw_netstats *stats;
13721829 struct vxlan_dev *vxlan;
13731830 struct vxlan_sock *vs;
13741831 struct vxlanhdr unparsed;
....@@ -1416,6 +1873,10 @@
14161873
14171874 if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
14181875 !net_eq(vxlan->net, dev_net(vxlan->dev))))
1876
+ goto drop;
1877
+
1878
+ if (vs->flags & VXLAN_F_REMCSUM_RX)
1879
+ if (unlikely(!vxlan_remcsum(&unparsed, skb, vs->flags)))
14191880 goto drop;
14201881
14211882 if (vxlan_collect_metadata(vs)) {
....@@ -1434,9 +1895,6 @@
14341895 memset(md, 0, sizeof(*md));
14351896 }
14361897
1437
- if (vs->flags & VXLAN_F_REMCSUM_RX)
1438
- if (!vxlan_remcsum(&unparsed, skb, vs->flags))
1439
- goto drop;
14401898 if (vs->flags & VXLAN_F_GBP)
14411899 vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
14421900 /* Note that GBP and GPE can never be active together. This is
....@@ -1481,12 +1939,7 @@
14811939 goto drop;
14821940 }
14831941
1484
- stats = this_cpu_ptr(vxlan->dev->tstats);
1485
- u64_stats_update_begin(&stats->syncp);
1486
- stats->rx_packets++;
1487
- stats->rx_bytes += skb->len;
1488
- u64_stats_update_end(&stats->syncp);
1489
-
1942
+ dev_sw_netstats_rx_add(vxlan->dev, skb->len);
14901943 gro_cells_receive(&vxlan->gro_cells, skb);
14911944
14921945 rcu_read_unlock();
....@@ -1496,6 +1949,34 @@
14961949 drop:
14971950 /* Consume bad packet */
14981951 kfree_skb(skb);
1952
+ return 0;
1953
+}
1954
+
1955
+/* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */
1956
+static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb)
1957
+{
1958
+ struct vxlan_dev *vxlan;
1959
+ struct vxlan_sock *vs;
1960
+ struct vxlanhdr *hdr;
1961
+ __be32 vni;
1962
+
1963
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + VXLAN_HLEN))
1964
+ return -EINVAL;
1965
+
1966
+ hdr = vxlan_hdr(skb);
1967
+
1968
+ if (!(hdr->vx_flags & VXLAN_HF_VNI))
1969
+ return -EINVAL;
1970
+
1971
+ vs = rcu_dereference_sk_user_data(sk);
1972
+ if (!vs)
1973
+ return -ENOENT;
1974
+
1975
+ vni = vxlan_vni(hdr->vx_vni);
1976
+ vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
1977
+ if (!vxlan)
1978
+ return -ENOENT;
1979
+
14991980 return 0;
15001981 }
15011982
....@@ -1941,7 +2422,7 @@
19412422 fl4.fl4_sport = sport;
19422423
19432424 rt = ip_route_output_key(vxlan->net, &fl4);
1944
- if (likely(!IS_ERR(rt))) {
2425
+ if (!IS_ERR(rt)) {
19452426 if (rt->dst.dev == dev) {
19462427 netdev_dbg(dev, "circular route to %pI4\n", &daddr);
19472428 ip_rt_put(rt);
....@@ -2017,7 +2498,8 @@
20172498
20182499 /* Bypass encapsulation if the destination is local */
20192500 static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
2020
- struct vxlan_dev *dst_vxlan, __be32 vni)
2501
+ struct vxlan_dev *dst_vxlan, __be32 vni,
2502
+ bool snoop)
20212503 {
20222504 struct pcpu_sw_netstats *tx_stats, *rx_stats;
20232505 union vxlan_addr loopback;
....@@ -2049,7 +2531,7 @@
20492531 goto drop;
20502532 }
20512533
2052
- if (dst_vxlan->cfg.flags & VXLAN_F_LEARN)
2534
+ if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
20532535 vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
20542536
20552537 u64_stats_update_begin(&tx_stats->syncp);
....@@ -2098,7 +2580,7 @@
20982580
20992581 return -ENOENT;
21002582 }
2101
- vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni);
2583
+ vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
21022584 return 1;
21032585 }
21042586
....@@ -2134,7 +2616,8 @@
21342616 if (vxlan_addr_any(dst)) {
21352617 if (did_rsc) {
21362618 /* short-circuited back to local bridge */
2137
- vxlan_encap_bypass(skb, vxlan, vxlan, default_vni);
2619
+ vxlan_encap_bypass(skb, vxlan, vxlan,
2620
+ default_vni, true);
21382621 return;
21392622 }
21402623 goto drop;
....@@ -2201,6 +2684,9 @@
22012684 struct rtable *rt;
22022685 __be16 df = 0;
22032686
2687
+ if (!ifindex)
2688
+ ifindex = sock4->sock->sk->sk_bound_dev_if;
2689
+
22042690 rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
22052691 dst->sin.sin_addr.s_addr,
22062692 &local_ip.sin.sin_addr.s_addr,
....@@ -2211,19 +2697,51 @@
22112697 goto tx_error;
22122698 }
22132699
2214
- /* Bypass encapsulation if the destination is local */
22152700 if (!info) {
2701
+ /* Bypass encapsulation if the destination is local */
22162702 err = encap_bypass_if_local(skb, dev, vxlan, dst,
22172703 dst_port, ifindex, vni,
22182704 &rt->dst, rt->rt_flags);
22192705 if (err)
22202706 goto out_unlock;
2707
+
2708
+ if (vxlan->cfg.df == VXLAN_DF_SET) {
2709
+ df = htons(IP_DF);
2710
+ } else if (vxlan->cfg.df == VXLAN_DF_INHERIT) {
2711
+ struct ethhdr *eth = eth_hdr(skb);
2712
+
2713
+ if (ntohs(eth->h_proto) == ETH_P_IPV6 ||
2714
+ (ntohs(eth->h_proto) == ETH_P_IP &&
2715
+ old_iph->frag_off & htons(IP_DF)))
2716
+ df = htons(IP_DF);
2717
+ }
22212718 } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
22222719 df = htons(IP_DF);
22232720 }
22242721
22252722 ndst = &rt->dst;
2226
- skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
2723
+ err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
2724
+ netif_is_any_bridge_port(dev));
2725
+ if (err < 0) {
2726
+ goto tx_error;
2727
+ } else if (err) {
2728
+ if (info) {
2729
+ struct ip_tunnel_info *unclone;
2730
+ struct in_addr src, dst;
2731
+
2732
+ unclone = skb_tunnel_info_unclone(skb);
2733
+ if (unlikely(!unclone))
2734
+ goto tx_error;
2735
+
2736
+ src = remote_ip.sin.sin_addr;
2737
+ dst = local_ip.sin.sin_addr;
2738
+ unclone->key.u.ipv4.src = src.s_addr;
2739
+ unclone->key.u.ipv4.dst = dst.s_addr;
2740
+ }
2741
+ vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
2742
+ dst_release(ndst);
2743
+ goto out_unlock;
2744
+ }
22272745
22282746 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
22292747 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
....@@ -2238,6 +2756,9 @@
22382756 #if IS_ENABLED(CONFIG_IPV6)
22392757 } else {
22402758 struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
2759
+
2760
+ if (!ifindex)
2761
+ ifindex = sock6->sock->sk->sk_bound_dev_if;
22412762
22422763 ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
22432764 label, &dst->sin6.sin6_addr,
....@@ -2260,7 +2781,29 @@
22602781 goto out_unlock;
22612782 }
22622783
2263
- skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM);
2784
+ err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
2785
+ netif_is_any_bridge_port(dev));
2786
+ if (err < 0) {
2787
+ goto tx_error;
2788
+ } else if (err) {
2789
+ if (info) {
2790
+ struct ip_tunnel_info *unclone;
2791
+ struct in6_addr src, dst;
2792
+
2793
+ unclone = skb_tunnel_info_unclone(skb);
2794
+ if (unlikely(!unclone))
2795
+ goto tx_error;
2796
+
2797
+ src = remote_ip.sin6.sin6_addr;
2798
+ dst = local_ip.sin6.sin6_addr;
2799
+ unclone->key.u.ipv6.src = src;
2800
+ unclone->key.u.ipv6.dst = dst;
2801
+ }
2802
+
2803
+ vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
2804
+ dst_release(ndst);
2805
+ goto out_unlock;
2806
+ }
22642807
22652808 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
22662809 ttl = ttl ? : ip6_dst_hoplimit(ndst);
....@@ -2294,6 +2837,38 @@
22942837 dst_release(ndst);
22952838 dev->stats.tx_errors++;
22962839 kfree_skb(skb);
2840
+}
2841
+
2842
+static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev,
2843
+ struct vxlan_fdb *f, __be32 vni, bool did_rsc)
2844
+{
2845
+ struct vxlan_rdst nh_rdst;
2846
+ struct nexthop *nh;
2847
+ bool do_xmit;
2848
+ u32 hash;
2849
+
2850
+ memset(&nh_rdst, 0, sizeof(struct vxlan_rdst));
2851
+ hash = skb_get_hash(skb);
2852
+
2853
+ rcu_read_lock();
2854
+ nh = rcu_dereference(f->nh);
2855
+ if (!nh) {
2856
+ rcu_read_unlock();
2857
+ goto drop;
2858
+ }
2859
+ do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst);
2860
+ rcu_read_unlock();
2861
+
2862
+ if (likely(do_xmit))
2863
+ vxlan_xmit_one(skb, dev, vni, &nh_rdst, did_rsc);
2864
+ else
2865
+ goto drop;
2866
+
2867
+ return;
2868
+
2869
+drop:
2870
+ dev->stats.tx_dropped++;
2871
+ dev_kfree_skb(skb);
22972872 }
22982873
22992874 /* Transmit local packets over Vxlan
....@@ -2372,22 +2947,27 @@
23722947 }
23732948 }
23742949
2375
- list_for_each_entry_rcu(rdst, &f->remotes, list) {
2376
- struct sk_buff *skb1;
2950
+ if (rcu_access_pointer(f->nh)) {
2951
+ vxlan_xmit_nh(skb, dev, f,
2952
+ (vni ? : vxlan->default_dst.remote_vni), did_rsc);
2953
+ } else {
2954
+ list_for_each_entry_rcu(rdst, &f->remotes, list) {
2955
+ struct sk_buff *skb1;
23772956
2378
- if (!fdst) {
2379
- fdst = rdst;
2380
- continue;
2957
+ if (!fdst) {
2958
+ fdst = rdst;
2959
+ continue;
2960
+ }
2961
+ skb1 = skb_clone(skb, GFP_ATOMIC);
2962
+ if (skb1)
2963
+ vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
23812964 }
2382
- skb1 = skb_clone(skb, GFP_ATOMIC);
2383
- if (skb1)
2384
- vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
2965
+ if (fdst)
2966
+ vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
2967
+ else
2968
+ kfree_skb(skb);
23852969 }
23862970
2387
- if (fdst)
2388
- vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
2389
- else
2390
- kfree_skb(skb);
23912971 return NETDEV_TX_OK;
23922972 }
23932973
....@@ -2404,7 +2984,7 @@
24042984 for (h = 0; h < FDB_HASH_SIZE; ++h) {
24052985 struct hlist_node *p, *n;
24062986
2407
- spin_lock_bh(&vxlan->hash_lock);
2987
+ spin_lock(&vxlan->hash_lock[h]);
24082988 hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
24092989 struct vxlan_fdb *f
24102990 = container_of(p, struct vxlan_fdb, hlist);
....@@ -2422,11 +3002,11 @@
24223002 "garbage collect %pM\n",
24233003 f->eth_addr);
24243004 f->state = NUD_STALE;
2425
- vxlan_fdb_destroy(vxlan, f, true);
3005
+ vxlan_fdb_destroy(vxlan, f, true, true);
24263006 } else if (time_before(timeout, next_timer))
24273007 next_timer = timeout;
24283008 }
2429
- spin_unlock_bh(&vxlan->hash_lock);
3009
+ spin_unlock(&vxlan->hash_lock[h]);
24303010 }
24313011
24323012 mod_timer(&vxlan->age_timer, next_timer);
....@@ -2478,12 +3058,13 @@
24783058 static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
24793059 {
24803060 struct vxlan_fdb *f;
3061
+ u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
24813062
2482
- spin_lock_bh(&vxlan->hash_lock);
3063
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
24833064 f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
24843065 if (f)
2485
- vxlan_fdb_destroy(vxlan, f, true);
2486
- spin_unlock_bh(&vxlan->hash_lock);
3066
+ vxlan_fdb_destroy(vxlan, f, true, true);
3067
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
24873068 }
24883069
24893070 static void vxlan_uninit(struct net_device *dev)
....@@ -2528,20 +3109,23 @@
25283109 {
25293110 unsigned int h;
25303111
2531
- spin_lock_bh(&vxlan->hash_lock);
25323112 for (h = 0; h < FDB_HASH_SIZE; ++h) {
25333113 struct hlist_node *p, *n;
3114
+
3115
+ spin_lock_bh(&vxlan->hash_lock[h]);
25343116 hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
25353117 struct vxlan_fdb *f
25363118 = container_of(p, struct vxlan_fdb, hlist);
25373119 if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP)))
25383120 continue;
25393121 /* the all_zeros_mac entry is deleted at vxlan_uninit */
2540
- if (!is_zero_ether_addr(f->eth_addr))
2541
- vxlan_fdb_destroy(vxlan, f, true);
3122
+ if (is_zero_ether_addr(f->eth_addr) &&
3123
+ f->vni == vxlan->cfg.vni)
3124
+ continue;
3125
+ vxlan_fdb_destroy(vxlan, f, true, true);
25423126 }
3127
+ spin_unlock_bh(&vxlan->hash_lock[h]);
25433128 }
2544
- spin_unlock_bh(&vxlan->hash_lock);
25453129 }
25463130
25473131 /* Cleanup timer and forwarding table on shutdown */
....@@ -2646,7 +3230,9 @@
26463230 .ndo_fdb_add = vxlan_fdb_add,
26473231 .ndo_fdb_del = vxlan_fdb_delete,
26483232 .ndo_fdb_dump = vxlan_fdb_dump,
3233
+ .ndo_fdb_get = vxlan_fdb_get,
26493234 .ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
3235
+ .ndo_change_proto_down = dev_change_proto_down_generic,
26503236 };
26513237
26523238 static const struct net_device_ops vxlan_netdev_raw_ops = {
....@@ -2723,14 +3309,15 @@
27233309 dev->max_mtu = ETH_MAX_MTU;
27243310
27253311 INIT_LIST_HEAD(&vxlan->next);
2726
- spin_lock_init(&vxlan->hash_lock);
27273312
27283313 timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
27293314
27303315 vxlan->dev = dev;
27313316
2732
- for (h = 0; h < FDB_HASH_SIZE; ++h)
3317
+ for (h = 0; h < FDB_HASH_SIZE; ++h) {
3318
+ spin_lock_init(&vxlan->hash_lock[h]);
27333319 INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
3320
+ }
27343321 }
27353322
27363323 static void vxlan_ether_setup(struct net_device *dev)
....@@ -2752,10 +3339,10 @@
27523339
27533340 static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
27543341 [IFLA_VXLAN_ID] = { .type = NLA_U32 },
2755
- [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
3342
+ [IFLA_VXLAN_GROUP] = { .len = sizeof_field(struct iphdr, daddr) },
27563343 [IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) },
27573344 [IFLA_VXLAN_LINK] = { .type = NLA_U32 },
2758
- [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
3345
+ [IFLA_VXLAN_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
27593346 [IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) },
27603347 [IFLA_VXLAN_TOS] = { .type = NLA_U8 },
27613348 [IFLA_VXLAN_TTL] = { .type = NLA_U8 },
....@@ -2779,6 +3366,7 @@
27793366 [IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
27803367 [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
27813368 [IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
3369
+ [IFLA_VXLAN_DF] = { .type = NLA_U8 },
27823370 };
27833371
27843372 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
....@@ -2818,7 +3406,7 @@
28183406 u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
28193407
28203408 if (id >= VXLAN_N_VID) {
2821
- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_ID],
3409
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_ID],
28223410 "VXLAN ID must be lower than 16777216");
28233411 return -ERANGE;
28243412 }
....@@ -2829,8 +3417,18 @@
28293417 = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
28303418
28313419 if (ntohs(p->high) < ntohs(p->low)) {
2832
- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE],
3420
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_PORT_RANGE],
28333421 "Invalid source port range");
3422
+ return -EINVAL;
3423
+ }
3424
+ }
3425
+
3426
+ if (data[IFLA_VXLAN_DF]) {
3427
+ enum ifla_vxlan_df df = nla_get_u8(data[IFLA_VXLAN_DF]);
3428
+
3429
+ if (df < 0 || df > VXLAN_DF_MAX) {
3430
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_DF],
3431
+ "Invalid DF attribute");
28343432 return -EINVAL;
28353433 }
28363434 }
....@@ -2845,13 +3443,33 @@
28453443 strlcpy(drvinfo->driver, "vxlan", sizeof(drvinfo->driver));
28463444 }
28473445
3446
+static int vxlan_get_link_ksettings(struct net_device *dev,
3447
+ struct ethtool_link_ksettings *cmd)
3448
+{
3449
+ struct vxlan_dev *vxlan = netdev_priv(dev);
3450
+ struct vxlan_rdst *dst = &vxlan->default_dst;
3451
+ struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
3452
+ dst->remote_ifindex);
3453
+
3454
+ if (!lowerdev) {
3455
+ cmd->base.duplex = DUPLEX_UNKNOWN;
3456
+ cmd->base.port = PORT_OTHER;
3457
+ cmd->base.speed = SPEED_UNKNOWN;
3458
+
3459
+ return 0;
3460
+ }
3461
+
3462
+ return __ethtool_get_link_ksettings(lowerdev, cmd);
3463
+}
3464
+
28483465 static const struct ethtool_ops vxlan_ethtool_ops = {
2849
- .get_drvinfo = vxlan_get_drvinfo,
2850
- .get_link = ethtool_op_get_link,
3466
+ .get_drvinfo = vxlan_get_drvinfo,
3467
+ .get_link = ethtool_op_get_link,
3468
+ .get_link_ksettings = vxlan_get_link_ksettings,
28513469 };
28523470
28533471 static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
2854
- __be16 port, u32 flags)
3472
+ __be16 port, u32 flags, int ifindex)
28553473 {
28563474 struct socket *sock;
28573475 struct udp_port_cfg udp_conf;
....@@ -2869,6 +3487,7 @@
28693487 }
28703488
28713489 udp_conf.local_udp_port = port;
3490
+ udp_conf.bind_ifindex = ifindex;
28723491
28733492 /* Open UDP socket */
28743493 err = udp_sock_create(net, &udp_conf, &sock);
....@@ -2880,7 +3499,8 @@
28803499
28813500 /* Create new listen socket if needed */
28823501 static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
2883
- __be16 port, u32 flags)
3502
+ __be16 port, u32 flags,
3503
+ int ifindex)
28843504 {
28853505 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
28863506 struct vxlan_sock *vs;
....@@ -2895,7 +3515,7 @@
28953515 for (h = 0; h < VNI_HASH_SIZE; ++h)
28963516 INIT_HLIST_HEAD(&vs->vni_list[h]);
28973517
2898
- sock = vxlan_create_sock(net, ipv6, port, flags);
3518
+ sock = vxlan_create_sock(net, ipv6, port, flags, ifindex);
28993519 if (IS_ERR(sock)) {
29003520 kfree(vs);
29013521 return ERR_CAST(sock);
....@@ -2918,6 +3538,7 @@
29183538 tunnel_cfg.sk_user_data = vs;
29193539 tunnel_cfg.encap_type = 1;
29203540 tunnel_cfg.encap_rcv = vxlan_rcv;
3541
+ tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
29213542 tunnel_cfg.encap_destroy = NULL;
29223543 tunnel_cfg.gro_receive = vxlan_gro_receive;
29233544 tunnel_cfg.gro_complete = vxlan_gro_complete;
....@@ -2932,11 +3553,17 @@
29323553 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
29333554 struct vxlan_sock *vs = NULL;
29343555 struct vxlan_dev_node *node;
3556
+ int l3mdev_index = 0;
3557
+
3558
+ if (vxlan->cfg.remote_ifindex)
3559
+ l3mdev_index = l3mdev_master_upper_ifindex_by_index(
3560
+ vxlan->net, vxlan->cfg.remote_ifindex);
29353561
29363562 if (!vxlan->cfg.no_share) {
29373563 spin_lock(&vn->sock_lock);
29383564 vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
2939
- vxlan->cfg.dst_port, vxlan->cfg.flags);
3565
+ vxlan->cfg.dst_port, vxlan->cfg.flags,
3566
+ l3mdev_index);
29403567 if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
29413568 spin_unlock(&vn->sock_lock);
29423569 return -EBUSY;
....@@ -2945,7 +3572,8 @@
29453572 }
29463573 if (!vs)
29473574 vs = vxlan_socket_create(vxlan->net, ipv6,
2948
- vxlan->cfg.dst_port, vxlan->cfg.flags);
3575
+ vxlan->cfg.dst_port, vxlan->cfg.flags,
3576
+ l3mdev_index);
29493577 if (IS_ERR(vs))
29503578 return PTR_ERR(vs);
29513579 #if IS_ENABLED(CONFIG_IPV6)
....@@ -3230,10 +3858,13 @@
32303858 {
32313859 struct vxlan_net *vn = net_generic(net, vxlan_net_id);
32323860 struct vxlan_dev *vxlan = netdev_priv(dev);
3861
+ struct net_device *remote_dev = NULL;
32333862 struct vxlan_fdb *f = NULL;
32343863 bool unregister = false;
3864
+ struct vxlan_rdst *dst;
32353865 int err;
32363866
3867
+ dst = &vxlan->default_dst;
32373868 err = vxlan_dev_configure(net, dev, conf, false, extack);
32383869 if (err)
32393870 return err;
....@@ -3241,15 +3872,15 @@
32413872 dev->ethtool_ops = &vxlan_ethtool_ops;
32423873
32433874 /* create an fdb entry for a valid default destination */
3244
- if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
3875
+ if (!vxlan_addr_any(&dst->remote_ip)) {
32453876 err = vxlan_fdb_create(vxlan, all_zeros_mac,
3246
- &vxlan->default_dst.remote_ip,
3877
+ &dst->remote_ip,
32473878 NUD_REACHABLE | NUD_PERMANENT,
32483879 vxlan->cfg.dst_port,
3249
- vxlan->default_dst.remote_vni,
3250
- vxlan->default_dst.remote_vni,
3251
- vxlan->default_dst.remote_ifindex,
3252
- NTF_SELF, &f);
3880
+ dst->remote_vni,
3881
+ dst->remote_vni,
3882
+ dst->remote_ifindex,
3883
+ NTF_SELF, 0, &f, extack);
32533884 if (err)
32543885 return err;
32553886 }
....@@ -3259,34 +3890,90 @@
32593890 goto errout;
32603891 unregister = true;
32613892
3262
- err = rtnl_configure_link(dev, NULL);
3263
- if (err)
3264
- goto errout;
3893
+ if (dst->remote_ifindex) {
3894
+ remote_dev = __dev_get_by_index(net, dst->remote_ifindex);
3895
+ if (!remote_dev) {
3896
+ err = -ENODEV;
3897
+ goto errout;
3898
+ }
32653899
3266
- /* notify default fdb entry */
3267
- if (f)
3268
- vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
3900
+ err = netdev_upper_dev_link(remote_dev, dev, extack);
3901
+ if (err)
3902
+ goto errout;
3903
+ }
3904
+
3905
+ err = rtnl_configure_link(dev, NULL);
3906
+ if (err < 0)
3907
+ goto unlink;
3908
+
3909
+ if (f) {
3910
+ vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f);
3911
+
3912
+ /* notify default fdb entry */
3913
+ err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
3914
+ RTM_NEWNEIGH, true, extack);
3915
+ if (err) {
3916
+ vxlan_fdb_destroy(vxlan, f, false, false);
3917
+ if (remote_dev)
3918
+ netdev_upper_dev_unlink(remote_dev, dev);
3919
+ goto unregister;
3920
+ }
3921
+ }
32693922
32703923 list_add(&vxlan->next, &vn->vxlan_list);
3924
+ if (remote_dev)
3925
+ dst->remote_dev = remote_dev;
32713926 return 0;
3272
-
3927
+unlink:
3928
+ if (remote_dev)
3929
+ netdev_upper_dev_unlink(remote_dev, dev);
32733930 errout:
32743931 /* unregister_netdevice() destroys the default FDB entry with deletion
32753932 * notification. But the addition notification was not sent yet, so
32763933 * destroy the entry by hand here.
32773934 */
32783935 if (f)
3279
- vxlan_fdb_destroy(vxlan, f, false);
3936
+ __vxlan_fdb_free(f);
3937
+unregister:
32803938 if (unregister)
32813939 unregister_netdevice(dev);
32823940 return err;
32833941 }
32843942
3943
+/* Set/clear flags based on attribute */
3944
+static int vxlan_nl2flag(struct vxlan_config *conf, struct nlattr *tb[],
3945
+ int attrtype, unsigned long mask, bool changelink,
3946
+ bool changelink_supported,
3947
+ struct netlink_ext_ack *extack)
3948
+{
3949
+ unsigned long flags;
3950
+
3951
+ if (!tb[attrtype])
3952
+ return 0;
3953
+
3954
+ if (changelink && !changelink_supported) {
3955
+ vxlan_flag_attr_error(attrtype, extack);
3956
+ return -EOPNOTSUPP;
3957
+ }
3958
+
3959
+ if (vxlan_policy[attrtype].type == NLA_FLAG)
3960
+ flags = conf->flags | mask;
3961
+ else if (nla_get_u8(tb[attrtype]))
3962
+ flags = conf->flags | mask;
3963
+ else
3964
+ flags = conf->flags & ~mask;
3965
+
3966
+ conf->flags = flags;
3967
+
3968
+ return 0;
3969
+}
3970
+
32853971 static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
32863972 struct net_device *dev, struct vxlan_config *conf,
3287
- bool changelink)
3973
+ bool changelink, struct netlink_ext_ack *extack)
32883974 {
32893975 struct vxlan_dev *vxlan = netdev_priv(dev);
3976
+ int err = 0;
32903977
32913978 memset(conf, 0, sizeof(*conf));
32923979
....@@ -3297,40 +3984,54 @@
32973984 if (data[IFLA_VXLAN_ID]) {
32983985 __be32 vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
32993986
3300
- if (changelink && (vni != conf->vni))
3987
+ if (changelink && (vni != conf->vni)) {
3988
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_ID], "Cannot change VNI");
33013989 return -EOPNOTSUPP;
3990
+ }
33023991 conf->vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
33033992 }
33043993
33053994 if (data[IFLA_VXLAN_GROUP]) {
3306
- if (changelink && (conf->remote_ip.sa.sa_family != AF_INET))
3995
+ if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
3996
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
33073997 return -EOPNOTSUPP;
3998
+ }
33083999
33094000 conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
33104001 conf->remote_ip.sa.sa_family = AF_INET;
33114002 } else if (data[IFLA_VXLAN_GROUP6]) {
3312
- if (!IS_ENABLED(CONFIG_IPV6))
4003
+ if (!IS_ENABLED(CONFIG_IPV6)) {
4004
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "IPv6 support not enabled in the kernel");
33134005 return -EPFNOSUPPORT;
4006
+ }
33144007
3315
- if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6))
4008
+ if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6)) {
4009
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "New group address family does not match old group");
33164010 return -EOPNOTSUPP;
4011
+ }
33174012
33184013 conf->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
33194014 conf->remote_ip.sa.sa_family = AF_INET6;
33204015 }
33214016
33224017 if (data[IFLA_VXLAN_LOCAL]) {
3323
- if (changelink && (conf->saddr.sa.sa_family != AF_INET))
4018
+ if (changelink && (conf->saddr.sa.sa_family != AF_INET)) {
4019
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL], "New local address family does not match old");
33244020 return -EOPNOTSUPP;
4021
+ }
33254022
33264023 conf->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
33274024 conf->saddr.sa.sa_family = AF_INET;
33284025 } else if (data[IFLA_VXLAN_LOCAL6]) {
3329
- if (!IS_ENABLED(CONFIG_IPV6))
4026
+ if (!IS_ENABLED(CONFIG_IPV6)) {
4027
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "IPv6 support not enabled in the kernel");
33304028 return -EPFNOSUPPORT;
4029
+ }
33314030
3332
- if (changelink && (conf->saddr.sa.sa_family != AF_INET6))
4031
+ if (changelink && (conf->saddr.sa.sa_family != AF_INET6)) {
4032
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "New local address family does not match old");
33334033 return -EOPNOTSUPP;
4034
+ }
33344035
33354036 /* TODO: respect scope id */
33364037 conf->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
....@@ -3347,9 +4048,12 @@
33474048 conf->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
33484049
33494050 if (data[IFLA_VXLAN_TTL_INHERIT]) {
3350
- if (changelink)
3351
- return -EOPNOTSUPP;
3352
- conf->flags |= VXLAN_F_TTL_INHERIT;
4051
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_TTL_INHERIT,
4052
+ VXLAN_F_TTL_INHERIT, changelink, false,
4053
+ extack);
4054
+ if (err)
4055
+ return err;
4056
+
33534057 }
33544058
33554059 if (data[IFLA_VXLAN_LABEL])
....@@ -3357,60 +4061,66 @@
33574061 IPV6_FLOWLABEL_MASK;
33584062
33594063 if (data[IFLA_VXLAN_LEARNING]) {
3360
- if (nla_get_u8(data[IFLA_VXLAN_LEARNING]))
3361
- conf->flags |= VXLAN_F_LEARN;
3362
- else
3363
- conf->flags &= ~VXLAN_F_LEARN;
4064
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING,
4065
+ VXLAN_F_LEARN, changelink, true,
4066
+ extack);
4067
+ if (err)
4068
+ return err;
33644069 } else if (!changelink) {
33654070 /* default to learn on a new device */
33664071 conf->flags |= VXLAN_F_LEARN;
33674072 }
33684073
3369
- if (data[IFLA_VXLAN_AGEING]) {
3370
- if (changelink)
3371
- return -EOPNOTSUPP;
4074
+ if (data[IFLA_VXLAN_AGEING])
33724075 conf->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
3373
- }
33744076
33754077 if (data[IFLA_VXLAN_PROXY]) {
3376
- if (changelink)
3377
- return -EOPNOTSUPP;
3378
- if (nla_get_u8(data[IFLA_VXLAN_PROXY]))
3379
- conf->flags |= VXLAN_F_PROXY;
4078
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_PROXY,
4079
+ VXLAN_F_PROXY, changelink, false,
4080
+ extack);
4081
+ if (err)
4082
+ return err;
33804083 }
33814084
33824085 if (data[IFLA_VXLAN_RSC]) {
3383
- if (changelink)
3384
- return -EOPNOTSUPP;
3385
- if (nla_get_u8(data[IFLA_VXLAN_RSC]))
3386
- conf->flags |= VXLAN_F_RSC;
4086
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_RSC,
4087
+ VXLAN_F_RSC, changelink, false,
4088
+ extack);
4089
+ if (err)
4090
+ return err;
33874091 }
33884092
33894093 if (data[IFLA_VXLAN_L2MISS]) {
3390
- if (changelink)
3391
- return -EOPNOTSUPP;
3392
- if (nla_get_u8(data[IFLA_VXLAN_L2MISS]))
3393
- conf->flags |= VXLAN_F_L2MISS;
4094
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L2MISS,
4095
+ VXLAN_F_L2MISS, changelink, false,
4096
+ extack);
4097
+ if (err)
4098
+ return err;
33944099 }
33954100
33964101 if (data[IFLA_VXLAN_L3MISS]) {
3397
- if (changelink)
3398
- return -EOPNOTSUPP;
3399
- if (nla_get_u8(data[IFLA_VXLAN_L3MISS]))
3400
- conf->flags |= VXLAN_F_L3MISS;
4102
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L3MISS,
4103
+ VXLAN_F_L3MISS, changelink, false,
4104
+ extack);
4105
+ if (err)
4106
+ return err;
34014107 }
34024108
34034109 if (data[IFLA_VXLAN_LIMIT]) {
3404
- if (changelink)
4110
+ if (changelink) {
4111
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LIMIT],
4112
+ "Cannot change limit");
34054113 return -EOPNOTSUPP;
4114
+ }
34064115 conf->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
34074116 }
34084117
34094118 if (data[IFLA_VXLAN_COLLECT_METADATA]) {
3410
- if (changelink)
3411
- return -EOPNOTSUPP;
3412
- if (nla_get_u8(data[IFLA_VXLAN_COLLECT_METADATA]))
3413
- conf->flags |= VXLAN_F_COLLECT_METADATA;
4119
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_COLLECT_METADATA,
4120
+ VXLAN_F_COLLECT_METADATA, changelink, false,
4121
+ extack);
4122
+ if (err)
4123
+ return err;
34144124 }
34154125
34164126 if (data[IFLA_VXLAN_PORT_RANGE]) {
....@@ -3420,74 +4130,97 @@
34204130 conf->port_min = ntohs(p->low);
34214131 conf->port_max = ntohs(p->high);
34224132 } else {
4133
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE],
4134
+ "Cannot change port range");
34234135 return -EOPNOTSUPP;
34244136 }
34254137 }
34264138
34274139 if (data[IFLA_VXLAN_PORT]) {
3428
- if (changelink)
4140
+ if (changelink) {
4141
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT],
4142
+ "Cannot change port");
34294143 return -EOPNOTSUPP;
4144
+ }
34304145 conf->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
34314146 }
34324147
34334148 if (data[IFLA_VXLAN_UDP_CSUM]) {
3434
- if (changelink)
4149
+ if (changelink) {
4150
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_UDP_CSUM],
4151
+ "Cannot change UDP_CSUM flag");
34354152 return -EOPNOTSUPP;
4153
+ }
34364154 if (!nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
34374155 conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
34384156 }
34394157
34404158 if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
3441
- if (changelink)
3442
- return -EOPNOTSUPP;
3443
- if (nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
3444
- conf->flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
4159
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
4160
+ VXLAN_F_UDP_ZERO_CSUM6_TX, changelink,
4161
+ false, extack);
4162
+ if (err)
4163
+ return err;
34454164 }
34464165
34474166 if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]) {
3448
- if (changelink)
3449
- return -EOPNOTSUPP;
3450
- if (nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
3451
- conf->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
4167
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
4168
+ VXLAN_F_UDP_ZERO_CSUM6_RX, changelink,
4169
+ false, extack);
4170
+ if (err)
4171
+ return err;
34524172 }
34534173
34544174 if (data[IFLA_VXLAN_REMCSUM_TX]) {
3455
- if (changelink)
3456
- return -EOPNOTSUPP;
3457
- if (nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
3458
- conf->flags |= VXLAN_F_REMCSUM_TX;
4175
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_TX,
4176
+ VXLAN_F_REMCSUM_TX, changelink, false,
4177
+ extack);
4178
+ if (err)
4179
+ return err;
34594180 }
34604181
34614182 if (data[IFLA_VXLAN_REMCSUM_RX]) {
3462
- if (changelink)
3463
- return -EOPNOTSUPP;
3464
- if (nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
3465
- conf->flags |= VXLAN_F_REMCSUM_RX;
4183
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_RX,
4184
+ VXLAN_F_REMCSUM_RX, changelink, false,
4185
+ extack);
4186
+ if (err)
4187
+ return err;
34664188 }
34674189
34684190 if (data[IFLA_VXLAN_GBP]) {
3469
- if (changelink)
3470
- return -EOPNOTSUPP;
3471
- conf->flags |= VXLAN_F_GBP;
4191
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GBP,
4192
+ VXLAN_F_GBP, changelink, false, extack);
4193
+ if (err)
4194
+ return err;
34724195 }
34734196
34744197 if (data[IFLA_VXLAN_GPE]) {
3475
- if (changelink)
3476
- return -EOPNOTSUPP;
3477
- conf->flags |= VXLAN_F_GPE;
4198
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GPE,
4199
+ VXLAN_F_GPE, changelink, false,
4200
+ extack);
4201
+ if (err)
4202
+ return err;
34784203 }
34794204
34804205 if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) {
3481
- if (changelink)
3482
- return -EOPNOTSUPP;
3483
- conf->flags |= VXLAN_F_REMCSUM_NOPARTIAL;
4206
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL,
4207
+ VXLAN_F_REMCSUM_NOPARTIAL, changelink,
4208
+ false, extack);
4209
+ if (err)
4210
+ return err;
34844211 }
34854212
34864213 if (tb[IFLA_MTU]) {
3487
- if (changelink)
4214
+ if (changelink) {
4215
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
4216
+ "Cannot change mtu");
34884217 return -EOPNOTSUPP;
4218
+ }
34894219 conf->mtu = nla_get_u32(tb[IFLA_MTU]);
34904220 }
4221
+
4222
+ if (data[IFLA_VXLAN_DF])
4223
+ conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
34914224
34924225 return 0;
34934226 }
....@@ -3499,7 +4232,7 @@
34994232 struct vxlan_config conf;
35004233 int err;
35014234
3502
- err = vxlan_nl2conf(tb, data, dev, &conf, false);
4235
+ err = vxlan_nl2conf(tb, data, dev, &conf, false, extack);
35034236 if (err)
35044237 return err;
35054238
....@@ -3511,51 +4244,68 @@
35114244 struct netlink_ext_ack *extack)
35124245 {
35134246 struct vxlan_dev *vxlan = netdev_priv(dev);
3514
- struct vxlan_rdst *dst = &vxlan->default_dst;
3515
- struct vxlan_rdst old_dst;
4247
+ struct net_device *lowerdev;
35164248 struct vxlan_config conf;
4249
+ struct vxlan_rdst *dst;
35174250 int err;
35184251
3519
- err = vxlan_nl2conf(tb, data,
3520
- dev, &conf, true);
4252
+ dst = &vxlan->default_dst;
4253
+ err = vxlan_nl2conf(tb, data, dev, &conf, true, extack);
35214254 if (err)
35224255 return err;
35234256
3524
- memcpy(&old_dst, dst, sizeof(struct vxlan_rdst));
4257
+ err = vxlan_config_validate(vxlan->net, &conf, &lowerdev,
4258
+ vxlan, extack);
4259
+ if (err)
4260
+ return err;
35254261
3526
- err = vxlan_dev_configure(vxlan->net, dev, &conf, true, extack);
4262
+ if (dst->remote_dev == lowerdev)
4263
+ lowerdev = NULL;
4264
+
4265
+ err = netdev_adjacent_change_prepare(dst->remote_dev, lowerdev, dev,
4266
+ extack);
35274267 if (err)
35284268 return err;
35294269
35304270 /* handle default dst entry */
3531
- if (!vxlan_addr_equal(&dst->remote_ip, &old_dst.remote_ip)) {
3532
- spin_lock_bh(&vxlan->hash_lock);
3533
- if (!vxlan_addr_any(&old_dst.remote_ip))
3534
- __vxlan_fdb_delete(vxlan, all_zeros_mac,
3535
- old_dst.remote_ip,
3536
- vxlan->cfg.dst_port,
3537
- old_dst.remote_vni,
3538
- old_dst.remote_vni,
3539
- old_dst.remote_ifindex, 0);
4271
+ if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
4272
+ u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
35404273
3541
- if (!vxlan_addr_any(&dst->remote_ip)) {
4274
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
4275
+ if (!vxlan_addr_any(&conf.remote_ip)) {
35424276 err = vxlan_fdb_update(vxlan, all_zeros_mac,
3543
- &dst->remote_ip,
4277
+ &conf.remote_ip,
35444278 NUD_REACHABLE | NUD_PERMANENT,
35454279 NLM_F_APPEND | NLM_F_CREATE,
35464280 vxlan->cfg.dst_port,
3547
- dst->remote_vni,
3548
- dst->remote_vni,
3549
- dst->remote_ifindex,
3550
- NTF_SELF);
4281
+ conf.vni, conf.vni,
4282
+ conf.remote_ifindex,
4283
+ NTF_SELF, 0, true, extack);
35514284 if (err) {
3552
- spin_unlock_bh(&vxlan->hash_lock);
4285
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4286
+ netdev_adjacent_change_abort(dst->remote_dev,
4287
+ lowerdev, dev);
35534288 return err;
35544289 }
35554290 }
3556
- spin_unlock_bh(&vxlan->hash_lock);
4291
+ if (!vxlan_addr_any(&dst->remote_ip))
4292
+ __vxlan_fdb_delete(vxlan, all_zeros_mac,
4293
+ dst->remote_ip,
4294
+ vxlan->cfg.dst_port,
4295
+ dst->remote_vni,
4296
+ dst->remote_vni,
4297
+ dst->remote_ifindex,
4298
+ true);
4299
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
35574300 }
35584301
4302
+ if (conf.age_interval != vxlan->cfg.age_interval)
4303
+ mod_timer(&vxlan->age_timer, jiffies);
4304
+
4305
+ netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev);
4306
+ if (lowerdev && lowerdev != dst->remote_dev)
4307
+ dst->remote_dev = lowerdev;
4308
+ vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
35594309 return 0;
35604310 }
35614311
....@@ -3567,6 +4317,8 @@
35674317
35684318 list_del(&vxlan->next);
35694319 unregister_netdevice_queue(dev, head);
4320
+ if (vxlan->default_dst.remote_dev)
4321
+ netdev_upper_dev_unlink(vxlan->default_dst.remote_dev, dev);
35704322 }
35714323
35724324 static size_t vxlan_get_size(const struct net_device *dev)
....@@ -3579,6 +4331,7 @@
35794331 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */
35804332 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL_INHERIT */
35814333 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
4334
+ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */
35824335 nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
35834336 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
35844337 nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
....@@ -3645,32 +4398,33 @@
36454398 nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
36464399 !!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
36474400 nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
4401
+ nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
36484402 nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
36494403 nla_put_u8(skb, IFLA_VXLAN_LEARNING,
3650
- !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
4404
+ !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
36514405 nla_put_u8(skb, IFLA_VXLAN_PROXY,
3652
- !!(vxlan->cfg.flags & VXLAN_F_PROXY)) ||
4406
+ !!(vxlan->cfg.flags & VXLAN_F_PROXY)) ||
36534407 nla_put_u8(skb, IFLA_VXLAN_RSC,
36544408 !!(vxlan->cfg.flags & VXLAN_F_RSC)) ||
36554409 nla_put_u8(skb, IFLA_VXLAN_L2MISS,
3656
- !!(vxlan->cfg.flags & VXLAN_F_L2MISS)) ||
4410
+ !!(vxlan->cfg.flags & VXLAN_F_L2MISS)) ||
36574411 nla_put_u8(skb, IFLA_VXLAN_L3MISS,
3658
- !!(vxlan->cfg.flags & VXLAN_F_L3MISS)) ||
4412
+ !!(vxlan->cfg.flags & VXLAN_F_L3MISS)) ||
36594413 nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA,
36604414 !!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) ||
36614415 nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) ||
36624416 nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
36634417 nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
36644418 nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
3665
- !(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
4419
+ !(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
36664420 nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
3667
- !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
4421
+ !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
36684422 nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
3669
- !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
4423
+ !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
36704424 nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
3671
- !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
4425
+ !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
36724426 nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
3673
- !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
4427
+ !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
36744428 goto nla_put_failure;
36754429
36764430 if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
....@@ -3727,7 +4481,7 @@
37274481 memset(&tb, 0, sizeof(tb));
37284482
37294483 dev = rtnl_create_link(net, name, name_assign_type,
3730
- &vxlan_link_ops, tb);
4484
+ &vxlan_link_ops, tb, NULL);
37314485 if (IS_ERR(dev))
37324486 return dev;
37334487
....@@ -3779,10 +4533,12 @@
37794533 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
37804534
37814535 if (event == NETDEV_UNREGISTER) {
3782
- vxlan_offload_rx_ports(dev, false);
4536
+ if (!dev->udp_tunnel_nic_info)
4537
+ vxlan_offload_rx_ports(dev, false);
37834538 vxlan_handle_lowerdev_unregister(vn, dev);
37844539 } else if (event == NETDEV_REGISTER) {
3785
- vxlan_offload_rx_ports(dev, true);
4540
+ if (!dev->udp_tunnel_nic_info)
4541
+ vxlan_offload_rx_ports(dev, true);
37864542 } else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
37874543 event == NETDEV_UDP_TUNNEL_DROP_INFO) {
37884544 vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
....@@ -3793,6 +4549,169 @@
37934549
37944550 static struct notifier_block vxlan_notifier_block __read_mostly = {
37954551 .notifier_call = vxlan_netdevice_event,
4552
+};
4553
+
4554
+static void
4555
+vxlan_fdb_offloaded_set(struct net_device *dev,
4556
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
4557
+{
4558
+ struct vxlan_dev *vxlan = netdev_priv(dev);
4559
+ struct vxlan_rdst *rdst;
4560
+ struct vxlan_fdb *f;
4561
+ u32 hash_index;
4562
+
4563
+ hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
4564
+
4565
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
4566
+
4567
+ f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
4568
+ if (!f)
4569
+ goto out;
4570
+
4571
+ rdst = vxlan_fdb_find_rdst(f, &fdb_info->remote_ip,
4572
+ fdb_info->remote_port,
4573
+ fdb_info->remote_vni,
4574
+ fdb_info->remote_ifindex);
4575
+ if (!rdst)
4576
+ goto out;
4577
+
4578
+ rdst->offloaded = fdb_info->offloaded;
4579
+
4580
+out:
4581
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4582
+}
4583
+
4584
+static int
4585
+vxlan_fdb_external_learn_add(struct net_device *dev,
4586
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
4587
+{
4588
+ struct vxlan_dev *vxlan = netdev_priv(dev);
4589
+ struct netlink_ext_ack *extack;
4590
+ u32 hash_index;
4591
+ int err;
4592
+
4593
+ hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
4594
+ extack = switchdev_notifier_info_to_extack(&fdb_info->info);
4595
+
4596
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
4597
+ err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip,
4598
+ NUD_REACHABLE,
4599
+ NLM_F_CREATE | NLM_F_REPLACE,
4600
+ fdb_info->remote_port,
4601
+ fdb_info->vni,
4602
+ fdb_info->remote_vni,
4603
+ fdb_info->remote_ifindex,
4604
+ NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
4605
+ 0, false, extack);
4606
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4607
+
4608
+ return err;
4609
+}
4610
+
4611
+static int
4612
+vxlan_fdb_external_learn_del(struct net_device *dev,
4613
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
4614
+{
4615
+ struct vxlan_dev *vxlan = netdev_priv(dev);
4616
+ struct vxlan_fdb *f;
4617
+ u32 hash_index;
4618
+ int err = 0;
4619
+
4620
+ hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
4621
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
4622
+
4623
+ f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
4624
+ if (!f)
4625
+ err = -ENOENT;
4626
+ else if (f->flags & NTF_EXT_LEARNED)
4627
+ err = __vxlan_fdb_delete(vxlan, fdb_info->eth_addr,
4628
+ fdb_info->remote_ip,
4629
+ fdb_info->remote_port,
4630
+ fdb_info->vni,
4631
+ fdb_info->remote_vni,
4632
+ fdb_info->remote_ifindex,
4633
+ false);
4634
+
4635
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4636
+
4637
+ return err;
4638
+}
4639
+
4640
+static int vxlan_switchdev_event(struct notifier_block *unused,
4641
+ unsigned long event, void *ptr)
4642
+{
4643
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
4644
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info;
4645
+ int err = 0;
4646
+
4647
+ switch (event) {
4648
+ case SWITCHDEV_VXLAN_FDB_OFFLOADED:
4649
+ vxlan_fdb_offloaded_set(dev, ptr);
4650
+ break;
4651
+ case SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE:
4652
+ fdb_info = ptr;
4653
+ err = vxlan_fdb_external_learn_add(dev, fdb_info);
4654
+ if (err) {
4655
+ err = notifier_from_errno(err);
4656
+ break;
4657
+ }
4658
+ fdb_info->offloaded = true;
4659
+ vxlan_fdb_offloaded_set(dev, fdb_info);
4660
+ break;
4661
+ case SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE:
4662
+ fdb_info = ptr;
4663
+ err = vxlan_fdb_external_learn_del(dev, fdb_info);
4664
+ if (err) {
4665
+ err = notifier_from_errno(err);
4666
+ break;
4667
+ }
4668
+ fdb_info->offloaded = false;
4669
+ vxlan_fdb_offloaded_set(dev, fdb_info);
4670
+ break;
4671
+ }
4672
+
4673
+ return err;
4674
+}
4675
+
4676
+static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
4677
+ .notifier_call = vxlan_switchdev_event,
4678
+};
4679
+
4680
+static void vxlan_fdb_nh_flush(struct nexthop *nh)
4681
+{
4682
+ struct vxlan_fdb *fdb;
4683
+ struct vxlan_dev *vxlan;
4684
+ u32 hash_index;
4685
+
4686
+ rcu_read_lock();
4687
+ list_for_each_entry_rcu(fdb, &nh->fdb_list, nh_list) {
4688
+ vxlan = rcu_dereference(fdb->vdev);
4689
+ WARN_ON(!vxlan);
4690
+ hash_index = fdb_head_index(vxlan, fdb->eth_addr,
4691
+ vxlan->default_dst.remote_vni);
4692
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
4693
+ if (!hlist_unhashed(&fdb->hlist))
4694
+ vxlan_fdb_destroy(vxlan, fdb, false, false);
4695
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
4696
+ }
4697
+ rcu_read_unlock();
4698
+}
4699
+
4700
+static int vxlan_nexthop_event(struct notifier_block *nb,
4701
+ unsigned long event, void *ptr)
4702
+{
4703
+ struct nexthop *nh = ptr;
4704
+
4705
+ if (!nh || event != NEXTHOP_EVENT_DEL)
4706
+ return NOTIFY_DONE;
4707
+
4708
+ vxlan_fdb_nh_flush(nh);
4709
+
4710
+ return NOTIFY_DONE;
4711
+}
4712
+
4713
+static struct notifier_block vxlan_nexthop_notifier_block __read_mostly = {
4714
+ .notifier_call = vxlan_nexthop_event,
37964715 };
37974716
37984717 static __net_init int vxlan_init_net(struct net *net)
....@@ -3806,7 +4725,7 @@
38064725 for (h = 0; h < PORT_HASH_SIZE; ++h)
38074726 INIT_HLIST_HEAD(&vn->sock_list[h]);
38084727
3809
- return 0;
4728
+ return register_nexthop_notifier(net, &vxlan_nexthop_notifier_block);
38104729 }
38114730
38124731 static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
....@@ -3836,6 +4755,8 @@
38364755 unsigned int h;
38374756
38384757 rtnl_lock();
4758
+ list_for_each_entry(net, net_list, exit_list)
4759
+ unregister_nexthop_notifier(net, &vxlan_nexthop_notifier_block);
38394760 list_for_each_entry(net, net_list, exit_list)
38404761 vxlan_destroy_tunnels(net, &list);
38414762
....@@ -3871,11 +4792,17 @@
38714792 if (rc)
38724793 goto out2;
38734794
3874
- rc = rtnl_link_register(&vxlan_link_ops);
4795
+ rc = register_switchdev_notifier(&vxlan_switchdev_notifier_block);
38754796 if (rc)
38764797 goto out3;
38774798
4799
+ rc = rtnl_link_register(&vxlan_link_ops);
4800
+ if (rc)
4801
+ goto out4;
4802
+
38784803 return 0;
4804
+out4:
4805
+ unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
38794806 out3:
38804807 unregister_netdevice_notifier(&vxlan_notifier_block);
38814808 out2:
....@@ -3888,6 +4815,7 @@
38884815 static void __exit vxlan_cleanup_module(void)
38894816 {
38904817 rtnl_link_unregister(&vxlan_link_ops);
4818
+ unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
38914819 unregister_netdevice_notifier(&vxlan_notifier_block);
38924820 unregister_pernet_subsys(&vxlan_net_ops);
38934821 /* rcu_barrier() is called by netns */