From 95099d4622f8cb224d94e314c7a8e0df60b13f87 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 09 Dec 2023 08:38:01 +0000
Subject: [PATCH] enable docker ppp
---
kernel/drivers/net/vxlan.c | 1636 +++++++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 1,282 insertions(+), 354 deletions(-)
diff --git a/kernel/drivers/net/vxlan.c b/kernel/drivers/net/vxlan.c
index eacc1e3..72d6706 100644
--- a/kernel/drivers/net/vxlan.c
+++ b/kernel/drivers/net/vxlan.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* VXLAN: Virtual eXtensible Local Area Network
*
* Copyright (c) 2012-2013 Vyatta Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -20,6 +17,7 @@
#include <linux/ethtool.h>
#include <net/arp.h>
#include <net/ndisc.h>
+#include <net/ipv6_stubs.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/rtnetlink.h>
@@ -28,6 +26,7 @@
#include <net/netns/generic.h>
#include <net/tun_proto.h>
#include <net/vxlan.h>
+#include <net/nexthop.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_tunnel.h>
@@ -79,8 +78,13 @@
u8 eth_addr[ETH_ALEN];
u16 state; /* see ndm_state */
__be32 vni;
- u8 flags; /* see ndm_flags */
+ u16 flags; /* see ndm_flags and below */
+ struct list_head nh_list;
+ struct nexthop __rcu *nh;
+ struct vxlan_dev __rcu *vdev;
};
+
+#define NTF_VXLAN_ADDED_BY_USER 0x100
/* salt for hash table */
static u32 vxlan_salt __read_mostly;
@@ -101,22 +105,6 @@
return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr);
else
return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
-}
-
-static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
-{
- if (ipa->sa.sa_family == AF_INET6)
- return ipv6_addr_any(&ipa->sin6.sin6_addr);
- else
- return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
-}
-
-static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
-{
- if (ipa->sa.sa_family == AF_INET6)
- return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
- else
- return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
}
static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
@@ -149,16 +137,6 @@
bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
{
return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
-}
-
-static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
-{
- return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
-}
-
-static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
-{
- return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
}
static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
@@ -200,19 +178,24 @@
*/
static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
{
+ if (rcu_access_pointer(fdb->nh))
+ return NULL;
return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
}
static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
{
+ if (rcu_access_pointer(fdb->nh))
+ return NULL;
return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
}
-/* Find VXLAN socket based on network namespace, address family and UDP port
- * and enabled unshareable flags.
+/* Find VXLAN socket based on network namespace, address family, UDP port,
+ * enabled unshareable flags and socket device binding (see l3mdev with
+ * non-default VRF).
*/
static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
- __be16 port, u32 flags)
+ __be16 port, u32 flags, int ifindex)
{
struct vxlan_sock *vs;
@@ -221,7 +204,8 @@
hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
if (inet_sk(vs->sock->sk)->inet_sport == port &&
vxlan_get_sk_family(vs) == family &&
- vs->flags == flags)
+ vs->flags == flags &&
+ vs->sock->sk->sk_bound_dev_if == ifindex)
return vs;
}
return NULL;
@@ -261,7 +245,7 @@
{
struct vxlan_sock *vs;
- vs = vxlan_find_sock(net, family, port, flags);
+ vs = vxlan_find_sock(net, family, port, flags, ifindex);
if (!vs)
return NULL;
@@ -276,9 +260,12 @@
{
unsigned long now = jiffies;
struct nda_cacheinfo ci;
- struct nlmsghdr *nlh;
- struct ndmsg *ndm;
bool send_ip, send_eth;
+ struct nlmsghdr *nlh;
+ struct nexthop *nh;
+ struct ndmsg *ndm;
+ int nh_family;
+ u32 nh_id;
nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
if (nlh == NULL)
@@ -289,15 +276,29 @@
send_eth = send_ip = true;
+ rcu_read_lock();
+ nh = rcu_dereference(fdb->nh);
+ if (nh) {
+ nh_family = nexthop_get_family(nh);
+ nh_id = nh->id;
+ }
+ rcu_read_unlock();
+
if (type == RTM_GETNEIGH) {
- send_ip = !vxlan_addr_any(&rdst->remote_ip);
+ if (rdst) {
+ send_ip = !vxlan_addr_any(&rdst->remote_ip);
+ ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
+ } else if (nh) {
+ ndm->ndm_family = nh_family;
+ }
send_eth = !is_zero_ether_addr(fdb->eth_addr);
- ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
} else
ndm->ndm_family = AF_BRIDGE;
ndm->ndm_state = fdb->state;
ndm->ndm_ifindex = vxlan->dev->ifindex;
ndm->ndm_flags = fdb->flags;
+ if (rdst && rdst->offloaded)
+ ndm->ndm_flags |= NTF_OFFLOADED;
ndm->ndm_type = RTN_UNICAST;
if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
@@ -307,22 +308,29 @@
if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
goto nla_put_failure;
+ if (nh) {
+ if (nla_put_u32(skb, NDA_NH_ID, nh_id))
+ goto nla_put_failure;
+ } else if (rdst) {
+ if (send_ip && vxlan_nla_put_addr(skb, NDA_DST,
+ &rdst->remote_ip))
+ goto nla_put_failure;
- if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, &rdst->remote_ip))
- goto nla_put_failure;
+ if (rdst->remote_port &&
+ rdst->remote_port != vxlan->cfg.dst_port &&
+ nla_put_be16(skb, NDA_PORT, rdst->remote_port))
+ goto nla_put_failure;
+ if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
+ nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
+ goto nla_put_failure;
+ if (rdst->remote_ifindex &&
+ nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
+ goto nla_put_failure;
+ }
- if (rdst->remote_port && rdst->remote_port != vxlan->cfg.dst_port &&
- nla_put_be16(skb, NDA_PORT, rdst->remote_port))
- goto nla_put_failure;
- if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
- nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
- goto nla_put_failure;
if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
nla_put_u32(skb, NDA_SRC_VNI,
be32_to_cpu(fdb->vni)))
- goto nla_put_failure;
- if (rdst->remote_ifindex &&
- nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
goto nla_put_failure;
ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
@@ -353,8 +361,8 @@
+ nla_total_size(sizeof(struct nda_cacheinfo));
}
-static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
- struct vxlan_rdst *rd, int type)
+static void __vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+ struct vxlan_rdst *rd, int type)
{
struct net *net = dev_net(vxlan->dev);
struct sk_buff *skb;
@@ -379,6 +387,70 @@
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}
+static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan,
+ const struct vxlan_fdb *fdb,
+ const struct vxlan_rdst *rd,
+ struct netlink_ext_ack *extack,
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+ fdb_info->info.dev = vxlan->dev;
+ fdb_info->info.extack = extack;
+ fdb_info->remote_ip = rd->remote_ip;
+ fdb_info->remote_port = rd->remote_port;
+ fdb_info->remote_vni = rd->remote_vni;
+ fdb_info->remote_ifindex = rd->remote_ifindex;
+ memcpy(fdb_info->eth_addr, fdb->eth_addr, ETH_ALEN);
+ fdb_info->vni = fdb->vni;
+ fdb_info->offloaded = rd->offloaded;
+ fdb_info->added_by_user = fdb->flags & NTF_VXLAN_ADDED_BY_USER;
+}
+
+static int vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
+ struct vxlan_fdb *fdb,
+ struct vxlan_rdst *rd,
+ bool adding,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_vxlan_fdb_info info;
+ enum switchdev_notifier_type notifier_type;
+ int ret;
+
+ if (WARN_ON(!rd))
+ return 0;
+
+ notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
+ : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
+ vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, NULL, &info);
+ ret = call_switchdev_notifiers(notifier_type, vxlan->dev,
+ &info.info, extack);
+ return notifier_to_errno(ret);
+}
+
+static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+ struct vxlan_rdst *rd, int type, bool swdev_notify,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (swdev_notify && rd) {
+ switch (type) {
+ case RTM_NEWNEIGH:
+ err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
+ true, extack);
+ if (err)
+ return err;
+ break;
+ case RTM_DELNEIGH:
+ vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
+ false, extack);
+ break;
+ }
+ }
+
+ __vxlan_fdb_notify(vxlan, fdb, rd, type);
+ return 0;
+}
+
static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -390,7 +462,7 @@
.remote_vni = cpu_to_be32(VXLAN_N_VID),
};
- vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH);
+ vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
}
static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
@@ -402,7 +474,7 @@
memcpy(f.eth_addr, eth_addr, ETH_ALEN);
- vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH);
+ vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
}
/* Hash Ethernet address */
@@ -427,14 +499,19 @@
return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1);
}
+static u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni)
+{
+ if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
+ return eth_vni_hash(mac, vni);
+ else
+ return eth_hash(mac);
+}
+
/* Hash chain to use given mac address */
static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
const u8 *mac, __be32 vni)
{
- if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
- return &vxlan->fdb_head[eth_vni_hash(mac, vni)];
- else
- return &vxlan->fdb_head[eth_hash(mac)];
+ return &vxlan->fdb_head[fdb_head_index(vxlan, mac, vni)];
}
/* Look up Ethernet address in forwarding table */
@@ -464,7 +541,7 @@
struct vxlan_fdb *f;
f = __vxlan_find_mac(vxlan, mac, vni);
- if (f)
+ if (f && f->used != jiffies)
f->used = jiffies;
return f;
@@ -488,10 +565,117 @@
return NULL;
}
+int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ u8 eth_addr[ETH_ALEN + 2] = { 0 };
+ struct vxlan_rdst *rdst;
+ struct vxlan_fdb *f;
+ int rc = 0;
+
+ if (is_multicast_ether_addr(mac) ||
+ is_zero_ether_addr(mac))
+ return -EINVAL;
+
+ ether_addr_copy(eth_addr, mac);
+
+ rcu_read_lock();
+
+ f = __vxlan_find_mac(vxlan, eth_addr, vni);
+ if (!f) {
+ rc = -ENOENT;
+ goto out;
+ }
+
+ rdst = first_remote_rcu(f);
+ vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, NULL, fdb_info);
+
+out:
+ rcu_read_unlock();
+ return rc;
+}
+EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
+
+static int vxlan_fdb_notify_one(struct notifier_block *nb,
+ const struct vxlan_dev *vxlan,
+ const struct vxlan_fdb *f,
+ const struct vxlan_rdst *rdst,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_vxlan_fdb_info fdb_info;
+ int rc;
+
+ vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, extack, &fdb_info);
+ rc = nb->notifier_call(nb, SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
+ &fdb_info);
+ return notifier_to_errno(rc);
+}
+
+int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
+ struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan;
+ struct vxlan_rdst *rdst;
+ struct vxlan_fdb *f;
+ unsigned int h;
+ int rc = 0;
+
+ if (!netif_is_vxlan(dev))
+ return -EINVAL;
+ vxlan = netdev_priv(dev);
+
+ for (h = 0; h < FDB_HASH_SIZE; ++h) {
+ spin_lock_bh(&vxlan->hash_lock[h]);
+ hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist) {
+ if (f->vni == vni) {
+ list_for_each_entry(rdst, &f->remotes, list) {
+ rc = vxlan_fdb_notify_one(nb, vxlan,
+ f, rdst,
+ extack);
+ if (rc)
+ goto unlock;
+ }
+ }
+ }
+ spin_unlock_bh(&vxlan->hash_lock[h]);
+ }
+ return 0;
+
+unlock:
+ spin_unlock_bh(&vxlan->hash_lock[h]);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(vxlan_fdb_replay);
+
+void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni)
+{
+ struct vxlan_dev *vxlan;
+ struct vxlan_rdst *rdst;
+ struct vxlan_fdb *f;
+ unsigned int h;
+
+ if (!netif_is_vxlan(dev))
+ return;
+ vxlan = netdev_priv(dev);
+
+ for (h = 0; h < FDB_HASH_SIZE; ++h) {
+ spin_lock_bh(&vxlan->hash_lock[h]);
+ hlist_for_each_entry(f, &vxlan->fdb_head[h], hlist)
+ if (f->vni == vni)
+ list_for_each_entry(rdst, &f->remotes, list)
+ rdst->offloaded = false;
+ spin_unlock_bh(&vxlan->hash_lock[h]);
+ }
+
+}
+EXPORT_SYMBOL_GPL(vxlan_fdb_clear_offload);
+
/* Replace destination of unicast mac */
static int vxlan_fdb_replace(struct vxlan_fdb *f,
union vxlan_addr *ip, __be16 port, __be32 vni,
- __u32 ifindex)
+ __u32 ifindex, struct vxlan_rdst *oldrd)
{
struct vxlan_rdst *rd;
@@ -503,11 +687,13 @@
if (!rd)
return 0;
+ *oldrd = *rd;
dst_cache_reset(&rd->dst_cache);
rd->remote_ip = *ip;
rd->remote_port = port;
rd->remote_vni = vni;
rd->remote_ifindex = ifindex;
+ rd->offloaded = false;
return 1;
}
@@ -524,15 +710,16 @@
rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
if (rd == NULL)
- return -ENOBUFS;
+ return -ENOMEM;
if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
kfree(rd);
- return -ENOBUFS;
+ return -ENOMEM;
}
rd->remote_ip = *ip;
rd->remote_port = port;
+ rd->offloaded = false;
rd->remote_vni = vni;
rd->remote_ifindex = ifindex;
@@ -637,9 +824,9 @@
return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
}
-static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
- const u8 *mac, __u16 state,
- __be32 src_vni, __u8 ndm_flags)
+static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
+ __u16 state, __be32 src_vni,
+ __u16 ndm_flags)
{
struct vxlan_fdb *f;
@@ -650,17 +837,93 @@
f->flags = ndm_flags;
f->updated = f->used = jiffies;
f->vni = src_vni;
+ f->nh = NULL;
+ RCU_INIT_POINTER(f->vdev, vxlan);
+ INIT_LIST_HEAD(&f->nh_list);
INIT_LIST_HEAD(&f->remotes);
memcpy(f->eth_addr, mac, ETH_ALEN);
return f;
}
+static void vxlan_fdb_insert(struct vxlan_dev *vxlan, const u8 *mac,
+ __be32 src_vni, struct vxlan_fdb *f)
+{
+ ++vxlan->addrcnt;
+ hlist_add_head_rcu(&f->hlist,
+ vxlan_fdb_head(vxlan, mac, src_vni));
+}
+
+static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+ u32 nhid, struct netlink_ext_ack *extack)
+{
+ struct nexthop *old_nh = rtnl_dereference(fdb->nh);
+ struct nexthop *nh;
+ int err = -EINVAL;
+
+ if (old_nh && old_nh->id == nhid)
+ return 0;
+
+ nh = nexthop_find_by_id(vxlan->net, nhid);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ goto err_inval;
+ }
+
+ if (nh) {
+ if (!nexthop_get(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ nh = NULL;
+ goto err_inval;
+ }
+ if (!nexthop_is_fdb(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop is not a fdb nexthop");
+ goto err_inval;
+ }
+
+ if (!nexthop_is_multipath(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group");
+ goto err_inval;
+ }
+
+ /* check nexthop group family */
+ switch (vxlan->default_dst.remote_ip.sa.sa_family) {
+ case AF_INET:
+ if (!nexthop_has_v4(nh)) {
+ err = -EAFNOSUPPORT;
+ NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
+ goto err_inval;
+ }
+ break;
+ case AF_INET6:
+ if (nexthop_has_v4(nh)) {
+ err = -EAFNOSUPPORT;
+ NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
+ goto err_inval;
+ }
+ }
+ }
+
+ if (old_nh) {
+ list_del_rcu(&fdb->nh_list);
+ nexthop_put(old_nh);
+ }
+ rcu_assign_pointer(fdb->nh, nh);
+ list_add_tail_rcu(&fdb->nh_list, &nh->fdb_list);
+ return 1;
+
+err_inval:
+ if (nh)
+ nexthop_put(nh);
+ return err;
+}
+
static int vxlan_fdb_create(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
__u16 state, __be16 port, __be32 src_vni,
- __be32 vni, __u32 ifindex, __u8 ndm_flags,
- struct vxlan_fdb **fdb)
+ __be32 vni, __u32 ifindex, __u16 ndm_flags,
+ u32 nhid, struct vxlan_fdb **fdb,
+ struct netlink_ext_ack *extack)
{
struct vxlan_rdst *rd = NULL;
struct vxlan_fdb *f;
@@ -675,98 +938,33 @@
if (!f)
return -ENOMEM;
- rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
- if (rc < 0) {
- kfree(f);
- return rc;
- }
-
- ++vxlan->addrcnt;
- hlist_add_head_rcu(&f->hlist,
- vxlan_fdb_head(vxlan, mac, src_vni));
+ if (nhid)
+ rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
+ else
+ rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+ if (rc < 0)
+ goto errout;
*fdb = f;
return 0;
+
+errout:
+ kfree(f);
+ return rc;
}
-/* Add new entry to forwarding table -- assumes lock held */
-static int vxlan_fdb_update(struct vxlan_dev *vxlan,
- const u8 *mac, union vxlan_addr *ip,
- __u16 state, __u16 flags,
- __be16 port, __be32 src_vni, __be32 vni,
- __u32 ifindex, __u8 ndm_flags)
+static void __vxlan_fdb_free(struct vxlan_fdb *f)
{
- struct vxlan_rdst *rd = NULL;
- struct vxlan_fdb *f;
- int notify = 0;
- int rc;
-
- f = __vxlan_find_mac(vxlan, mac, src_vni);
- if (f) {
- if (flags & NLM_F_EXCL) {
- netdev_dbg(vxlan->dev,
- "lost race to create %pM\n", mac);
- return -EEXIST;
- }
- if (f->state != state) {
- f->state = state;
- f->updated = jiffies;
- notify = 1;
- }
- if (f->flags != ndm_flags) {
- f->flags = ndm_flags;
- f->updated = jiffies;
- notify = 1;
- }
- if ((flags & NLM_F_REPLACE)) {
- /* Only change unicasts */
- if (!(is_multicast_ether_addr(f->eth_addr) ||
- is_zero_ether_addr(f->eth_addr))) {
- notify |= vxlan_fdb_replace(f, ip, port, vni,
- ifindex);
- } else
- return -EOPNOTSUPP;
- }
- if ((flags & NLM_F_APPEND) &&
- (is_multicast_ether_addr(f->eth_addr) ||
- is_zero_ether_addr(f->eth_addr))) {
- rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
-
- if (rc < 0)
- return rc;
- notify |= rc;
- }
- } else {
- if (!(flags & NLM_F_CREATE))
- return -ENOENT;
-
- /* Disallow replace to add a multicast entry */
- if ((flags & NLM_F_REPLACE) &&
- (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
- return -EOPNOTSUPP;
-
- netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
- rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
- vni, ifindex, ndm_flags, &f);
- if (rc < 0)
- return rc;
- notify = 1;
- }
-
- if (notify) {
- if (rd == NULL)
- rd = first_remote_rtnl(f);
- vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH);
- }
-
- return 0;
-}
-
-static void vxlan_fdb_free(struct rcu_head *head)
-{
- struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
struct vxlan_rdst *rd, *nd;
+ struct nexthop *nh;
+
+ nh = rcu_dereference_raw(f->nh);
+ if (nh) {
+ rcu_assign_pointer(f->nh, NULL);
+ rcu_assign_pointer(f->vdev, NULL);
+ nexthop_put(nh);
+ }
list_for_each_entry_safe(rd, nd, &f->remotes, list) {
dst_cache_destroy(&rd->dst_cache);
@@ -775,17 +973,33 @@
kfree(f);
}
-static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
- bool do_notify)
+static void vxlan_fdb_free(struct rcu_head *head)
{
- netdev_dbg(vxlan->dev,
- "delete %pM\n", f->eth_addr);
+ struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
+
+ __vxlan_fdb_free(f);
+}
+
+static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
+ bool do_notify, bool swdev_notify)
+{
+ struct vxlan_rdst *rd;
+
+ netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr);
--vxlan->addrcnt;
- if (do_notify)
- vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
+ if (do_notify) {
+ if (rcu_access_pointer(f->nh))
+ vxlan_fdb_notify(vxlan, f, NULL, RTM_DELNEIGH,
+ swdev_notify, NULL);
+ else
+ list_for_each_entry(rd, &f->remotes, list)
+ vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
+ swdev_notify, NULL);
+ }
hlist_del_rcu(&f->hlist);
+ list_del_rcu(&f->nh_list);
call_rcu(&f->rcu, vxlan_fdb_free);
}
@@ -797,20 +1011,193 @@
kfree(rd);
}
+static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
+ union vxlan_addr *ip,
+ __u16 state, __u16 flags,
+ __be16 port, __be32 vni,
+ __u32 ifindex, __u16 ndm_flags,
+ struct vxlan_fdb *f, u32 nhid,
+ bool swdev_notify,
+ struct netlink_ext_ack *extack)
+{
+ __u16 fdb_flags = (ndm_flags & ~NTF_USE);
+ struct vxlan_rdst *rd = NULL;
+ struct vxlan_rdst oldrd;
+ int notify = 0;
+ int rc = 0;
+ int err;
+
+ if (nhid && !rcu_access_pointer(f->nh)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot replace an existing non nexthop fdb with a nexthop");
+ return -EOPNOTSUPP;
+ }
+
+ if (nhid && (flags & NLM_F_APPEND)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot append to a nexthop fdb");
+ return -EOPNOTSUPP;
+ }
+
+ /* Do not allow an externally learned entry to take over an entry added
+ * by the user.
+ */
+ if (!(fdb_flags & NTF_EXT_LEARNED) ||
+ !(f->flags & NTF_VXLAN_ADDED_BY_USER)) {
+ if (f->state != state) {
+ f->state = state;
+ f->updated = jiffies;
+ notify = 1;
+ }
+ if (f->flags != fdb_flags) {
+ f->flags = fdb_flags;
+ f->updated = jiffies;
+ notify = 1;
+ }
+ }
+
+ if ((flags & NLM_F_REPLACE)) {
+ /* Only change unicasts */
+ if (!(is_multicast_ether_addr(f->eth_addr) ||
+ is_zero_ether_addr(f->eth_addr))) {
+ if (nhid) {
+ rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
+ if (rc < 0)
+ return rc;
+ } else {
+ rc = vxlan_fdb_replace(f, ip, port, vni,
+ ifindex, &oldrd);
+ }
+ notify |= rc;
+ } else {
+ NL_SET_ERR_MSG(extack, "Cannot replace non-unicast fdb entries");
+ return -EOPNOTSUPP;
+ }
+ }
+ if ((flags & NLM_F_APPEND) &&
+ (is_multicast_ether_addr(f->eth_addr) ||
+ is_zero_ether_addr(f->eth_addr))) {
+ rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+
+ if (rc < 0)
+ return rc;
+ notify |= rc;
+ }
+
+ if (ndm_flags & NTF_USE)
+ f->used = jiffies;
+
+ if (notify) {
+ if (rd == NULL)
+ rd = first_remote_rtnl(f);
+
+ err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH,
+ swdev_notify, extack);
+ if (err)
+ goto err_notify;
+ }
+
+ return 0;
+
+err_notify:
+ if (nhid)
+ return err;
+ if ((flags & NLM_F_REPLACE) && rc)
+ *rd = oldrd;
+ else if ((flags & NLM_F_APPEND) && rc) {
+ list_del_rcu(&rd->list);
+ call_rcu(&rd->rcu, vxlan_dst_free);
+ }
+ return err;
+}
+
+static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
+ const u8 *mac, union vxlan_addr *ip,
+ __u16 state, __u16 flags,
+ __be16 port, __be32 src_vni, __be32 vni,
+ __u32 ifindex, __u16 ndm_flags, u32 nhid,
+ bool swdev_notify,
+ struct netlink_ext_ack *extack)
+{
+ __u16 fdb_flags = (ndm_flags & ~NTF_USE);
+ struct vxlan_fdb *f;
+ int rc;
+
+ /* Disallow replace to add a multicast entry */
+ if ((flags & NLM_F_REPLACE) &&
+ (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
+ return -EOPNOTSUPP;
+
+ netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
+ rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
+ vni, ifindex, fdb_flags, nhid, &f, extack);
+ if (rc < 0)
+ return rc;
+
+ vxlan_fdb_insert(vxlan, mac, src_vni, f);
+ rc = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH,
+ swdev_notify, extack);
+ if (rc)
+ goto err_notify;
+
+ return 0;
+
+err_notify:
+ vxlan_fdb_destroy(vxlan, f, false, false);
+ return rc;
+}
+
+/* Add new entry to forwarding table -- assumes lock held */
+static int vxlan_fdb_update(struct vxlan_dev *vxlan,
+ const u8 *mac, union vxlan_addr *ip,
+ __u16 state, __u16 flags,
+ __be16 port, __be32 src_vni, __be32 vni,
+ __u32 ifindex, __u16 ndm_flags, u32 nhid,
+ bool swdev_notify,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_fdb *f;
+
+ f = __vxlan_find_mac(vxlan, mac, src_vni);
+ if (f) {
+ if (flags & NLM_F_EXCL) {
+ netdev_dbg(vxlan->dev,
+ "lost race to create %pM\n", mac);
+ return -EEXIST;
+ }
+
+ return vxlan_fdb_update_existing(vxlan, ip, state, flags, port,
+ vni, ifindex, ndm_flags, f,
+ nhid, swdev_notify, extack);
+ } else {
+ if (!(flags & NLM_F_CREATE))
+ return -ENOENT;
+
+ return vxlan_fdb_update_create(vxlan, mac, ip, state, flags,
+ port, src_vni, vni, ifindex,
+ ndm_flags, nhid, swdev_notify,
+ extack);
+ }
+}
+
static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
- struct vxlan_rdst *rd)
+ struct vxlan_rdst *rd, bool swdev_notify)
{
list_del_rcu(&rd->list);
- vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
+ vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify, NULL);
call_rcu(&rd->rcu, vxlan_dst_free);
}
static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
- __be32 *vni, u32 *ifindex)
+ __be32 *vni, u32 *ifindex, u32 *nhid)
{
struct net *net = dev_net(vxlan->dev);
int err;
+
+ if (tb[NDA_NH_ID] && (tb[NDA_DST] || tb[NDA_VNI] || tb[NDA_IFINDEX] ||
+ tb[NDA_PORT]))
+ return -EINVAL;
if (tb[NDA_DST]) {
err = vxlan_nla_get_addr(ip, tb[NDA_DST]);
@@ -818,6 +1205,7 @@
return err;
} else {
union vxlan_addr *remote = &vxlan->default_dst.remote_ip;
+
if (remote->sa.sa_family == AF_INET) {
ip->sin.sin_addr.s_addr = htonl(INADDR_ANY);
ip->sa.sa_family = AF_INET;
@@ -866,20 +1254,27 @@
*ifindex = 0;
}
+ if (tb[NDA_NH_ID])
+ *nhid = nla_get_u32(tb[NDA_NH_ID]);
+ else
+ *nhid = 0;
+
return 0;
}
/* Add static entry (via netlink) */
static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr, u16 vid, u16 flags)
+ const unsigned char *addr, u16 vid, u16 flags,
+ struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
/* struct net *net = dev_net(vxlan->dev); */
union vxlan_addr ip;
__be16 port;
__be32 src_vni, vni;
- u32 ifindex;
+ u32 ifindex, nhid;
+ u32 hash_index;
int err;
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
@@ -888,20 +1283,24 @@
return -EINVAL;
}
- if (tb[NDA_DST] == NULL)
+ if (!tb || (!tb[NDA_DST] && !tb[NDA_NH_ID]))
return -EINVAL;
- err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
+ err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
+ &nhid);
if (err)
return err;
if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family)
return -EAFNOSUPPORT;
- spin_lock_bh(&vxlan->hash_lock);
+ hash_index = fdb_head_index(vxlan, addr, src_vni);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
- port, src_vni, vni, ifindex, ndm->ndm_flags);
- spin_unlock_bh(&vxlan->hash_lock);
+ port, src_vni, vni, ifindex,
+ ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
+ nhid, true, extack);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
return err;
}
@@ -909,10 +1308,10 @@
static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
const unsigned char *addr, union vxlan_addr ip,
__be16 port, __be32 src_vni, __be32 vni,
- u32 ifindex, u16 vid)
+ u32 ifindex, bool swdev_notify)
{
- struct vxlan_fdb *f;
struct vxlan_rdst *rd = NULL;
+ struct vxlan_fdb *f;
int err = -ENOENT;
f = vxlan_find_mac(vxlan, addr, src_vni);
@@ -929,11 +1328,11 @@
* otherwise destroy the fdb entry
*/
if (rd && !list_is_singular(&f->remotes)) {
- vxlan_fdb_dst_destroy(vxlan, f, rd);
+ vxlan_fdb_dst_destroy(vxlan, f, rd, swdev_notify);
goto out;
}
- vxlan_fdb_destroy(vxlan, f, true);
+ vxlan_fdb_destroy(vxlan, f, true, swdev_notify);
out:
return 0;
@@ -947,18 +1346,21 @@
struct vxlan_dev *vxlan = netdev_priv(dev);
union vxlan_addr ip;
__be32 src_vni, vni;
+ u32 ifindex, nhid;
+ u32 hash_index;
__be16 port;
- u32 ifindex;
int err;
- err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
+ err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
+ &nhid);
if (err)
return err;
- spin_lock_bh(&vxlan->hash_lock);
+ hash_index = fdb_head_index(vxlan, addr, src_vni);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex,
- vid);
- spin_unlock_bh(&vxlan->hash_lock);
+ true);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
return err;
}
@@ -978,6 +1380,23 @@
rcu_read_lock();
hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
struct vxlan_rdst *rd;
+
+ if (rcu_access_pointer(f->nh)) {
+ if (*idx < cb->args[2])
+ goto skip_nh;
+ err = vxlan_fdb_info(skb, vxlan, f,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH,
+ NLM_F_MULTI, NULL);
+ if (err < 0) {
+ rcu_read_unlock();
+ goto out;
+ }
+skip_nh:
+ *idx += 1;
+ continue;
+ }
list_for_each_entry_rcu(rd, &f->remotes, list) {
if (*idx < cb->args[2])
@@ -999,6 +1418,39 @@
rcu_read_unlock();
}
out:
+ return err;
+}
+
+static int vxlan_fdb_get(struct sk_buff *skb,
+ struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr,
+ u16 vid, u32 portid, u32 seq,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_fdb *f;
+ __be32 vni;
+ int err;
+
+ if (tb[NDA_VNI])
+ vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
+ else
+ vni = vxlan->default_dst.remote_vni;
+
+ rcu_read_lock();
+
+ f = __vxlan_find_mac(vxlan, addr, vni);
+ if (!f) {
+ NL_SET_ERR_MSG(extack, "Fdb entry not found");
+ err = -ENOENT;
+ goto errout;
+ }
+
+ err = vxlan_fdb_info(skb, vxlan, f, portid, seq,
+ RTM_NEWNEIGH, 0, first_remote_rcu(f));
+errout:
+ rcu_read_unlock();
return err;
}
@@ -1032,6 +1484,10 @@
if (f->state & (NUD_PERMANENT | NUD_NOARP))
return true;
+ /* Don't override an fdb with nexthop with a learnt entry */
+ if (rcu_access_pointer(f->nh))
+ return true;
+
if (net_ratelimit())
netdev_info(dev,
"%pM migrated from %pIS to %pIS\n",
@@ -1039,10 +1495,12 @@
rdst->remote_ip = *src_ip;
f->updated = jiffies;
- vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH);
+ vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
} else {
+ u32 hash_index = fdb_head_index(vxlan, src_mac, vni);
+
/* learned new entry */
- spin_lock(&vxlan->hash_lock);
+ spin_lock(&vxlan->hash_lock[hash_index]);
/* close off race between vxlan_flush and incoming packets */
if (netif_running(dev))
@@ -1052,8 +1510,8 @@
vxlan->cfg.dst_port,
vni,
vxlan->default_dst.remote_vni,
- ifindex, NTF_SELF);
- spin_unlock(&vxlan->hash_lock);
+ ifindex, NTF_SELF, 0, true, NULL);
+ spin_unlock(&vxlan->hash_lock[hash_index]);
}
return false;
@@ -1368,7 +1826,6 @@
/* Callback from net/ipv4/udp.c to receive packets */
static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
{
- struct pcpu_sw_netstats *stats;
struct vxlan_dev *vxlan;
struct vxlan_sock *vs;
struct vxlanhdr unparsed;
@@ -1416,6 +1873,10 @@
if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
!net_eq(vxlan->net, dev_net(vxlan->dev))))
+ goto drop;
+
+ if (vs->flags & VXLAN_F_REMCSUM_RX)
+ if (unlikely(!vxlan_remcsum(&unparsed, skb, vs->flags)))
goto drop;
if (vxlan_collect_metadata(vs)) {
@@ -1434,9 +1895,6 @@
memset(md, 0, sizeof(*md));
}
- if (vs->flags & VXLAN_F_REMCSUM_RX)
- if (!vxlan_remcsum(&unparsed, skb, vs->flags))
- goto drop;
if (vs->flags & VXLAN_F_GBP)
vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
/* Note that GBP and GPE can never be active together. This is
@@ -1481,12 +1939,7 @@
goto drop;
}
- stats = this_cpu_ptr(vxlan->dev->tstats);
- u64_stats_update_begin(&stats->syncp);
- stats->rx_packets++;
- stats->rx_bytes += skb->len;
- u64_stats_update_end(&stats->syncp);
-
+ dev_sw_netstats_rx_add(vxlan->dev, skb->len);
gro_cells_receive(&vxlan->gro_cells, skb);
rcu_read_unlock();
@@ -1496,6 +1949,34 @@
drop:
/* Consume bad packet */
kfree_skb(skb);
+ return 0;
+}
+
+/* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */
+static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb)
+{
+ struct vxlan_dev *vxlan;
+ struct vxlan_sock *vs;
+ struct vxlanhdr *hdr;
+ __be32 vni;
+
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + VXLAN_HLEN))
+ return -EINVAL;
+
+ hdr = vxlan_hdr(skb);
+
+ if (!(hdr->vx_flags & VXLAN_HF_VNI))
+ return -EINVAL;
+
+ vs = rcu_dereference_sk_user_data(sk);
+ if (!vs)
+ return -ENOENT;
+
+ vni = vxlan_vni(hdr->vx_vni);
+ vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
+ if (!vxlan)
+ return -ENOENT;
+
return 0;
}
@@ -1941,7 +2422,7 @@
fl4.fl4_sport = sport;
rt = ip_route_output_key(vxlan->net, &fl4);
- if (likely(!IS_ERR(rt))) {
+ if (!IS_ERR(rt)) {
if (rt->dst.dev == dev) {
netdev_dbg(dev, "circular route to %pI4\n", &daddr);
ip_rt_put(rt);
@@ -2017,7 +2498,8 @@
/* Bypass encapsulation if the destination is local */
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
- struct vxlan_dev *dst_vxlan, __be32 vni)
+ struct vxlan_dev *dst_vxlan, __be32 vni,
+ bool snoop)
{
struct pcpu_sw_netstats *tx_stats, *rx_stats;
union vxlan_addr loopback;
@@ -2049,7 +2531,7 @@
goto drop;
}
- if (dst_vxlan->cfg.flags & VXLAN_F_LEARN)
+ if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
u64_stats_update_begin(&tx_stats->syncp);
@@ -2098,7 +2580,7 @@
return -ENOENT;
}
- vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni);
+ vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
return 1;
}
@@ -2134,7 +2616,8 @@
if (vxlan_addr_any(dst)) {
if (did_rsc) {
/* short-circuited back to local bridge */
- vxlan_encap_bypass(skb, vxlan, vxlan, default_vni);
+ vxlan_encap_bypass(skb, vxlan, vxlan,
+ default_vni, true);
return;
}
goto drop;
@@ -2201,6 +2684,9 @@
struct rtable *rt;
__be16 df = 0;
+ if (!ifindex)
+ ifindex = sock4->sock->sk->sk_bound_dev_if;
+
rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
dst->sin.sin_addr.s_addr,
&local_ip.sin.sin_addr.s_addr,
@@ -2211,19 +2697,51 @@
goto tx_error;
}
- /* Bypass encapsulation if the destination is local */
if (!info) {
+ /* Bypass encapsulation if the destination is local */
err = encap_bypass_if_local(skb, dev, vxlan, dst,
dst_port, ifindex, vni,
&rt->dst, rt->rt_flags);
if (err)
goto out_unlock;
+
+ if (vxlan->cfg.df == VXLAN_DF_SET) {
+ df = htons(IP_DF);
+ } else if (vxlan->cfg.df == VXLAN_DF_INHERIT) {
+ struct ethhdr *eth = eth_hdr(skb);
+
+ if (ntohs(eth->h_proto) == ETH_P_IPV6 ||
+ (ntohs(eth->h_proto) == ETH_P_IP &&
+ old_iph->frag_off & htons(IP_DF)))
+ df = htons(IP_DF);
+ }
} else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
df = htons(IP_DF);
}
ndst = &rt->dst;
- skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
+ err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
+ netif_is_any_bridge_port(dev));
+ if (err < 0) {
+ goto tx_error;
+ } else if (err) {
+ if (info) {
+ struct ip_tunnel_info *unclone;
+ struct in_addr src, dst;
+
+ unclone = skb_tunnel_info_unclone(skb);
+ if (unlikely(!unclone))
+ goto tx_error;
+
+ src = remote_ip.sin.sin_addr;
+ dst = local_ip.sin.sin_addr;
+ unclone->key.u.ipv4.src = src.s_addr;
+ unclone->key.u.ipv4.dst = dst.s_addr;
+ }
+ vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
+ dst_release(ndst);
+ goto out_unlock;
+ }
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -2238,6 +2756,9 @@
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
+
+ if (!ifindex)
+ ifindex = sock6->sock->sk->sk_bound_dev_if;
ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
label, &dst->sin6.sin6_addr,
@@ -2260,7 +2781,29 @@
goto out_unlock;
}
- skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM);
+ err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
+ netif_is_any_bridge_port(dev));
+ if (err < 0) {
+ goto tx_error;
+ } else if (err) {
+ if (info) {
+ struct ip_tunnel_info *unclone;
+ struct in6_addr src, dst;
+
+ unclone = skb_tunnel_info_unclone(skb);
+ if (unlikely(!unclone))
+ goto tx_error;
+
+ src = remote_ip.sin6.sin6_addr;
+ dst = local_ip.sin6.sin6_addr;
+ unclone->key.u.ipv6.src = src;
+ unclone->key.u.ipv6.dst = dst;
+ }
+
+ vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
+ dst_release(ndst);
+ goto out_unlock;
+ }
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
@@ -2294,6 +2837,38 @@
dst_release(ndst);
dev->stats.tx_errors++;
kfree_skb(skb);
+}
+
+static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev,
+ struct vxlan_fdb *f, __be32 vni, bool did_rsc)
+{
+ struct vxlan_rdst nh_rdst;
+ struct nexthop *nh;
+ bool do_xmit;
+ u32 hash;
+
+ memset(&nh_rdst, 0, sizeof(struct vxlan_rdst));
+ hash = skb_get_hash(skb);
+
+ rcu_read_lock();
+ nh = rcu_dereference(f->nh);
+ if (!nh) {
+ rcu_read_unlock();
+ goto drop;
+ }
+ do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst);
+ rcu_read_unlock();
+
+ if (likely(do_xmit))
+ vxlan_xmit_one(skb, dev, vni, &nh_rdst, did_rsc);
+ else
+ goto drop;
+
+ return;
+
+drop:
+ dev->stats.tx_dropped++;
+ dev_kfree_skb(skb);
}
/* Transmit local packets over Vxlan
@@ -2372,22 +2947,27 @@
}
}
- list_for_each_entry_rcu(rdst, &f->remotes, list) {
- struct sk_buff *skb1;
+ if (rcu_access_pointer(f->nh)) {
+ vxlan_xmit_nh(skb, dev, f,
+ (vni ? : vxlan->default_dst.remote_vni), did_rsc);
+ } else {
+ list_for_each_entry_rcu(rdst, &f->remotes, list) {
+ struct sk_buff *skb1;
- if (!fdst) {
- fdst = rdst;
- continue;
+ if (!fdst) {
+ fdst = rdst;
+ continue;
+ }
+ skb1 = skb_clone(skb, GFP_ATOMIC);
+ if (skb1)
+ vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
}
- skb1 = skb_clone(skb, GFP_ATOMIC);
- if (skb1)
- vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
+ if (fdst)
+ vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
+ else
+ kfree_skb(skb);
}
- if (fdst)
- vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
- else
- kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -2404,7 +2984,7 @@
for (h = 0; h < FDB_HASH_SIZE; ++h) {
struct hlist_node *p, *n;
- spin_lock_bh(&vxlan->hash_lock);
+ spin_lock(&vxlan->hash_lock[h]);
hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
struct vxlan_fdb *f
= container_of(p, struct vxlan_fdb, hlist);
@@ -2422,11 +3002,11 @@
"garbage collect %pM\n",
f->eth_addr);
f->state = NUD_STALE;
- vxlan_fdb_destroy(vxlan, f, true);
+ vxlan_fdb_destroy(vxlan, f, true, true);
} else if (time_before(timeout, next_timer))
next_timer = timeout;
}
- spin_unlock_bh(&vxlan->hash_lock);
+ spin_unlock(&vxlan->hash_lock[h]);
}
mod_timer(&vxlan->age_timer, next_timer);
@@ -2478,12 +3058,13 @@
static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
{
struct vxlan_fdb *f;
+ u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
- spin_lock_bh(&vxlan->hash_lock);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
if (f)
- vxlan_fdb_destroy(vxlan, f, true);
- spin_unlock_bh(&vxlan->hash_lock);
+ vxlan_fdb_destroy(vxlan, f, true, true);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
}
static void vxlan_uninit(struct net_device *dev)
@@ -2528,20 +3109,23 @@
{
unsigned int h;
- spin_lock_bh(&vxlan->hash_lock);
for (h = 0; h < FDB_HASH_SIZE; ++h) {
struct hlist_node *p, *n;
+
+ spin_lock_bh(&vxlan->hash_lock[h]);
hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
struct vxlan_fdb *f
= container_of(p, struct vxlan_fdb, hlist);
if (!do_all && (f->state & (NUD_PERMANENT | NUD_NOARP)))
continue;
/* the all_zeros_mac entry is deleted at vxlan_uninit */
- if (!is_zero_ether_addr(f->eth_addr))
- vxlan_fdb_destroy(vxlan, f, true);
+ if (is_zero_ether_addr(f->eth_addr) &&
+ f->vni == vxlan->cfg.vni)
+ continue;
+ vxlan_fdb_destroy(vxlan, f, true, true);
}
+ spin_unlock_bh(&vxlan->hash_lock[h]);
}
- spin_unlock_bh(&vxlan->hash_lock);
}
/* Cleanup timer and forwarding table on shutdown */
@@ -2646,7 +3230,9 @@
.ndo_fdb_add = vxlan_fdb_add,
.ndo_fdb_del = vxlan_fdb_delete,
.ndo_fdb_dump = vxlan_fdb_dump,
+ .ndo_fdb_get = vxlan_fdb_get,
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
+ .ndo_change_proto_down = dev_change_proto_down_generic,
};
static const struct net_device_ops vxlan_netdev_raw_ops = {
@@ -2723,14 +3309,15 @@
dev->max_mtu = ETH_MAX_MTU;
INIT_LIST_HEAD(&vxlan->next);
- spin_lock_init(&vxlan->hash_lock);
timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE);
vxlan->dev = dev;
- for (h = 0; h < FDB_HASH_SIZE; ++h)
+ for (h = 0; h < FDB_HASH_SIZE; ++h) {
+ spin_lock_init(&vxlan->hash_lock[h]);
INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
+ }
}
static void vxlan_ether_setup(struct net_device *dev)
@@ -2752,10 +3339,10 @@
static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_ID] = { .type = NLA_U32 },
- [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_VXLAN_GROUP] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) },
[IFLA_VXLAN_LINK] = { .type = NLA_U32 },
- [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
+ [IFLA_VXLAN_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
[IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) },
[IFLA_VXLAN_TOS] = { .type = NLA_U8 },
[IFLA_VXLAN_TTL] = { .type = NLA_U8 },
@@ -2779,6 +3366,7 @@
[IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
[IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
+ [IFLA_VXLAN_DF] = { .type = NLA_U8 },
};
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -2818,7 +3406,7 @@
u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
if (id >= VXLAN_N_VID) {
- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_ID],
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_ID],
"VXLAN ID must be lower than 16777216");
return -ERANGE;
}
@@ -2829,8 +3417,18 @@
= nla_data(data[IFLA_VXLAN_PORT_RANGE]);
if (ntohs(p->high) < ntohs(p->low)) {
- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE],
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_PORT_RANGE],
"Invalid source port range");
+ return -EINVAL;
+ }
+ }
+
+ if (data[IFLA_VXLAN_DF]) {
+ enum ifla_vxlan_df df = nla_get_u8(data[IFLA_VXLAN_DF]);
+
+ if (df < 0 || df > VXLAN_DF_MAX) {
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_DF],
+ "Invalid DF attribute");
return -EINVAL;
}
}
@@ -2845,13 +3443,33 @@
strlcpy(drvinfo->driver, "vxlan", sizeof(drvinfo->driver));
}
+static int vxlan_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+ struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
+ dst->remote_ifindex);
+
+ if (!lowerdev) {
+ cmd->base.duplex = DUPLEX_UNKNOWN;
+ cmd->base.port = PORT_OTHER;
+ cmd->base.speed = SPEED_UNKNOWN;
+
+ return 0;
+ }
+
+ return __ethtool_get_link_ksettings(lowerdev, cmd);
+}
+
static const struct ethtool_ops vxlan_ethtool_ops = {
- .get_drvinfo = vxlan_get_drvinfo,
- .get_link = ethtool_op_get_link,
+ .get_drvinfo = vxlan_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_link_ksettings = vxlan_get_link_ksettings,
};
static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
- __be16 port, u32 flags)
+ __be16 port, u32 flags, int ifindex)
{
struct socket *sock;
struct udp_port_cfg udp_conf;
@@ -2869,6 +3487,7 @@
}
udp_conf.local_udp_port = port;
+ udp_conf.bind_ifindex = ifindex;
/* Open UDP socket */
err = udp_sock_create(net, &udp_conf, &sock);
@@ -2880,7 +3499,8 @@
/* Create new listen socket if needed */
static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
- __be16 port, u32 flags)
+ __be16 port, u32 flags,
+ int ifindex)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
@@ -2895,7 +3515,7 @@
for (h = 0; h < VNI_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vs->vni_list[h]);
- sock = vxlan_create_sock(net, ipv6, port, flags);
+ sock = vxlan_create_sock(net, ipv6, port, flags, ifindex);
if (IS_ERR(sock)) {
kfree(vs);
return ERR_CAST(sock);
@@ -2918,6 +3538,7 @@
tunnel_cfg.sk_user_data = vs;
tunnel_cfg.encap_type = 1;
tunnel_cfg.encap_rcv = vxlan_rcv;
+ tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
tunnel_cfg.encap_destroy = NULL;
tunnel_cfg.gro_receive = vxlan_gro_receive;
tunnel_cfg.gro_complete = vxlan_gro_complete;
@@ -2932,11 +3553,17 @@
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_sock *vs = NULL;
struct vxlan_dev_node *node;
+ int l3mdev_index = 0;
+
+ if (vxlan->cfg.remote_ifindex)
+ l3mdev_index = l3mdev_master_upper_ifindex_by_index(
+ vxlan->net, vxlan->cfg.remote_ifindex);
if (!vxlan->cfg.no_share) {
spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
- vxlan->cfg.dst_port, vxlan->cfg.flags);
+ vxlan->cfg.dst_port, vxlan->cfg.flags,
+ l3mdev_index);
if (vs && !refcount_inc_not_zero(&vs->refcnt)) {
spin_unlock(&vn->sock_lock);
return -EBUSY;
@@ -2945,7 +3572,8 @@
}
if (!vs)
vs = vxlan_socket_create(vxlan->net, ipv6,
- vxlan->cfg.dst_port, vxlan->cfg.flags);
+ vxlan->cfg.dst_port, vxlan->cfg.flags,
+ l3mdev_index);
if (IS_ERR(vs))
return PTR_ERR(vs);
#if IS_ENABLED(CONFIG_IPV6)
@@ -3230,10 +3858,13 @@
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct net_device *remote_dev = NULL;
struct vxlan_fdb *f = NULL;
bool unregister = false;
+ struct vxlan_rdst *dst;
int err;
+ dst = &vxlan->default_dst;
err = vxlan_dev_configure(net, dev, conf, false, extack);
if (err)
return err;
@@ -3241,15 +3872,15 @@
dev->ethtool_ops = &vxlan_ethtool_ops;
/* create an fdb entry for a valid default destination */
- if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
+ if (!vxlan_addr_any(&dst->remote_ip)) {
err = vxlan_fdb_create(vxlan, all_zeros_mac,
- &vxlan->default_dst.remote_ip,
+ &dst->remote_ip,
NUD_REACHABLE | NUD_PERMANENT,
vxlan->cfg.dst_port,
- vxlan->default_dst.remote_vni,
- vxlan->default_dst.remote_vni,
- vxlan->default_dst.remote_ifindex,
- NTF_SELF, &f);
+ dst->remote_vni,
+ dst->remote_vni,
+ dst->remote_ifindex,
+ NTF_SELF, 0, &f, extack);
if (err)
return err;
}
@@ -3259,34 +3890,90 @@
goto errout;
unregister = true;
- err = rtnl_configure_link(dev, NULL);
- if (err)
- goto errout;
+ if (dst->remote_ifindex) {
+ remote_dev = __dev_get_by_index(net, dst->remote_ifindex);
+ if (!remote_dev) {
+ err = -ENODEV;
+ goto errout;
+ }
- /* notify default fdb entry */
- if (f)
- vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
+ err = netdev_upper_dev_link(remote_dev, dev, extack);
+ if (err)
+ goto errout;
+ }
+
+ err = rtnl_configure_link(dev, NULL);
+ if (err < 0)
+ goto unlink;
+
+ if (f) {
+ vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f);
+
+ /* notify default fdb entry */
+ err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
+ RTM_NEWNEIGH, true, extack);
+ if (err) {
+ vxlan_fdb_destroy(vxlan, f, false, false);
+ if (remote_dev)
+ netdev_upper_dev_unlink(remote_dev, dev);
+ goto unregister;
+ }
+ }
list_add(&vxlan->next, &vn->vxlan_list);
+ if (remote_dev)
+ dst->remote_dev = remote_dev;
return 0;
-
+unlink:
+ if (remote_dev)
+ netdev_upper_dev_unlink(remote_dev, dev);
errout:
/* unregister_netdevice() destroys the default FDB entry with deletion
* notification. But the addition notification was not sent yet, so
* destroy the entry by hand here.
*/
if (f)
- vxlan_fdb_destroy(vxlan, f, false);
+ __vxlan_fdb_free(f);
+unregister:
if (unregister)
unregister_netdevice(dev);
return err;
}
+/* Set/clear flags based on attribute */
+static int vxlan_nl2flag(struct vxlan_config *conf, struct nlattr *tb[],
+ int attrtype, unsigned long mask, bool changelink,
+ bool changelink_supported,
+ struct netlink_ext_ack *extack)
+{
+ unsigned long flags;
+
+ if (!tb[attrtype])
+ return 0;
+
+ if (changelink && !changelink_supported) {
+ vxlan_flag_attr_error(attrtype, extack);
+ return -EOPNOTSUPP;
+ }
+
+ if (vxlan_policy[attrtype].type == NLA_FLAG)
+ flags = conf->flags | mask;
+ else if (nla_get_u8(tb[attrtype]))
+ flags = conf->flags | mask;
+ else
+ flags = conf->flags & ~mask;
+
+ conf->flags = flags;
+
+ return 0;
+}
+
static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
struct net_device *dev, struct vxlan_config *conf,
- bool changelink)
+ bool changelink, struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ int err = 0;
memset(conf, 0, sizeof(*conf));
@@ -3297,40 +3984,54 @@
if (data[IFLA_VXLAN_ID]) {
__be32 vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
- if (changelink && (vni != conf->vni))
+ if (changelink && (vni != conf->vni)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_ID], "Cannot change VNI");
return -EOPNOTSUPP;
+ }
conf->vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
}
if (data[IFLA_VXLAN_GROUP]) {
- if (changelink && (conf->remote_ip.sa.sa_family != AF_INET))
+ if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
return -EOPNOTSUPP;
+ }
conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
conf->remote_ip.sa.sa_family = AF_INET;
} else if (data[IFLA_VXLAN_GROUP6]) {
- if (!IS_ENABLED(CONFIG_IPV6))
+ if (!IS_ENABLED(CONFIG_IPV6)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "IPv6 support not enabled in the kernel");
return -EPFNOSUPPORT;
+ }
- if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6))
+ if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "New group address family does not match old group");
return -EOPNOTSUPP;
+ }
conf->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
conf->remote_ip.sa.sa_family = AF_INET6;
}
if (data[IFLA_VXLAN_LOCAL]) {
- if (changelink && (conf->saddr.sa.sa_family != AF_INET))
+ if (changelink && (conf->saddr.sa.sa_family != AF_INET)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL], "New local address family does not match old");
return -EOPNOTSUPP;
+ }
conf->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
conf->saddr.sa.sa_family = AF_INET;
} else if (data[IFLA_VXLAN_LOCAL6]) {
- if (!IS_ENABLED(CONFIG_IPV6))
+ if (!IS_ENABLED(CONFIG_IPV6)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "IPv6 support not enabled in the kernel");
return -EPFNOSUPPORT;
+ }
- if (changelink && (conf->saddr.sa.sa_family != AF_INET6))
+ if (changelink && (conf->saddr.sa.sa_family != AF_INET6)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "New local address family does not match old");
return -EOPNOTSUPP;
+ }
/* TODO: respect scope id */
conf->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
@@ -3347,9 +4048,12 @@
conf->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
if (data[IFLA_VXLAN_TTL_INHERIT]) {
- if (changelink)
- return -EOPNOTSUPP;
- conf->flags |= VXLAN_F_TTL_INHERIT;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_TTL_INHERIT,
+ VXLAN_F_TTL_INHERIT, changelink, false,
+ extack);
+ if (err)
+ return err;
+
}
if (data[IFLA_VXLAN_LABEL])
@@ -3357,60 +4061,66 @@
IPV6_FLOWLABEL_MASK;
if (data[IFLA_VXLAN_LEARNING]) {
- if (nla_get_u8(data[IFLA_VXLAN_LEARNING]))
- conf->flags |= VXLAN_F_LEARN;
- else
- conf->flags &= ~VXLAN_F_LEARN;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING,
+ VXLAN_F_LEARN, changelink, true,
+ extack);
+ if (err)
+ return err;
} else if (!changelink) {
/* default to learn on a new device */
conf->flags |= VXLAN_F_LEARN;
}
- if (data[IFLA_VXLAN_AGEING]) {
- if (changelink)
- return -EOPNOTSUPP;
+ if (data[IFLA_VXLAN_AGEING])
conf->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
- }
if (data[IFLA_VXLAN_PROXY]) {
- if (changelink)
- return -EOPNOTSUPP;
- if (nla_get_u8(data[IFLA_VXLAN_PROXY]))
- conf->flags |= VXLAN_F_PROXY;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_PROXY,
+ VXLAN_F_PROXY, changelink, false,
+ extack);
+ if (err)
+ return err;
}
if (data[IFLA_VXLAN_RSC]) {
- if (changelink)
- return -EOPNOTSUPP;
- if (nla_get_u8(data[IFLA_VXLAN_RSC]))
- conf->flags |= VXLAN_F_RSC;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_RSC,
+ VXLAN_F_RSC, changelink, false,
+ extack);
+ if (err)
+ return err;
}
if (data[IFLA_VXLAN_L2MISS]) {
- if (changelink)
- return -EOPNOTSUPP;
- if (nla_get_u8(data[IFLA_VXLAN_L2MISS]))
- conf->flags |= VXLAN_F_L2MISS;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L2MISS,
+ VXLAN_F_L2MISS, changelink, false,
+ extack);
+ if (err)
+ return err;
}
if (data[IFLA_VXLAN_L3MISS]) {
- if (changelink)
- return -EOPNOTSUPP;
- if (nla_get_u8(data[IFLA_VXLAN_L3MISS]))
- conf->flags |= VXLAN_F_L3MISS;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L3MISS,
+ VXLAN_F_L3MISS, changelink, false,
+ extack);
+ if (err)
+ return err;
}
if (data[IFLA_VXLAN_LIMIT]) {
- if (changelink)
+ if (changelink) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LIMIT],
+ "Cannot change limit");
return -EOPNOTSUPP;
+ }
conf->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
}
if (data[IFLA_VXLAN_COLLECT_METADATA]) {
- if (changelink)
- return -EOPNOTSUPP;
- if (nla_get_u8(data[IFLA_VXLAN_COLLECT_METADATA]))
- conf->flags |= VXLAN_F_COLLECT_METADATA;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_COLLECT_METADATA,
+ VXLAN_F_COLLECT_METADATA, changelink, false,
+ extack);
+ if (err)
+ return err;
}
if (data[IFLA_VXLAN_PORT_RANGE]) {
@@ -3420,74 +4130,97 @@
conf->port_min = ntohs(p->low);
conf->port_max = ntohs(p->high);
} else {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE],
+ "Cannot change port range");
return -EOPNOTSUPP;
}
}
if (data[IFLA_VXLAN_PORT]) {
- if (changelink)
+ if (changelink) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT],
+ "Cannot change port");
return -EOPNOTSUPP;
+ }
conf->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
}
if (data[IFLA_VXLAN_UDP_CSUM]) {
- if (changelink)
+ if (changelink) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_UDP_CSUM],
+ "Cannot change UDP_CSUM flag");
return -EOPNOTSUPP;
+ }
if (!nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
}
if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
- if (changelink)
- return -EOPNOTSUPP;
- if (nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
- conf->flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+ VXLAN_F_UDP_ZERO_CSUM6_TX, changelink,
+ false, extack);
+ if (err)
+ return err;
}
if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]) {
- if (changelink)
- return -EOPNOTSUPP;
- if (nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
- conf->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+ VXLAN_F_UDP_ZERO_CSUM6_RX, changelink,
+ false, extack);
+ if (err)
+ return err;
}
if (data[IFLA_VXLAN_REMCSUM_TX]) {
- if (changelink)
- return -EOPNOTSUPP;
- if (nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
- conf->flags |= VXLAN_F_REMCSUM_TX;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_TX,
+ VXLAN_F_REMCSUM_TX, changelink, false,
+ extack);
+ if (err)
+ return err;
}
if (data[IFLA_VXLAN_REMCSUM_RX]) {
- if (changelink)
- return -EOPNOTSUPP;
- if (nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
- conf->flags |= VXLAN_F_REMCSUM_RX;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_RX,
+ VXLAN_F_REMCSUM_RX, changelink, false,
+ extack);
+ if (err)
+ return err;
}
if (data[IFLA_VXLAN_GBP]) {
- if (changelink)
- return -EOPNOTSUPP;
- conf->flags |= VXLAN_F_GBP;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GBP,
+ VXLAN_F_GBP, changelink, false, extack);
+ if (err)
+ return err;
}
if (data[IFLA_VXLAN_GPE]) {
- if (changelink)
- return -EOPNOTSUPP;
- conf->flags |= VXLAN_F_GPE;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GPE,
+ VXLAN_F_GPE, changelink, false,
+ extack);
+ if (err)
+ return err;
}
if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) {
- if (changelink)
- return -EOPNOTSUPP;
- conf->flags |= VXLAN_F_REMCSUM_NOPARTIAL;
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL,
+ VXLAN_F_REMCSUM_NOPARTIAL, changelink,
+ false, extack);
+ if (err)
+ return err;
}
if (tb[IFLA_MTU]) {
- if (changelink)
+ if (changelink) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
+ "Cannot change mtu");
return -EOPNOTSUPP;
+ }
conf->mtu = nla_get_u32(tb[IFLA_MTU]);
}
+
+ if (data[IFLA_VXLAN_DF])
+ conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
return 0;
}
@@ -3499,7 +4232,7 @@
struct vxlan_config conf;
int err;
- err = vxlan_nl2conf(tb, data, dev, &conf, false);
+ err = vxlan_nl2conf(tb, data, dev, &conf, false, extack);
if (err)
return err;
@@ -3511,51 +4244,68 @@
struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_rdst *dst = &vxlan->default_dst;
- struct vxlan_rdst old_dst;
+ struct net_device *lowerdev;
struct vxlan_config conf;
+ struct vxlan_rdst *dst;
int err;
- err = vxlan_nl2conf(tb, data,
- dev, &conf, true);
+ dst = &vxlan->default_dst;
+ err = vxlan_nl2conf(tb, data, dev, &conf, true, extack);
if (err)
return err;
- memcpy(&old_dst, dst, sizeof(struct vxlan_rdst));
+ err = vxlan_config_validate(vxlan->net, &conf, &lowerdev,
+ vxlan, extack);
+ if (err)
+ return err;
- err = vxlan_dev_configure(vxlan->net, dev, &conf, true, extack);
+ if (dst->remote_dev == lowerdev)
+ lowerdev = NULL;
+
+ err = netdev_adjacent_change_prepare(dst->remote_dev, lowerdev, dev,
+ extack);
if (err)
return err;
/* handle default dst entry */
- if (!vxlan_addr_equal(&dst->remote_ip, &old_dst.remote_ip)) {
- spin_lock_bh(&vxlan->hash_lock);
- if (!vxlan_addr_any(&old_dst.remote_ip))
- __vxlan_fdb_delete(vxlan, all_zeros_mac,
- old_dst.remote_ip,
- vxlan->cfg.dst_port,
- old_dst.remote_vni,
- old_dst.remote_vni,
- old_dst.remote_ifindex, 0);
+ if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
+ u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
- if (!vxlan_addr_any(&dst->remote_ip)) {
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ if (!vxlan_addr_any(&conf.remote_ip)) {
err = vxlan_fdb_update(vxlan, all_zeros_mac,
- &dst->remote_ip,
+ &conf.remote_ip,
NUD_REACHABLE | NUD_PERMANENT,
NLM_F_APPEND | NLM_F_CREATE,
vxlan->cfg.dst_port,
- dst->remote_vni,
- dst->remote_vni,
- dst->remote_ifindex,
- NTF_SELF);
+ conf.vni, conf.vni,
+ conf.remote_ifindex,
+ NTF_SELF, 0, true, extack);
if (err) {
- spin_unlock_bh(&vxlan->hash_lock);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ netdev_adjacent_change_abort(dst->remote_dev,
+ lowerdev, dev);
return err;
}
}
- spin_unlock_bh(&vxlan->hash_lock);
+ if (!vxlan_addr_any(&dst->remote_ip))
+ __vxlan_fdb_delete(vxlan, all_zeros_mac,
+ dst->remote_ip,
+ vxlan->cfg.dst_port,
+ dst->remote_vni,
+ dst->remote_vni,
+ dst->remote_ifindex,
+ true);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
}
+ if (conf.age_interval != vxlan->cfg.age_interval)
+ mod_timer(&vxlan->age_timer, jiffies);
+
+ netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev);
+ if (lowerdev && lowerdev != dst->remote_dev)
+ dst->remote_dev = lowerdev;
+ vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
return 0;
}
@@ -3567,6 +4317,8 @@
list_del(&vxlan->next);
unregister_netdevice_queue(dev, head);
+ if (vxlan->default_dst.remote_dev)
+ netdev_upper_dev_unlink(vxlan->default_dst.remote_dev, dev);
}
static size_t vxlan_get_size(const struct net_device *dev)
@@ -3579,6 +4331,7 @@
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL_INHERIT */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
+ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */
nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
@@ -3645,32 +4398,33 @@
nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
!!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
+ nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
nla_put_u8(skb, IFLA_VXLAN_LEARNING,
- !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
nla_put_u8(skb, IFLA_VXLAN_PROXY,
- !!(vxlan->cfg.flags & VXLAN_F_PROXY)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_PROXY)) ||
nla_put_u8(skb, IFLA_VXLAN_RSC,
!!(vxlan->cfg.flags & VXLAN_F_RSC)) ||
nla_put_u8(skb, IFLA_VXLAN_L2MISS,
- !!(vxlan->cfg.flags & VXLAN_F_L2MISS)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_L2MISS)) ||
nla_put_u8(skb, IFLA_VXLAN_L3MISS,
- !!(vxlan->cfg.flags & VXLAN_F_L3MISS)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_L3MISS)) ||
nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA,
!!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) ||
nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) ||
nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
- !(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
+ !(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
- !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
- !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
- !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
- !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
+ !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
goto nla_put_failure;
if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
@@ -3727,7 +4481,7 @@
memset(&tb, 0, sizeof(tb));
dev = rtnl_create_link(net, name, name_assign_type,
- &vxlan_link_ops, tb);
+ &vxlan_link_ops, tb, NULL);
if (IS_ERR(dev))
return dev;
@@ -3779,10 +4533,12 @@
struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
if (event == NETDEV_UNREGISTER) {
- vxlan_offload_rx_ports(dev, false);
+ if (!dev->udp_tunnel_nic_info)
+ vxlan_offload_rx_ports(dev, false);
vxlan_handle_lowerdev_unregister(vn, dev);
} else if (event == NETDEV_REGISTER) {
- vxlan_offload_rx_ports(dev, true);
+ if (!dev->udp_tunnel_nic_info)
+ vxlan_offload_rx_ports(dev, true);
} else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
event == NETDEV_UDP_TUNNEL_DROP_INFO) {
vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
@@ -3793,6 +4549,169 @@
static struct notifier_block vxlan_notifier_block __read_mostly = {
.notifier_call = vxlan_netdevice_event,
+};
+
+static void
+vxlan_fdb_offloaded_set(struct net_device *dev,
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_rdst *rdst;
+ struct vxlan_fdb *f;
+ u32 hash_index;
+
+ hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
+
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
+
+ f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
+ if (!f)
+ goto out;
+
+ rdst = vxlan_fdb_find_rdst(f, &fdb_info->remote_ip,
+ fdb_info->remote_port,
+ fdb_info->remote_vni,
+ fdb_info->remote_ifindex);
+ if (!rdst)
+ goto out;
+
+ rdst->offloaded = fdb_info->offloaded;
+
+out:
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+}
+
+static int
+vxlan_fdb_external_learn_add(struct net_device *dev,
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct netlink_ext_ack *extack;
+ u32 hash_index;
+ int err;
+
+ hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
+ extack = switchdev_notifier_info_to_extack(&fdb_info->info);
+
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip,
+ NUD_REACHABLE,
+ NLM_F_CREATE | NLM_F_REPLACE,
+ fdb_info->remote_port,
+ fdb_info->vni,
+ fdb_info->remote_vni,
+ fdb_info->remote_ifindex,
+ NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
+ 0, false, extack);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+
+ return err;
+}
+
+static int
+vxlan_fdb_external_learn_del(struct net_device *dev,
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_fdb *f;
+ u32 hash_index;
+ int err = 0;
+
+ hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
+
+ f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
+ if (!f)
+ err = -ENOENT;
+ else if (f->flags & NTF_EXT_LEARNED)
+ err = __vxlan_fdb_delete(vxlan, fdb_info->eth_addr,
+ fdb_info->remote_ip,
+ fdb_info->remote_port,
+ fdb_info->vni,
+ fdb_info->remote_vni,
+ fdb_info->remote_ifindex,
+ false);
+
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+
+ return err;
+}
+
+static int vxlan_switchdev_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct switchdev_notifier_vxlan_fdb_info *fdb_info;
+ int err = 0;
+
+ switch (event) {
+ case SWITCHDEV_VXLAN_FDB_OFFLOADED:
+ vxlan_fdb_offloaded_set(dev, ptr);
+ break;
+ case SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE:
+ fdb_info = ptr;
+ err = vxlan_fdb_external_learn_add(dev, fdb_info);
+ if (err) {
+ err = notifier_from_errno(err);
+ break;
+ }
+ fdb_info->offloaded = true;
+ vxlan_fdb_offloaded_set(dev, fdb_info);
+ break;
+ case SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE:
+ fdb_info = ptr;
+ err = vxlan_fdb_external_learn_del(dev, fdb_info);
+ if (err) {
+ err = notifier_from_errno(err);
+ break;
+ }
+ fdb_info->offloaded = false;
+ vxlan_fdb_offloaded_set(dev, fdb_info);
+ break;
+ }
+
+ return err;
+}
+
+static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
+ .notifier_call = vxlan_switchdev_event,
+};
+
+static void vxlan_fdb_nh_flush(struct nexthop *nh)
+{
+ struct vxlan_fdb *fdb;
+ struct vxlan_dev *vxlan;
+ u32 hash_index;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(fdb, &nh->fdb_list, nh_list) {
+ vxlan = rcu_dereference(fdb->vdev);
+ WARN_ON(!vxlan);
+ hash_index = fdb_head_index(vxlan, fdb->eth_addr,
+ vxlan->default_dst.remote_vni);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ if (!hlist_unhashed(&fdb->hlist))
+ vxlan_fdb_destroy(vxlan, fdb, false, false);
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ }
+ rcu_read_unlock();
+}
+
+static int vxlan_nexthop_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct nexthop *nh = ptr;
+
+ if (!nh || event != NEXTHOP_EVENT_DEL)
+ return NOTIFY_DONE;
+
+ vxlan_fdb_nh_flush(nh);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block vxlan_nexthop_notifier_block __read_mostly = {
+ .notifier_call = vxlan_nexthop_event,
};
static __net_init int vxlan_init_net(struct net *net)
@@ -3806,7 +4725,7 @@
for (h = 0; h < PORT_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vn->sock_list[h]);
- return 0;
+ return register_nexthop_notifier(net, &vxlan_nexthop_notifier_block);
}
static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
@@ -3836,6 +4755,8 @@
unsigned int h;
rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list)
+ unregister_nexthop_notifier(net, &vxlan_nexthop_notifier_block);
list_for_each_entry(net, net_list, exit_list)
vxlan_destroy_tunnels(net, &list);
@@ -3871,11 +4792,17 @@
if (rc)
goto out2;
- rc = rtnl_link_register(&vxlan_link_ops);
+ rc = register_switchdev_notifier(&vxlan_switchdev_notifier_block);
if (rc)
goto out3;
+ rc = rtnl_link_register(&vxlan_link_ops);
+ if (rc)
+ goto out4;
+
return 0;
+out4:
+ unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
out3:
unregister_netdevice_notifier(&vxlan_notifier_block);
out2:
@@ -3888,6 +4815,7 @@
static void __exit vxlan_cleanup_module(void)
{
rtnl_link_unregister(&vxlan_link_ops);
+ unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
unregister_netdevice_notifier(&vxlan_notifier_block);
unregister_pernet_subsys(&vxlan_net_ops);
/* rcu_barrier() is called by netns */
--
Gitblit v1.6.2