From 072de836f53be56a70cecf70b43ae43b7ce17376 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 11 Dec 2023 10:08:36 +0000
Subject: [PATCH] mk-rootfs.sh
---
kernel/net/xfrm/xfrm_state.c | 703 +++++++++++++++++++++++++++++++++++++---------------------
1 files changed, 451 insertions(+), 252 deletions(-)
diff --git a/kernel/net/xfrm/xfrm_state.c b/kernel/net/xfrm/xfrm_state.c
index d739955..5f4b51e 100644
--- a/kernel/net/xfrm/xfrm_state.c
+++ b/kernel/net/xfrm/xfrm_state.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* xfrm_state.c
*
@@ -26,6 +27,8 @@
#include <linux/interrupt.h>
#include <linux/kernel.h>
+#include <crypto/aead.h>
+
#include "xfrm_hash.h"
#define xfrm_state_deref_prot(table, net) \
@@ -41,7 +44,6 @@
*/
static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
-static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
static struct kmem_cache *xfrm_state_cache __ro_after_init;
static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
@@ -137,7 +139,7 @@
}
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- write_seqcount_begin(&xfrm_state_hash_generation);
+ write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
@@ -153,7 +155,7 @@
rcu_assign_pointer(net->xfrm.state_byspi, nspi);
net->xfrm.state_hmask = nhashmask;
- write_seqcount_end(&xfrm_state_hash_generation);
+ write_seqcount_end(&net->xfrm.xfrm_state_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
osize = (ohashmask + 1) * sizeof(struct hlist_head);
@@ -173,66 +175,135 @@
int __xfrm_state_delete(struct xfrm_state *x);
int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
-bool km_is_alive(const struct km_event *c);
+static bool km_is_alive(const struct km_event *c);
void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
-static DEFINE_SPINLOCK(xfrm_type_lock);
int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
{
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- const struct xfrm_type **typemap;
int err = 0;
- if (unlikely(afinfo == NULL))
+ if (!afinfo)
return -EAFNOSUPPORT;
- typemap = afinfo->type_map;
- spin_lock_bh(&xfrm_type_lock);
- if (likely(typemap[type->proto] == NULL))
- typemap[type->proto] = type;
- else
- err = -EEXIST;
- spin_unlock_bh(&xfrm_type_lock);
+#define X(afi, T, name) do { \
+ WARN_ON((afi)->type_ ## name); \
+ (afi)->type_ ## name = (T); \
+ } while (0)
+
+ switch (type->proto) {
+ case IPPROTO_COMP:
+ X(afinfo, type, comp);
+ break;
+ case IPPROTO_AH:
+ X(afinfo, type, ah);
+ break;
+ case IPPROTO_ESP:
+ X(afinfo, type, esp);
+ break;
+ case IPPROTO_IPIP:
+ X(afinfo, type, ipip);
+ break;
+ case IPPROTO_DSTOPTS:
+ X(afinfo, type, dstopts);
+ break;
+ case IPPROTO_ROUTING:
+ X(afinfo, type, routing);
+ break;
+ case IPPROTO_IPV6:
+ X(afinfo, type, ipip6);
+ break;
+ default:
+ WARN_ON(1);
+ err = -EPROTONOSUPPORT;
+ break;
+ }
+#undef X
rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(xfrm_register_type);
-int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
+void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
{
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- const struct xfrm_type **typemap;
- int err = 0;
if (unlikely(afinfo == NULL))
- return -EAFNOSUPPORT;
- typemap = afinfo->type_map;
- spin_lock_bh(&xfrm_type_lock);
+ return;
- if (unlikely(typemap[type->proto] != type))
- err = -ENOENT;
- else
- typemap[type->proto] = NULL;
- spin_unlock_bh(&xfrm_type_lock);
+#define X(afi, T, name) do { \
+ WARN_ON((afi)->type_ ## name != (T)); \
+ (afi)->type_ ## name = NULL; \
+ } while (0)
+
+ switch (type->proto) {
+ case IPPROTO_COMP:
+ X(afinfo, type, comp);
+ break;
+ case IPPROTO_AH:
+ X(afinfo, type, ah);
+ break;
+ case IPPROTO_ESP:
+ X(afinfo, type, esp);
+ break;
+ case IPPROTO_IPIP:
+ X(afinfo, type, ipip);
+ break;
+ case IPPROTO_DSTOPTS:
+ X(afinfo, type, dstopts);
+ break;
+ case IPPROTO_ROUTING:
+ X(afinfo, type, routing);
+ break;
+ case IPPROTO_IPV6:
+ X(afinfo, type, ipip6);
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+#undef X
rcu_read_unlock();
- return err;
}
EXPORT_SYMBOL(xfrm_unregister_type);
static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
{
+ const struct xfrm_type *type = NULL;
struct xfrm_state_afinfo *afinfo;
- const struct xfrm_type **typemap;
- const struct xfrm_type *type;
int modload_attempted = 0;
retry:
afinfo = xfrm_state_get_afinfo(family);
if (unlikely(afinfo == NULL))
return NULL;
- typemap = afinfo->type_map;
- type = READ_ONCE(typemap[proto]);
+ switch (proto) {
+ case IPPROTO_COMP:
+ type = afinfo->type_comp;
+ break;
+ case IPPROTO_AH:
+ type = afinfo->type_ah;
+ break;
+ case IPPROTO_ESP:
+ type = afinfo->type_esp;
+ break;
+ case IPPROTO_IPIP:
+ type = afinfo->type_ipip;
+ break;
+ case IPPROTO_DSTOPTS:
+ type = afinfo->type_dstopts;
+ break;
+ case IPPROTO_ROUTING:
+ type = afinfo->type_routing;
+ break;
+ case IPPROTO_IPV6:
+ type = afinfo->type_ipip6;
+ break;
+ default:
+ break;
+ }
+
if (unlikely(type && !try_module_get(type->owner)))
type = NULL;
@@ -252,65 +323,71 @@
module_put(type->owner);
}
-static DEFINE_SPINLOCK(xfrm_type_offload_lock);
int xfrm_register_type_offload(const struct xfrm_type_offload *type,
unsigned short family)
{
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- const struct xfrm_type_offload **typemap;
int err = 0;
if (unlikely(afinfo == NULL))
return -EAFNOSUPPORT;
- typemap = afinfo->type_offload_map;
- spin_lock_bh(&xfrm_type_offload_lock);
- if (likely(typemap[type->proto] == NULL))
- typemap[type->proto] = type;
- else
- err = -EEXIST;
- spin_unlock_bh(&xfrm_type_offload_lock);
+ switch (type->proto) {
+ case IPPROTO_ESP:
+ WARN_ON(afinfo->type_offload_esp);
+ afinfo->type_offload_esp = type;
+ break;
+ default:
+ WARN_ON(1);
+ err = -EPROTONOSUPPORT;
+ break;
+ }
+
rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(xfrm_register_type_offload);
-int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
- unsigned short family)
+void xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
+ unsigned short family)
{
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- const struct xfrm_type_offload **typemap;
- int err = 0;
if (unlikely(afinfo == NULL))
- return -EAFNOSUPPORT;
- typemap = afinfo->type_offload_map;
- spin_lock_bh(&xfrm_type_offload_lock);
+ return;
- if (unlikely(typemap[type->proto] != type))
- err = -ENOENT;
- else
- typemap[type->proto] = NULL;
- spin_unlock_bh(&xfrm_type_offload_lock);
+ switch (type->proto) {
+ case IPPROTO_ESP:
+ WARN_ON(afinfo->type_offload_esp != type);
+ afinfo->type_offload_esp = NULL;
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
rcu_read_unlock();
- return err;
}
EXPORT_SYMBOL(xfrm_unregister_type_offload);
static const struct xfrm_type_offload *
xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
{
+ const struct xfrm_type_offload *type = NULL;
struct xfrm_state_afinfo *afinfo;
- const struct xfrm_type_offload **typemap;
- const struct xfrm_type_offload *type;
retry:
afinfo = xfrm_state_get_afinfo(family);
if (unlikely(afinfo == NULL))
return NULL;
- typemap = afinfo->type_offload_map;
- type = typemap[proto];
+ switch (proto) {
+ case IPPROTO_ESP:
+ type = afinfo->type_offload_esp;
+ break;
+ default:
+ break;
+ }
+
if ((type && !try_module_get(type->owner)))
type = NULL;
@@ -330,100 +407,67 @@
module_put(type->owner);
}
-static DEFINE_SPINLOCK(xfrm_mode_lock);
-int xfrm_register_mode(struct xfrm_mode *mode, int family)
+static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = {
+ [XFRM_MODE_BEET] = {
+ .encap = XFRM_MODE_BEET,
+ .flags = XFRM_MODE_FLAG_TUNNEL,
+ .family = AF_INET,
+ },
+ [XFRM_MODE_TRANSPORT] = {
+ .encap = XFRM_MODE_TRANSPORT,
+ .family = AF_INET,
+ },
+ [XFRM_MODE_TUNNEL] = {
+ .encap = XFRM_MODE_TUNNEL,
+ .flags = XFRM_MODE_FLAG_TUNNEL,
+ .family = AF_INET,
+ },
+};
+
+static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = {
+ [XFRM_MODE_BEET] = {
+ .encap = XFRM_MODE_BEET,
+ .flags = XFRM_MODE_FLAG_TUNNEL,
+ .family = AF_INET6,
+ },
+ [XFRM_MODE_ROUTEOPTIMIZATION] = {
+ .encap = XFRM_MODE_ROUTEOPTIMIZATION,
+ .family = AF_INET6,
+ },
+ [XFRM_MODE_TRANSPORT] = {
+ .encap = XFRM_MODE_TRANSPORT,
+ .family = AF_INET6,
+ },
+ [XFRM_MODE_TUNNEL] = {
+ .encap = XFRM_MODE_TUNNEL,
+ .flags = XFRM_MODE_FLAG_TUNNEL,
+ .family = AF_INET6,
+ },
+};
+
+static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
{
- struct xfrm_state_afinfo *afinfo;
- struct xfrm_mode **modemap;
- int err;
-
- if (unlikely(mode->encap >= XFRM_MODE_MAX))
- return -EINVAL;
-
- afinfo = xfrm_state_get_afinfo(family);
- if (unlikely(afinfo == NULL))
- return -EAFNOSUPPORT;
-
- err = -EEXIST;
- modemap = afinfo->mode_map;
- spin_lock_bh(&xfrm_mode_lock);
- if (modemap[mode->encap])
- goto out;
-
- err = -ENOENT;
- if (!try_module_get(afinfo->owner))
- goto out;
-
- mode->afinfo = afinfo;
- modemap[mode->encap] = mode;
- err = 0;
-
-out:
- spin_unlock_bh(&xfrm_mode_lock);
- rcu_read_unlock();
- return err;
-}
-EXPORT_SYMBOL(xfrm_register_mode);
-
-int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
-{
- struct xfrm_state_afinfo *afinfo;
- struct xfrm_mode **modemap;
- int err;
-
- if (unlikely(mode->encap >= XFRM_MODE_MAX))
- return -EINVAL;
-
- afinfo = xfrm_state_get_afinfo(family);
- if (unlikely(afinfo == NULL))
- return -EAFNOSUPPORT;
-
- err = -ENOENT;
- modemap = afinfo->mode_map;
- spin_lock_bh(&xfrm_mode_lock);
- if (likely(modemap[mode->encap] == mode)) {
- modemap[mode->encap] = NULL;
- module_put(mode->afinfo->owner);
- err = 0;
- }
-
- spin_unlock_bh(&xfrm_mode_lock);
- rcu_read_unlock();
- return err;
-}
-EXPORT_SYMBOL(xfrm_unregister_mode);
-
-static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
-{
- struct xfrm_state_afinfo *afinfo;
- struct xfrm_mode *mode;
- int modload_attempted = 0;
+ const struct xfrm_mode *mode;
if (unlikely(encap >= XFRM_MODE_MAX))
return NULL;
-retry:
- afinfo = xfrm_state_get_afinfo(family);
- if (unlikely(afinfo == NULL))
- return NULL;
-
- mode = READ_ONCE(afinfo->mode_map[encap]);
- if (unlikely(mode && !try_module_get(mode->owner)))
- mode = NULL;
-
- rcu_read_unlock();
- if (!mode && !modload_attempted) {
- request_module("xfrm-mode-%d-%d", family, encap);
- modload_attempted = 1;
- goto retry;
+ switch (family) {
+ case AF_INET:
+ mode = &xfrm4_mode_map[encap];
+ if (mode->family == family)
+ return mode;
+ break;
+ case AF_INET6:
+ mode = &xfrm6_mode_map[encap];
+ if (mode->family == family)
+ return mode;
+ break;
+ default:
+ break;
}
- return mode;
-}
-
-static void xfrm_put_mode(struct xfrm_mode *mode)
-{
- module_put(mode->owner);
+ return NULL;
}
void xfrm_state_free(struct xfrm_state *x)
@@ -434,7 +478,7 @@
static void ___xfrm_state_destroy(struct xfrm_state *x)
{
- tasklet_hrtimer_cancel(&x->mtimer);
+ hrtimer_cancel(&x->mtimer);
del_timer_sync(&x->rtimer);
kfree(x->aead);
kfree(x->aalg);
@@ -444,12 +488,6 @@
kfree(x->coaddr);
kfree(x->replay_esn);
kfree(x->preplay_esn);
- if (x->inner_mode)
- xfrm_put_mode(x->inner_mode);
- if (x->inner_mode_iaf)
- xfrm_put_mode(x->inner_mode_iaf);
- if (x->outer_mode)
- xfrm_put_mode(x->outer_mode);
if (x->type_offload)
xfrm_put_type_offload(x->type_offload);
if (x->type) {
@@ -481,8 +519,8 @@
static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
{
- struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
- struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
+ struct xfrm_state *x = container_of(me, struct xfrm_state, mtimer);
+ enum hrtimer_restart ret = HRTIMER_NORESTART;
time64_t now = ktime_get_real_seconds();
time64_t next = TIME64_MAX;
int warn = 0;
@@ -546,7 +584,8 @@
km_state_expired(x, 0, 0);
resched:
if (next != TIME64_MAX) {
- tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
+ hrtimer_forward_now(&x->mtimer, ktime_set(next, 0));
+ ret = HRTIMER_RESTART;
}
goto out;
@@ -563,7 +602,7 @@
out:
spin_unlock(&x->lock);
- return HRTIMER_NORESTART;
+ return ret;
}
static void xfrm_replay_timer_handler(struct timer_list *t);
@@ -572,7 +611,7 @@
{
struct xfrm_state *x;
- x = kmem_cache_alloc(xfrm_state_cache, GFP_ATOMIC | __GFP_ZERO);
+ x = kmem_cache_zalloc(xfrm_state_cache, GFP_ATOMIC);
if (x) {
write_pnet(&x->xs_net, net);
@@ -582,8 +621,8 @@
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
- tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
- CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
+ hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT);
+ x->mtimer.function = xfrm_timer_handler;
timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
x->curlft.add_time = ktime_get_real_seconds();
x->lft.soft_byte_limit = XFRM_INF;
@@ -592,8 +631,6 @@
x->lft.hard_packet_limit = XFRM_INF;
x->replay_maxage = 0;
x->replay_maxdiff = 0;
- x->inner_mode = NULL;
- x->inner_mode_iaf = NULL;
spin_lock_init(&x->lock);
}
return x;
@@ -631,6 +668,9 @@
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
spin_unlock(&net->xfrm.xfrm_state_lock);
+
+ if (x->encap_sk)
+ sock_put(rcu_dereference_raw(x->encap_sk));
xfrm_dev_state_delete(x);
@@ -811,24 +851,79 @@
EXPORT_SYMBOL(xfrm_sad_getinfo);
static void
+__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
+{
+ const struct flowi4 *fl4 = &fl->u.ip4;
+
+ sel->daddr.a4 = fl4->daddr;
+ sel->saddr.a4 = fl4->saddr;
+ sel->dport = xfrm_flowi_dport(fl, &fl4->uli);
+ sel->dport_mask = htons(0xffff);
+ sel->sport = xfrm_flowi_sport(fl, &fl4->uli);
+ sel->sport_mask = htons(0xffff);
+ sel->family = AF_INET;
+ sel->prefixlen_d = 32;
+ sel->prefixlen_s = 32;
+ sel->proto = fl4->flowi4_proto;
+ sel->ifindex = fl4->flowi4_oif;
+}
+
+static void
+__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
+{
+ const struct flowi6 *fl6 = &fl->u.ip6;
+
+ /* Initialize temporary selector matching only to current session. */
+ *(struct in6_addr *)&sel->daddr = fl6->daddr;
+ *(struct in6_addr *)&sel->saddr = fl6->saddr;
+ sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
+ sel->dport_mask = htons(0xffff);
+ sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
+ sel->sport_mask = htons(0xffff);
+ sel->family = AF_INET6;
+ sel->prefixlen_d = 128;
+ sel->prefixlen_s = 128;
+ sel->proto = fl6->flowi6_proto;
+ sel->ifindex = fl6->flowi6_oif;
+}
+
+static void
xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
const struct xfrm_tmpl *tmpl,
const xfrm_address_t *daddr, const xfrm_address_t *saddr,
unsigned short family)
{
- struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family);
-
- if (!afinfo)
- return;
-
- afinfo->init_tempsel(&x->sel, fl);
-
- if (family != tmpl->encap_family) {
- afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family);
- if (!afinfo)
- return;
+ switch (family) {
+ case AF_INET:
+ __xfrm4_init_tempsel(&x->sel, fl);
+ break;
+ case AF_INET6:
+ __xfrm6_init_tempsel(&x->sel, fl);
+ break;
}
- afinfo->init_temprop(x, tmpl, daddr, saddr);
+
+ x->id = tmpl->id;
+
+ switch (tmpl->encap_family) {
+ case AF_INET:
+ if (x->id.daddr.a4 == 0)
+ x->id.daddr.a4 = daddr->a4;
+ x->props.saddr = tmpl->saddr;
+ if (x->props.saddr.a4 == 0)
+ x->props.saddr.a4 = saddr->a4;
+ break;
+ case AF_INET6:
+ if (ipv6_addr_any((struct in6_addr *)&x->id.daddr))
+ memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
+ memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
+ if (ipv6_addr_any((struct in6_addr *)&x->props.saddr))
+ memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
+ break;
+ }
+
+ x->props.mode = tmpl->mode;
+ x->props.reqid = tmpl->reqid;
+ x->props.family = tmpl->encap_family;
}
static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
@@ -925,7 +1020,8 @@
if ((x->sel.family &&
(x->sel.family != family ||
!xfrm_selector_match(&x->sel, fl, family))) ||
- !security_xfrm_state_pol_flow_match(x, pol, fl))
+ !security_xfrm_state_pol_flow_match(x, pol,
+ &fl->u.__fl_common))
return;
if (!*best ||
@@ -940,7 +1036,8 @@
if ((!x->sel.family ||
(x->sel.family == family &&
xfrm_selector_match(&x->sel, fl, family))) &&
- security_xfrm_state_pol_flow_match(x, pol, fl))
+ security_xfrm_state_pol_flow_match(x, pol,
+ &fl->u.__fl_common))
*error = -ESRCH;
}
}
@@ -965,7 +1062,7 @@
to_put = NULL;
- sequence = read_seqcount_begin(&xfrm_state_hash_generation);
+ sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
rcu_read_lock();
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
@@ -1052,7 +1149,9 @@
hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
}
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
- tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
+ hrtimer_start(&x->mtimer,
+ ktime_set(net->xfrm.sysctl_acq_expires, 0),
+ HRTIMER_MODE_REL_SOFT);
net->xfrm.state_num++;
xfrm_hash_grow_check(net, x->bydst.next != NULL);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
@@ -1076,7 +1175,7 @@
if (to_put)
xfrm_state_put(to_put);
- if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
+ if (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)) {
*err = -EAGAIN;
if (x) {
xfrm_state_put(x);
@@ -1164,7 +1263,7 @@
hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
}
- tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
+ hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
if (x->replay_maxage)
mod_timer(&x->rtimer, jiffies + x->replay_maxage);
@@ -1271,7 +1370,9 @@
x->mark.m = m->m;
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
xfrm_state_hold(x);
- tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
+ hrtimer_start(&x->mtimer,
+ ktime_set(net->xfrm.sysctl_acq_expires, 0),
+ HRTIMER_MODE_REL_SOFT);
list_add(&x->km.all, &net->xfrm.state_all);
hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
h = xfrm_src_hash(net, daddr, saddr, family);
@@ -1443,9 +1544,6 @@
memcpy(&x->mark, &orig->mark, sizeof(x->mark));
memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark));
- if (xfrm_init_state(x) < 0)
- goto error;
-
x->props.flags = orig->props.flags;
x->props.extra_flags = orig->props.extra_flags;
@@ -1458,6 +1556,7 @@
x->km.seq = orig->km.seq;
x->replay = orig->replay;
x->preplay = orig->preplay;
+ x->lastused = orig->lastused;
return x;
@@ -1467,7 +1566,8 @@
return NULL;
}
-struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
+struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
+ u32 if_id)
{
unsigned int h;
struct xfrm_state *x = NULL;
@@ -1483,6 +1583,8 @@
continue;
if (m->reqid && x->props.reqid != m->reqid)
continue;
+ if (if_id != 0 && x->if_id != if_id)
+ continue;
if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
m->old_family) ||
!xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
@@ -1497,6 +1599,8 @@
hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
if (x->props.mode != m->mode ||
x->id.proto != m->proto)
+ continue;
+ if (if_id != 0 && x->if_id != if_id)
continue;
if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
m->old_family) ||
@@ -1523,6 +1627,11 @@
xc = xfrm_state_clone(x, encap);
if (!xc)
return NULL;
+
+ xc->props.family = m->new_family;
+
+ if (xfrm_init_state(xc) < 0)
+ goto error;
memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
@@ -1605,7 +1714,8 @@
memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
x1->km.dying = 0;
- tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
+ hrtimer_start(&x1->mtimer, ktime_set(1, 0),
+ HRTIMER_MODE_REL_SOFT);
if (x1->curlft.use_time)
xfrm_state_check_expire(x1);
@@ -1644,7 +1754,7 @@
if (x->curlft.bytes >= x->lft.hard_byte_limit ||
x->curlft.packets >= x->lft.hard_packet_limit) {
x->km.state = XFRM_STATE_EXPIRED;
- tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
+ hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL_SOFT);
return -EINVAL;
}
@@ -1701,51 +1811,129 @@
EXPORT_SYMBOL(xfrm_find_acq);
#ifdef CONFIG_XFRM_SUB_POLICY
-int
-xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
- unsigned short family, struct net *net)
+#if IS_ENABLED(CONFIG_IPV6)
+/* distribution counting sort function for xfrm_state and xfrm_tmpl */
+static void
+__xfrm6_sort(void **dst, void **src, int n,
+ int (*cmp)(const void *p), int maxclass)
+{
+ int count[XFRM_MAX_DEPTH] = { };
+ int class[XFRM_MAX_DEPTH];
+ int i;
+
+ for (i = 0; i < n; i++) {
+ int c = cmp(src[i]);
+
+ class[i] = c;
+ count[c]++;
+ }
+
+ for (i = 2; i < maxclass; i++)
+ count[i] += count[i - 1];
+
+ for (i = 0; i < n; i++) {
+ dst[count[class[i] - 1]++] = src[i];
+ src[i] = NULL;
+ }
+}
+
+/* Rule for xfrm_state:
+ *
+ * rule 1: select IPsec transport except AH
+ * rule 2: select MIPv6 RO or inbound trigger
+ * rule 3: select IPsec transport AH
+ * rule 4: select IPsec tunnel
+ * rule 5: others
+ */
+static int __xfrm6_state_sort_cmp(const void *p)
+{
+ const struct xfrm_state *v = p;
+
+ switch (v->props.mode) {
+ case XFRM_MODE_TRANSPORT:
+ if (v->id.proto != IPPROTO_AH)
+ return 1;
+ else
+ return 3;
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case XFRM_MODE_ROUTEOPTIMIZATION:
+ case XFRM_MODE_IN_TRIGGER:
+ return 2;
+#endif
+ case XFRM_MODE_TUNNEL:
+ case XFRM_MODE_BEET:
+ return 4;
+ }
+ return 5;
+}
+
+/* Rule for xfrm_tmpl:
+ *
+ * rule 1: select IPsec transport
+ * rule 2: select MIPv6 RO or inbound trigger
+ * rule 3: select IPsec tunnel
+ * rule 4: others
+ */
+static int __xfrm6_tmpl_sort_cmp(const void *p)
+{
+ const struct xfrm_tmpl *v = p;
+
+ switch (v->mode) {
+ case XFRM_MODE_TRANSPORT:
+ return 1;
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case XFRM_MODE_ROUTEOPTIMIZATION:
+ case XFRM_MODE_IN_TRIGGER:
+ return 2;
+#endif
+ case XFRM_MODE_TUNNEL:
+ case XFRM_MODE_BEET:
+ return 3;
+ }
+ return 4;
+}
+#else
+static inline int __xfrm6_state_sort_cmp(const void *p) { return 5; }
+static inline int __xfrm6_tmpl_sort_cmp(const void *p) { return 4; }
+
+static inline void
+__xfrm6_sort(void **dst, void **src, int n,
+ int (*cmp)(const void *p), int maxclass)
{
int i;
- int err = 0;
- struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- if (!afinfo)
- return -EAFNOSUPPORT;
- spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
- if (afinfo->tmpl_sort)
- err = afinfo->tmpl_sort(dst, src, n);
+ for (i = 0; i < n; i++)
+ dst[i] = src[i];
+}
+#endif /* CONFIG_IPV6 */
+
+void
+xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
+ unsigned short family)
+{
+ int i;
+
+ if (family == AF_INET6)
+ __xfrm6_sort((void **)dst, (void **)src, n,
+ __xfrm6_tmpl_sort_cmp, 5);
else
for (i = 0; i < n; i++)
dst[i] = src[i];
- spin_unlock_bh(&net->xfrm.xfrm_state_lock);
- rcu_read_unlock();
- return err;
}
-EXPORT_SYMBOL(xfrm_tmpl_sort);
-int
+void
xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
unsigned short family)
{
int i;
- int err = 0;
- struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- struct net *net = xs_net(*src);
- if (!afinfo)
- return -EAFNOSUPPORT;
-
- spin_lock_bh(&net->xfrm.xfrm_state_lock);
- if (afinfo->state_sort)
- err = afinfo->state_sort(dst, src, n);
+ if (family == AF_INET6)
+ __xfrm6_sort((void **)dst, (void **)src, n,
+ __xfrm6_state_sort_cmp, 6);
else
for (i = 0; i < n; i++)
dst[i] = src[i];
- spin_unlock_bh(&net->xfrm.xfrm_state_lock);
- rcu_read_unlock();
- return err;
}
-EXPORT_SYMBOL(xfrm_state_sort);
#endif
/* Silly enough, but I'm lazy to build resolution list */
@@ -2102,7 +2290,7 @@
}
EXPORT_SYMBOL(km_report);
-bool km_is_alive(const struct km_event *c)
+static bool km_is_alive(const struct km_event *c)
{
struct xfrm_mgr *km;
bool is_alive = false;
@@ -2118,7 +2306,6 @@
return is_alive;
}
-EXPORT_SYMBOL(km_is_alive);
#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT)
static DEFINE_SPINLOCK(xfrm_translator_lock);
@@ -2180,14 +2367,14 @@
EXPORT_SYMBOL_GPL(xfrm_unregister_translator);
#endif
-int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
+int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen)
{
int err;
u8 *data;
struct xfrm_mgr *km;
struct xfrm_policy *pol = NULL;
- if (!optval && !optlen) {
+ if (sockptr_is_null(optval) && !optlen) {
xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
__sk_dst_reset(sk);
@@ -2197,7 +2384,7 @@
if (optlen <= 0 || optlen > PAGE_SIZE)
return -EMSGSIZE;
- data = memdup_user(optval, optlen);
+ data = memdup_sockptr(optval, optlen);
if (IS_ERR(data))
return PTR_ERR(data);
@@ -2206,8 +2393,10 @@
if (in_compat_syscall()) {
struct xfrm_translator *xtr = xfrm_get_translator();
- if (!xtr)
+ if (!xtr) {
+ kfree(data);
return -EOPNOTSUPP;
+ }
err = xtr->xlate_user_policy_sockptr(&data, optlen);
xfrm_put_translator(xtr);
@@ -2305,6 +2494,7 @@
return rcu_dereference(xfrm_state_afinfo[family]);
}
+EXPORT_SYMBOL_GPL(xfrm_state_afinfo_get_rcu);
struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
{
@@ -2339,37 +2529,49 @@
}
EXPORT_SYMBOL(xfrm_state_delete_tunnel);
-int xfrm_state_mtu(struct xfrm_state *x, int mtu)
+u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
const struct xfrm_type *type = READ_ONCE(x->type);
+ struct crypto_aead *aead;
+ u32 blksize, net_adj = 0;
- if (x->km.state == XFRM_STATE_VALID &&
- type && type->get_mtu)
- return type->get_mtu(x, mtu);
+ if (x->km.state != XFRM_STATE_VALID ||
+ !type || type->proto != IPPROTO_ESP)
+ return mtu - x->props.header_len;
- return mtu - x->props.header_len;
+ aead = x->data;
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+
+ switch (x->props.mode) {
+ case XFRM_MODE_TRANSPORT:
+ case XFRM_MODE_BEET:
+ if (x->props.family == AF_INET)
+ net_adj = sizeof(struct iphdr);
+ else if (x->props.family == AF_INET6)
+ net_adj = sizeof(struct ipv6hdr);
+ break;
+ case XFRM_MODE_TUNNEL:
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
+ net_adj) & ~(blksize - 1)) + net_adj - 2;
}
+EXPORT_SYMBOL_GPL(xfrm_state_mtu);
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
{
- struct xfrm_state_afinfo *afinfo;
- struct xfrm_mode *inner_mode;
+ const struct xfrm_mode *inner_mode;
+ const struct xfrm_mode *outer_mode;
int family = x->props.family;
int err;
- err = -EAFNOSUPPORT;
- afinfo = xfrm_state_get_afinfo(family);
- if (!afinfo)
- goto error;
-
- err = 0;
- if (afinfo->init_flags)
- err = afinfo->init_flags(x);
-
- rcu_read_unlock();
-
- if (err)
- goto error;
+ if (family == AF_INET &&
+ READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
+ x->props.flags |= XFRM_STATE_NOPMTUDISC;
err = -EPROTONOSUPPORT;
@@ -2379,25 +2581,22 @@
goto error;
if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
- family != x->sel.family) {
- xfrm_put_mode(inner_mode);
+ family != x->sel.family)
goto error;
- }
- x->inner_mode = inner_mode;
+ x->inner_mode = *inner_mode;
} else {
- struct xfrm_mode *inner_mode_iaf;
+ const struct xfrm_mode *inner_mode_iaf;
int iafamily = AF_INET;
inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
if (inner_mode == NULL)
goto error;
- if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
- xfrm_put_mode(inner_mode);
+ if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL))
goto error;
- }
- x->inner_mode = inner_mode;
+
+ x->inner_mode = *inner_mode;
if (x->props.family == AF_INET)
iafamily = AF_INET6;
@@ -2405,9 +2604,7 @@
inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
if (inner_mode_iaf) {
if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
- x->inner_mode_iaf = inner_mode_iaf;
- else
- xfrm_put_mode(inner_mode_iaf);
+ x->inner_mode_iaf = *inner_mode_iaf;
}
}
@@ -2421,12 +2618,13 @@
if (err)
goto error;
- x->outer_mode = xfrm_get_mode(x->props.mode, family);
- if (x->outer_mode == NULL) {
+ outer_mode = xfrm_get_mode(x->props.mode, family);
+ if (!outer_mode) {
err = -EPROTONOSUPPORT;
goto error;
}
+ x->outer_mode = *outer_mode;
if (init_replay) {
err = xfrm_init_replay(x);
if (err)
@@ -2478,6 +2676,7 @@
net->xfrm.state_num = 0;
INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
spin_lock_init(&net->xfrm.xfrm_state_lock);
+ seqcount_init(&net->xfrm.xfrm_state_hash_generation);
return 0;
out_byspi:
--
Gitblit v1.6.2