From ee930fffee469d076998274a2ca55e13dc1efb67 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 10 May 2024 08:50:54 +0000 Subject: [PATCH] enable tun/tap/iptables --- kernel/include/net/xfrm.h | 291 ++++++++++++++++++++------------------------------------- 1 files changed, 104 insertions(+), 187 deletions(-) diff --git a/kernel/include/net/xfrm.h b/kernel/include/net/xfrm.h index 1078828..b4121a7 100644 --- a/kernel/include/net/xfrm.h +++ b/kernel/include/net/xfrm.h @@ -15,6 +15,7 @@ #include <linux/audit.h> #include <linux/slab.h> #include <linux/refcount.h> +#include <linux/sockptr.h> #include <net/sock.h> #include <net/dst.h> @@ -127,9 +128,21 @@ struct xfrm_state_offload { struct net_device *dev; + struct net_device *real_dev; unsigned long offload_handle; unsigned int num_exthdrs; u8 flags; +}; + +struct xfrm_mode { + u8 encap; + u8 family; + u8 flags; +}; + +/* Flags for xfrm_mode. */ +enum { + XFRM_MODE_FLAG_TUNNEL = 1, }; /* Full description of state of transformer. */ @@ -182,6 +195,7 @@ /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; + struct sock __rcu *encap_sk; /* Data for care-of address */ xfrm_address_t *coaddr; @@ -219,7 +233,7 @@ struct xfrm_stats stats; struct xfrm_lifetime_cur curlft; - struct tasklet_hrtimer mtimer; + struct hrtimer mtimer; struct xfrm_state_offload xso; @@ -234,9 +248,9 @@ /* Reference to data common to all the instances of this * transformer. */ const struct xfrm_type *type; - struct xfrm_mode *inner_mode; - struct xfrm_mode *inner_mode_iaf; - struct xfrm_mode *outer_mode; + struct xfrm_mode inner_mode; + struct xfrm_mode inner_mode_iaf; + struct xfrm_mode outer_mode; const struct xfrm_type_offload *type_offload; @@ -316,13 +330,6 @@ xfrm_address_t *saddr, xfrm_address_t *daddr, u32 mark); - void (*decode_session)(struct sk_buff *skb, - struct flowi *fl, - int reverse); - int (*get_tos)(const struct flowi *fl); - int (*init_path)(struct xfrm_dst *path, - struct dst_entry *dst, - int nfheader_len); int (*fill_dst)(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl); @@ -342,29 +349,20 @@ int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { - unsigned int family; - unsigned int proto; - __be16 eth_proto; - struct module *owner; - const struct xfrm_type *type_map[IPPROTO_MAX]; - const struct xfrm_type_offload *type_offload_map[IPPROTO_MAX]; - struct xfrm_mode *mode_map[XFRM_MODE_MAX]; + u8 family; + u8 proto; - int (*init_flags)(struct xfrm_state *x); - void (*init_tempsel)(struct xfrm_selector *sel, - const struct flowi *fl); - void (*init_temprop)(struct xfrm_state *x, - const struct xfrm_tmpl *tmpl, - const xfrm_address_t *daddr, - const xfrm_address_t *saddr); - int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); - int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); + const struct xfrm_type_offload *type_offload_esp; + + const struct xfrm_type *type_esp; + const struct xfrm_type *type_ipip; + const struct xfrm_type *type_ipip6; + const struct xfrm_type *type_comp; + const struct xfrm_type *type_ah; + const struct xfrm_type *type_routing; + const struct xfrm_type *type_dstopts; + int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); - int (*output_finish)(struct sock *sk, struct sk_buff *skb); - int (*extract_input)(struct xfrm_state *x, - struct sk_buff *skb); - int (*extract_output)(struct xfrm_state *x, - struct sk_buff *skb); int (*transport_finish)(struct sk_buff *skb, int async); void (*local_error)(struct sk_buff *skb, u32 mtu); @@ -376,7 +374,8 @@ struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family); struct xfrm_input_afinfo { - unsigned int family; + u8 family; + bool is_ipip; int (*callback)(struct sk_buff *skb, u8 protocol, int err); }; @@ -404,12 +403,10 @@ int (*reject)(struct xfrm_state *, struct sk_buff *, const struct flowi *); int (*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **); - /* Estimate maximal size of result of transformation of a dgram */ - u32 (*get_mtu)(struct xfrm_state *, int size); }; int xfrm_register_type(const struct xfrm_type *type, unsigned short family); -int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family); +void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family); struct xfrm_type_offload { char *description; @@ -421,79 +418,7 @@ }; int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); -int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); - -struct xfrm_mode { - /* - * Remove encapsulation header. - * - * The IP header will be moved over the top of the encapsulation - * header. - * - * On entry, the transport header shall point to where the IP header - * should be and the network header shall be set to where the IP - * header currently is. skb->data shall point to the start of the - * payload. - */ - int (*input2)(struct xfrm_state *x, struct sk_buff *skb); - - /* - * This is the actual input entry point. - * - * For transport mode and equivalent this would be identical to - * input2 (which does not need to be set). While tunnel mode - * and equivalent would set this to the tunnel encapsulation function - * xfrm4_prepare_input that would in turn call input2. - */ - int (*input)(struct xfrm_state *x, struct sk_buff *skb); - - /* - * Add encapsulation header. - * - * On exit, the transport header will be set to the start of the - * encapsulation header to be filled in by x->type->output and - * the mac header will be set to the nextheader (protocol for - * IPv4) field of the extension header directly preceding the - * encapsulation header, or in its absence, that of the top IP - * header. The value of the network header will always point - * to the top IP header while skb->data will point to the payload. - */ - int (*output2)(struct xfrm_state *x,struct sk_buff *skb); - - /* - * This is the actual output entry point. - * - * For transport mode and equivalent this would be identical to - * output2 (which does not need to be set). While tunnel mode - * and equivalent would set this to a tunnel encapsulation function - * (xfrm4_prepare_output or xfrm6_prepare_output) that would in turn - * call output2. - */ - int (*output)(struct xfrm_state *x, struct sk_buff *skb); - - /* - * Adjust pointers into the packet and do GSO segmentation. - */ - struct sk_buff *(*gso_segment)(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features); - - /* - * Adjust pointers into the packet when IPsec is done at layer2. - */ - void (*xmit)(struct xfrm_state *x, struct sk_buff *skb); - - struct xfrm_state_afinfo *afinfo; - struct module *owner; - unsigned int encap; - int flags; -}; - -/* Flags for xfrm_mode. */ -enum { - XFRM_MODE_FLAG_TUNNEL = 1, -}; - -int xfrm_register_mode(struct xfrm_mode *mode, int family); -int xfrm_unregister_mode(struct xfrm_mode *mode, int family); +void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); static inline int xfrm_af2proto(unsigned int family) { @@ -507,13 +432,13 @@ } } -static inline struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto) +static inline const struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto) { if ((ipproto == IPPROTO_IPIP && x->props.family == AF_INET) || (ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6)) - return x->inner_mode; + return &x->inner_mode; else - return x->inner_mode_iaf; + return &x->inner_mode_iaf; } struct xfrm_tmpl { @@ -578,6 +503,7 @@ /* This lock only affects elements except for entry. */ rwlock_t lock; refcount_t refcnt; + u32 pos; struct timer_list timer; atomic_t genid; @@ -590,6 +516,7 @@ struct xfrm_lifetime_cur curlft; struct xfrm_policy_walk_entry walk; struct xfrm_policy_queue polq; + bool bydst_reinsert; u8 type; u8 action; u8 flags; @@ -597,6 +524,7 @@ u16 family; struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; + struct hlist_node bydst_inexact_list; struct rcu_head rcu; }; @@ -1099,49 +1027,21 @@ }; struct sec_path { - refcount_t refcnt; int len; int olen; + int verified_cnt; struct xfrm_state *xvec[XFRM_MAX_DEPTH]; struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH]; }; -static inline int secpath_exists(struct sk_buff *skb) -{ -#ifdef CONFIG_XFRM - return skb->sp != NULL; -#else - return 0; -#endif -} - -static inline struct sec_path * -secpath_get(struct sec_path *sp) -{ - if (sp) - refcount_inc(&sp->refcnt); - return sp; -} - -void __secpath_destroy(struct sec_path *sp); - -static inline void -secpath_put(struct sec_path *sp) -{ - if (sp && refcount_dec_and_test(&sp->refcnt)) - __secpath_destroy(sp); -} - -struct sec_path *secpath_dup(struct sec_path *src); -int secpath_set(struct sk_buff *skb); +struct sec_path *secpath_set(struct sk_buff *skb); static inline void secpath_reset(struct sk_buff *skb) { #ifdef CONFIG_XFRM - secpath_put(skb->sp); - skb->sp = NULL; + skb_ext_del(skb, SKB_EXT_SEC_PATH); #endif } @@ -1197,8 +1097,8 @@ if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - return (!net->xfrm.policy_count[dir] && !skb->sp) || - (skb_dst(skb)->flags & DST_NOPOLICY) || + return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) || + (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) || __xfrm_policy_check(sk, ndir, skb, family); } @@ -1506,6 +1406,8 @@ struct xfrm6_protocol { int (*handler)(struct sk_buff *skb); + int (*input_handler)(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); @@ -1517,6 +1419,7 @@ /* XFRM tunnel handlers. */ struct xfrm_tunnel { int (*handler)(struct sk_buff *skb); + int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, u32 info); struct xfrm_tunnel __rcu *next; @@ -1525,6 +1428,7 @@ struct xfrm6_tunnel { int (*handler)(struct sk_buff *skb); + int (*cb_handler)(struct sk_buff *skb, int err); int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info); struct xfrm6_tunnel __rcu *next; @@ -1602,21 +1506,19 @@ u8 proto, unsigned short family); #ifdef CONFIG_XFRM_SUB_POLICY -int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, - unsigned short family, struct net *net); -int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, +void xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, unsigned short family); +void xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, + unsigned short family); #else -static inline int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, - int n, unsigned short family, struct net *net) -{ - return -ENOSYS; -} - -static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, +static inline void xfrm_tmpl_sort(struct xfrm_tmpl **d, struct xfrm_tmpl **s, int n, unsigned short family) { - return -ENOSYS; +} + +static inline void xfrm_state_sort(struct xfrm_state **d, struct xfrm_state **s, + int n, unsigned short family) +{ } #endif @@ -1645,20 +1547,23 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x); -int xfrm_state_mtu(struct xfrm_state *x, int mtu); +u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); -int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); +int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, + int (*finish)(struct net *, struct sock *, + struct sk_buff *)); int xfrm_trans_queue(struct sk_buff *skb, int (*finish)(struct net *, struct sock *, struct sk_buff *)); int xfrm_output_resume(struct sk_buff *skb, int err); int xfrm_output(struct sock *sk, struct sk_buff *skb); -int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); + +int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb); + void xfrm_local_error(struct sk_buff *skb, int mtu); -int xfrm4_extract_header(struct sk_buff *skb); int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); @@ -1674,55 +1579,46 @@ return xfrm_input(skb, nexthdr, spi, 0); } -int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); -int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb); -int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err); int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol); int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char protocol); int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); -int xfrm6_extract_header(struct sk_buff *skb); int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t); +int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type); int xfrm6_transport_finish(struct sk_buff *skb, int async); int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t); int xfrm6_rcv(struct sk_buff *skb); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); void xfrm6_local_error(struct sk_buff *skb, u32 mtu); -int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err); int xfrm6_protocol_register(struct xfrm6_protocol *handler, unsigned char protocol); int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, unsigned char protocol); int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family); __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr); -int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); -int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); #ifdef CONFIG_XFRM +void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); -int xfrm_user_policy(struct sock *sk, int optname, - u8 __user *optval, int optlen); +int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); +int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, + int optlen); #else -static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) +static inline int xfrm_user_policy(struct sock *sk, int optname, + sockptr_t optval, int optlen) { return -ENOPROTOOPT; -} - -static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) -{ - /* should not happen */ - kfree_skb(skb); - return 0; } #endif @@ -1739,13 +1635,16 @@ void *); void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, - u8 type, int dir, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, + const struct xfrm_mark *mark, + u32 if_id, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err); -struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8, - int dir, u32 id, int delete, int *err); +struct xfrm_policy *xfrm_policy_byid(struct net *net, + const struct xfrm_mark *mark, u32 if_id, + u8 type, int dir, u32 id, int delete, + int *err); int xfrm_policy_flush(struct net *net, u8 type, bool task_valid); void xfrm_policy_hash_rebuild(struct net *net); u32 xfrm_get_acqseq(void); @@ -1763,14 +1662,15 @@ const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap); -struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net); +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, + u32 if_id); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m, struct xfrm_encap_tmpl *encap); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap); + struct xfrm_encap_tmpl *encap, u32 if_id); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); @@ -1922,14 +1822,16 @@ #ifdef CONFIG_XFRM static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { - return skb->sp->xvec[skb->sp->len - 1]; + struct sec_path *sp = skb_sec_path(skb); + + return sp->xvec[sp->len - 1]; } #endif static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) { #ifdef CONFIG_XFRM - struct sec_path *sp = skb->sp; + struct sec_path *sp = skb_sec_path(skb); if (!sp || !sp->olen || sp->len != sp->olen) return NULL; @@ -1987,7 +1889,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) { struct xfrm_state_offload *xso = &x->xso; - struct net_device *dev = xso->dev; + struct net_device *dev = xso->dev; if (dev && dev->xfrmdev_ops) { if (dev->xfrmdev_ops->xdo_dev_state_free) @@ -2088,7 +1990,7 @@ tunnel = true; break; } - if (tunnel && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)) + if (tunnel && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)) return -EINVAL; return 0; @@ -2127,4 +2029,19 @@ } #endif +#if IS_ENABLED(CONFIG_IPV6) +static inline bool xfrm6_local_dontfrag(const struct sock *sk) +{ + int proto; + + if (!sk || sk->sk_family != AF_INET6) + return false; + + proto = sk->sk_protocol; + if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) + return inet6_sk(sk)->dontfrag; + + return false; +} +#endif #endif /* _NET_XFRM_H */ -- Gitblit v1.6.2