From 072de836f53be56a70cecf70b43ae43b7ce17376 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 11 Dec 2023 10:08:36 +0000
Subject: [PATCH] mk-rootfs.sh
---
kernel/net/ipv6/ip6_output.c | 543 +++++++++++++++++++++++++++++++++++++-----------------
1 files changed, 371 insertions(+), 172 deletions(-)
diff --git a/kernel/net/ipv6/ip6_output.c b/kernel/net/ipv6/ip6_output.c
index 302c0da..e427f50 100644
--- a/kernel/net/ipv6/ip6_output.c
+++ b/kernel/net/ipv6/ip6_output.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* IPv6 output functions
* Linux INET6 implementation
@@ -6,11 +7,6 @@
* Pedro Roque <roque@di.fc.ul.pt>
*
* Based on linux/net/ipv4/ip_output.c
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*
* Changes:
* A.N.Kuznetsov : airthmetics in fragmentation.
@@ -58,15 +54,43 @@
#include <linux/mroute6.h>
#include <net/l3mdev.h>
#include <net/lwtunnel.h>
-#include <soc/rockchip/android-version.h>
+#include <net/ip_tunnels.h>
static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
+ unsigned int hh_len = LL_RESERVED_SPACE(dev);
+ int delta = hh_len - skb_headroom(skb);
+ const struct in6_addr *nexthop;
struct neighbour *neigh;
- struct in6_addr *nexthop;
int ret;
+
+ /* Be paranoid, rather than too clever. */
+ if (unlikely(delta > 0) && dev->header_ops) {
+ /* pskb_expand_head() might crash, if skb is shared */
+ if (skb_shared(skb)) {
+ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+ if (likely(nskb)) {
+ if (skb->sk)
+ skb_set_owner_w(nskb, skb->sk);
+ consume_skb(skb);
+ } else {
+ kfree_skb(skb);
+ }
+ skb = nskb;
+ }
+ if (skb &&
+ pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+ kfree_skb(skb);
+ skb = NULL;
+ }
+ if (!skb) {
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
+ return -ENOMEM;
+ }
+ }
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
@@ -118,7 +142,7 @@
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
- ret = neigh_output(neigh, skb);
+ ret = neigh_output(neigh, skb, false);
rcu_read_unlock_bh();
return ret;
}
@@ -162,16 +186,9 @@
return ret;
}
-static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
unsigned int mtu;
- int ret;
-
- ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
- if (ret) {
- kfree_skb(skb);
- return ret;
- }
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
@@ -193,9 +210,25 @@
return ip6_finish_output2(net, sk, skb);
}
+static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ switch (ret) {
+ case NET_XMIT_SUCCESS:
+ return __ip6_finish_output(net, sk, skb);
+ case NET_XMIT_CN:
+ return __ip6_finish_output(net, sk, skb) ? : ret;
+ default:
+ kfree_skb(skb);
+ return ret;
+ }
+}
+
int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb_dst(skb)->dev;
+ struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
skb->protocol = htons(ETH_P_IPV6);
@@ -208,7 +241,7 @@
}
return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, dev,
+ net, sk, skb, indev, dev,
ip6_finish_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
@@ -228,7 +261,7 @@
* which are using proper atomic operations or spinlocks.
*/
int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
- __u32 mark, struct ipv6_txoptions *opt, int tclass)
+ __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
{
struct net *net = sock_net(sk);
const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -293,7 +326,7 @@
hdr->daddr = *first_hop;
skb->protocol = htons(ETH_P_IPV6);
- skb->priority = sk->sk_priority;
+ skb->priority = priority;
skb->mark = mark;
mtu = dst_mtu(dst);
@@ -339,6 +372,12 @@
if (sk && ra->sel == sel &&
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (np && np->rtalert_isolate &&
+ !net_eq(sock_net(sk), dev_net(skb->dev))) {
+ continue;
+ }
if (last) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
@@ -417,6 +456,13 @@
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
+#ifdef CONFIG_NET_SWITCHDEV
+ if (skb->offload_l3_fwd_mark) {
+ consume_skb(skb);
+ return 0;
+ }
+#endif
+
skb->tstamp = 0;
return dst_output(net, sk, skb);
}
@@ -441,13 +487,14 @@
int ip6_forward(struct sk_buff *skb)
{
- struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct inet6_skb_parm *opt = IP6CB(skb);
struct net *net = dev_net(dst->dev);
+ struct inet6_dev *idev;
u32 mtu;
+ idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
if (net->ipv6.devconf_all->forwarding == 0)
goto error;
@@ -461,7 +508,7 @@
goto drop;
if (!net->ipv6.devconf_all->disable_policy &&
- !idev->cnf.disable_policy &&
+ (!idev || !idev->cnf.disable_policy) &&
!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
@@ -491,8 +538,6 @@
* check and decrement ttl
*/
if (hdr->hop_limit <= 1) {
- /* Force OUTPUT device used as source address */
- skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
@@ -616,8 +661,172 @@
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
+ skb_ext_copy(to, from);
skb_copy_secmark(to, from);
}
+
+int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
+ u8 nexthdr, __be32 frag_id,
+ struct ip6_fraglist_iter *iter)
+{
+ unsigned int first_len;
+ struct frag_hdr *fh;
+
+ /* BUILD HEADER */
+ *prevhdr = NEXTHDR_FRAGMENT;
+ iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
+ if (!iter->tmp_hdr)
+ return -ENOMEM;
+
+ iter->frag = skb_shinfo(skb)->frag_list;
+ skb_frag_list_init(skb);
+
+ iter->offset = 0;
+ iter->hlen = hlen;
+ iter->frag_id = frag_id;
+ iter->nexthdr = nexthdr;
+
+ __skb_pull(skb, hlen);
+ fh = __skb_push(skb, sizeof(struct frag_hdr));
+ __skb_push(skb, hlen);
+ skb_reset_network_header(skb);
+ memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
+
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(IP6_MF);
+ fh->identification = frag_id;
+
+ first_len = skb_pagelen(skb);
+ skb->data_len = first_len - skb_headlen(skb);
+ skb->len = first_len;
+ ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+
+ return 0;
+}
+EXPORT_SYMBOL(ip6_fraglist_init);
+
+void ip6_fraglist_prepare(struct sk_buff *skb,
+ struct ip6_fraglist_iter *iter)
+{
+ struct sk_buff *frag = iter->frag;
+ unsigned int hlen = iter->hlen;
+ struct frag_hdr *fh;
+
+ frag->ip_summed = CHECKSUM_NONE;
+ skb_reset_transport_header(frag);
+ fh = __skb_push(frag, sizeof(struct frag_hdr));
+ __skb_push(frag, hlen);
+ skb_reset_network_header(frag);
+ memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
+ iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
+ fh->nexthdr = iter->nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(iter->offset);
+ if (frag->next)
+ fh->frag_off |= htons(IP6_MF);
+ fh->identification = iter->frag_id;
+ ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+ ip6_copy_metadata(frag, skb);
+}
+EXPORT_SYMBOL(ip6_fraglist_prepare);
+
+void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
+ unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
+ u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
+{
+ state->prevhdr = prevhdr;
+ state->nexthdr = nexthdr;
+ state->frag_id = frag_id;
+
+ state->hlen = hlen;
+ state->mtu = mtu;
+
+ state->left = skb->len - hlen; /* Space per frame */
+ state->ptr = hlen; /* Where to start from */
+
+ state->hroom = hdr_room;
+ state->troom = needed_tailroom;
+
+ state->offset = 0;
+}
+EXPORT_SYMBOL(ip6_frag_init);
+
+struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
+{
+ u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
+ struct sk_buff *frag;
+ struct frag_hdr *fh;
+ unsigned int len;
+
+ len = state->left;
+ /* IF: it doesn't fit, use 'mtu' - the data space left */
+ if (len > state->mtu)
+ len = state->mtu;
+ /* IF: we are not sending up to and including the packet end
+ then align the next start on an eight byte boundary */
+ if (len < state->left)
+ len &= ~7;
+
+ /* Allocate buffer */
+ frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
+ state->hroom + state->troom, GFP_ATOMIC);
+ if (!frag)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * Set up data on packet
+ */
+
+ ip6_copy_metadata(frag, skb);
+ skb_reserve(frag, state->hroom);
+ skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
+ skb_reset_network_header(frag);
+ fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
+ frag->transport_header = (frag->network_header + state->hlen +
+ sizeof(struct frag_hdr));
+
+ /*
+ * Charge the memory for the fragment to any owner
+ * it might possess
+ */
+ if (skb->sk)
+ skb_set_owner_w(frag, skb->sk);
+
+ /*
+ * Copy the packet header into the new buffer.
+ */
+ skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
+
+ fragnexthdr_offset = skb_network_header(frag);
+ fragnexthdr_offset += prevhdr - skb_network_header(skb);
+ *fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
+ /*
+ * Build fragment header.
+ */
+ fh->nexthdr = state->nexthdr;
+ fh->reserved = 0;
+ fh->identification = state->frag_id;
+
+ /*
+ * Copy a block of the IP datagram.
+ */
+ BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
+ len));
+ state->left -= len;
+
+ fh->frag_off = htons(state->offset);
+ if (state->left > 0)
+ fh->frag_off |= htons(IP6_MF);
+ ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+
+ state->ptr += len;
+ state->offset += len;
+
+ return frag;
+}
+EXPORT_SYMBOL(ip6_frag_next);
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
@@ -626,12 +835,11 @@
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
- struct ipv6hdr *tmp_hdr;
- struct frag_hdr *fh;
- unsigned int mtu, hlen, left, len, nexthdr_offset;
- int hroom, troom;
+ struct ip6_frag_state state;
+ unsigned int mtu, hlen, nexthdr_offset;
+ ktime_t tstamp = skb->tstamp;
+ int hroom, err = 0;
__be32 frag_id;
- int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0;
err = ip6_find_1stfragopt(skb, &prevhdr);
@@ -678,6 +886,7 @@
hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
unsigned int first_len = skb_pagelen(skb);
+ struct ip6_fraglist_iter iter;
struct sk_buff *frag2;
if (first_len - hlen > mtu ||
@@ -705,85 +914,46 @@
skb->truesize -= frag->truesize;
}
- err = 0;
- offset = 0;
- /* BUILD HEADER */
-
- *prevhdr = NEXTHDR_FRAGMENT;
- tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
- if (!tmp_hdr) {
- err = -ENOMEM;
+ err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
+ &iter);
+ if (err < 0)
goto fail;
- }
- frag = skb_shinfo(skb)->frag_list;
- skb_frag_list_init(skb);
- __skb_pull(skb, hlen);
- fh = __skb_push(skb, sizeof(struct frag_hdr));
- __skb_push(skb, hlen);
- skb_reset_network_header(skb);
- memcpy(skb_network_header(skb), tmp_hdr, hlen);
-
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->frag_off = htons(IP6_MF);
- fh->identification = frag_id;
-
- first_len = skb_pagelen(skb);
- skb->data_len = first_len - skb_headlen(skb);
- skb->len = first_len;
- ipv6_hdr(skb)->payload_len = htons(first_len -
- sizeof(struct ipv6hdr));
+ /* We prevent @rt from being freed. */
+ rcu_read_lock();
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
- if (frag) {
- frag->ip_summed = CHECKSUM_NONE;
- skb_reset_transport_header(frag);
- fh = __skb_push(frag, sizeof(struct frag_hdr));
- __skb_push(frag, hlen);
- skb_reset_network_header(frag);
- memcpy(skb_network_header(frag), tmp_hdr,
- hlen);
- offset += skb->len - hlen - sizeof(struct frag_hdr);
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->frag_off = htons(offset);
- if (frag->next)
- fh->frag_off |= htons(IP6_MF);
- fh->identification = frag_id;
- ipv6_hdr(frag)->payload_len =
- htons(frag->len -
- sizeof(struct ipv6hdr));
- ip6_copy_metadata(frag, skb);
- }
+ if (iter.frag)
+ ip6_fraglist_prepare(skb, &iter);
+ skb->tstamp = tstamp;
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES);
- if (err || !frag)
+ if (err || !iter.frag)
break;
- skb = frag;
- frag = skb->next;
- skb->next = NULL;
+ skb = ip6_fraglist_next(&iter);
}
- kfree(tmp_hdr);
+ kfree(iter.tmp_hdr);
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGOKS);
+ rcu_read_unlock();
return 0;
}
- kfree_skb_list(frag);
+ kfree_skb_list(iter.frag);
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
+ rcu_read_unlock();
return err;
slow_path_clean:
@@ -797,93 +967,29 @@
}
slow_path:
- left = skb->len - hlen; /* Space per frame */
- ptr = hlen; /* Where to start from */
-
/*
* Fragment the datagram.
*/
- troom = rt->dst.dev->needed_tailroom;
+ ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
+ LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
+ &state);
/*
* Keep copying data until we run out.
*/
- while (left > 0) {
- u8 *fragnexthdr_offset;
- len = left;
- /* IF: it doesn't fit, use 'mtu' - the data space left */
- if (len > mtu)
- len = mtu;
- /* IF: we are not sending up to and including the packet end
- then align the next start on an eight byte boundary */
- if (len < left) {
- len &= ~7;
- }
-
- /* Allocate buffer */
- frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
- hroom + troom, GFP_ATOMIC);
- if (!frag) {
- err = -ENOMEM;
+ while (state.left > 0) {
+ frag = ip6_frag_next(skb, &state);
+ if (IS_ERR(frag)) {
+ err = PTR_ERR(frag);
goto fail;
}
/*
- * Set up data on packet
- */
-
- ip6_copy_metadata(frag, skb);
- skb_reserve(frag, hroom);
- skb_put(frag, len + hlen + sizeof(struct frag_hdr));
- skb_reset_network_header(frag);
- fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
- frag->transport_header = (frag->network_header + hlen +
- sizeof(struct frag_hdr));
-
- /*
- * Charge the memory for the fragment to any owner
- * it might possess
- */
- if (skb->sk)
- skb_set_owner_w(frag, skb->sk);
-
- /*
- * Copy the packet header into the new buffer.
- */
- skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
-
- fragnexthdr_offset = skb_network_header(frag);
- fragnexthdr_offset += prevhdr - skb_network_header(skb);
- *fragnexthdr_offset = NEXTHDR_FRAGMENT;
-
- /*
- * Build fragment header.
- */
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->identification = frag_id;
-
- /*
- * Copy a block of the IP datagram.
- */
- BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
- len));
- left -= len;
-
- fh->frag_off = htons(offset);
- if (left > 0)
- fh->frag_off |= htons(IP6_MF);
- ipv6_hdr(frag)->payload_len = htons(frag->len -
- sizeof(struct ipv6hdr));
-
- ptr += len;
- offset += len;
-
- /*
* Put this fragment into the sending queue.
*/
+ frag->tstamp = tstamp;
err = output(net, sk, frag);
if (err)
goto fail;
@@ -1066,14 +1172,11 @@
}
}
#endif
-#if defined(CONFIG_ANDROID_VERSION) && CONFIG_ANDROID_VERSION < ANDROID_VERSION(8, 0, 0, 0)
-#else
if (ipv6_addr_v4mapped(&fl6->saddr) &&
!(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
err = -EAFNOSUPPORT;
goto out_err_release;
}
-#endif
return 0;
@@ -1088,6 +1191,7 @@
/**
* ip6_dst_lookup - perform route lookup on flow
+ * @net: Network namespace to perform lookup in
* @sk: socket which provides route info
* @dst: pointer to dst_entry * for result
* @fl6: flow to lookup
@@ -1106,6 +1210,7 @@
/**
* ip6_dst_lookup_flow - perform route lookup on flow with ipsec
+ * @net: Network namespace to perform lookup in
* @sk: socket which provides route info
* @fl6: flow to lookup
* @final_dst: final destination address for ipsec lookup
@@ -1166,6 +1271,74 @@
return dst;
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
+
+/**
+ * ip6_dst_lookup_tunnel - perform route lookup on tunnel
+ * @skb: Packet for which lookup is done
+ * @dev: Tunnel device
+ * @net: Network namespace of tunnel device
+ * @sock: Socket which provides route info
+ * @saddr: Memory to store the src ip address
+ * @info: Tunnel information
+ * @protocol: IP protocol
+ * @use_cache: Flag to enable cache usage
+ * This function performs a route lookup on a tunnel
+ *
+ * It returns a valid dst pointer and stores src address to be used in
+ * tunnel in param saddr on success, else a pointer encoded error code.
+ */
+
+struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
+ struct net_device *dev,
+ struct net *net,
+ struct socket *sock,
+ struct in6_addr *saddr,
+ const struct ip_tunnel_info *info,
+ u8 protocol,
+ bool use_cache)
+{
+ struct dst_entry *dst = NULL;
+#ifdef CONFIG_DST_CACHE
+ struct dst_cache *dst_cache;
+#endif
+ struct flowi6 fl6;
+ __u8 prio;
+
+#ifdef CONFIG_DST_CACHE
+ dst_cache = (struct dst_cache *)&info->dst_cache;
+ if (use_cache) {
+ dst = dst_cache_get_ip6(dst_cache, saddr);
+ if (dst)
+ return dst;
+ }
+#endif
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = protocol;
+ fl6.daddr = info->key.u.ipv6.dst;
+ fl6.saddr = info->key.u.ipv6.src;
+ prio = info->key.tos;
+ fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label);
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
+ NULL);
+ if (IS_ERR(dst)) {
+ netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
+ return ERR_PTR(-ENETUNREACH);
+ }
+ if (dst->dev == dev) { /* is this necessary? */
+ netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
+ dst_release(dst);
+ return ERR_PTR(-ELOOP);
+ }
+#ifdef CONFIG_DST_CACHE
+ if (use_cache)
+ dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
+#endif
+ *saddr = fl6.saddr;
+ return dst;
+}
+EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
gfp_t gfp)
@@ -1263,11 +1436,10 @@
if (np->frag_size)
mtu = np->frag_size;
}
- if (mtu < IPV6_MIN_MTU)
- return -EINVAL;
cork->base.fragsize = mtu;
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
+ cork->base.mark = ipc6->sockc.mark;
sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
if (dst_allfrag(xfrm_dst_path(&rt->dst)))
@@ -1292,6 +1464,7 @@
{
struct sk_buff *skb, *skb_prev = NULL;
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
+ struct ubuf_info *uarg = NULL;
int exthdrlen = 0;
int dst_exthdrlen = 0;
int hh_len;
@@ -1304,7 +1477,7 @@
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
unsigned int wmem_alloc_delta = 0;
- bool paged;
+ bool paged, extra_uref = false;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1324,14 +1497,19 @@
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
(opt ? opt->opt_nflen : 0);
- maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
- sizeof(struct frag_hdr);
headersize = sizeof(struct ipv6hdr) +
(opt ? opt->opt_flen + opt->opt_nflen : 0) +
(dst_allfrag(&rt->dst) ?
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
+
+ if (mtu <= fragheaderlen ||
+ ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
+ goto emsgsize;
+
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
+ sizeof(struct frag_hdr);
/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
* the first fragment
@@ -1369,13 +1547,27 @@
rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
csummode = CHECKSUM_PARTIAL;
+ if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
+ uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ if (!uarg)
+ return -ENOBUFS;
+ extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
+ if (rt->dst.dev->features & NETIF_F_SG &&
+ csummode == CHECKSUM_PARTIAL) {
+ paged = true;
+ } else {
+ uarg->zerocopy = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+ }
+ }
+
/*
* Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU.
* Otherwise, we need to reserve fragment header and
* fragment alignment (= 8-15 octects, in total).
*
- * Note that we may need to "move" the data from the tail of
+ * Note that we may need to "move" the data from the tail
* of the buffer to the new fragment when we split
* the message.
*
@@ -1489,12 +1681,6 @@
skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
dst_exthdrlen);
- /* Only the initial fragment is time stamped */
- skb_shinfo(skb)->tx_flags = cork->tx_flags;
- cork->tx_flags = 0;
- skb_shinfo(skb)->tskey = tskey;
- tskey = 0;
-
/*
* Find where to start putting bytes
*/
@@ -1506,7 +1692,7 @@
if (fraggap) {
skb->csum = skb_copy_and_csum_bits(
skb_prev, maxfraglen,
- data + transhdrlen, fraggap, 0);
+ data + transhdrlen, fraggap);
skb_prev->csum = csum_sub(skb_prev->csum,
skb->csum);
data += fraggap;
@@ -1525,6 +1711,13 @@
transhdrlen = 0;
exthdrlen = 0;
dst_exthdrlen = 0;
+
+ /* Only the initial fragment is time stamped */
+ skb_shinfo(skb)->tx_flags = cork->tx_flags;
+ cork->tx_flags = 0;
+ skb_shinfo(skb)->tskey = tskey;
+ tskey = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
if ((flags & MSG_CONFIRM) && !skb_prev)
skb_set_dst_pending_confirm(skb, 1);
@@ -1555,7 +1748,7 @@
err = -EFAULT;
goto error;
}
- } else {
+ } else if (!uarg || !uarg->zerocopy) {
int i = skb_shinfo(skb)->nr_frags;
err = -ENOMEM;
@@ -1585,6 +1778,10 @@
skb->data_len += copy;
skb->truesize += copy;
wmem_alloc_delta += copy;
+ } else {
+ err = skb_zerocopy_iter_dgram(skb, from, copy);
+ if (err < 0)
+ goto error;
}
offset += copy;
length -= copy;
@@ -1597,6 +1794,8 @@
error_efault:
err = -EFAULT;
error:
+ if (uarg)
+ sock_zerocopy_put_abort(uarg, extra_uref);
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
@@ -1718,7 +1917,7 @@
hdr->daddr = *final_dst;
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = cork->base.mark;
skb->tstamp = cork->base.transmit_time;
--
Gitblit v1.6.2