From 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 22 Oct 2024 10:36:11 +0000 Subject: [PATCH] 修改4g拨号为QMI,需要在系统里后台执行quectel-CM --- kernel/net/core/skbuff.c | 1367 ++++++++++++++++++++++++++++++++++++++++++++++------------- 1 files changed, 1,066 insertions(+), 301 deletions(-) diff --git a/kernel/net/core/skbuff.c b/kernel/net/core/skbuff.c index 6cca6a0..0c8b8ab 100644 --- a/kernel/net/core/skbuff.c +++ b/kernel/net/core/skbuff.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Routines having to do with the 'struct sk_buff' memory handlers. * @@ -25,11 +26,6 @@ * disabled, or you better be *real* sure that the operation is atomic * with respect to whatever list is being frobbed (e.g. via lock_sock() * or via disabling bottom half handlers, etc). - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ /* @@ -63,6 +59,7 @@ #include <linux/errqueue.h> #include <linux/prefetch.h> #include <linux/if_vlan.h> +#include <linux/mpls.h> #include <net/protocol.h> #include <net/dst.h> @@ -70,15 +67,24 @@ #include <net/checksum.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> +#include <net/mpls.h> +#include <net/mptcp.h> #include <linux/uaccess.h> #include <trace/events/skb.h> #include <linux/highmem.h> #include <linux/capability.h> #include <linux/user_namespace.h> +#include <linux/indirect_call_wrapper.h> +#include <trace/hooks/net.h> + +#include "datagram.h" struct kmem_cache *skbuff_head_cache __ro_after_init; static struct kmem_cache *skbuff_fclone_cache __ro_after_init; +#ifdef CONFIG_SKB_EXTENSIONS +static struct kmem_cache *skbuff_ext_cache __ro_after_init; +#endif int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); @@ -97,7 +103,7 @@ static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr, const char msg[]) { - pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n", + pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n", msg, addr, skb->len, sz, skb->head, skb->data, (unsigned long)skb->tail, (unsigned long)skb->end, skb->dev ? skb->dev->name : "<NULL>"); @@ -244,6 +250,9 @@ fclones->skb2.fclone = SKB_FCLONE_CLONE; } + + skb_set_kcov_handle(skb, kcov_common_handle()); + out: return skb; nodata: @@ -252,6 +261,35 @@ goto out; } EXPORT_SYMBOL(__alloc_skb); + +/* Caller must provide SKB that is memset cleared */ +static struct sk_buff *__build_skb_around(struct sk_buff *skb, + void *data, unsigned int frag_size) +{ + struct skb_shared_info *shinfo; + unsigned int size = frag_size ? : ksize(data); + + size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + /* Assumes caller memset cleared SKB */ + skb->truesize = SKB_TRUESIZE(size); + refcount_set(&skb->users, 1); + skb->head = data; + skb->data = data; + skb_reset_tail_pointer(skb); + skb->end = skb->tail + size; + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; + + /* make sure we initialize shinfo sequentially */ + shinfo = skb_shinfo(skb); + memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); + atomic_set(&shinfo->dataref, 1); + + skb_set_kcov_handle(skb, kcov_common_handle()); + + return skb; +} /** * __build_skb - build a network buffer @@ -274,32 +312,15 @@ */ struct sk_buff *__build_skb(void *data, unsigned int frag_size) { - struct skb_shared_info *shinfo; struct sk_buff *skb; - unsigned int size = frag_size ? : ksize(data); skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); - if (!skb) + if (unlikely(!skb)) return NULL; - size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->truesize = SKB_TRUESIZE(size); - refcount_set(&skb->users, 1); - skb->head = data; - skb->data = data; - skb_reset_tail_pointer(skb); - skb->end = skb->tail + size; - skb->mac_header = (typeof(skb->mac_header))~0U; - skb->transport_header = (typeof(skb->transport_header))~0U; - /* make sure we initialize shinfo sequentially */ - shinfo = skb_shinfo(skb); - memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); - atomic_set(&shinfo->dataref, 1); - - return skb; + return __build_skb_around(skb, data, frag_size); } /* build_skb() is wrapper over __build_skb(), that specifically @@ -320,6 +341,29 @@ } EXPORT_SYMBOL(build_skb); +/** + * build_skb_around - build a network buffer around provided skb + * @skb: sk_buff provide by caller, must be memset cleared + * @data: data buffer provided by caller + * @frag_size: size of data, or 0 if head was kmalloced + */ +struct sk_buff *build_skb_around(struct sk_buff *skb, + void *data, unsigned int frag_size) +{ + if (unlikely(!skb)) + return NULL; + + skb = __build_skb_around(skb, data, frag_size); + + if (skb && frag_size) { + skb->head_frag = 1; + if (page_is_pfmemalloc(virt_to_head_page(data))) + skb->pfmemalloc = 1; + } + return skb; +} +EXPORT_SYMBOL(build_skb_around); + #define NAPI_SKB_CACHE_SIZE 64 struct napi_alloc_cache { @@ -330,34 +374,6 @@ static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); - -static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) -{ - struct page_frag_cache *nc; - unsigned long flags; - void *data; - - local_irq_save(flags); - nc = this_cpu_ptr(&netdev_alloc_cache); - data = page_frag_alloc(nc, fragsz, gfp_mask); - local_irq_restore(flags); - return data; -} - -/** - * netdev_alloc_frag - allocate a page fragment - * @fragsz: fragment size - * - * Allocates a frag from a page for receive buffer. - * Uses GFP_ATOMIC allocations. - */ -void *netdev_alloc_frag(unsigned int fragsz) -{ - fragsz = SKB_DATA_ALIGN(fragsz); - - return __netdev_alloc_frag(fragsz, GFP_ATOMIC); -} -EXPORT_SYMBOL(netdev_alloc_frag); static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { @@ -373,6 +389,31 @@ return __napi_alloc_frag(fragsz, GFP_ATOMIC); } EXPORT_SYMBOL(napi_alloc_frag); + +/** + * netdev_alloc_frag - allocate a page fragment + * @fragsz: fragment size + * + * Allocates a frag from a page for receive buffer. + * Uses GFP_ATOMIC allocations. + */ +void *netdev_alloc_frag(unsigned int fragsz) +{ + struct page_frag_cache *nc; + void *data; + + fragsz = SKB_DATA_ALIGN(fragsz); + if (in_irq() || irqs_disabled()) { + nc = this_cpu_ptr(&netdev_alloc_cache); + data = page_frag_alloc(nc, fragsz, GFP_ATOMIC); + } else { + local_bh_disable(); + data = __napi_alloc_frag(fragsz, GFP_ATOMIC); + local_bh_enable(); + } + return data; +} +EXPORT_SYMBOL(netdev_alloc_frag); /** * __netdev_alloc_skb - allocate an skbuff for rx on a specific device @@ -391,7 +432,6 @@ gfp_t gfp_mask) { struct page_frag_cache *nc; - unsigned long flags; struct sk_buff *skb; bool pfmemalloc; void *data; @@ -416,13 +456,17 @@ if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; - local_irq_save(flags); - - nc = this_cpu_ptr(&netdev_alloc_cache); - data = page_frag_alloc(nc, len, gfp_mask); - pfmemalloc = nc->pfmemalloc; - - local_irq_restore(flags); + if (in_irq() || irqs_disabled()) { + nc = this_cpu_ptr(&netdev_alloc_cache); + data = page_frag_alloc(nc, len, gfp_mask); + pfmemalloc = nc->pfmemalloc; + } else { + local_bh_disable(); + nc = this_cpu_ptr(&napi_alloc_cache.page); + data = page_frag_alloc(nc, len, gfp_mask); + pfmemalloc = nc->pfmemalloc; + local_bh_enable(); + } if (unlikely(!data)) return NULL; @@ -433,7 +477,6 @@ return NULL; } - /* use OR instead of assignment to avoid clearing of bits in mask */ if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; @@ -498,7 +541,6 @@ return NULL; } - /* use OR instead of assignment to avoid clearing of bits in mask */ if (nc->page.pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; @@ -619,7 +661,6 @@ void skb_release_head_state(struct sk_buff *skb) { skb_dst_drop(skb); - secpath_reset(skb); if (skb->destructor) { WARN_ON(in_irq()); skb->destructor(skb); @@ -627,9 +668,7 @@ #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb_nfct(skb)); #endif -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - nf_bridge_put(skb->nf_bridge); -#endif + skb_ext_put(skb); } /* Free everything but the sk_buff shell. */ @@ -668,6 +707,7 @@ if (!skb_unref(skb)) return; + trace_android_vh_kfree_skb(skb); trace_kfree_skb(skb, __builtin_return_address(0)); __kfree_skb(skb); } @@ -684,6 +724,101 @@ } EXPORT_SYMBOL(kfree_skb_list); +/* Dump skb information and contents. + * + * Must only be called from net_ratelimit()-ed paths. + * + * Dumps whole packets if full_pkt, only headers otherwise. + */ +void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) +{ + struct skb_shared_info *sh = skb_shinfo(skb); + struct net_device *dev = skb->dev; + struct sock *sk = skb->sk; + struct sk_buff *list_skb; + bool has_mac, has_trans; + int headroom, tailroom; + int i, len, seg_len; + + if (full_pkt) + len = skb->len; + else + len = min_t(int, skb->len, MAX_HEADER + 128); + + headroom = skb_headroom(skb); + tailroom = skb_tailroom(skb); + + has_mac = skb_mac_header_was_set(skb); + has_trans = skb_transport_header_was_set(skb); + + printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n" + "mac=(%d,%d) net=(%d,%d) trans=%d\n" + "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n" + "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n" + "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n", + level, skb->len, headroom, skb_headlen(skb), tailroom, + has_mac ? skb->mac_header : -1, + has_mac ? skb_mac_header_len(skb) : -1, + skb->network_header, + has_trans ? skb_network_header_len(skb) : -1, + has_trans ? skb->transport_header : -1, + sh->tx_flags, sh->nr_frags, + sh->gso_size, sh->gso_type, sh->gso_segs, + skb->csum, skb->ip_summed, skb->csum_complete_sw, + skb->csum_valid, skb->csum_level, + skb->hash, skb->sw_hash, skb->l4_hash, + ntohs(skb->protocol), skb->pkt_type, skb->skb_iif); + + if (dev) + printk("%sdev name=%s feat=%pNF\n", + level, dev->name, &dev->features); + if (sk) + printk("%ssk family=%hu type=%u proto=%u\n", + level, sk->sk_family, sk->sk_type, sk->sk_protocol); + + if (full_pkt && headroom) + print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET, + 16, 1, skb->head, headroom, false); + + seg_len = min_t(int, skb_headlen(skb), len); + if (seg_len) + print_hex_dump(level, "skb linear: ", DUMP_PREFIX_OFFSET, + 16, 1, skb->data, seg_len, false); + len -= seg_len; + + if (full_pkt && tailroom) + print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET, + 16, 1, skb_tail_pointer(skb), tailroom, false); + + for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + u32 p_off, p_len, copied; + struct page *p; + u8 *vaddr; + + skb_frag_foreach_page(frag, skb_frag_off(frag), + skb_frag_size(frag), p, p_off, p_len, + copied) { + seg_len = min_t(int, p_len, len); + vaddr = kmap_atomic(p); + print_hex_dump(level, "skb frag: ", + DUMP_PREFIX_OFFSET, + 16, 1, vaddr + p_off, seg_len, false); + kunmap_atomic(vaddr); + len -= seg_len; + if (!len) + break; + } + } + + if (full_pkt && skb_has_frag_list(skb)) { + printk("skb fraglist:\n"); + skb_walk_frags(skb, list_skb) + skb_dump(level, list_skb, true); + } +} +EXPORT_SYMBOL(skb_dump); + /** * skb_tx_error - report an sk_buff xmit error * @skb: buffer that triggered an error @@ -697,6 +832,7 @@ } EXPORT_SYMBOL(skb_tx_error); +#ifdef CONFIG_TRACEPOINTS /** * consume_skb - free an skbuff * @skb: buffer to free @@ -714,6 +850,7 @@ __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); +#endif /** * consume_stateless_skb - free an skbuff, assuming it is stateless @@ -770,9 +907,6 @@ void napi_consume_skb(struct sk_buff *skb, int budget) { - if (unlikely(!skb)) - return; - /* Zero budget indicate non-NAPI context called us, like netpoll */ if (unlikely(!budget)) { dev_consume_skb_any(skb); @@ -809,9 +943,7 @@ new->dev = old->dev; memcpy(new->cb, old->cb, sizeof(old->cb)); skb_dst_copy(new, old); -#ifdef CONFIG_XFRM - new->sp = secpath_get(old->sp); -#endif + __skb_ext_copy(new, old); __nf_copy(new, old, false); /* Note : this field could be in headers_start/headers_end section @@ -887,6 +1019,31 @@ return n; #undef C } + +/** + * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg + * @first: first sk_buff of the msg + */ +struct sk_buff *alloc_skb_for_msg(struct sk_buff *first) +{ + struct sk_buff *n; + + n = alloc_skb(0, GFP_ATOMIC); + if (!n) + return NULL; + + n->len = first->len; + n->data_len = first->len; + n->truesize = first->truesize; + + skb_shinfo(n)->frag_list = first; + + __copy_skb_header(n, first); + n->destructor = NULL; + + return n; +} +EXPORT_SYMBOL_GPL(alloc_skb_for_msg); /** * skb_morph - morph one skb into another @@ -1012,7 +1169,11 @@ uarg->len++; uarg->bytelen = bytelen; atomic_set(&sk->sk_zckey, ++next); - sock_zerocopy_get(uarg); + + /* no extra ref when appending to datagram (MSG_MORE) */ + if (sk->sk_type == SOCK_STREAM) + sock_zerocopy_get(uarg); + return uarg; } } @@ -1102,7 +1263,7 @@ } EXPORT_SYMBOL_GPL(sock_zerocopy_put); -void sock_zerocopy_put_abort(struct ubuf_info *uarg) +void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) { if (uarg) { struct sock *sk = skb_from_uarg(uarg)->sk; @@ -1110,13 +1271,17 @@ atomic_dec(&sk->sk_zckey); uarg->len--; - sock_zerocopy_put(uarg); + if (have_uref) + sock_zerocopy_put(uarg); } } EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort); -extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, - struct iov_iter *from, size_t length); +int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) +{ + return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); +} +EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram); int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, @@ -1144,7 +1309,7 @@ return err; } - skb_zcopy_set(skb, uarg); + skb_zcopy_set(skb, uarg, NULL); return skb->len - orig_len; } EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream); @@ -1164,7 +1329,7 @@ if (skb_copy_ubufs(nskb, GFP_ATOMIC)) return -EIO; } - skb_zcopy_set(nskb, skb_uarg(orig)); + skb_zcopy_set(nskb, skb_uarg(orig), NULL); } return 0; } @@ -1220,7 +1385,7 @@ struct page *p; u8 *vaddr; - skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f), + skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f), p, p_off, p_len, copied) { u32 copy, done = 0; vaddr = kmap_atomic(p); @@ -1510,11 +1675,10 @@ skb->head = data; skb->head_frag = 0; skb->data += off; + + skb_set_end_offset(skb, size); #ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; off = nhead; -#else - skb->end = skb->head + size; #endif skb->tail += off; skb_headers_offset_update(skb, nhead); @@ -1561,6 +1725,38 @@ return skb2; } EXPORT_SYMBOL(skb_realloc_headroom); + +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) +{ + unsigned int saved_end_offset, saved_truesize; + struct skb_shared_info *shinfo; + int res; + + saved_end_offset = skb_end_offset(skb); + saved_truesize = skb->truesize; + + res = pskb_expand_head(skb, 0, 0, pri); + if (res) + return res; + + skb->truesize = saved_truesize; + + if (likely(skb_end_offset(skb) == saved_end_offset)) + return 0; + + shinfo = skb_shinfo(skb); + + /* We are about to change back skb->end, + * we need to move skb_shinfo() to its new location. + */ + memmove(skb->head + saved_end_offset, + shinfo, + offsetof(struct skb_shared_info, frags[shinfo->nr_frags])); + + skb_set_end_offset(skb, saved_end_offset); + + return 0; +} /** * skb_copy_expand - copy and expand sk_buff @@ -1944,8 +2140,6 @@ struct sk_buff *insp = NULL; do { - BUG_ON(!list); - if (list->len <= eat) { /* Eaten as whole. */ eat -= list->len; @@ -1953,6 +2147,9 @@ insp = list; } else { /* Eaten partially. */ + if (skb_is_gso(skb) && !list->head_frag && + skb_headlen(list)) + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; if (skb_shared(list)) { /* Sucks! We need to fork list. :-( */ @@ -1997,10 +2194,12 @@ skb_frag_unref(skb, i); eat -= size; } else { - skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[k]; + + *frag = skb_shinfo(skb)->frags[i]; if (eat) { - skb_shinfo(skb)->frags[k].page_offset += eat; - skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); + skb_frag_off_add(frag, eat); + skb_frag_size_sub(frag, eat); if (!i) goto end; eat = 0; @@ -2072,7 +2271,7 @@ copy = len; skb_frag_foreach_page(f, - f->page_offset + offset - start, + skb_frag_off(f) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(to + copied, vaddr + p_off, p_len); @@ -2248,7 +2447,7 @@ const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(skb_frag_page(f), - f->page_offset, skb_frag_size(f), + skb_frag_off(f), skb_frag_size(f), offset, len, spd, false, sk, pipe)) return true; } @@ -2338,20 +2537,20 @@ for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; - if (offset < frag->size) + if (offset < skb_frag_size(frag)) break; - offset -= frag->size; + offset -= skb_frag_size(frag); } for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; - slen = min_t(size_t, len, frag->size - offset); + slen = min_t(size_t, len, skb_frag_size(frag) - offset); while (slen) { - ret = kernel_sendpage_locked(sk, frag->page.p, - frag->page_offset + offset, + ret = kernel_sendpage_locked(sk, skb_frag_page(frag), + skb_frag_off(frag) + offset, slen, MSG_DONTWAIT); if (ret <= 0) goto error; @@ -2385,19 +2584,6 @@ return orig_len == len ? ret : orig_len - len; } EXPORT_SYMBOL_GPL(skb_send_sock_locked); - -/* Send skb data on a socket. */ -int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len) -{ - int ret = 0; - - lock_sock(sk); - ret = skb_send_sock_locked(sk, skb, offset, len); - release_sock(sk); - - return ret; -} -EXPORT_SYMBOL_GPL(skb_send_sock); /** * skb_store_bits - store bits from kernel buffer to skb @@ -2446,7 +2632,7 @@ copy = len; skb_frag_foreach_page(frag, - frag->page_offset + offset - start, + skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(vaddr + p_off, from + copied, p_len); @@ -2501,7 +2687,8 @@ if (copy > 0) { if (copy > len) copy = len; - csum = ops->update(skb->data + offset, copy, csum); + csum = INDIRECT_CALL_1(ops->update, csum_partial_ext, + skb->data + offset, copy, csum); if ((len -= copy) == 0) return csum; offset += copy; @@ -2525,12 +2712,16 @@ copy = len; skb_frag_foreach_page(frag, - frag->page_offset + offset - start, + skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); - csum2 = ops->update(vaddr + p_off, p_len, 0); + csum2 = INDIRECT_CALL_1(ops->update, + csum_partial_ext, + vaddr + p_off, p_len, 0); kunmap_atomic(vaddr); - csum = ops->combine(csum, csum2, pos, p_len); + csum = INDIRECT_CALL_1(ops->combine, + csum_block_add_ext, csum, + csum2, pos, p_len); pos += p_len; } @@ -2553,7 +2744,8 @@ copy = len; csum2 = __skb_checksum(frag_iter, offset - start, copy, 0, ops); - csum = ops->combine(csum, csum2, pos, copy); + csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext, + csum, csum2, pos, copy); if ((len -= copy) == 0) return csum; offset += copy; @@ -2582,19 +2774,20 @@ /* Both of above in one bottle. */ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, - u8 *to, int len, __wsum csum) + u8 *to, int len) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int pos = 0; + __wsum csum = 0; /* Copy header. */ if (copy > 0) { if (copy > len) copy = len; csum = csum_partial_copy_nocheck(skb->data + offset, to, - copy, csum); + copy); if ((len -= copy) == 0) return csum; offset += copy; @@ -2619,12 +2812,12 @@ copy = len; skb_frag_foreach_page(frag, - frag->page_offset + offset - start, + skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); csum2 = csum_partial_copy_nocheck(vaddr + p_off, to + copied, - p_len, 0); + p_len); kunmap_atomic(vaddr); csum = csum_block_add(csum, csum2, pos); pos += p_len; @@ -2650,7 +2843,7 @@ copy = len; csum2 = skb_copy_and_csum_bits(frag_iter, offset - start, - to, copy, 0); + to, copy); csum = csum_block_add(csum, csum2, pos); if ((len -= copy) == 0) return csum; @@ -2664,6 +2857,65 @@ return csum; } EXPORT_SYMBOL(skb_copy_and_csum_bits); + +__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) +{ + __sum16 sum; + + sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); + /* See comments in __skb_checksum_complete(). */ + if (likely(!sum)) { + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev, skb); + } + if (!skb_shared(skb)) + skb->csum_valid = !sum; + return sum; +} +EXPORT_SYMBOL(__skb_checksum_complete_head); + +/* This function assumes skb->csum already holds pseudo header's checksum, + * which has been changed from the hardware checksum, for example, by + * __skb_checksum_validate_complete(). And, the original skb->csum must + * have been validated unsuccessfully for CHECKSUM_COMPLETE case. + * + * It returns non-zero if the recomputed checksum is still invalid, otherwise + * zero. The new checksum is stored back into skb->csum unless the skb is + * shared. + */ +__sum16 __skb_checksum_complete(struct sk_buff *skb) +{ + __wsum csum; + __sum16 sum; + + csum = skb_checksum(skb, 0, skb->len, 0); + + sum = csum_fold(csum_add(skb->csum, csum)); + /* This check is inverted, because we already knew the hardware + * checksum is invalid before calling this function. So, if the + * re-computed checksum is valid instead, then we have a mismatch + * between the original skb->csum and skb_checksum(). This means either + * the original hardware checksum is incorrect or we screw up skb->csum + * when moving skb->data around. + */ + if (likely(!sum)) { + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(skb->dev, skb); + } + + if (!skb_shared(skb)) { + /* Save full packet checksum */ + skb->csum = csum; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum_complete_sw = 1; + skb->csum_valid = !sum; + } + + return sum; +} +EXPORT_SYMBOL(__skb_checksum_complete); static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum) { @@ -2779,11 +3031,15 @@ skb_zerocopy_clone(to, from, GFP_ATOMIC); for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { + int size; + if (!len) break; skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; - skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); - len -= skb_shinfo(to)->frags[j].size; + size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]), + len); + skb_frag_size_set(&skb_shinfo(to)->frags[j], size); + len -= size; skb_frag_ref(to, j); j++; } @@ -2810,7 +3066,7 @@ csum = 0; if (csstart != skb->len) csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, - skb->len - csstart, 0); + skb->len - csstart); if (skb->ip_summed == CHECKSUM_PARTIAL) { long csstuff = csstart + skb->csum_offset; @@ -2985,28 +3241,6 @@ } EXPORT_SYMBOL(skb_append); -/** - * skb_insert - insert a buffer - * @old: buffer to insert before - * @newsk: buffer to insert - * @list: list to use - * - * Place a packet before a given packet in a list. The list locks are - * taken and this function is atomic with respect to other list locked - * calls. - * - * A buffer cannot be placed on two lists at the same time. - */ -void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_insert(newsk, old->prev, old, list); - spin_unlock_irqrestore(&list->lock, flags); -} -EXPORT_SYMBOL(skb_insert); - static inline void skb_split_inside_header(struct sk_buff *skb, struct sk_buff* skb1, const u32 len, const int pos) @@ -3056,7 +3290,7 @@ * 2. Split is accurately. We make this. */ skb_frag_ref(skb, i); - skb_shinfo(skb1)->frags[0].page_offset += len - pos; + skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos); skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos); skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos); skb_shinfo(skb)->nr_frags++; @@ -3095,19 +3329,7 @@ */ static int skb_prepare_for_shift(struct sk_buff *skb) { - int ret = 0; - - if (skb_cloned(skb)) { - /* Save and restore truesize: pskb_expand_head() may reallocate - * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we - * cannot change truesize at this point. - */ - unsigned int save_truesize = skb->truesize; - - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - skb->truesize = save_truesize; - } - return ret; + return skb_unclone_keeptruesize(skb, GFP_ATOMIC); } /** @@ -3131,7 +3353,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) { int from, to, merge, todo; - struct skb_frag_struct *fragfrom, *fragto; + skb_frag_t *fragfrom, *fragto; BUG_ON(shiftlen > skb->len); @@ -3150,7 +3372,7 @@ */ if (!to || !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), - fragfrom->page_offset)) { + skb_frag_off(fragfrom))) { merge = -1; } else { merge = to - 1; @@ -3167,7 +3389,7 @@ skb_frag_size_add(fragto, shiftlen); skb_frag_size_sub(fragfrom, shiftlen); - fragfrom->page_offset += shiftlen; + skb_frag_off_add(fragfrom, shiftlen); goto onlymerged; } @@ -3198,11 +3420,11 @@ } else { __skb_frag_ref(fragfrom); - fragto->page = fragfrom->page; - fragto->page_offset = fragfrom->page_offset; + skb_frag_page_copy(fragto, fragfrom); + skb_frag_off_copy(fragto, fragfrom); skb_frag_size_set(fragto, todo); - fragfrom->page_offset += todo; + skb_frag_off_add(fragfrom, todo); skb_frag_size_sub(fragfrom, todo); todo = 0; @@ -3327,7 +3549,7 @@ if (!st->frag_data) st->frag_data = kmap_atomic(skb_frag_page(frag)); - *data = (u8 *) st->frag_data + frag->page_offset + + *data = (u8 *) st->frag_data + skb_frag_off(frag) + (abs_offset - st->stepped_offset); return block_limit - abs_offset; @@ -3417,64 +3639,6 @@ } EXPORT_SYMBOL(skb_find_text); -/** - * skb_append_datato_frags - append the user data to a skb - * @sk: sock structure - * @skb: skb structure to be appended with user data. - * @getfrag: call back function to be used for getting the user data - * @from: pointer to user message iov - * @length: length of the iov message - * - * Description: This procedure append the user data in the fragment part - * of the skb if any page alloc fails user this procedure returns -ENOMEM - */ -int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, - int (*getfrag)(void *from, char *to, int offset, - int len, int odd, struct sk_buff *skb), - void *from, int length) -{ - int frg_cnt = skb_shinfo(skb)->nr_frags; - int copy; - int offset = 0; - int ret; - struct page_frag *pfrag = ¤t->task_frag; - - do { - /* Return error if we don't have space for new frag */ - if (frg_cnt >= MAX_SKB_FRAGS) - return -EMSGSIZE; - - if (!sk_page_frag_refill(sk, pfrag)) - return -ENOMEM; - - /* copy the user data to page */ - copy = min_t(int, length, pfrag->size - pfrag->offset); - - ret = getfrag(from, page_address(pfrag->page) + pfrag->offset, - offset, copy, 0, skb); - if (ret < 0) - return -EFAULT; - - /* copy was successful so update the size parameters */ - skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset, - copy); - frg_cnt++; - pfrag->offset += copy; - get_page(pfrag->page); - - skb->truesize += copy; - refcount_add(copy, &sk->sk_wmem_alloc); - skb->len += copy; - skb->data_len += copy; - offset += copy; - length -= copy; - - } while (length > 0); - - return 0; -} -EXPORT_SYMBOL(skb_append_datato_frags); - int skb_append_pagefrags(struct sk_buff *skb, struct page *page, int offset, size_t size) { @@ -3521,11 +3685,126 @@ struct page *page; page = virt_to_head_page(frag_skb->head); - head_frag.page.p = page; - head_frag.page_offset = frag_skb->data - - (unsigned char *)page_address(page); - head_frag.size = skb_headlen(frag_skb); + __skb_frag_set_page(&head_frag, page); + skb_frag_off_set(&head_frag, frag_skb->data - + (unsigned char *)page_address(page)); + skb_frag_size_set(&head_frag, skb_headlen(frag_skb)); return head_frag; +} + +struct sk_buff *skb_segment_list(struct sk_buff *skb, + netdev_features_t features, + unsigned int offset) +{ + struct sk_buff *list_skb = skb_shinfo(skb)->frag_list; + unsigned int tnl_hlen = skb_tnl_header_len(skb); + unsigned int delta_truesize = 0; + unsigned int delta_len = 0; + struct sk_buff *tail = NULL; + struct sk_buff *nskb, *tmp; + int len_diff, err; + + skb_push(skb, -skb_network_offset(skb) + offset); + + /* Ensure the head is writeable before touching the shared info */ + err = skb_unclone(skb, GFP_ATOMIC); + if (err) + goto err_linearize; + + skb_shinfo(skb)->frag_list = NULL; + + while (list_skb) { + nskb = list_skb; + list_skb = list_skb->next; + + err = 0; + delta_truesize += nskb->truesize; + if (skb_shared(nskb)) { + tmp = skb_clone(nskb, GFP_ATOMIC); + if (tmp) { + consume_skb(nskb); + nskb = tmp; + err = skb_unclone(nskb, GFP_ATOMIC); + } else { + err = -ENOMEM; + } + } + + if (!tail) + skb->next = nskb; + else + tail->next = nskb; + + if (unlikely(err)) { + nskb->next = list_skb; + goto err_linearize; + } + + tail = nskb; + + delta_len += nskb->len; + + skb_push(nskb, -skb_network_offset(nskb) + offset); + + skb_release_head_state(nskb); + len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb); + __copy_skb_header(nskb, skb); + + skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); + nskb->transport_header += len_diff; + skb_copy_from_linear_data_offset(skb, -tnl_hlen, + nskb->data - tnl_hlen, + offset + tnl_hlen); + + if (skb_needs_linearize(nskb, features) && + __skb_linearize(nskb)) + goto err_linearize; + } + + skb->truesize = skb->truesize - delta_truesize; + skb->data_len = skb->data_len - delta_len; + skb->len = skb->len - delta_len; + + skb_gso_reset(skb); + + skb->prev = tail; + + if (skb_needs_linearize(skb, features) && + __skb_linearize(skb)) + goto err_linearize; + + skb_get(skb); + + return skb; + +err_linearize: + kfree_skb_list(skb->next); + skb->next = NULL; + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL_GPL(skb_segment_list); + +int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) +{ + if (unlikely(p->len + skb->len >= 65536)) + return -E2BIG; + + if (NAPI_GRO_CB(p)->last == p) + skb_shinfo(p)->frag_list = skb; + else + NAPI_GRO_CB(p)->last->next = skb; + + skb_pull(skb, skb_gro_offset(skb)); + + NAPI_GRO_CB(p)->last = skb; + NAPI_GRO_CB(p)->count++; + p->data_len += skb->len; + p->truesize += skb->truesize; + p->len += skb->len; + + NAPI_GRO_CB(skb)->same_flow = 1; + + return 0; } /** @@ -3543,44 +3822,44 @@ struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; - skb_frag_t *frag = skb_shinfo(head_skb)->frags; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); - struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int partial_segs = 0; unsigned int headroom; unsigned int len = head_skb->len; + struct sk_buff *frag_skb; + skb_frag_t *frag; __be16 proto; bool csum, sg; - int nfrags = skb_shinfo(head_skb)->nr_frags; int err = -ENOMEM; int i = 0; - int pos; - int dummy; + int nfrags, pos; - if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) && - (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) { - /* gso_size is untrusted, and we have a frag_list with a linear - * non head_frag head. - * - * (we assume checking the first list_skb member suffices; - * i.e if either of the list_skb members have non head_frag - * head, then the first one has too). - * - * If head_skb's headlen does not fit requested gso_size, it - * means that the frag_list members do NOT terminate on exact - * gso_size boundaries. Hence we cannot perform skb_frag_t page - * sharing. Therefore we must fallback to copying the frag_list - * skbs; we do so by disabling SG. - */ - if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) - features &= ~NETIF_F_SG; + if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) && + mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) { + struct sk_buff *check_skb; + + for (check_skb = list_skb; check_skb; check_skb = check_skb->next) { + if (skb_headlen(check_skb) && !check_skb->head_frag) { + /* gso_size is untrusted, and we have a frag_list with + * a linear non head_frag item. + * + * If head_skb's headlen does not fit requested gso_size, + * it means that the frag_list members do NOT terminate + * on exact gso_size boundaries. Hence we cannot perform + * skb_frag_t page sharing. Therefore we must fallback to + * copying the frag_list skbs; we do so by disabling SG. + */ + features &= ~NETIF_F_SG; + break; + } + } } __skb_push(head_skb, doffset); - proto = skb_network_protocol(head_skb, &dummy); + proto = skb_network_protocol(head_skb, NULL); if (unlikely(!proto)) return ERR_PTR(-EINVAL); @@ -3633,6 +3912,13 @@ headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); + if (skb_orphan_frags(head_skb, GFP_ATOMIC)) + return ERR_PTR(-ENOMEM); + + nfrags = skb_shinfo(head_skb)->nr_frags; + frag = skb_shinfo(head_skb)->frags; + frag_skb = head_skb; + do { struct sk_buff *nskb; skb_frag_t *nskb_frag; @@ -3657,6 +3943,10 @@ (skb_headlen(list_skb) == len || sg)) { BUG_ON(skb_headlen(list_skb) > len); + nskb = skb_clone(list_skb, GFP_ATOMIC); + if (unlikely(!nskb)) + goto err; + i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; @@ -3675,11 +3965,7 @@ frag++; } - nskb = skb_clone(list_skb, GFP_ATOMIC); list_skb = list_skb->next; - - if (unlikely(!nskb)) - goto err; if (unlikely(pskb_trim(nskb, len))) { kfree_skb(nskb); @@ -3726,14 +4012,20 @@ goto perform_csum_check; if (!sg) { - if (!nskb->remcsum_offload) - nskb->ip_summed = CHECKSUM_NONE; - SKB_GSO_CB(nskb)->csum = - skb_copy_and_csum_bits(head_skb, offset, - skb_put(nskb, len), - len, 0); - SKB_GSO_CB(nskb)->csum_start = - skb_headroom(nskb) + doffset; + if (!csum) { + if (!nskb->remcsum_offload) + nskb->ip_summed = CHECKSUM_NONE; + SKB_GSO_CB(nskb)->csum = + skb_copy_and_csum_bits(head_skb, offset, + skb_put(nskb, + len), + len); + SKB_GSO_CB(nskb)->csum_start = + skb_headroom(nskb) + doffset; + } else { + if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len)) + goto err; + } continue; } @@ -3745,12 +4037,16 @@ skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & SKBTX_SHARED_FRAG; - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || - skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) + if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) goto err; while (pos < offset + len) { if (i >= nfrags) { + if (skb_orphan_frags(list_skb, GFP_ATOMIC) || + skb_zerocopy_clone(nskb, list_skb, + GFP_ATOMIC)) + goto err; + i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; @@ -3764,10 +4060,6 @@ i--; frag--; } - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || - skb_zerocopy_clone(nskb, frag_skb, - GFP_ATOMIC)) - goto err; list_skb = list_skb->next; } @@ -3786,7 +4078,7 @@ size = skb_frag_size(nskb_frag); if (pos < offset) { - nskb_frag->page_offset += offset - pos; + skb_frag_off_add(nskb_frag, offset - pos); skb_frag_size_sub(nskb_frag, offset - pos); } @@ -3907,7 +4199,7 @@ *--frag = *--frag2; } while (--i); - frag->page_offset += offset; + skb_frag_off_add(frag, offset); skb_frag_size_sub(frag, offset); /* all fragments truesize : remove (head size + sk_buff) */ @@ -3936,8 +4228,8 @@ pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; - frag->page.p = page; - frag->page_offset = first_offset; + __skb_frag_set_page(frag, page); + skb_frag_off_set(frag, first_offset); skb_frag_size_set(frag, first_size); memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); @@ -3953,7 +4245,7 @@ if (offset > headlen) { unsigned int eat = offset - headlen; - skbinfo->frags[0].page_offset += eat; + skb_frag_off_add(&skbinfo->frags[0], eat); skb_frag_size_sub(&skbinfo->frags[0], eat); skb->data_len -= eat; skb->len -= eat; @@ -3983,7 +4275,58 @@ NAPI_GRO_CB(skb)->same_flow = 1; return 0; } -EXPORT_SYMBOL_GPL(skb_gro_receive); + +#ifdef CONFIG_SKB_EXTENSIONS +#define SKB_EXT_ALIGN_VALUE 8 +#define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE) + +static const u8 skb_ext_type_len[] = { +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + [SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info), +#endif +#ifdef CONFIG_XFRM + [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path), +#endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext), +#endif +#if IS_ENABLED(CONFIG_MPTCP) + [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext), +#endif +}; + +static __always_inline unsigned int skb_ext_total_length(void) +{ + return SKB_EXT_CHUNKSIZEOF(struct skb_ext) + +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + skb_ext_type_len[SKB_EXT_BRIDGE_NF] + +#endif +#ifdef CONFIG_XFRM + skb_ext_type_len[SKB_EXT_SEC_PATH] + +#endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + skb_ext_type_len[TC_SKB_EXT] + +#endif +#if IS_ENABLED(CONFIG_MPTCP) + skb_ext_type_len[SKB_EXT_MPTCP] + +#endif + 0; +} + +static void skb_extensions_init(void) +{ + BUILD_BUG_ON(SKB_EXT_NUM >= 8); + BUILD_BUG_ON(skb_ext_total_length() > 255); + + skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache", + SKB_EXT_ALIGN_VALUE * skb_ext_total_length(), + 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, + NULL); +} +#else +static void skb_extensions_init(void) {} +#endif void __init skb_init(void) { @@ -3999,6 +4342,7 @@ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + skb_extensions_init(); } static int @@ -4037,7 +4381,7 @@ if (copy > len) copy = len; sg_set_page(&sg[elt], skb_frag_page(frag), copy, - frag->page_offset+offset-start); + skb_frag_off(frag) + offset - start); elt++; if (!(len -= copy)) return elt; @@ -4154,7 +4498,7 @@ * at the moment even if they are anonymous). */ if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && - __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) + !__pskb_pull_tail(skb, __skb_pagelen(skb))) return -ENOMEM; /* Easy case. Most of packets will go this way. */ @@ -4258,7 +4602,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned int)sk->sk_rcvbuf) + (unsigned int)READ_ONCE(sk->sk_rcvbuf)) return -ENOMEM; skb_orphan(skb); @@ -4377,7 +4721,7 @@ { bool ret; - if (likely(sysctl_tstamp_allow_data || tsonly)) + if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly)) return true; read_lock_bh(&sk->sk_callback_lock); @@ -4433,13 +4777,18 @@ if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) { - skb = tcp_get_timestamping_opt_stats(sk); + skb = tcp_get_timestamping_opt_stats(sk, orig_skb); opt_stats = true; } else #endif skb = alloc_skb(0, GFP_ATOMIC); } else { skb = skb_clone(orig_skb, GFP_ATOMIC); + + if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) { + kfree_skb(skb); + return; + } } if (!skb) return; @@ -4550,9 +4899,9 @@ typeof(IPPROTO_IP) proto, unsigned int off) { - switch (proto) { - int err; + int err; + switch (proto) { case IPPROTO_TCP: err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), off + MAX_TCP_HDR_LEN); @@ -4595,7 +4944,7 @@ if (err < 0) goto out; - if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) + if (ip_is_fragment(ip_hdr(skb))) fragment = true; off = ip_hdrlen(skb); @@ -4962,13 +5311,13 @@ skb->skb_iif = 0; skb->ignore_df = 0; skb_dst_drop(skb); - secpath_reset(skb); - nf_reset(skb); + skb_ext_reset(skb); + nf_reset_ct(skb); nf_reset_trace(skb); #ifdef CONFIG_NET_SWITCHDEV skb->offload_fwd_mark = 0; - skb->offload_mr_fwd_mark = 0; + skb->offload_l3_fwd_mark = 0; #endif if (!xnet) @@ -5060,6 +5409,8 @@ * - L2+L3+L4+payload size (e.g. sanity check before passing to driver) * * This is a helper to do that correctly considering GSO_BY_FRAGS. + * + * @skb: GSO skb * * @seg_len: The segmented length (from skb_gso_*_seglen). In the * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. @@ -5246,7 +5597,7 @@ int err; if (likely(skb_vlan_tag_present(skb))) { - skb->vlan_tci = 0; + __vlan_hwaccel_clear_tag(skb); } else { if (unlikely(!eth_type_vlan(skb->protocol))) return 0; @@ -5298,6 +5649,252 @@ return 0; } EXPORT_SYMBOL(skb_vlan_push); + +/** + * skb_eth_pop() - Drop the Ethernet header at the head of a packet + * + * @skb: Socket buffer to modify + * + * Drop the Ethernet header of @skb. + * + * Expects that skb->data points to the mac header and that no VLAN tags are + * present. + * + * Returns 0 on success, -errno otherwise. + */ +int skb_eth_pop(struct sk_buff *skb) +{ + if (!pskb_may_pull(skb, ETH_HLEN) || skb_vlan_tagged(skb) || + skb_network_offset(skb) < ETH_HLEN) + return -EPROTO; + + skb_pull_rcsum(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + return 0; +} +EXPORT_SYMBOL(skb_eth_pop); + +/** + * skb_eth_push() - Add a new Ethernet header at the head of a packet + * + * @skb: Socket buffer to modify + * @dst: Destination MAC address of the new header + * @src: Source MAC address of the new header + * + * Prepend @skb with a new Ethernet header. + * + * Expects that skb->data points to the mac header, which must be empty. + * + * Returns 0 on success, -errno otherwise. + */ +int skb_eth_push(struct sk_buff *skb, const unsigned char *dst, + const unsigned char *src) +{ + struct ethhdr *eth; + int err; + + if (skb_network_offset(skb) || skb_vlan_tag_present(skb)) + return -EPROTO; + + err = skb_cow_head(skb, sizeof(*eth)); + if (err < 0) + return err; + + skb_push(skb, sizeof(*eth)); + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + eth = eth_hdr(skb); + ether_addr_copy(eth->h_dest, dst); + ether_addr_copy(eth->h_source, src); + eth->h_proto = skb->protocol; + + skb_postpush_rcsum(skb, eth, sizeof(*eth)); + + return 0; +} +EXPORT_SYMBOL(skb_eth_push); + +/* Update the ethertype of hdr and the skb csum value if required. */ +static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr, + __be16 ethertype) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __be16 diff[] = { ~hdr->h_proto, ethertype }; + + skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum); + } + + hdr->h_proto = ethertype; +} + +/** + * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of + * the packet + * + * @skb: buffer + * @mpls_lse: MPLS label stack entry to push + * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848) + * @mac_len: length of the MAC header + * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is + * ethernet + * + * Expects skb->data at mac header. + * + * Returns 0 on success, -errno otherwise. + */ +int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, + int mac_len, bool ethernet) +{ + struct mpls_shim_hdr *lse; + int err; + + if (unlikely(!eth_p_mpls(mpls_proto))) + return -EINVAL; + + /* Networking stack does not allow simultaneous Tunnel and MPLS GSO. */ + if (skb->encapsulation) + return -EINVAL; + + err = skb_cow_head(skb, MPLS_HLEN); + if (unlikely(err)) + return err; + + if (!skb->inner_protocol) { + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_protocol(skb, skb->protocol); + } + + skb_push(skb, MPLS_HLEN); + memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), + mac_len); + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb_reset_mac_len(skb); + + lse = mpls_hdr(skb); + lse->label_stack_entry = mpls_lse; + skb_postpush_rcsum(skb, lse, MPLS_HLEN); + + if (ethernet && mac_len >= ETH_HLEN) + skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto); + skb->protocol = mpls_proto; + + return 0; +} +EXPORT_SYMBOL_GPL(skb_mpls_push); + +/** + * skb_mpls_pop() - pop the outermost MPLS header + * + * @skb: buffer + * @next_proto: ethertype of header after popped MPLS header + * @mac_len: length of the MAC header + * @ethernet: flag to indicate if the packet is ethernet + * + * Expects skb->data at mac header. + * + * Returns 0 on success, -errno otherwise. + */ +int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, + bool ethernet) +{ + int err; + + if (unlikely(!eth_p_mpls(skb->protocol))) + return 0; + + err = skb_ensure_writable(skb, mac_len + MPLS_HLEN); + if (unlikely(err)) + return err; + + skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN); + memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), + mac_len); + + __skb_pull(skb, MPLS_HLEN); + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + + if (ethernet && mac_len >= ETH_HLEN) { + struct ethhdr *hdr; + + /* use mpls_hdr() to get ethertype to account for VLANs. */ + hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); + skb_mod_eth_type(skb, hdr, next_proto); + } + skb->protocol = next_proto; + + return 0; +} +EXPORT_SYMBOL_GPL(skb_mpls_pop); + +/** + * skb_mpls_update_lse() - modify outermost MPLS header and update csum + * + * @skb: buffer + * @mpls_lse: new MPLS label stack entry to update to + * + * Expects skb->data at mac header. + * + * Returns 0 on success, -errno otherwise. + */ +int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse) +{ + int err; + + if (unlikely(!eth_p_mpls(skb->protocol))) + return -EINVAL; + + err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); + if (unlikely(err)) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse }; + + skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum); + } + + mpls_hdr(skb)->label_stack_entry = mpls_lse; + + return 0; +} +EXPORT_SYMBOL_GPL(skb_mpls_update_lse); + +/** + * skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header + * + * @skb: buffer + * + * Expects skb->data at mac header. + * + * Returns 0 on success, -errno otherwise. + */ +int skb_mpls_dec_ttl(struct sk_buff *skb) +{ + u32 lse; + u8 ttl; + + if (unlikely(!eth_p_mpls(skb->protocol))) + return -EINVAL; + + if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN)) + return -ENOMEM; + + lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry); + ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; + if (!--ttl) + return -EINVAL; + + lse &= ~MPLS_LS_TTL_MASK; + lse |= ttl << MPLS_LS_TTL_SHIFT; + + return skb_mpls_update_lse(skb, cpu_to_be32(lse)); +} +EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl); /** * alloc_skb_with_frags - allocate skb with page frags @@ -5421,11 +6018,7 @@ skb->head = data; skb->data = data; skb->head_frag = 0; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; -#else - skb->end = skb->head + size; -#endif + skb_set_end_offset(skb, size); skb_set_tail_pointer(skb, skb_headlen(skb)); skb_headers_offset_update(skb, 0); skb->cloned = 0; @@ -5517,8 +6110,7 @@ size = SKB_WITH_OVERHEAD(ksize(data)); memcpy((struct skb_shared_info *)(data + size), - skb_shinfo(skb), offsetof(struct skb_shared_info, - frags[skb_shinfo(skb)->nr_frags])); + skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0])); if (skb_orphan_frags(skb, gfp_mask)) { kfree(data); return -ENOMEM; @@ -5539,7 +6131,7 @@ * where splitting is expensive. * 2. Split is accurately. We make this. */ - shinfo->frags[0].page_offset += off - pos; + skb_frag_off_add(&shinfo->frags[0], off - pos); skb_frag_size_sub(&shinfo->frags[0], off - pos); } skb_frag_ref(skb, i); @@ -5564,11 +6156,7 @@ skb->head = data; skb->head_frag = 0; skb->data = data; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->end = size; -#else - skb->end = skb->head + size; -#endif + skb_set_end_offset(skb, size); skb_reset_tail_pointer(skb); skb_headers_offset_update(skb, 0); skb->cloned = 0; @@ -5642,4 +6230,181 @@ */ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); } -EXPORT_SYMBOL_GPL(skb_condense); + +#ifdef CONFIG_SKB_EXTENSIONS +static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id) +{ + return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE); +} + +/** + * __skb_ext_alloc - allocate a new skb extensions storage + * + * @flags: See kmalloc(). + * + * Returns the newly allocated pointer. The pointer can later attached to a + * skb via __skb_ext_set(). + * Note: caller must handle the skb_ext as an opaque data. + */ +struct skb_ext *__skb_ext_alloc(gfp_t flags) +{ + struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags); + + if (new) { + memset(new->offset, 0, sizeof(new->offset)); + refcount_set(&new->refcnt, 1); + } + + return new; +} + +static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, + unsigned int old_active) +{ + struct skb_ext *new; + + if (refcount_read(&old->refcnt) == 1) + return old; + + new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC); + if (!new) + return NULL; + + memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE); + refcount_set(&new->refcnt, 1); + +#ifdef CONFIG_XFRM + if (old_active & (1 << SKB_EXT_SEC_PATH)) { + struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH); + unsigned int i; + + for (i = 0; i < sp->len; i++) + xfrm_state_hold(sp->xvec[i]); + } +#endif + __skb_ext_put(old); + return new; +} + +/** + * __skb_ext_set - attach the specified extension storage to this skb + * @skb: buffer + * @id: extension id + * @ext: extension storage previously allocated via __skb_ext_alloc() + * + * Existing extensions, if any, are cleared. + * + * Returns the pointer to the extension. + */ +void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, + struct skb_ext *ext) +{ + unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext); + + skb_ext_put(skb); + newlen = newoff + skb_ext_type_len[id]; + ext->chunks = newlen; + ext->offset[id] = newoff; + skb->extensions = ext; + skb->active_extensions = 1 << id; + return skb_ext_get_ptr(ext, id); +} + +/** + * skb_ext_add - allocate space for given extension, COW if needed + * @skb: buffer + * @id: extension to allocate space for + * + * Allocates enough space for the given extension. + * If the extension is already present, a pointer to that extension + * is returned. + * + * If the skb was cloned, COW applies and the returned memory can be + * modified without changing the extension space of clones buffers. + * + * Returns pointer to the extension or NULL on allocation failure. + */ +void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) +{ + struct skb_ext *new, *old = NULL; + unsigned int newlen, newoff; + + if (skb->active_extensions) { + old = skb->extensions; + + new = skb_ext_maybe_cow(old, skb->active_extensions); + if (!new) + return NULL; + + if (__skb_ext_exist(new, id)) + goto set_active; + + newoff = new->chunks; + } else { + newoff = SKB_EXT_CHUNKSIZEOF(*new); + + new = __skb_ext_alloc(GFP_ATOMIC); + if (!new) + return NULL; + } + + newlen = newoff + skb_ext_type_len[id]; + new->chunks = newlen; + new->offset[id] = newoff; +set_active: + skb->extensions = new; + skb->active_extensions |= 1 << id; + return skb_ext_get_ptr(new, id); +} +EXPORT_SYMBOL(skb_ext_add); + +#ifdef CONFIG_XFRM +static void skb_ext_put_sp(struct sec_path *sp) +{ + unsigned int i; + + for (i = 0; i < sp->len; i++) + xfrm_state_put(sp->xvec[i]); +} +#endif + +void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id) +{ + struct skb_ext *ext = skb->extensions; + + skb->active_extensions &= ~(1 << id); + if (skb->active_extensions == 0) { + skb->extensions = NULL; + __skb_ext_put(ext); +#ifdef CONFIG_XFRM + } else if (id == SKB_EXT_SEC_PATH && + refcount_read(&ext->refcnt) == 1) { + struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH); + + skb_ext_put_sp(sp); + sp->len = 0; +#endif + } +} +EXPORT_SYMBOL(__skb_ext_del); + +void __skb_ext_put(struct skb_ext *ext) +{ + /* If this is last clone, nothing can increment + * it after check passes. Avoids one atomic op. + */ + if (refcount_read(&ext->refcnt) == 1) + goto free_now; + + if (!refcount_dec_and_test(&ext->refcnt)) + return; +free_now: +#ifdef CONFIG_XFRM + if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH)) + skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH)); +#endif + + kmem_cache_free(skbuff_ext_cache, ext); +} +EXPORT_SYMBOL(__skb_ext_put); +#endif /* CONFIG_SKB_EXTENSIONS */ -- Gitblit v1.6.2