hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/net/core/skbuff.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Routines having to do with the 'struct sk_buff' memory handlers.
34 *
....@@ -25,11 +26,6 @@
2526 * disabled, or you better be *real* sure that the operation is atomic
2627 * with respect to whatever list is being frobbed (e.g. via lock_sock()
2728 * or via disabling bottom half handlers, etc).
28
- *
29
- * This program is free software; you can redistribute it and/or
30
- * modify it under the terms of the GNU General Public License
31
- * as published by the Free Software Foundation; either version
32
- * 2 of the License, or (at your option) any later version.
3329 */
3430
3531 /*
....@@ -63,7 +59,7 @@
6359 #include <linux/errqueue.h>
6460 #include <linux/prefetch.h>
6561 #include <linux/if_vlan.h>
66
-#include <linux/locallock.h>
62
+#include <linux/mpls.h>
6763
6864 #include <net/protocol.h>
6965 #include <net/dst.h>
....@@ -71,15 +67,24 @@
7167 #include <net/checksum.h>
7268 #include <net/ip6_checksum.h>
7369 #include <net/xfrm.h>
70
+#include <net/mpls.h>
71
+#include <net/mptcp.h>
7472
7573 #include <linux/uaccess.h>
7674 #include <trace/events/skb.h>
7775 #include <linux/highmem.h>
7876 #include <linux/capability.h>
7977 #include <linux/user_namespace.h>
78
+#include <linux/indirect_call_wrapper.h>
79
+#include <trace/hooks/net.h>
80
+
81
+#include "datagram.h"
8082
8183 struct kmem_cache *skbuff_head_cache __ro_after_init;
8284 static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
85
+#ifdef CONFIG_SKB_EXTENSIONS
86
+static struct kmem_cache *skbuff_ext_cache __ro_after_init;
87
+#endif
8388 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
8489 EXPORT_SYMBOL(sysctl_max_skb_frags);
8590
....@@ -98,7 +103,7 @@
98103 static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
99104 const char msg[])
100105 {
101
- pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
106
+ pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n",
102107 msg, addr, skb->len, sz, skb->head, skb->data,
103108 (unsigned long)skb->tail, (unsigned long)skb->end,
104109 skb->dev ? skb->dev->name : "<NULL>");
....@@ -245,6 +250,9 @@
245250
246251 fclones->skb2.fclone = SKB_FCLONE_CLONE;
247252 }
253
+
254
+ skb_set_kcov_handle(skb, kcov_common_handle());
255
+
248256 out:
249257 return skb;
250258 nodata:
....@@ -253,6 +261,35 @@
253261 goto out;
254262 }
255263 EXPORT_SYMBOL(__alloc_skb);
264
+
265
+/* Caller must provide SKB that is memset cleared */
266
+static struct sk_buff *__build_skb_around(struct sk_buff *skb,
267
+ void *data, unsigned int frag_size)
268
+{
269
+ struct skb_shared_info *shinfo;
270
+ unsigned int size = frag_size ? : ksize(data);
271
+
272
+ size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
273
+
274
+ /* Assumes caller memset cleared SKB */
275
+ skb->truesize = SKB_TRUESIZE(size);
276
+ refcount_set(&skb->users, 1);
277
+ skb->head = data;
278
+ skb->data = data;
279
+ skb_reset_tail_pointer(skb);
280
+ skb->end = skb->tail + size;
281
+ skb->mac_header = (typeof(skb->mac_header))~0U;
282
+ skb->transport_header = (typeof(skb->transport_header))~0U;
283
+
284
+ /* make sure we initialize shinfo sequentially */
285
+ shinfo = skb_shinfo(skb);
286
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
287
+ atomic_set(&shinfo->dataref, 1);
288
+
289
+ skb_set_kcov_handle(skb, kcov_common_handle());
290
+
291
+ return skb;
292
+}
256293
257294 /**
258295 * __build_skb - build a network buffer
....@@ -275,32 +312,15 @@
275312 */
276313 struct sk_buff *__build_skb(void *data, unsigned int frag_size)
277314 {
278
- struct skb_shared_info *shinfo;
279315 struct sk_buff *skb;
280
- unsigned int size = frag_size ? : ksize(data);
281316
282317 skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
283
- if (!skb)
318
+ if (unlikely(!skb))
284319 return NULL;
285320
286
- size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
287
-
288321 memset(skb, 0, offsetof(struct sk_buff, tail));
289
- skb->truesize = SKB_TRUESIZE(size);
290
- refcount_set(&skb->users, 1);
291
- skb->head = data;
292
- skb->data = data;
293
- skb_reset_tail_pointer(skb);
294
- skb->end = skb->tail + size;
295
- skb->mac_header = (typeof(skb->mac_header))~0U;
296
- skb->transport_header = (typeof(skb->transport_header))~0U;
297322
298
- /* make sure we initialize shinfo sequentially */
299
- shinfo = skb_shinfo(skb);
300
- memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
301
- atomic_set(&shinfo->dataref, 1);
302
-
303
- return skb;
323
+ return __build_skb_around(skb, data, frag_size);
304324 }
305325
306326 /* build_skb() is wrapper over __build_skb(), that specifically
....@@ -321,6 +341,29 @@
321341 }
322342 EXPORT_SYMBOL(build_skb);
323343
344
+/**
345
+ * build_skb_around - build a network buffer around provided skb
346
+ * @skb: sk_buff provide by caller, must be memset cleared
347
+ * @data: data buffer provided by caller
348
+ * @frag_size: size of data, or 0 if head was kmalloced
349
+ */
350
+struct sk_buff *build_skb_around(struct sk_buff *skb,
351
+ void *data, unsigned int frag_size)
352
+{
353
+ if (unlikely(!skb))
354
+ return NULL;
355
+
356
+ skb = __build_skb_around(skb, data, frag_size);
357
+
358
+ if (skb && frag_size) {
359
+ skb->head_frag = 1;
360
+ if (page_is_pfmemalloc(virt_to_head_page(data)))
361
+ skb->pfmemalloc = 1;
362
+ }
363
+ return skb;
364
+}
365
+EXPORT_SYMBOL(build_skb_around);
366
+
324367 #define NAPI_SKB_CACHE_SIZE 64
325368
326369 struct napi_alloc_cache {
....@@ -331,21 +374,21 @@
331374
332375 static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
333376 static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
334
-static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock);
335
-static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock);
336377
337
-static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
378
+static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
338379 {
339
- struct page_frag_cache *nc;
340
- unsigned long flags;
341
- void *data;
380
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
342381
343
- local_lock_irqsave(netdev_alloc_lock, flags);
344
- nc = this_cpu_ptr(&netdev_alloc_cache);
345
- data = page_frag_alloc(nc, fragsz, gfp_mask);
346
- local_unlock_irqrestore(netdev_alloc_lock, flags);
347
- return data;
382
+ return page_frag_alloc(&nc->page, fragsz, gfp_mask);
348383 }
384
+
385
+void *napi_alloc_frag(unsigned int fragsz)
386
+{
387
+ fragsz = SKB_DATA_ALIGN(fragsz);
388
+
389
+ return __napi_alloc_frag(fragsz, GFP_ATOMIC);
390
+}
391
+EXPORT_SYMBOL(napi_alloc_frag);
349392
350393 /**
351394 * netdev_alloc_frag - allocate a page fragment
....@@ -356,30 +399,21 @@
356399 */
357400 void *netdev_alloc_frag(unsigned int fragsz)
358401 {
359
- fragsz = SKB_DATA_ALIGN(fragsz);
360
-
361
- return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
362
-}
363
-EXPORT_SYMBOL(netdev_alloc_frag);
364
-
365
-static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
366
-{
367
- struct napi_alloc_cache *nc;
402
+ struct page_frag_cache *nc;
368403 void *data;
369404
370
- nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
371
- data = page_frag_alloc(&nc->page, fragsz, gfp_mask);
372
- put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
405
+ fragsz = SKB_DATA_ALIGN(fragsz);
406
+ if (in_irq() || irqs_disabled()) {
407
+ nc = this_cpu_ptr(&netdev_alloc_cache);
408
+ data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
409
+ } else {
410
+ local_bh_disable();
411
+ data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
412
+ local_bh_enable();
413
+ }
373414 return data;
374415 }
375
-
376
-void *napi_alloc_frag(unsigned int fragsz)
377
-{
378
- fragsz = SKB_DATA_ALIGN(fragsz);
379
-
380
- return __napi_alloc_frag(fragsz, GFP_ATOMIC);
381
-}
382
-EXPORT_SYMBOL(napi_alloc_frag);
416
+EXPORT_SYMBOL(netdev_alloc_frag);
383417
384418 /**
385419 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
....@@ -398,7 +432,6 @@
398432 gfp_t gfp_mask)
399433 {
400434 struct page_frag_cache *nc;
401
- unsigned long flags;
402435 struct sk_buff *skb;
403436 bool pfmemalloc;
404437 void *data;
....@@ -423,13 +456,17 @@
423456 if (sk_memalloc_socks())
424457 gfp_mask |= __GFP_MEMALLOC;
425458
426
- local_lock_irqsave(netdev_alloc_lock, flags);
427
-
428
- nc = this_cpu_ptr(&netdev_alloc_cache);
429
- data = page_frag_alloc(nc, len, gfp_mask);
430
- pfmemalloc = nc->pfmemalloc;
431
-
432
- local_unlock_irqrestore(netdev_alloc_lock, flags);
459
+ if (in_irq() || irqs_disabled()) {
460
+ nc = this_cpu_ptr(&netdev_alloc_cache);
461
+ data = page_frag_alloc(nc, len, gfp_mask);
462
+ pfmemalloc = nc->pfmemalloc;
463
+ } else {
464
+ local_bh_disable();
465
+ nc = this_cpu_ptr(&napi_alloc_cache.page);
466
+ data = page_frag_alloc(nc, len, gfp_mask);
467
+ pfmemalloc = nc->pfmemalloc;
468
+ local_bh_enable();
469
+ }
433470
434471 if (unlikely(!data))
435472 return NULL;
....@@ -440,7 +477,6 @@
440477 return NULL;
441478 }
442479
443
- /* use OR instead of assignment to avoid clearing of bits in mask */
444480 if (pfmemalloc)
445481 skb->pfmemalloc = 1;
446482 skb->head_frag = 1;
....@@ -473,7 +509,6 @@
473509 struct napi_alloc_cache *nc;
474510 struct sk_buff *skb;
475511 void *data;
476
- bool pfmemalloc;
477512
478513 len += NET_SKB_PAD + NET_IP_ALIGN;
479514
....@@ -496,10 +531,7 @@
496531 if (sk_memalloc_socks())
497532 gfp_mask |= __GFP_MEMALLOC;
498533
499
- nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
500534 data = page_frag_alloc(&nc->page, len, gfp_mask);
501
- pfmemalloc = nc->page.pfmemalloc;
502
- put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
503535 if (unlikely(!data))
504536 return NULL;
505537
....@@ -509,8 +541,7 @@
509541 return NULL;
510542 }
511543
512
- /* use OR instead of assignment to avoid clearing of bits in mask */
513
- if (pfmemalloc)
544
+ if (nc->page.pfmemalloc)
514545 skb->pfmemalloc = 1;
515546 skb->head_frag = 1;
516547
....@@ -630,7 +661,6 @@
630661 void skb_release_head_state(struct sk_buff *skb)
631662 {
632663 skb_dst_drop(skb);
633
- secpath_reset(skb);
634664 if (skb->destructor) {
635665 WARN_ON(in_irq());
636666 skb->destructor(skb);
....@@ -638,9 +668,7 @@
638668 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
639669 nf_conntrack_put(skb_nfct(skb));
640670 #endif
641
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
642
- nf_bridge_put(skb->nf_bridge);
643
-#endif
671
+ skb_ext_put(skb);
644672 }
645673
646674 /* Free everything but the sk_buff shell. */
....@@ -679,6 +707,7 @@
679707 if (!skb_unref(skb))
680708 return;
681709
710
+ trace_android_vh_kfree_skb(skb);
682711 trace_kfree_skb(skb, __builtin_return_address(0));
683712 __kfree_skb(skb);
684713 }
....@@ -695,6 +724,101 @@
695724 }
696725 EXPORT_SYMBOL(kfree_skb_list);
697726
727
+/* Dump skb information and contents.
728
+ *
729
+ * Must only be called from net_ratelimit()-ed paths.
730
+ *
731
+ * Dumps whole packets if full_pkt, only headers otherwise.
732
+ */
733
+void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
734
+{
735
+ struct skb_shared_info *sh = skb_shinfo(skb);
736
+ struct net_device *dev = skb->dev;
737
+ struct sock *sk = skb->sk;
738
+ struct sk_buff *list_skb;
739
+ bool has_mac, has_trans;
740
+ int headroom, tailroom;
741
+ int i, len, seg_len;
742
+
743
+ if (full_pkt)
744
+ len = skb->len;
745
+ else
746
+ len = min_t(int, skb->len, MAX_HEADER + 128);
747
+
748
+ headroom = skb_headroom(skb);
749
+ tailroom = skb_tailroom(skb);
750
+
751
+ has_mac = skb_mac_header_was_set(skb);
752
+ has_trans = skb_transport_header_was_set(skb);
753
+
754
+ printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
755
+ "mac=(%d,%d) net=(%d,%d) trans=%d\n"
756
+ "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
757
+ "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
758
+ "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
759
+ level, skb->len, headroom, skb_headlen(skb), tailroom,
760
+ has_mac ? skb->mac_header : -1,
761
+ has_mac ? skb_mac_header_len(skb) : -1,
762
+ skb->network_header,
763
+ has_trans ? skb_network_header_len(skb) : -1,
764
+ has_trans ? skb->transport_header : -1,
765
+ sh->tx_flags, sh->nr_frags,
766
+ sh->gso_size, sh->gso_type, sh->gso_segs,
767
+ skb->csum, skb->ip_summed, skb->csum_complete_sw,
768
+ skb->csum_valid, skb->csum_level,
769
+ skb->hash, skb->sw_hash, skb->l4_hash,
770
+ ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
771
+
772
+ if (dev)
773
+ printk("%sdev name=%s feat=%pNF\n",
774
+ level, dev->name, &dev->features);
775
+ if (sk)
776
+ printk("%ssk family=%hu type=%u proto=%u\n",
777
+ level, sk->sk_family, sk->sk_type, sk->sk_protocol);
778
+
779
+ if (full_pkt && headroom)
780
+ print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET,
781
+ 16, 1, skb->head, headroom, false);
782
+
783
+ seg_len = min_t(int, skb_headlen(skb), len);
784
+ if (seg_len)
785
+ print_hex_dump(level, "skb linear: ", DUMP_PREFIX_OFFSET,
786
+ 16, 1, skb->data, seg_len, false);
787
+ len -= seg_len;
788
+
789
+ if (full_pkt && tailroom)
790
+ print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET,
791
+ 16, 1, skb_tail_pointer(skb), tailroom, false);
792
+
793
+ for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) {
794
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
795
+ u32 p_off, p_len, copied;
796
+ struct page *p;
797
+ u8 *vaddr;
798
+
799
+ skb_frag_foreach_page(frag, skb_frag_off(frag),
800
+ skb_frag_size(frag), p, p_off, p_len,
801
+ copied) {
802
+ seg_len = min_t(int, p_len, len);
803
+ vaddr = kmap_atomic(p);
804
+ print_hex_dump(level, "skb frag: ",
805
+ DUMP_PREFIX_OFFSET,
806
+ 16, 1, vaddr + p_off, seg_len, false);
807
+ kunmap_atomic(vaddr);
808
+ len -= seg_len;
809
+ if (!len)
810
+ break;
811
+ }
812
+ }
813
+
814
+ if (full_pkt && skb_has_frag_list(skb)) {
815
+ printk("skb fraglist:\n");
816
+ skb_walk_frags(skb, list_skb)
817
+ skb_dump(level, list_skb, true);
818
+ }
819
+}
820
+EXPORT_SYMBOL(skb_dump);
821
+
698822 /**
699823 * skb_tx_error - report an sk_buff xmit error
700824 * @skb: buffer that triggered an error
....@@ -708,6 +832,7 @@
708832 }
709833 EXPORT_SYMBOL(skb_tx_error);
710834
835
+#ifdef CONFIG_TRACEPOINTS
711836 /**
712837 * consume_skb - free an skbuff
713838 * @skb: buffer to free
....@@ -725,6 +850,7 @@
725850 __kfree_skb(skb);
726851 }
727852 EXPORT_SYMBOL(consume_skb);
853
+#endif
728854
729855 /**
730856 * consume_stateless_skb - free an skbuff, assuming it is stateless
....@@ -742,26 +868,23 @@
742868
743869 void __kfree_skb_flush(void)
744870 {
745
- struct napi_alloc_cache *nc;
871
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
746872
747
- nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
748873 /* flush skb_cache if containing objects */
749874 if (nc->skb_count) {
750875 kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
751876 nc->skb_cache);
752877 nc->skb_count = 0;
753878 }
754
- put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
755879 }
756880
757881 static inline void _kfree_skb_defer(struct sk_buff *skb)
758882 {
759
- struct napi_alloc_cache *nc;
883
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
760884
761885 /* drop skb->head and call any destructors for packet */
762886 skb_release_all(skb);
763887
764
- nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
765888 /* record skb to CPU local list */
766889 nc->skb_cache[nc->skb_count++] = skb;
767890
....@@ -776,7 +899,6 @@
776899 nc->skb_cache);
777900 nc->skb_count = 0;
778901 }
779
- put_locked_var(napi_alloc_cache_lock, napi_alloc_cache);
780902 }
781903 void __kfree_skb_defer(struct sk_buff *skb)
782904 {
....@@ -785,9 +907,6 @@
785907
786908 void napi_consume_skb(struct sk_buff *skb, int budget)
787909 {
788
- if (unlikely(!skb))
789
- return;
790
-
791910 /* Zero budget indicate non-NAPI context called us, like netpoll */
792911 if (unlikely(!budget)) {
793912 dev_consume_skb_any(skb);
....@@ -824,9 +943,7 @@
824943 new->dev = old->dev;
825944 memcpy(new->cb, old->cb, sizeof(old->cb));
826945 skb_dst_copy(new, old);
827
-#ifdef CONFIG_XFRM
828
- new->sp = secpath_get(old->sp);
829
-#endif
946
+ __skb_ext_copy(new, old);
830947 __nf_copy(new, old, false);
831948
832949 /* Note : this field could be in headers_start/headers_end section
....@@ -902,6 +1019,31 @@
9021019 return n;
9031020 #undef C
9041021 }
1022
+
1023
+/**
1024
+ * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg
1025
+ * @first: first sk_buff of the msg
1026
+ */
1027
+struct sk_buff *alloc_skb_for_msg(struct sk_buff *first)
1028
+{
1029
+ struct sk_buff *n;
1030
+
1031
+ n = alloc_skb(0, GFP_ATOMIC);
1032
+ if (!n)
1033
+ return NULL;
1034
+
1035
+ n->len = first->len;
1036
+ n->data_len = first->len;
1037
+ n->truesize = first->truesize;
1038
+
1039
+ skb_shinfo(n)->frag_list = first;
1040
+
1041
+ __copy_skb_header(n, first);
1042
+ n->destructor = NULL;
1043
+
1044
+ return n;
1045
+}
1046
+EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
9051047
9061048 /**
9071049 * skb_morph - morph one skb into another
....@@ -1027,7 +1169,11 @@
10271169 uarg->len++;
10281170 uarg->bytelen = bytelen;
10291171 atomic_set(&sk->sk_zckey, ++next);
1030
- sock_zerocopy_get(uarg);
1172
+
1173
+ /* no extra ref when appending to datagram (MSG_MORE) */
1174
+ if (sk->sk_type == SOCK_STREAM)
1175
+ sock_zerocopy_get(uarg);
1176
+
10311177 return uarg;
10321178 }
10331179 }
....@@ -1117,7 +1263,7 @@
11171263 }
11181264 EXPORT_SYMBOL_GPL(sock_zerocopy_put);
11191265
1120
-void sock_zerocopy_put_abort(struct ubuf_info *uarg)
1266
+void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
11211267 {
11221268 if (uarg) {
11231269 struct sock *sk = skb_from_uarg(uarg)->sk;
....@@ -1125,13 +1271,17 @@
11251271 atomic_dec(&sk->sk_zckey);
11261272 uarg->len--;
11271273
1128
- sock_zerocopy_put(uarg);
1274
+ if (have_uref)
1275
+ sock_zerocopy_put(uarg);
11291276 }
11301277 }
11311278 EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
11321279
1133
-extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
1134
- struct iov_iter *from, size_t length);
1280
+int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
1281
+{
1282
+ return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
1283
+}
1284
+EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
11351285
11361286 int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
11371287 struct msghdr *msg, int len,
....@@ -1159,7 +1309,7 @@
11591309 return err;
11601310 }
11611311
1162
- skb_zcopy_set(skb, uarg);
1312
+ skb_zcopy_set(skb, uarg, NULL);
11631313 return skb->len - orig_len;
11641314 }
11651315 EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
....@@ -1179,7 +1329,7 @@
11791329 if (skb_copy_ubufs(nskb, GFP_ATOMIC))
11801330 return -EIO;
11811331 }
1182
- skb_zcopy_set(nskb, skb_uarg(orig));
1332
+ skb_zcopy_set(nskb, skb_uarg(orig), NULL);
11831333 }
11841334 return 0;
11851335 }
....@@ -1235,7 +1385,7 @@
12351385 struct page *p;
12361386 u8 *vaddr;
12371387
1238
- skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f),
1388
+ skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f),
12391389 p, p_off, p_len, copied) {
12401390 u32 copy, done = 0;
12411391 vaddr = kmap_atomic(p);
....@@ -1525,11 +1675,10 @@
15251675 skb->head = data;
15261676 skb->head_frag = 0;
15271677 skb->data += off;
1678
+
1679
+ skb_set_end_offset(skb, size);
15281680 #ifdef NET_SKBUFF_DATA_USES_OFFSET
1529
- skb->end = size;
15301681 off = nhead;
1531
-#else
1532
- skb->end = skb->head + size;
15331682 #endif
15341683 skb->tail += off;
15351684 skb_headers_offset_update(skb, nhead);
....@@ -1576,6 +1725,38 @@
15761725 return skb2;
15771726 }
15781727 EXPORT_SYMBOL(skb_realloc_headroom);
1728
+
1729
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
1730
+{
1731
+ unsigned int saved_end_offset, saved_truesize;
1732
+ struct skb_shared_info *shinfo;
1733
+ int res;
1734
+
1735
+ saved_end_offset = skb_end_offset(skb);
1736
+ saved_truesize = skb->truesize;
1737
+
1738
+ res = pskb_expand_head(skb, 0, 0, pri);
1739
+ if (res)
1740
+ return res;
1741
+
1742
+ skb->truesize = saved_truesize;
1743
+
1744
+ if (likely(skb_end_offset(skb) == saved_end_offset))
1745
+ return 0;
1746
+
1747
+ shinfo = skb_shinfo(skb);
1748
+
1749
+ /* We are about to change back skb->end,
1750
+ * we need to move skb_shinfo() to its new location.
1751
+ */
1752
+ memmove(skb->head + saved_end_offset,
1753
+ shinfo,
1754
+ offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
1755
+
1756
+ skb_set_end_offset(skb, saved_end_offset);
1757
+
1758
+ return 0;
1759
+}
15791760
15801761 /**
15811762 * skb_copy_expand - copy and expand sk_buff
....@@ -1959,8 +2140,6 @@
19592140 struct sk_buff *insp = NULL;
19602141
19612142 do {
1962
- BUG_ON(!list);
1963
-
19642143 if (list->len <= eat) {
19652144 /* Eaten as whole. */
19662145 eat -= list->len;
....@@ -1968,6 +2147,9 @@
19682147 insp = list;
19692148 } else {
19702149 /* Eaten partially. */
2150
+ if (skb_is_gso(skb) && !list->head_frag &&
2151
+ skb_headlen(list))
2152
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
19712153
19722154 if (skb_shared(list)) {
19732155 /* Sucks! We need to fork list. :-( */
....@@ -2012,10 +2194,12 @@
20122194 skb_frag_unref(skb, i);
20132195 eat -= size;
20142196 } else {
2015
- skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
2197
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
2198
+
2199
+ *frag = skb_shinfo(skb)->frags[i];
20162200 if (eat) {
2017
- skb_shinfo(skb)->frags[k].page_offset += eat;
2018
- skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
2201
+ skb_frag_off_add(frag, eat);
2202
+ skb_frag_size_sub(frag, eat);
20192203 if (!i)
20202204 goto end;
20212205 eat = 0;
....@@ -2087,7 +2271,7 @@
20872271 copy = len;
20882272
20892273 skb_frag_foreach_page(f,
2090
- f->page_offset + offset - start,
2274
+ skb_frag_off(f) + offset - start,
20912275 copy, p, p_off, p_len, copied) {
20922276 vaddr = kmap_atomic(p);
20932277 memcpy(to + copied, vaddr + p_off, p_len);
....@@ -2263,7 +2447,7 @@
22632447 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
22642448
22652449 if (__splice_segment(skb_frag_page(f),
2266
- f->page_offset, skb_frag_size(f),
2450
+ skb_frag_off(f), skb_frag_size(f),
22672451 offset, len, spd, false, sk, pipe))
22682452 return true;
22692453 }
....@@ -2353,20 +2537,20 @@
23532537 for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
23542538 skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
23552539
2356
- if (offset < frag->size)
2540
+ if (offset < skb_frag_size(frag))
23572541 break;
23582542
2359
- offset -= frag->size;
2543
+ offset -= skb_frag_size(frag);
23602544 }
23612545
23622546 for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
23632547 skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
23642548
2365
- slen = min_t(size_t, len, frag->size - offset);
2549
+ slen = min_t(size_t, len, skb_frag_size(frag) - offset);
23662550
23672551 while (slen) {
2368
- ret = kernel_sendpage_locked(sk, frag->page.p,
2369
- frag->page_offset + offset,
2552
+ ret = kernel_sendpage_locked(sk, skb_frag_page(frag),
2553
+ skb_frag_off(frag) + offset,
23702554 slen, MSG_DONTWAIT);
23712555 if (ret <= 0)
23722556 goto error;
....@@ -2400,19 +2584,6 @@
24002584 return orig_len == len ? ret : orig_len - len;
24012585 }
24022586 EXPORT_SYMBOL_GPL(skb_send_sock_locked);
2403
-
2404
-/* Send skb data on a socket. */
2405
-int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
2406
-{
2407
- int ret = 0;
2408
-
2409
- lock_sock(sk);
2410
- ret = skb_send_sock_locked(sk, skb, offset, len);
2411
- release_sock(sk);
2412
-
2413
- return ret;
2414
-}
2415
-EXPORT_SYMBOL_GPL(skb_send_sock);
24162587
24172588 /**
24182589 * skb_store_bits - store bits from kernel buffer to skb
....@@ -2461,7 +2632,7 @@
24612632 copy = len;
24622633
24632634 skb_frag_foreach_page(frag,
2464
- frag->page_offset + offset - start,
2635
+ skb_frag_off(frag) + offset - start,
24652636 copy, p, p_off, p_len, copied) {
24662637 vaddr = kmap_atomic(p);
24672638 memcpy(vaddr + p_off, from + copied, p_len);
....@@ -2516,7 +2687,8 @@
25162687 if (copy > 0) {
25172688 if (copy > len)
25182689 copy = len;
2519
- csum = ops->update(skb->data + offset, copy, csum);
2690
+ csum = INDIRECT_CALL_1(ops->update, csum_partial_ext,
2691
+ skb->data + offset, copy, csum);
25202692 if ((len -= copy) == 0)
25212693 return csum;
25222694 offset += copy;
....@@ -2540,12 +2712,16 @@
25402712 copy = len;
25412713
25422714 skb_frag_foreach_page(frag,
2543
- frag->page_offset + offset - start,
2715
+ skb_frag_off(frag) + offset - start,
25442716 copy, p, p_off, p_len, copied) {
25452717 vaddr = kmap_atomic(p);
2546
- csum2 = ops->update(vaddr + p_off, p_len, 0);
2718
+ csum2 = INDIRECT_CALL_1(ops->update,
2719
+ csum_partial_ext,
2720
+ vaddr + p_off, p_len, 0);
25472721 kunmap_atomic(vaddr);
2548
- csum = ops->combine(csum, csum2, pos, p_len);
2722
+ csum = INDIRECT_CALL_1(ops->combine,
2723
+ csum_block_add_ext, csum,
2724
+ csum2, pos, p_len);
25492725 pos += p_len;
25502726 }
25512727
....@@ -2568,7 +2744,8 @@
25682744 copy = len;
25692745 csum2 = __skb_checksum(frag_iter, offset - start,
25702746 copy, 0, ops);
2571
- csum = ops->combine(csum, csum2, pos, copy);
2747
+ csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext,
2748
+ csum, csum2, pos, copy);
25722749 if ((len -= copy) == 0)
25732750 return csum;
25742751 offset += copy;
....@@ -2597,19 +2774,20 @@
25972774 /* Both of above in one bottle. */
25982775
25992776 __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
2600
- u8 *to, int len, __wsum csum)
2777
+ u8 *to, int len)
26012778 {
26022779 int start = skb_headlen(skb);
26032780 int i, copy = start - offset;
26042781 struct sk_buff *frag_iter;
26052782 int pos = 0;
2783
+ __wsum csum = 0;
26062784
26072785 /* Copy header. */
26082786 if (copy > 0) {
26092787 if (copy > len)
26102788 copy = len;
26112789 csum = csum_partial_copy_nocheck(skb->data + offset, to,
2612
- copy, csum);
2790
+ copy);
26132791 if ((len -= copy) == 0)
26142792 return csum;
26152793 offset += copy;
....@@ -2634,12 +2812,12 @@
26342812 copy = len;
26352813
26362814 skb_frag_foreach_page(frag,
2637
- frag->page_offset + offset - start,
2815
+ skb_frag_off(frag) + offset - start,
26382816 copy, p, p_off, p_len, copied) {
26392817 vaddr = kmap_atomic(p);
26402818 csum2 = csum_partial_copy_nocheck(vaddr + p_off,
26412819 to + copied,
2642
- p_len, 0);
2820
+ p_len);
26432821 kunmap_atomic(vaddr);
26442822 csum = csum_block_add(csum, csum2, pos);
26452823 pos += p_len;
....@@ -2665,7 +2843,7 @@
26652843 copy = len;
26662844 csum2 = skb_copy_and_csum_bits(frag_iter,
26672845 offset - start,
2668
- to, copy, 0);
2846
+ to, copy);
26692847 csum = csum_block_add(csum, csum2, pos);
26702848 if ((len -= copy) == 0)
26712849 return csum;
....@@ -2679,6 +2857,65 @@
26792857 return csum;
26802858 }
26812859 EXPORT_SYMBOL(skb_copy_and_csum_bits);
2860
+
2861
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
2862
+{
2863
+ __sum16 sum;
2864
+
2865
+ sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
2866
+ /* See comments in __skb_checksum_complete(). */
2867
+ if (likely(!sum)) {
2868
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
2869
+ !skb->csum_complete_sw)
2870
+ netdev_rx_csum_fault(skb->dev, skb);
2871
+ }
2872
+ if (!skb_shared(skb))
2873
+ skb->csum_valid = !sum;
2874
+ return sum;
2875
+}
2876
+EXPORT_SYMBOL(__skb_checksum_complete_head);
2877
+
2878
+/* This function assumes skb->csum already holds pseudo header's checksum,
2879
+ * which has been changed from the hardware checksum, for example, by
2880
+ * __skb_checksum_validate_complete(). And, the original skb->csum must
2881
+ * have been validated unsuccessfully for CHECKSUM_COMPLETE case.
2882
+ *
2883
+ * It returns non-zero if the recomputed checksum is still invalid, otherwise
2884
+ * zero. The new checksum is stored back into skb->csum unless the skb is
2885
+ * shared.
2886
+ */
2887
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
2888
+{
2889
+ __wsum csum;
2890
+ __sum16 sum;
2891
+
2892
+ csum = skb_checksum(skb, 0, skb->len, 0);
2893
+
2894
+ sum = csum_fold(csum_add(skb->csum, csum));
2895
+ /* This check is inverted, because we already knew the hardware
2896
+ * checksum is invalid before calling this function. So, if the
2897
+ * re-computed checksum is valid instead, then we have a mismatch
2898
+ * between the original skb->csum and skb_checksum(). This means either
2899
+ * the original hardware checksum is incorrect or we screw up skb->csum
2900
+ * when moving skb->data around.
2901
+ */
2902
+ if (likely(!sum)) {
2903
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
2904
+ !skb->csum_complete_sw)
2905
+ netdev_rx_csum_fault(skb->dev, skb);
2906
+ }
2907
+
2908
+ if (!skb_shared(skb)) {
2909
+ /* Save full packet checksum */
2910
+ skb->csum = csum;
2911
+ skb->ip_summed = CHECKSUM_COMPLETE;
2912
+ skb->csum_complete_sw = 1;
2913
+ skb->csum_valid = !sum;
2914
+ }
2915
+
2916
+ return sum;
2917
+}
2918
+EXPORT_SYMBOL(__skb_checksum_complete);
26822919
26832920 static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
26842921 {
....@@ -2794,11 +3031,15 @@
27943031 skb_zerocopy_clone(to, from, GFP_ATOMIC);
27953032
27963033 for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
3034
+ int size;
3035
+
27973036 if (!len)
27983037 break;
27993038 skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
2800
- skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
2801
- len -= skb_shinfo(to)->frags[j].size;
3039
+ size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]),
3040
+ len);
3041
+ skb_frag_size_set(&skb_shinfo(to)->frags[j], size);
3042
+ len -= size;
28023043 skb_frag_ref(to, j);
28033044 j++;
28043045 }
....@@ -2825,7 +3066,7 @@
28253066 csum = 0;
28263067 if (csstart != skb->len)
28273068 csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
2828
- skb->len - csstart, 0);
3069
+ skb->len - csstart);
28293070
28303071 if (skb->ip_summed == CHECKSUM_PARTIAL) {
28313072 long csstuff = csstart + skb->csum_offset;
....@@ -3000,28 +3241,6 @@
30003241 }
30013242 EXPORT_SYMBOL(skb_append);
30023243
3003
-/**
3004
- * skb_insert - insert a buffer
3005
- * @old: buffer to insert before
3006
- * @newsk: buffer to insert
3007
- * @list: list to use
3008
- *
3009
- * Place a packet before a given packet in a list. The list locks are
3010
- * taken and this function is atomic with respect to other list locked
3011
- * calls.
3012
- *
3013
- * A buffer cannot be placed on two lists at the same time.
3014
- */
3015
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
3016
-{
3017
- unsigned long flags;
3018
-
3019
- spin_lock_irqsave(&list->lock, flags);
3020
- __skb_insert(newsk, old->prev, old, list);
3021
- spin_unlock_irqrestore(&list->lock, flags);
3022
-}
3023
-EXPORT_SYMBOL(skb_insert);
3024
-
30253244 static inline void skb_split_inside_header(struct sk_buff *skb,
30263245 struct sk_buff* skb1,
30273246 const u32 len, const int pos)
....@@ -3071,7 +3290,7 @@
30713290 * 2. Split is accurately. We make this.
30723291 */
30733292 skb_frag_ref(skb, i);
3074
- skb_shinfo(skb1)->frags[0].page_offset += len - pos;
3293
+ skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos);
30753294 skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
30763295 skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
30773296 skb_shinfo(skb)->nr_frags++;
....@@ -3110,19 +3329,7 @@
31103329 */
31113330 static int skb_prepare_for_shift(struct sk_buff *skb)
31123331 {
3113
- int ret = 0;
3114
-
3115
- if (skb_cloned(skb)) {
3116
- /* Save and restore truesize: pskb_expand_head() may reallocate
3117
- * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we
3118
- * cannot change truesize at this point.
3119
- */
3120
- unsigned int save_truesize = skb->truesize;
3121
-
3122
- ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3123
- skb->truesize = save_truesize;
3124
- }
3125
- return ret;
3332
+ return skb_unclone_keeptruesize(skb, GFP_ATOMIC);
31263333 }
31273334
31283335 /**
....@@ -3146,7 +3353,7 @@
31463353 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
31473354 {
31483355 int from, to, merge, todo;
3149
- struct skb_frag_struct *fragfrom, *fragto;
3356
+ skb_frag_t *fragfrom, *fragto;
31503357
31513358 BUG_ON(shiftlen > skb->len);
31523359
....@@ -3165,7 +3372,7 @@
31653372 */
31663373 if (!to ||
31673374 !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
3168
- fragfrom->page_offset)) {
3375
+ skb_frag_off(fragfrom))) {
31693376 merge = -1;
31703377 } else {
31713378 merge = to - 1;
....@@ -3182,7 +3389,7 @@
31823389
31833390 skb_frag_size_add(fragto, shiftlen);
31843391 skb_frag_size_sub(fragfrom, shiftlen);
3185
- fragfrom->page_offset += shiftlen;
3392
+ skb_frag_off_add(fragfrom, shiftlen);
31863393
31873394 goto onlymerged;
31883395 }
....@@ -3213,11 +3420,11 @@
32133420
32143421 } else {
32153422 __skb_frag_ref(fragfrom);
3216
- fragto->page = fragfrom->page;
3217
- fragto->page_offset = fragfrom->page_offset;
3423
+ skb_frag_page_copy(fragto, fragfrom);
3424
+ skb_frag_off_copy(fragto, fragfrom);
32183425 skb_frag_size_set(fragto, todo);
32193426
3220
- fragfrom->page_offset += todo;
3427
+ skb_frag_off_add(fragfrom, todo);
32213428 skb_frag_size_sub(fragfrom, todo);
32223429 todo = 0;
32233430
....@@ -3342,7 +3549,7 @@
33423549 if (!st->frag_data)
33433550 st->frag_data = kmap_atomic(skb_frag_page(frag));
33443551
3345
- *data = (u8 *) st->frag_data + frag->page_offset +
3552
+ *data = (u8 *) st->frag_data + skb_frag_off(frag) +
33463553 (abs_offset - st->stepped_offset);
33473554
33483555 return block_limit - abs_offset;
....@@ -3432,64 +3639,6 @@
34323639 }
34333640 EXPORT_SYMBOL(skb_find_text);
34343641
3435
-/**
3436
- * skb_append_datato_frags - append the user data to a skb
3437
- * @sk: sock structure
3438
- * @skb: skb structure to be appended with user data.
3439
- * @getfrag: call back function to be used for getting the user data
3440
- * @from: pointer to user message iov
3441
- * @length: length of the iov message
3442
- *
3443
- * Description: This procedure append the user data in the fragment part
3444
- * of the skb if any page alloc fails user this procedure returns -ENOMEM
3445
- */
3446
-int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
3447
- int (*getfrag)(void *from, char *to, int offset,
3448
- int len, int odd, struct sk_buff *skb),
3449
- void *from, int length)
3450
-{
3451
- int frg_cnt = skb_shinfo(skb)->nr_frags;
3452
- int copy;
3453
- int offset = 0;
3454
- int ret;
3455
- struct page_frag *pfrag = &current->task_frag;
3456
-
3457
- do {
3458
- /* Return error if we don't have space for new frag */
3459
- if (frg_cnt >= MAX_SKB_FRAGS)
3460
- return -EMSGSIZE;
3461
-
3462
- if (!sk_page_frag_refill(sk, pfrag))
3463
- return -ENOMEM;
3464
-
3465
- /* copy the user data to page */
3466
- copy = min_t(int, length, pfrag->size - pfrag->offset);
3467
-
3468
- ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
3469
- offset, copy, 0, skb);
3470
- if (ret < 0)
3471
- return -EFAULT;
3472
-
3473
- /* copy was successful so update the size parameters */
3474
- skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
3475
- copy);
3476
- frg_cnt++;
3477
- pfrag->offset += copy;
3478
- get_page(pfrag->page);
3479
-
3480
- skb->truesize += copy;
3481
- refcount_add(copy, &sk->sk_wmem_alloc);
3482
- skb->len += copy;
3483
- skb->data_len += copy;
3484
- offset += copy;
3485
- length -= copy;
3486
-
3487
- } while (length > 0);
3488
-
3489
- return 0;
3490
-}
3491
-EXPORT_SYMBOL(skb_append_datato_frags);
3492
-
34933642 int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
34943643 int offset, size_t size)
34953644 {
....@@ -3536,11 +3685,126 @@
35363685 struct page *page;
35373686
35383687 page = virt_to_head_page(frag_skb->head);
3539
- head_frag.page.p = page;
3540
- head_frag.page_offset = frag_skb->data -
3541
- (unsigned char *)page_address(page);
3542
- head_frag.size = skb_headlen(frag_skb);
3688
+ __skb_frag_set_page(&head_frag, page);
3689
+ skb_frag_off_set(&head_frag, frag_skb->data -
3690
+ (unsigned char *)page_address(page));
3691
+ skb_frag_size_set(&head_frag, skb_headlen(frag_skb));
35433692 return head_frag;
3693
+}
3694
+
3695
+struct sk_buff *skb_segment_list(struct sk_buff *skb,
3696
+ netdev_features_t features,
3697
+ unsigned int offset)
3698
+{
3699
+ struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
3700
+ unsigned int tnl_hlen = skb_tnl_header_len(skb);
3701
+ unsigned int delta_truesize = 0;
3702
+ unsigned int delta_len = 0;
3703
+ struct sk_buff *tail = NULL;
3704
+ struct sk_buff *nskb, *tmp;
3705
+ int len_diff, err;
3706
+
3707
+ skb_push(skb, -skb_network_offset(skb) + offset);
3708
+
3709
+ /* Ensure the head is writeable before touching the shared info */
3710
+ err = skb_unclone(skb, GFP_ATOMIC);
3711
+ if (err)
3712
+ goto err_linearize;
3713
+
3714
+ skb_shinfo(skb)->frag_list = NULL;
3715
+
3716
+ while (list_skb) {
3717
+ nskb = list_skb;
3718
+ list_skb = list_skb->next;
3719
+
3720
+ err = 0;
3721
+ delta_truesize += nskb->truesize;
3722
+ if (skb_shared(nskb)) {
3723
+ tmp = skb_clone(nskb, GFP_ATOMIC);
3724
+ if (tmp) {
3725
+ consume_skb(nskb);
3726
+ nskb = tmp;
3727
+ err = skb_unclone(nskb, GFP_ATOMIC);
3728
+ } else {
3729
+ err = -ENOMEM;
3730
+ }
3731
+ }
3732
+
3733
+ if (!tail)
3734
+ skb->next = nskb;
3735
+ else
3736
+ tail->next = nskb;
3737
+
3738
+ if (unlikely(err)) {
3739
+ nskb->next = list_skb;
3740
+ goto err_linearize;
3741
+ }
3742
+
3743
+ tail = nskb;
3744
+
3745
+ delta_len += nskb->len;
3746
+
3747
+ skb_push(nskb, -skb_network_offset(nskb) + offset);
3748
+
3749
+ skb_release_head_state(nskb);
3750
+ len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb);
3751
+ __copy_skb_header(nskb, skb);
3752
+
3753
+ skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
3754
+ nskb->transport_header += len_diff;
3755
+ skb_copy_from_linear_data_offset(skb, -tnl_hlen,
3756
+ nskb->data - tnl_hlen,
3757
+ offset + tnl_hlen);
3758
+
3759
+ if (skb_needs_linearize(nskb, features) &&
3760
+ __skb_linearize(nskb))
3761
+ goto err_linearize;
3762
+ }
3763
+
3764
+ skb->truesize = skb->truesize - delta_truesize;
3765
+ skb->data_len = skb->data_len - delta_len;
3766
+ skb->len = skb->len - delta_len;
3767
+
3768
+ skb_gso_reset(skb);
3769
+
3770
+ skb->prev = tail;
3771
+
3772
+ if (skb_needs_linearize(skb, features) &&
3773
+ __skb_linearize(skb))
3774
+ goto err_linearize;
3775
+
3776
+ skb_get(skb);
3777
+
3778
+ return skb;
3779
+
3780
+err_linearize:
3781
+ kfree_skb_list(skb->next);
3782
+ skb->next = NULL;
3783
+ return ERR_PTR(-ENOMEM);
3784
+}
3785
+EXPORT_SYMBOL_GPL(skb_segment_list);
3786
+
3787
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
3788
+{
3789
+ if (unlikely(p->len + skb->len >= 65536))
3790
+ return -E2BIG;
3791
+
3792
+ if (NAPI_GRO_CB(p)->last == p)
3793
+ skb_shinfo(p)->frag_list = skb;
3794
+ else
3795
+ NAPI_GRO_CB(p)->last->next = skb;
3796
+
3797
+ skb_pull(skb, skb_gro_offset(skb));
3798
+
3799
+ NAPI_GRO_CB(p)->last = skb;
3800
+ NAPI_GRO_CB(p)->count++;
3801
+ p->data_len += skb->len;
3802
+ p->truesize += skb->truesize;
3803
+ p->len += skb->len;
3804
+
3805
+ NAPI_GRO_CB(skb)->same_flow = 1;
3806
+
3807
+ return 0;
35443808 }
35453809
35463810 /**
....@@ -3558,44 +3822,44 @@
35583822 struct sk_buff *segs = NULL;
35593823 struct sk_buff *tail = NULL;
35603824 struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
3561
- skb_frag_t *frag = skb_shinfo(head_skb)->frags;
35623825 unsigned int mss = skb_shinfo(head_skb)->gso_size;
35633826 unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
3564
- struct sk_buff *frag_skb = head_skb;
35653827 unsigned int offset = doffset;
35663828 unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
35673829 unsigned int partial_segs = 0;
35683830 unsigned int headroom;
35693831 unsigned int len = head_skb->len;
3832
+ struct sk_buff *frag_skb;
3833
+ skb_frag_t *frag;
35703834 __be16 proto;
35713835 bool csum, sg;
3572
- int nfrags = skb_shinfo(head_skb)->nr_frags;
35733836 int err = -ENOMEM;
35743837 int i = 0;
3575
- int pos;
3576
- int dummy;
3838
+ int nfrags, pos;
35773839
3578
- if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
3579
- (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
3580
- /* gso_size is untrusted, and we have a frag_list with a linear
3581
- * non head_frag head.
3582
- *
3583
- * (we assume checking the first list_skb member suffices;
3584
- * i.e if either of the list_skb members have non head_frag
3585
- * head, then the first one has too).
3586
- *
3587
- * If head_skb's headlen does not fit requested gso_size, it
3588
- * means that the frag_list members do NOT terminate on exact
3589
- * gso_size boundaries. Hence we cannot perform skb_frag_t page
3590
- * sharing. Therefore we must fallback to copying the frag_list
3591
- * skbs; we do so by disabling SG.
3592
- */
3593
- if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
3594
- features &= ~NETIF_F_SG;
3840
+ if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
3841
+ mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
3842
+ struct sk_buff *check_skb;
3843
+
3844
+ for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
3845
+ if (skb_headlen(check_skb) && !check_skb->head_frag) {
3846
+ /* gso_size is untrusted, and we have a frag_list with
3847
+ * a linear non head_frag item.
3848
+ *
3849
+ * If head_skb's headlen does not fit requested gso_size,
3850
+ * it means that the frag_list members do NOT terminate
3851
+ * on exact gso_size boundaries. Hence we cannot perform
3852
+ * skb_frag_t page sharing. Therefore we must fallback to
3853
+ * copying the frag_list skbs; we do so by disabling SG.
3854
+ */
3855
+ features &= ~NETIF_F_SG;
3856
+ break;
3857
+ }
3858
+ }
35953859 }
35963860
35973861 __skb_push(head_skb, doffset);
3598
- proto = skb_network_protocol(head_skb, &dummy);
3862
+ proto = skb_network_protocol(head_skb, NULL);
35993863 if (unlikely(!proto))
36003864 return ERR_PTR(-EINVAL);
36013865
....@@ -3648,6 +3912,13 @@
36483912 headroom = skb_headroom(head_skb);
36493913 pos = skb_headlen(head_skb);
36503914
3915
+ if (skb_orphan_frags(head_skb, GFP_ATOMIC))
3916
+ return ERR_PTR(-ENOMEM);
3917
+
3918
+ nfrags = skb_shinfo(head_skb)->nr_frags;
3919
+ frag = skb_shinfo(head_skb)->frags;
3920
+ frag_skb = head_skb;
3921
+
36513922 do {
36523923 struct sk_buff *nskb;
36533924 skb_frag_t *nskb_frag;
....@@ -3672,6 +3943,10 @@
36723943 (skb_headlen(list_skb) == len || sg)) {
36733944 BUG_ON(skb_headlen(list_skb) > len);
36743945
3946
+ nskb = skb_clone(list_skb, GFP_ATOMIC);
3947
+ if (unlikely(!nskb))
3948
+ goto err;
3949
+
36753950 i = 0;
36763951 nfrags = skb_shinfo(list_skb)->nr_frags;
36773952 frag = skb_shinfo(list_skb)->frags;
....@@ -3690,11 +3965,7 @@
36903965 frag++;
36913966 }
36923967
3693
- nskb = skb_clone(list_skb, GFP_ATOMIC);
36943968 list_skb = list_skb->next;
3695
-
3696
- if (unlikely(!nskb))
3697
- goto err;
36983969
36993970 if (unlikely(pskb_trim(nskb, len))) {
37003971 kfree_skb(nskb);
....@@ -3741,14 +4012,20 @@
37414012 goto perform_csum_check;
37424013
37434014 if (!sg) {
3744
- if (!nskb->remcsum_offload)
3745
- nskb->ip_summed = CHECKSUM_NONE;
3746
- SKB_GSO_CB(nskb)->csum =
3747
- skb_copy_and_csum_bits(head_skb, offset,
3748
- skb_put(nskb, len),
3749
- len, 0);
3750
- SKB_GSO_CB(nskb)->csum_start =
3751
- skb_headroom(nskb) + doffset;
4015
+ if (!csum) {
4016
+ if (!nskb->remcsum_offload)
4017
+ nskb->ip_summed = CHECKSUM_NONE;
4018
+ SKB_GSO_CB(nskb)->csum =
4019
+ skb_copy_and_csum_bits(head_skb, offset,
4020
+ skb_put(nskb,
4021
+ len),
4022
+ len);
4023
+ SKB_GSO_CB(nskb)->csum_start =
4024
+ skb_headroom(nskb) + doffset;
4025
+ } else {
4026
+ if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len))
4027
+ goto err;
4028
+ }
37524029 continue;
37534030 }
37544031
....@@ -3760,12 +4037,16 @@
37604037 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
37614038 SKBTX_SHARED_FRAG;
37624039
3763
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
3764
- skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
4040
+ if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
37654041 goto err;
37664042
37674043 while (pos < offset + len) {
37684044 if (i >= nfrags) {
4045
+ if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
4046
+ skb_zerocopy_clone(nskb, list_skb,
4047
+ GFP_ATOMIC))
4048
+ goto err;
4049
+
37694050 i = 0;
37704051 nfrags = skb_shinfo(list_skb)->nr_frags;
37714052 frag = skb_shinfo(list_skb)->frags;
....@@ -3779,10 +4060,6 @@
37794060 i--;
37804061 frag--;
37814062 }
3782
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
3783
- skb_zerocopy_clone(nskb, frag_skb,
3784
- GFP_ATOMIC))
3785
- goto err;
37864063
37874064 list_skb = list_skb->next;
37884065 }
....@@ -3801,7 +4078,7 @@
38014078 size = skb_frag_size(nskb_frag);
38024079
38034080 if (pos < offset) {
3804
- nskb_frag->page_offset += offset - pos;
4081
+ skb_frag_off_add(nskb_frag, offset - pos);
38054082 skb_frag_size_sub(nskb_frag, offset - pos);
38064083 }
38074084
....@@ -3922,7 +4199,7 @@
39224199 *--frag = *--frag2;
39234200 } while (--i);
39244201
3925
- frag->page_offset += offset;
4202
+ skb_frag_off_add(frag, offset);
39264203 skb_frag_size_sub(frag, offset);
39274204
39284205 /* all fragments truesize : remove (head size + sk_buff) */
....@@ -3951,8 +4228,8 @@
39514228
39524229 pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
39534230
3954
- frag->page.p = page;
3955
- frag->page_offset = first_offset;
4231
+ __skb_frag_set_page(frag, page);
4232
+ skb_frag_off_set(frag, first_offset);
39564233 skb_frag_size_set(frag, first_size);
39574234
39584235 memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
....@@ -3968,7 +4245,7 @@
39684245 if (offset > headlen) {
39694246 unsigned int eat = offset - headlen;
39704247
3971
- skbinfo->frags[0].page_offset += eat;
4248
+ skb_frag_off_add(&skbinfo->frags[0], eat);
39724249 skb_frag_size_sub(&skbinfo->frags[0], eat);
39734250 skb->data_len -= eat;
39744251 skb->len -= eat;
....@@ -3998,7 +4275,58 @@
39984275 NAPI_GRO_CB(skb)->same_flow = 1;
39994276 return 0;
40004277 }
4001
-EXPORT_SYMBOL_GPL(skb_gro_receive);
4278
+
4279
+#ifdef CONFIG_SKB_EXTENSIONS
4280
+#define SKB_EXT_ALIGN_VALUE 8
4281
+#define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE)
4282
+
4283
+static const u8 skb_ext_type_len[] = {
4284
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
4285
+ [SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info),
4286
+#endif
4287
+#ifdef CONFIG_XFRM
4288
+ [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
4289
+#endif
4290
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
4291
+ [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
4292
+#endif
4293
+#if IS_ENABLED(CONFIG_MPTCP)
4294
+ [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
4295
+#endif
4296
+};
4297
+
4298
+static __always_inline unsigned int skb_ext_total_length(void)
4299
+{
4300
+ return SKB_EXT_CHUNKSIZEOF(struct skb_ext) +
4301
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
4302
+ skb_ext_type_len[SKB_EXT_BRIDGE_NF] +
4303
+#endif
4304
+#ifdef CONFIG_XFRM
4305
+ skb_ext_type_len[SKB_EXT_SEC_PATH] +
4306
+#endif
4307
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
4308
+ skb_ext_type_len[TC_SKB_EXT] +
4309
+#endif
4310
+#if IS_ENABLED(CONFIG_MPTCP)
4311
+ skb_ext_type_len[SKB_EXT_MPTCP] +
4312
+#endif
4313
+ 0;
4314
+}
4315
+
4316
+static void skb_extensions_init(void)
4317
+{
4318
+ BUILD_BUG_ON(SKB_EXT_NUM >= 8);
4319
+ BUILD_BUG_ON(skb_ext_total_length() > 255);
4320
+
4321
+ skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
4322
+ SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
4323
+ 0,
4324
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC,
4325
+ NULL);
4326
+}
4327
+#else
4328
+static void skb_extensions_init(void) {}
4329
+#endif
40024330
40034331 void __init skb_init(void)
40044332 {
....@@ -4014,6 +4342,7 @@
40144342 0,
40154343 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
40164344 NULL);
4345
+ skb_extensions_init();
40174346 }
40184347
40194348 static int
....@@ -4052,7 +4381,7 @@
40524381 if (copy > len)
40534382 copy = len;
40544383 sg_set_page(&sg[elt], skb_frag_page(frag), copy,
4055
- frag->page_offset+offset-start);
4384
+ skb_frag_off(frag) + offset - start);
40564385 elt++;
40574386 if (!(len -= copy))
40584387 return elt;
....@@ -4169,7 +4498,7 @@
41694498 * at the moment even if they are anonymous).
41704499 */
41714500 if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
4172
- __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
4501
+ !__pskb_pull_tail(skb, __skb_pagelen(skb)))
41734502 return -ENOMEM;
41744503
41754504 /* Easy case. Most of packets will go this way. */
....@@ -4273,7 +4602,7 @@
42734602 int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
42744603 {
42754604 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
4276
- (unsigned int)sk->sk_rcvbuf)
4605
+ (unsigned int)READ_ONCE(sk->sk_rcvbuf))
42774606 return -ENOMEM;
42784607
42794608 skb_orphan(skb);
....@@ -4392,7 +4721,7 @@
43924721 {
43934722 bool ret;
43944723
4395
- if (likely(sysctl_tstamp_allow_data || tsonly))
4724
+ if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
43964725 return true;
43974726
43984727 read_lock_bh(&sk->sk_callback_lock);
....@@ -4448,13 +4777,18 @@
44484777 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
44494778 sk->sk_protocol == IPPROTO_TCP &&
44504779 sk->sk_type == SOCK_STREAM) {
4451
- skb = tcp_get_timestamping_opt_stats(sk);
4780
+ skb = tcp_get_timestamping_opt_stats(sk, orig_skb);
44524781 opt_stats = true;
44534782 } else
44544783 #endif
44554784 skb = alloc_skb(0, GFP_ATOMIC);
44564785 } else {
44574786 skb = skb_clone(orig_skb, GFP_ATOMIC);
4787
+
4788
+ if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
4789
+ kfree_skb(skb);
4790
+ return;
4791
+ }
44584792 }
44594793 if (!skb)
44604794 return;
....@@ -4565,9 +4899,9 @@
45654899 typeof(IPPROTO_IP) proto,
45664900 unsigned int off)
45674901 {
4568
- switch (proto) {
4569
- int err;
4902
+ int err;
45704903
4904
+ switch (proto) {
45714905 case IPPROTO_TCP:
45724906 err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
45734907 off + MAX_TCP_HDR_LEN);
....@@ -4610,7 +4944,7 @@
46104944 if (err < 0)
46114945 goto out;
46124946
4613
- if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
4947
+ if (ip_is_fragment(ip_hdr(skb)))
46144948 fragment = true;
46154949
46164950 off = ip_hdrlen(skb);
....@@ -4977,13 +5311,13 @@
49775311 skb->skb_iif = 0;
49785312 skb->ignore_df = 0;
49795313 skb_dst_drop(skb);
4980
- secpath_reset(skb);
4981
- nf_reset(skb);
5314
+ skb_ext_reset(skb);
5315
+ nf_reset_ct(skb);
49825316 nf_reset_trace(skb);
49835317
49845318 #ifdef CONFIG_NET_SWITCHDEV
49855319 skb->offload_fwd_mark = 0;
4986
- skb->offload_mr_fwd_mark = 0;
5320
+ skb->offload_l3_fwd_mark = 0;
49875321 #endif
49885322
49895323 if (!xnet)
....@@ -5075,6 +5409,8 @@
50755409 * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
50765410 *
50775411 * This is a helper to do that correctly considering GSO_BY_FRAGS.
5412
+ *
5413
+ * @skb: GSO skb
50785414 *
50795415 * @seg_len: The segmented length (from skb_gso_*_seglen). In the
50805416 * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
....@@ -5261,7 +5597,7 @@
52615597 int err;
52625598
52635599 if (likely(skb_vlan_tag_present(skb))) {
5264
- skb->vlan_tci = 0;
5600
+ __vlan_hwaccel_clear_tag(skb);
52655601 } else {
52665602 if (unlikely(!eth_type_vlan(skb->protocol)))
52675603 return 0;
....@@ -5313,6 +5649,252 @@
53135649 return 0;
53145650 }
53155651 EXPORT_SYMBOL(skb_vlan_push);
5652
+
5653
+/**
5654
+ * skb_eth_pop() - Drop the Ethernet header at the head of a packet
5655
+ *
5656
+ * @skb: Socket buffer to modify
5657
+ *
5658
+ * Drop the Ethernet header of @skb.
5659
+ *
5660
+ * Expects that skb->data points to the mac header and that no VLAN tags are
5661
+ * present.
5662
+ *
5663
+ * Returns 0 on success, -errno otherwise.
5664
+ */
5665
+int skb_eth_pop(struct sk_buff *skb)
5666
+{
5667
+ if (!pskb_may_pull(skb, ETH_HLEN) || skb_vlan_tagged(skb) ||
5668
+ skb_network_offset(skb) < ETH_HLEN)
5669
+ return -EPROTO;
5670
+
5671
+ skb_pull_rcsum(skb, ETH_HLEN);
5672
+ skb_reset_mac_header(skb);
5673
+ skb_reset_mac_len(skb);
5674
+
5675
+ return 0;
5676
+}
5677
+EXPORT_SYMBOL(skb_eth_pop);
5678
+
5679
+/**
5680
+ * skb_eth_push() - Add a new Ethernet header at the head of a packet
5681
+ *
5682
+ * @skb: Socket buffer to modify
5683
+ * @dst: Destination MAC address of the new header
5684
+ * @src: Source MAC address of the new header
5685
+ *
5686
+ * Prepend @skb with a new Ethernet header.
5687
+ *
5688
+ * Expects that skb->data points to the mac header, which must be empty.
5689
+ *
5690
+ * Returns 0 on success, -errno otherwise.
5691
+ */
5692
+int skb_eth_push(struct sk_buff *skb, const unsigned char *dst,
5693
+ const unsigned char *src)
5694
+{
5695
+ struct ethhdr *eth;
5696
+ int err;
5697
+
5698
+ if (skb_network_offset(skb) || skb_vlan_tag_present(skb))
5699
+ return -EPROTO;
5700
+
5701
+ err = skb_cow_head(skb, sizeof(*eth));
5702
+ if (err < 0)
5703
+ return err;
5704
+
5705
+ skb_push(skb, sizeof(*eth));
5706
+ skb_reset_mac_header(skb);
5707
+ skb_reset_mac_len(skb);
5708
+
5709
+ eth = eth_hdr(skb);
5710
+ ether_addr_copy(eth->h_dest, dst);
5711
+ ether_addr_copy(eth->h_source, src);
5712
+ eth->h_proto = skb->protocol;
5713
+
5714
+ skb_postpush_rcsum(skb, eth, sizeof(*eth));
5715
+
5716
+ return 0;
5717
+}
5718
+EXPORT_SYMBOL(skb_eth_push);
5719
+
5720
+/* Update the ethertype of hdr and the skb csum value if required. */
5721
+static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
5722
+ __be16 ethertype)
5723
+{
5724
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
5725
+ __be16 diff[] = { ~hdr->h_proto, ethertype };
5726
+
5727
+ skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
5728
+ }
5729
+
5730
+ hdr->h_proto = ethertype;
5731
+}
5732
+
5733
+/**
5734
+ * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of
5735
+ * the packet
5736
+ *
5737
+ * @skb: buffer
5738
+ * @mpls_lse: MPLS label stack entry to push
5739
+ * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
5740
+ * @mac_len: length of the MAC header
5741
+ * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is
5742
+ * ethernet
5743
+ *
5744
+ * Expects skb->data at mac header.
5745
+ *
5746
+ * Returns 0 on success, -errno otherwise.
5747
+ */
5748
+int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
5749
+ int mac_len, bool ethernet)
5750
+{
5751
+ struct mpls_shim_hdr *lse;
5752
+ int err;
5753
+
5754
+ if (unlikely(!eth_p_mpls(mpls_proto)))
5755
+ return -EINVAL;
5756
+
5757
+ /* Networking stack does not allow simultaneous Tunnel and MPLS GSO. */
5758
+ if (skb->encapsulation)
5759
+ return -EINVAL;
5760
+
5761
+ err = skb_cow_head(skb, MPLS_HLEN);
5762
+ if (unlikely(err))
5763
+ return err;
5764
+
5765
+ if (!skb->inner_protocol) {
5766
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
5767
+ skb_set_inner_protocol(skb, skb->protocol);
5768
+ }
5769
+
5770
+ skb_push(skb, MPLS_HLEN);
5771
+ memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
5772
+ mac_len);
5773
+ skb_reset_mac_header(skb);
5774
+ skb_set_network_header(skb, mac_len);
5775
+ skb_reset_mac_len(skb);
5776
+
5777
+ lse = mpls_hdr(skb);
5778
+ lse->label_stack_entry = mpls_lse;
5779
+ skb_postpush_rcsum(skb, lse, MPLS_HLEN);
5780
+
5781
+ if (ethernet && mac_len >= ETH_HLEN)
5782
+ skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
5783
+ skb->protocol = mpls_proto;
5784
+
5785
+ return 0;
5786
+}
5787
+EXPORT_SYMBOL_GPL(skb_mpls_push);
5788
+
5789
+/**
5790
+ * skb_mpls_pop() - pop the outermost MPLS header
5791
+ *
5792
+ * @skb: buffer
5793
+ * @next_proto: ethertype of header after popped MPLS header
5794
+ * @mac_len: length of the MAC header
5795
+ * @ethernet: flag to indicate if the packet is ethernet
5796
+ *
5797
+ * Expects skb->data at mac header.
5798
+ *
5799
+ * Returns 0 on success, -errno otherwise.
5800
+ */
5801
+int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
5802
+ bool ethernet)
5803
+{
5804
+ int err;
5805
+
5806
+ if (unlikely(!eth_p_mpls(skb->protocol)))
5807
+ return 0;
5808
+
5809
+ err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
5810
+ if (unlikely(err))
5811
+ return err;
5812
+
5813
+ skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
5814
+ memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
5815
+ mac_len);
5816
+
5817
+ __skb_pull(skb, MPLS_HLEN);
5818
+ skb_reset_mac_header(skb);
5819
+ skb_set_network_header(skb, mac_len);
5820
+
5821
+ if (ethernet && mac_len >= ETH_HLEN) {
5822
+ struct ethhdr *hdr;
5823
+
5824
+ /* use mpls_hdr() to get ethertype to account for VLANs. */
5825
+ hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
5826
+ skb_mod_eth_type(skb, hdr, next_proto);
5827
+ }
5828
+ skb->protocol = next_proto;
5829
+
5830
+ return 0;
5831
+}
5832
+EXPORT_SYMBOL_GPL(skb_mpls_pop);
5833
+
5834
+/**
5835
+ * skb_mpls_update_lse() - modify outermost MPLS header and update csum
5836
+ *
5837
+ * @skb: buffer
5838
+ * @mpls_lse: new MPLS label stack entry to update to
5839
+ *
5840
+ * Expects skb->data at mac header.
5841
+ *
5842
+ * Returns 0 on success, -errno otherwise.
5843
+ */
5844
+int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse)
5845
+{
5846
+ int err;
5847
+
5848
+ if (unlikely(!eth_p_mpls(skb->protocol)))
5849
+ return -EINVAL;
5850
+
5851
+ err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
5852
+ if (unlikely(err))
5853
+ return err;
5854
+
5855
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
5856
+ __be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse };
5857
+
5858
+ skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
5859
+ }
5860
+
5861
+ mpls_hdr(skb)->label_stack_entry = mpls_lse;
5862
+
5863
+ return 0;
5864
+}
5865
+EXPORT_SYMBOL_GPL(skb_mpls_update_lse);
5866
+
5867
+/**
5868
+ * skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header
5869
+ *
5870
+ * @skb: buffer
5871
+ *
5872
+ * Expects skb->data at mac header.
5873
+ *
5874
+ * Returns 0 on success, -errno otherwise.
5875
+ */
5876
+int skb_mpls_dec_ttl(struct sk_buff *skb)
5877
+{
5878
+ u32 lse;
5879
+ u8 ttl;
5880
+
5881
+ if (unlikely(!eth_p_mpls(skb->protocol)))
5882
+ return -EINVAL;
5883
+
5884
+ if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
5885
+ return -ENOMEM;
5886
+
5887
+ lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry);
5888
+ ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
5889
+ if (!--ttl)
5890
+ return -EINVAL;
5891
+
5892
+ lse &= ~MPLS_LS_TTL_MASK;
5893
+ lse |= ttl << MPLS_LS_TTL_SHIFT;
5894
+
5895
+ return skb_mpls_update_lse(skb, cpu_to_be32(lse));
5896
+}
5897
+EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
53165898
53175899 /**
53185900 * alloc_skb_with_frags - allocate skb with page frags
....@@ -5436,11 +6018,7 @@
54366018 skb->head = data;
54376019 skb->data = data;
54386020 skb->head_frag = 0;
5439
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
5440
- skb->end = size;
5441
-#else
5442
- skb->end = skb->head + size;
5443
-#endif
6021
+ skb_set_end_offset(skb, size);
54446022 skb_set_tail_pointer(skb, skb_headlen(skb));
54456023 skb_headers_offset_update(skb, 0);
54466024 skb->cloned = 0;
....@@ -5532,8 +6110,7 @@
55326110 size = SKB_WITH_OVERHEAD(ksize(data));
55336111
55346112 memcpy((struct skb_shared_info *)(data + size),
5535
- skb_shinfo(skb), offsetof(struct skb_shared_info,
5536
- frags[skb_shinfo(skb)->nr_frags]));
6113
+ skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
55376114 if (skb_orphan_frags(skb, gfp_mask)) {
55386115 kfree(data);
55396116 return -ENOMEM;
....@@ -5554,7 +6131,7 @@
55546131 * where splitting is expensive.
55556132 * 2. Split is accurately. We make this.
55566133 */
5557
- shinfo->frags[0].page_offset += off - pos;
6134
+ skb_frag_off_add(&shinfo->frags[0], off - pos);
55586135 skb_frag_size_sub(&shinfo->frags[0], off - pos);
55596136 }
55606137 skb_frag_ref(skb, i);
....@@ -5579,11 +6156,7 @@
55796156 skb->head = data;
55806157 skb->head_frag = 0;
55816158 skb->data = data;
5582
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
5583
- skb->end = size;
5584
-#else
5585
- skb->end = skb->head + size;
5586
-#endif
6159
+ skb_set_end_offset(skb, size);
55876160 skb_reset_tail_pointer(skb);
55886161 skb_headers_offset_update(skb, 0);
55896162 skb->cloned = 0;
....@@ -5657,4 +6230,181 @@
56576230 */
56586231 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
56596232 }
5660
-EXPORT_SYMBOL_GPL(skb_condense);
6233
+
6234
+#ifdef CONFIG_SKB_EXTENSIONS
6235
+static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
6236
+{
6237
+ return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
6238
+}
6239
+
6240
+/**
6241
+ * __skb_ext_alloc - allocate a new skb extensions storage
6242
+ *
6243
+ * @flags: See kmalloc().
6244
+ *
6245
+ * Returns the newly allocated pointer. The pointer can later attached to a
6246
+ * skb via __skb_ext_set().
6247
+ * Note: caller must handle the skb_ext as an opaque data.
6248
+ */
6249
+struct skb_ext *__skb_ext_alloc(gfp_t flags)
6250
+{
6251
+ struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags);
6252
+
6253
+ if (new) {
6254
+ memset(new->offset, 0, sizeof(new->offset));
6255
+ refcount_set(&new->refcnt, 1);
6256
+ }
6257
+
6258
+ return new;
6259
+}
6260
+
6261
+static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
6262
+ unsigned int old_active)
6263
+{
6264
+ struct skb_ext *new;
6265
+
6266
+ if (refcount_read(&old->refcnt) == 1)
6267
+ return old;
6268
+
6269
+ new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
6270
+ if (!new)
6271
+ return NULL;
6272
+
6273
+ memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
6274
+ refcount_set(&new->refcnt, 1);
6275
+
6276
+#ifdef CONFIG_XFRM
6277
+ if (old_active & (1 << SKB_EXT_SEC_PATH)) {
6278
+ struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH);
6279
+ unsigned int i;
6280
+
6281
+ for (i = 0; i < sp->len; i++)
6282
+ xfrm_state_hold(sp->xvec[i]);
6283
+ }
6284
+#endif
6285
+ __skb_ext_put(old);
6286
+ return new;
6287
+}
6288
+
6289
+/**
6290
+ * __skb_ext_set - attach the specified extension storage to this skb
6291
+ * @skb: buffer
6292
+ * @id: extension id
6293
+ * @ext: extension storage previously allocated via __skb_ext_alloc()
6294
+ *
6295
+ * Existing extensions, if any, are cleared.
6296
+ *
6297
+ * Returns the pointer to the extension.
6298
+ */
6299
+void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
6300
+ struct skb_ext *ext)
6301
+{
6302
+ unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext);
6303
+
6304
+ skb_ext_put(skb);
6305
+ newlen = newoff + skb_ext_type_len[id];
6306
+ ext->chunks = newlen;
6307
+ ext->offset[id] = newoff;
6308
+ skb->extensions = ext;
6309
+ skb->active_extensions = 1 << id;
6310
+ return skb_ext_get_ptr(ext, id);
6311
+}
6312
+
6313
+/**
6314
+ * skb_ext_add - allocate space for given extension, COW if needed
6315
+ * @skb: buffer
6316
+ * @id: extension to allocate space for
6317
+ *
6318
+ * Allocates enough space for the given extension.
6319
+ * If the extension is already present, a pointer to that extension
6320
+ * is returned.
6321
+ *
6322
+ * If the skb was cloned, COW applies and the returned memory can be
6323
+ * modified without changing the extension space of clones buffers.
6324
+ *
6325
+ * Returns pointer to the extension or NULL on allocation failure.
6326
+ */
6327
+void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
6328
+{
6329
+ struct skb_ext *new, *old = NULL;
6330
+ unsigned int newlen, newoff;
6331
+
6332
+ if (skb->active_extensions) {
6333
+ old = skb->extensions;
6334
+
6335
+ new = skb_ext_maybe_cow(old, skb->active_extensions);
6336
+ if (!new)
6337
+ return NULL;
6338
+
6339
+ if (__skb_ext_exist(new, id))
6340
+ goto set_active;
6341
+
6342
+ newoff = new->chunks;
6343
+ } else {
6344
+ newoff = SKB_EXT_CHUNKSIZEOF(*new);
6345
+
6346
+ new = __skb_ext_alloc(GFP_ATOMIC);
6347
+ if (!new)
6348
+ return NULL;
6349
+ }
6350
+
6351
+ newlen = newoff + skb_ext_type_len[id];
6352
+ new->chunks = newlen;
6353
+ new->offset[id] = newoff;
6354
+set_active:
6355
+ skb->extensions = new;
6356
+ skb->active_extensions |= 1 << id;
6357
+ return skb_ext_get_ptr(new, id);
6358
+}
6359
+EXPORT_SYMBOL(skb_ext_add);
6360
+
6361
+#ifdef CONFIG_XFRM
6362
+static void skb_ext_put_sp(struct sec_path *sp)
6363
+{
6364
+ unsigned int i;
6365
+
6366
+ for (i = 0; i < sp->len; i++)
6367
+ xfrm_state_put(sp->xvec[i]);
6368
+}
6369
+#endif
6370
+
6371
+void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
6372
+{
6373
+ struct skb_ext *ext = skb->extensions;
6374
+
6375
+ skb->active_extensions &= ~(1 << id);
6376
+ if (skb->active_extensions == 0) {
6377
+ skb->extensions = NULL;
6378
+ __skb_ext_put(ext);
6379
+#ifdef CONFIG_XFRM
6380
+ } else if (id == SKB_EXT_SEC_PATH &&
6381
+ refcount_read(&ext->refcnt) == 1) {
6382
+ struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH);
6383
+
6384
+ skb_ext_put_sp(sp);
6385
+ sp->len = 0;
6386
+#endif
6387
+ }
6388
+}
6389
+EXPORT_SYMBOL(__skb_ext_del);
6390
+
6391
+void __skb_ext_put(struct skb_ext *ext)
6392
+{
6393
+ /* If this is last clone, nothing can increment
6394
+ * it after check passes. Avoids one atomic op.
6395
+ */
6396
+ if (refcount_read(&ext->refcnt) == 1)
6397
+ goto free_now;
6398
+
6399
+ if (!refcount_dec_and_test(&ext->refcnt))
6400
+ return;
6401
+free_now:
6402
+#ifdef CONFIG_XFRM
6403
+ if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
6404
+ skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
6405
+#endif
6406
+
6407
+ kmem_cache_free(skbuff_ext_cache, ext);
6408
+}
6409
+EXPORT_SYMBOL(__skb_ext_put);
6410
+#endif /* CONFIG_SKB_EXTENSIONS */