hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/core/skbuff.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Routines having to do with the 'struct sk_buff' memory handlers.
34 *
....@@ -25,11 +26,6 @@
2526 * disabled, or you better be *real* sure that the operation is atomic
2627 * with respect to whatever list is being frobbed (e.g. via lock_sock()
2728 * or via disabling bottom half handlers, etc).
28
- *
29
- * This program is free software; you can redistribute it and/or
30
- * modify it under the terms of the GNU General Public License
31
- * as published by the Free Software Foundation; either version
32
- * 2 of the License, or (at your option) any later version.
3329 */
3430
3531 /*
....@@ -63,6 +59,7 @@
6359 #include <linux/errqueue.h>
6460 #include <linux/prefetch.h>
6561 #include <linux/if_vlan.h>
62
+#include <linux/mpls.h>
6663
6764 #include <net/protocol.h>
6865 #include <net/dst.h>
....@@ -70,15 +67,24 @@
7067 #include <net/checksum.h>
7168 #include <net/ip6_checksum.h>
7269 #include <net/xfrm.h>
70
+#include <net/mpls.h>
71
+#include <net/mptcp.h>
7372
7473 #include <linux/uaccess.h>
7574 #include <trace/events/skb.h>
7675 #include <linux/highmem.h>
7776 #include <linux/capability.h>
7877 #include <linux/user_namespace.h>
78
+#include <linux/indirect_call_wrapper.h>
79
+#include <trace/hooks/net.h>
80
+
81
+#include "datagram.h"
7982
8083 struct kmem_cache *skbuff_head_cache __ro_after_init;
8184 static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
85
+#ifdef CONFIG_SKB_EXTENSIONS
86
+static struct kmem_cache *skbuff_ext_cache __ro_after_init;
87
+#endif
8288 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
8389 EXPORT_SYMBOL(sysctl_max_skb_frags);
8490
....@@ -97,7 +103,7 @@
97103 static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
98104 const char msg[])
99105 {
100
- pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
106
+ pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n",
101107 msg, addr, skb->len, sz, skb->head, skb->data,
102108 (unsigned long)skb->tail, (unsigned long)skb->end,
103109 skb->dev ? skb->dev->name : "<NULL>");
....@@ -244,6 +250,9 @@
244250
245251 fclones->skb2.fclone = SKB_FCLONE_CLONE;
246252 }
253
+
254
+ skb_set_kcov_handle(skb, kcov_common_handle());
255
+
247256 out:
248257 return skb;
249258 nodata:
....@@ -252,6 +261,35 @@
252261 goto out;
253262 }
254263 EXPORT_SYMBOL(__alloc_skb);
264
+
265
+/* Caller must provide SKB that is memset cleared */
266
+static struct sk_buff *__build_skb_around(struct sk_buff *skb,
267
+ void *data, unsigned int frag_size)
268
+{
269
+ struct skb_shared_info *shinfo;
270
+ unsigned int size = frag_size ? : ksize(data);
271
+
272
+ size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
273
+
274
+ /* Assumes caller memset cleared SKB */
275
+ skb->truesize = SKB_TRUESIZE(size);
276
+ refcount_set(&skb->users, 1);
277
+ skb->head = data;
278
+ skb->data = data;
279
+ skb_reset_tail_pointer(skb);
280
+ skb->end = skb->tail + size;
281
+ skb->mac_header = (typeof(skb->mac_header))~0U;
282
+ skb->transport_header = (typeof(skb->transport_header))~0U;
283
+
284
+ /* make sure we initialize shinfo sequentially */
285
+ shinfo = skb_shinfo(skb);
286
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
287
+ atomic_set(&shinfo->dataref, 1);
288
+
289
+ skb_set_kcov_handle(skb, kcov_common_handle());
290
+
291
+ return skb;
292
+}
255293
256294 /**
257295 * __build_skb - build a network buffer
....@@ -274,32 +312,15 @@
274312 */
275313 struct sk_buff *__build_skb(void *data, unsigned int frag_size)
276314 {
277
- struct skb_shared_info *shinfo;
278315 struct sk_buff *skb;
279
- unsigned int size = frag_size ? : ksize(data);
280316
281317 skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
282
- if (!skb)
318
+ if (unlikely(!skb))
283319 return NULL;
284320
285
- size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
286
-
287321 memset(skb, 0, offsetof(struct sk_buff, tail));
288
- skb->truesize = SKB_TRUESIZE(size);
289
- refcount_set(&skb->users, 1);
290
- skb->head = data;
291
- skb->data = data;
292
- skb_reset_tail_pointer(skb);
293
- skb->end = skb->tail + size;
294
- skb->mac_header = (typeof(skb->mac_header))~0U;
295
- skb->transport_header = (typeof(skb->transport_header))~0U;
296322
297
- /* make sure we initialize shinfo sequentially */
298
- shinfo = skb_shinfo(skb);
299
- memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
300
- atomic_set(&shinfo->dataref, 1);
301
-
302
- return skb;
323
+ return __build_skb_around(skb, data, frag_size);
303324 }
304325
305326 /* build_skb() is wrapper over __build_skb(), that specifically
....@@ -320,6 +341,29 @@
320341 }
321342 EXPORT_SYMBOL(build_skb);
322343
344
+/**
345
+ * build_skb_around - build a network buffer around provided skb
346
+ * @skb: sk_buff provide by caller, must be memset cleared
347
+ * @data: data buffer provided by caller
348
+ * @frag_size: size of data, or 0 if head was kmalloced
349
+ */
350
+struct sk_buff *build_skb_around(struct sk_buff *skb,
351
+ void *data, unsigned int frag_size)
352
+{
353
+ if (unlikely(!skb))
354
+ return NULL;
355
+
356
+ skb = __build_skb_around(skb, data, frag_size);
357
+
358
+ if (skb && frag_size) {
359
+ skb->head_frag = 1;
360
+ if (page_is_pfmemalloc(virt_to_head_page(data)))
361
+ skb->pfmemalloc = 1;
362
+ }
363
+ return skb;
364
+}
365
+EXPORT_SYMBOL(build_skb_around);
366
+
323367 #define NAPI_SKB_CACHE_SIZE 64
324368
325369 struct napi_alloc_cache {
....@@ -330,34 +374,6 @@
330374
331375 static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
332376 static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
333
-
334
-static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
335
-{
336
- struct page_frag_cache *nc;
337
- unsigned long flags;
338
- void *data;
339
-
340
- local_irq_save(flags);
341
- nc = this_cpu_ptr(&netdev_alloc_cache);
342
- data = page_frag_alloc(nc, fragsz, gfp_mask);
343
- local_irq_restore(flags);
344
- return data;
345
-}
346
-
347
-/**
348
- * netdev_alloc_frag - allocate a page fragment
349
- * @fragsz: fragment size
350
- *
351
- * Allocates a frag from a page for receive buffer.
352
- * Uses GFP_ATOMIC allocations.
353
- */
354
-void *netdev_alloc_frag(unsigned int fragsz)
355
-{
356
- fragsz = SKB_DATA_ALIGN(fragsz);
357
-
358
- return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
359
-}
360
-EXPORT_SYMBOL(netdev_alloc_frag);
361377
362378 static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
363379 {
....@@ -373,6 +389,31 @@
373389 return __napi_alloc_frag(fragsz, GFP_ATOMIC);
374390 }
375391 EXPORT_SYMBOL(napi_alloc_frag);
392
+
393
+/**
394
+ * netdev_alloc_frag - allocate a page fragment
395
+ * @fragsz: fragment size
396
+ *
397
+ * Allocates a frag from a page for receive buffer.
398
+ * Uses GFP_ATOMIC allocations.
399
+ */
400
+void *netdev_alloc_frag(unsigned int fragsz)
401
+{
402
+ struct page_frag_cache *nc;
403
+ void *data;
404
+
405
+ fragsz = SKB_DATA_ALIGN(fragsz);
406
+ if (in_irq() || irqs_disabled()) {
407
+ nc = this_cpu_ptr(&netdev_alloc_cache);
408
+ data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
409
+ } else {
410
+ local_bh_disable();
411
+ data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
412
+ local_bh_enable();
413
+ }
414
+ return data;
415
+}
416
+EXPORT_SYMBOL(netdev_alloc_frag);
376417
377418 /**
378419 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
....@@ -391,7 +432,6 @@
391432 gfp_t gfp_mask)
392433 {
393434 struct page_frag_cache *nc;
394
- unsigned long flags;
395435 struct sk_buff *skb;
396436 bool pfmemalloc;
397437 void *data;
....@@ -416,13 +456,17 @@
416456 if (sk_memalloc_socks())
417457 gfp_mask |= __GFP_MEMALLOC;
418458
419
- local_irq_save(flags);
420
-
421
- nc = this_cpu_ptr(&netdev_alloc_cache);
422
- data = page_frag_alloc(nc, len, gfp_mask);
423
- pfmemalloc = nc->pfmemalloc;
424
-
425
- local_irq_restore(flags);
459
+ if (in_irq() || irqs_disabled()) {
460
+ nc = this_cpu_ptr(&netdev_alloc_cache);
461
+ data = page_frag_alloc(nc, len, gfp_mask);
462
+ pfmemalloc = nc->pfmemalloc;
463
+ } else {
464
+ local_bh_disable();
465
+ nc = this_cpu_ptr(&napi_alloc_cache.page);
466
+ data = page_frag_alloc(nc, len, gfp_mask);
467
+ pfmemalloc = nc->pfmemalloc;
468
+ local_bh_enable();
469
+ }
426470
427471 if (unlikely(!data))
428472 return NULL;
....@@ -433,7 +477,6 @@
433477 return NULL;
434478 }
435479
436
- /* use OR instead of assignment to avoid clearing of bits in mask */
437480 if (pfmemalloc)
438481 skb->pfmemalloc = 1;
439482 skb->head_frag = 1;
....@@ -498,7 +541,6 @@
498541 return NULL;
499542 }
500543
501
- /* use OR instead of assignment to avoid clearing of bits in mask */
502544 if (nc->page.pfmemalloc)
503545 skb->pfmemalloc = 1;
504546 skb->head_frag = 1;
....@@ -619,7 +661,6 @@
619661 void skb_release_head_state(struct sk_buff *skb)
620662 {
621663 skb_dst_drop(skb);
622
- secpath_reset(skb);
623664 if (skb->destructor) {
624665 WARN_ON(in_irq());
625666 skb->destructor(skb);
....@@ -627,9 +668,7 @@
627668 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
628669 nf_conntrack_put(skb_nfct(skb));
629670 #endif
630
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
631
- nf_bridge_put(skb->nf_bridge);
632
-#endif
671
+ skb_ext_put(skb);
633672 }
634673
635674 /* Free everything but the sk_buff shell. */
....@@ -668,6 +707,7 @@
668707 if (!skb_unref(skb))
669708 return;
670709
710
+ trace_android_vh_kfree_skb(skb);
671711 trace_kfree_skb(skb, __builtin_return_address(0));
672712 __kfree_skb(skb);
673713 }
....@@ -684,6 +724,101 @@
684724 }
685725 EXPORT_SYMBOL(kfree_skb_list);
686726
727
+/* Dump skb information and contents.
728
+ *
729
+ * Must only be called from net_ratelimit()-ed paths.
730
+ *
731
+ * Dumps whole packets if full_pkt, only headers otherwise.
732
+ */
733
+void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
734
+{
735
+ struct skb_shared_info *sh = skb_shinfo(skb);
736
+ struct net_device *dev = skb->dev;
737
+ struct sock *sk = skb->sk;
738
+ struct sk_buff *list_skb;
739
+ bool has_mac, has_trans;
740
+ int headroom, tailroom;
741
+ int i, len, seg_len;
742
+
743
+ if (full_pkt)
744
+ len = skb->len;
745
+ else
746
+ len = min_t(int, skb->len, MAX_HEADER + 128);
747
+
748
+ headroom = skb_headroom(skb);
749
+ tailroom = skb_tailroom(skb);
750
+
751
+ has_mac = skb_mac_header_was_set(skb);
752
+ has_trans = skb_transport_header_was_set(skb);
753
+
754
+ printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
755
+ "mac=(%d,%d) net=(%d,%d) trans=%d\n"
756
+ "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
757
+ "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
758
+ "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
759
+ level, skb->len, headroom, skb_headlen(skb), tailroom,
760
+ has_mac ? skb->mac_header : -1,
761
+ has_mac ? skb_mac_header_len(skb) : -1,
762
+ skb->network_header,
763
+ has_trans ? skb_network_header_len(skb) : -1,
764
+ has_trans ? skb->transport_header : -1,
765
+ sh->tx_flags, sh->nr_frags,
766
+ sh->gso_size, sh->gso_type, sh->gso_segs,
767
+ skb->csum, skb->ip_summed, skb->csum_complete_sw,
768
+ skb->csum_valid, skb->csum_level,
769
+ skb->hash, skb->sw_hash, skb->l4_hash,
770
+ ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
771
+
772
+ if (dev)
773
+ printk("%sdev name=%s feat=%pNF\n",
774
+ level, dev->name, &dev->features);
775
+ if (sk)
776
+ printk("%ssk family=%hu type=%u proto=%u\n",
777
+ level, sk->sk_family, sk->sk_type, sk->sk_protocol);
778
+
779
+ if (full_pkt && headroom)
780
+ print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET,
781
+ 16, 1, skb->head, headroom, false);
782
+
783
+ seg_len = min_t(int, skb_headlen(skb), len);
784
+ if (seg_len)
785
+ print_hex_dump(level, "skb linear: ", DUMP_PREFIX_OFFSET,
786
+ 16, 1, skb->data, seg_len, false);
787
+ len -= seg_len;
788
+
789
+ if (full_pkt && tailroom)
790
+ print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET,
791
+ 16, 1, skb_tail_pointer(skb), tailroom, false);
792
+
793
+ for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) {
794
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
795
+ u32 p_off, p_len, copied;
796
+ struct page *p;
797
+ u8 *vaddr;
798
+
799
+ skb_frag_foreach_page(frag, skb_frag_off(frag),
800
+ skb_frag_size(frag), p, p_off, p_len,
801
+ copied) {
802
+ seg_len = min_t(int, p_len, len);
803
+ vaddr = kmap_atomic(p);
804
+ print_hex_dump(level, "skb frag: ",
805
+ DUMP_PREFIX_OFFSET,
806
+ 16, 1, vaddr + p_off, seg_len, false);
807
+ kunmap_atomic(vaddr);
808
+ len -= seg_len;
809
+ if (!len)
810
+ break;
811
+ }
812
+ }
813
+
814
+ if (full_pkt && skb_has_frag_list(skb)) {
815
+ printk("skb fraglist:\n");
816
+ skb_walk_frags(skb, list_skb)
817
+ skb_dump(level, list_skb, true);
818
+ }
819
+}
820
+EXPORT_SYMBOL(skb_dump);
821
+
687822 /**
688823 * skb_tx_error - report an sk_buff xmit error
689824 * @skb: buffer that triggered an error
....@@ -697,6 +832,7 @@
697832 }
698833 EXPORT_SYMBOL(skb_tx_error);
699834
835
+#ifdef CONFIG_TRACEPOINTS
700836 /**
701837 * consume_skb - free an skbuff
702838 * @skb: buffer to free
....@@ -714,6 +850,7 @@
714850 __kfree_skb(skb);
715851 }
716852 EXPORT_SYMBOL(consume_skb);
853
+#endif
717854
718855 /**
719856 * consume_stateless_skb - free an skbuff, assuming it is stateless
....@@ -770,9 +907,6 @@
770907
771908 void napi_consume_skb(struct sk_buff *skb, int budget)
772909 {
773
- if (unlikely(!skb))
774
- return;
775
-
776910 /* Zero budget indicate non-NAPI context called us, like netpoll */
777911 if (unlikely(!budget)) {
778912 dev_consume_skb_any(skb);
....@@ -809,9 +943,7 @@
809943 new->dev = old->dev;
810944 memcpy(new->cb, old->cb, sizeof(old->cb));
811945 skb_dst_copy(new, old);
812
-#ifdef CONFIG_XFRM
813
- new->sp = secpath_get(old->sp);
814
-#endif
946
+ __skb_ext_copy(new, old);
815947 __nf_copy(new, old, false);
816948
817949 /* Note : this field could be in headers_start/headers_end section
....@@ -887,6 +1019,31 @@
8871019 return n;
8881020 #undef C
8891021 }
1022
+
1023
+/**
1024
+ * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg
1025
+ * @first: first sk_buff of the msg
1026
+ */
1027
+struct sk_buff *alloc_skb_for_msg(struct sk_buff *first)
1028
+{
1029
+ struct sk_buff *n;
1030
+
1031
+ n = alloc_skb(0, GFP_ATOMIC);
1032
+ if (!n)
1033
+ return NULL;
1034
+
1035
+ n->len = first->len;
1036
+ n->data_len = first->len;
1037
+ n->truesize = first->truesize;
1038
+
1039
+ skb_shinfo(n)->frag_list = first;
1040
+
1041
+ __copy_skb_header(n, first);
1042
+ n->destructor = NULL;
1043
+
1044
+ return n;
1045
+}
1046
+EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
8901047
8911048 /**
8921049 * skb_morph - morph one skb into another
....@@ -1012,7 +1169,11 @@
10121169 uarg->len++;
10131170 uarg->bytelen = bytelen;
10141171 atomic_set(&sk->sk_zckey, ++next);
1015
- sock_zerocopy_get(uarg);
1172
+
1173
+ /* no extra ref when appending to datagram (MSG_MORE) */
1174
+ if (sk->sk_type == SOCK_STREAM)
1175
+ sock_zerocopy_get(uarg);
1176
+
10161177 return uarg;
10171178 }
10181179 }
....@@ -1102,7 +1263,7 @@
11021263 }
11031264 EXPORT_SYMBOL_GPL(sock_zerocopy_put);
11041265
1105
-void sock_zerocopy_put_abort(struct ubuf_info *uarg)
1266
+void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
11061267 {
11071268 if (uarg) {
11081269 struct sock *sk = skb_from_uarg(uarg)->sk;
....@@ -1110,13 +1271,17 @@
11101271 atomic_dec(&sk->sk_zckey);
11111272 uarg->len--;
11121273
1113
- sock_zerocopy_put(uarg);
1274
+ if (have_uref)
1275
+ sock_zerocopy_put(uarg);
11141276 }
11151277 }
11161278 EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
11171279
1118
-extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
1119
- struct iov_iter *from, size_t length);
1280
+int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
1281
+{
1282
+ return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
1283
+}
1284
+EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
11201285
11211286 int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
11221287 struct msghdr *msg, int len,
....@@ -1144,7 +1309,7 @@
11441309 return err;
11451310 }
11461311
1147
- skb_zcopy_set(skb, uarg);
1312
+ skb_zcopy_set(skb, uarg, NULL);
11481313 return skb->len - orig_len;
11491314 }
11501315 EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
....@@ -1164,7 +1329,7 @@
11641329 if (skb_copy_ubufs(nskb, GFP_ATOMIC))
11651330 return -EIO;
11661331 }
1167
- skb_zcopy_set(nskb, skb_uarg(orig));
1332
+ skb_zcopy_set(nskb, skb_uarg(orig), NULL);
11681333 }
11691334 return 0;
11701335 }
....@@ -1220,7 +1385,7 @@
12201385 struct page *p;
12211386 u8 *vaddr;
12221387
1223
- skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f),
1388
+ skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f),
12241389 p, p_off, p_len, copied) {
12251390 u32 copy, done = 0;
12261391 vaddr = kmap_atomic(p);
....@@ -1510,11 +1675,10 @@
15101675 skb->head = data;
15111676 skb->head_frag = 0;
15121677 skb->data += off;
1678
+
1679
+ skb_set_end_offset(skb, size);
15131680 #ifdef NET_SKBUFF_DATA_USES_OFFSET
1514
- skb->end = size;
15151681 off = nhead;
1516
-#else
1517
- skb->end = skb->head + size;
15181682 #endif
15191683 skb->tail += off;
15201684 skb_headers_offset_update(skb, nhead);
....@@ -1561,6 +1725,38 @@
15611725 return skb2;
15621726 }
15631727 EXPORT_SYMBOL(skb_realloc_headroom);
1728
+
1729
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
1730
+{
1731
+ unsigned int saved_end_offset, saved_truesize;
1732
+ struct skb_shared_info *shinfo;
1733
+ int res;
1734
+
1735
+ saved_end_offset = skb_end_offset(skb);
1736
+ saved_truesize = skb->truesize;
1737
+
1738
+ res = pskb_expand_head(skb, 0, 0, pri);
1739
+ if (res)
1740
+ return res;
1741
+
1742
+ skb->truesize = saved_truesize;
1743
+
1744
+ if (likely(skb_end_offset(skb) == saved_end_offset))
1745
+ return 0;
1746
+
1747
+ shinfo = skb_shinfo(skb);
1748
+
1749
+ /* We are about to change back skb->end,
1750
+ * we need to move skb_shinfo() to its new location.
1751
+ */
1752
+ memmove(skb->head + saved_end_offset,
1753
+ shinfo,
1754
+ offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
1755
+
1756
+ skb_set_end_offset(skb, saved_end_offset);
1757
+
1758
+ return 0;
1759
+}
15641760
15651761 /**
15661762 * skb_copy_expand - copy and expand sk_buff
....@@ -1944,8 +2140,6 @@
19442140 struct sk_buff *insp = NULL;
19452141
19462142 do {
1947
- BUG_ON(!list);
1948
-
19492143 if (list->len <= eat) {
19502144 /* Eaten as whole. */
19512145 eat -= list->len;
....@@ -1953,6 +2147,9 @@
19532147 insp = list;
19542148 } else {
19552149 /* Eaten partially. */
2150
+ if (skb_is_gso(skb) && !list->head_frag &&
2151
+ skb_headlen(list))
2152
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
19562153
19572154 if (skb_shared(list)) {
19582155 /* Sucks! We need to fork list. :-( */
....@@ -1997,10 +2194,12 @@
19972194 skb_frag_unref(skb, i);
19982195 eat -= size;
19992196 } else {
2000
- skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
2197
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
2198
+
2199
+ *frag = skb_shinfo(skb)->frags[i];
20012200 if (eat) {
2002
- skb_shinfo(skb)->frags[k].page_offset += eat;
2003
- skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
2201
+ skb_frag_off_add(frag, eat);
2202
+ skb_frag_size_sub(frag, eat);
20042203 if (!i)
20052204 goto end;
20062205 eat = 0;
....@@ -2072,7 +2271,7 @@
20722271 copy = len;
20732272
20742273 skb_frag_foreach_page(f,
2075
- f->page_offset + offset - start,
2274
+ skb_frag_off(f) + offset - start,
20762275 copy, p, p_off, p_len, copied) {
20772276 vaddr = kmap_atomic(p);
20782277 memcpy(to + copied, vaddr + p_off, p_len);
....@@ -2248,7 +2447,7 @@
22482447 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
22492448
22502449 if (__splice_segment(skb_frag_page(f),
2251
- f->page_offset, skb_frag_size(f),
2450
+ skb_frag_off(f), skb_frag_size(f),
22522451 offset, len, spd, false, sk, pipe))
22532452 return true;
22542453 }
....@@ -2338,20 +2537,20 @@
23382537 for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
23392538 skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
23402539
2341
- if (offset < frag->size)
2540
+ if (offset < skb_frag_size(frag))
23422541 break;
23432542
2344
- offset -= frag->size;
2543
+ offset -= skb_frag_size(frag);
23452544 }
23462545
23472546 for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
23482547 skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
23492548
2350
- slen = min_t(size_t, len, frag->size - offset);
2549
+ slen = min_t(size_t, len, skb_frag_size(frag) - offset);
23512550
23522551 while (slen) {
2353
- ret = kernel_sendpage_locked(sk, frag->page.p,
2354
- frag->page_offset + offset,
2552
+ ret = kernel_sendpage_locked(sk, skb_frag_page(frag),
2553
+ skb_frag_off(frag) + offset,
23552554 slen, MSG_DONTWAIT);
23562555 if (ret <= 0)
23572556 goto error;
....@@ -2385,19 +2584,6 @@
23852584 return orig_len == len ? ret : orig_len - len;
23862585 }
23872586 EXPORT_SYMBOL_GPL(skb_send_sock_locked);
2388
-
2389
-/* Send skb data on a socket. */
2390
-int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
2391
-{
2392
- int ret = 0;
2393
-
2394
- lock_sock(sk);
2395
- ret = skb_send_sock_locked(sk, skb, offset, len);
2396
- release_sock(sk);
2397
-
2398
- return ret;
2399
-}
2400
-EXPORT_SYMBOL_GPL(skb_send_sock);
24012587
24022588 /**
24032589 * skb_store_bits - store bits from kernel buffer to skb
....@@ -2446,7 +2632,7 @@
24462632 copy = len;
24472633
24482634 skb_frag_foreach_page(frag,
2449
- frag->page_offset + offset - start,
2635
+ skb_frag_off(frag) + offset - start,
24502636 copy, p, p_off, p_len, copied) {
24512637 vaddr = kmap_atomic(p);
24522638 memcpy(vaddr + p_off, from + copied, p_len);
....@@ -2501,7 +2687,8 @@
25012687 if (copy > 0) {
25022688 if (copy > len)
25032689 copy = len;
2504
- csum = ops->update(skb->data + offset, copy, csum);
2690
+ csum = INDIRECT_CALL_1(ops->update, csum_partial_ext,
2691
+ skb->data + offset, copy, csum);
25052692 if ((len -= copy) == 0)
25062693 return csum;
25072694 offset += copy;
....@@ -2525,12 +2712,16 @@
25252712 copy = len;
25262713
25272714 skb_frag_foreach_page(frag,
2528
- frag->page_offset + offset - start,
2715
+ skb_frag_off(frag) + offset - start,
25292716 copy, p, p_off, p_len, copied) {
25302717 vaddr = kmap_atomic(p);
2531
- csum2 = ops->update(vaddr + p_off, p_len, 0);
2718
+ csum2 = INDIRECT_CALL_1(ops->update,
2719
+ csum_partial_ext,
2720
+ vaddr + p_off, p_len, 0);
25322721 kunmap_atomic(vaddr);
2533
- csum = ops->combine(csum, csum2, pos, p_len);
2722
+ csum = INDIRECT_CALL_1(ops->combine,
2723
+ csum_block_add_ext, csum,
2724
+ csum2, pos, p_len);
25342725 pos += p_len;
25352726 }
25362727
....@@ -2553,7 +2744,8 @@
25532744 copy = len;
25542745 csum2 = __skb_checksum(frag_iter, offset - start,
25552746 copy, 0, ops);
2556
- csum = ops->combine(csum, csum2, pos, copy);
2747
+ csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext,
2748
+ csum, csum2, pos, copy);
25572749 if ((len -= copy) == 0)
25582750 return csum;
25592751 offset += copy;
....@@ -2582,19 +2774,20 @@
25822774 /* Both of above in one bottle. */
25832775
25842776 __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
2585
- u8 *to, int len, __wsum csum)
2777
+ u8 *to, int len)
25862778 {
25872779 int start = skb_headlen(skb);
25882780 int i, copy = start - offset;
25892781 struct sk_buff *frag_iter;
25902782 int pos = 0;
2783
+ __wsum csum = 0;
25912784
25922785 /* Copy header. */
25932786 if (copy > 0) {
25942787 if (copy > len)
25952788 copy = len;
25962789 csum = csum_partial_copy_nocheck(skb->data + offset, to,
2597
- copy, csum);
2790
+ copy);
25982791 if ((len -= copy) == 0)
25992792 return csum;
26002793 offset += copy;
....@@ -2619,12 +2812,12 @@
26192812 copy = len;
26202813
26212814 skb_frag_foreach_page(frag,
2622
- frag->page_offset + offset - start,
2815
+ skb_frag_off(frag) + offset - start,
26232816 copy, p, p_off, p_len, copied) {
26242817 vaddr = kmap_atomic(p);
26252818 csum2 = csum_partial_copy_nocheck(vaddr + p_off,
26262819 to + copied,
2627
- p_len, 0);
2820
+ p_len);
26282821 kunmap_atomic(vaddr);
26292822 csum = csum_block_add(csum, csum2, pos);
26302823 pos += p_len;
....@@ -2650,7 +2843,7 @@
26502843 copy = len;
26512844 csum2 = skb_copy_and_csum_bits(frag_iter,
26522845 offset - start,
2653
- to, copy, 0);
2846
+ to, copy);
26542847 csum = csum_block_add(csum, csum2, pos);
26552848 if ((len -= copy) == 0)
26562849 return csum;
....@@ -2664,6 +2857,65 @@
26642857 return csum;
26652858 }
26662859 EXPORT_SYMBOL(skb_copy_and_csum_bits);
2860
+
2861
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
2862
+{
2863
+ __sum16 sum;
2864
+
2865
+ sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
2866
+ /* See comments in __skb_checksum_complete(). */
2867
+ if (likely(!sum)) {
2868
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
2869
+ !skb->csum_complete_sw)
2870
+ netdev_rx_csum_fault(skb->dev, skb);
2871
+ }
2872
+ if (!skb_shared(skb))
2873
+ skb->csum_valid = !sum;
2874
+ return sum;
2875
+}
2876
+EXPORT_SYMBOL(__skb_checksum_complete_head);
2877
+
2878
+/* This function assumes skb->csum already holds pseudo header's checksum,
2879
+ * which has been changed from the hardware checksum, for example, by
2880
+ * __skb_checksum_validate_complete(). And, the original skb->csum must
2881
+ * have been validated unsuccessfully for CHECKSUM_COMPLETE case.
2882
+ *
2883
+ * It returns non-zero if the recomputed checksum is still invalid, otherwise
2884
+ * zero. The new checksum is stored back into skb->csum unless the skb is
2885
+ * shared.
2886
+ */
2887
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
2888
+{
2889
+ __wsum csum;
2890
+ __sum16 sum;
2891
+
2892
+ csum = skb_checksum(skb, 0, skb->len, 0);
2893
+
2894
+ sum = csum_fold(csum_add(skb->csum, csum));
2895
+ /* This check is inverted, because we already knew the hardware
2896
+ * checksum is invalid before calling this function. So, if the
2897
+ * re-computed checksum is valid instead, then we have a mismatch
2898
+ * between the original skb->csum and skb_checksum(). This means either
2899
+ * the original hardware checksum is incorrect or we screw up skb->csum
2900
+ * when moving skb->data around.
2901
+ */
2902
+ if (likely(!sum)) {
2903
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
2904
+ !skb->csum_complete_sw)
2905
+ netdev_rx_csum_fault(skb->dev, skb);
2906
+ }
2907
+
2908
+ if (!skb_shared(skb)) {
2909
+ /* Save full packet checksum */
2910
+ skb->csum = csum;
2911
+ skb->ip_summed = CHECKSUM_COMPLETE;
2912
+ skb->csum_complete_sw = 1;
2913
+ skb->csum_valid = !sum;
2914
+ }
2915
+
2916
+ return sum;
2917
+}
2918
+EXPORT_SYMBOL(__skb_checksum_complete);
26672919
26682920 static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
26692921 {
....@@ -2779,11 +3031,15 @@
27793031 skb_zerocopy_clone(to, from, GFP_ATOMIC);
27803032
27813033 for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
3034
+ int size;
3035
+
27823036 if (!len)
27833037 break;
27843038 skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
2785
- skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
2786
- len -= skb_shinfo(to)->frags[j].size;
3039
+ size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]),
3040
+ len);
3041
+ skb_frag_size_set(&skb_shinfo(to)->frags[j], size);
3042
+ len -= size;
27873043 skb_frag_ref(to, j);
27883044 j++;
27893045 }
....@@ -2810,7 +3066,7 @@
28103066 csum = 0;
28113067 if (csstart != skb->len)
28123068 csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
2813
- skb->len - csstart, 0);
3069
+ skb->len - csstart);
28143070
28153071 if (skb->ip_summed == CHECKSUM_PARTIAL) {
28163072 long csstuff = csstart + skb->csum_offset;
....@@ -2985,28 +3241,6 @@
29853241 }
29863242 EXPORT_SYMBOL(skb_append);
29873243
2988
-/**
2989
- * skb_insert - insert a buffer
2990
- * @old: buffer to insert before
2991
- * @newsk: buffer to insert
2992
- * @list: list to use
2993
- *
2994
- * Place a packet before a given packet in a list. The list locks are
2995
- * taken and this function is atomic with respect to other list locked
2996
- * calls.
2997
- *
2998
- * A buffer cannot be placed on two lists at the same time.
2999
- */
3000
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
3001
-{
3002
- unsigned long flags;
3003
-
3004
- spin_lock_irqsave(&list->lock, flags);
3005
- __skb_insert(newsk, old->prev, old, list);
3006
- spin_unlock_irqrestore(&list->lock, flags);
3007
-}
3008
-EXPORT_SYMBOL(skb_insert);
3009
-
30103244 static inline void skb_split_inside_header(struct sk_buff *skb,
30113245 struct sk_buff* skb1,
30123246 const u32 len, const int pos)
....@@ -3056,7 +3290,7 @@
30563290 * 2. Split is accurately. We make this.
30573291 */
30583292 skb_frag_ref(skb, i);
3059
- skb_shinfo(skb1)->frags[0].page_offset += len - pos;
3293
+ skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos);
30603294 skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
30613295 skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
30623296 skb_shinfo(skb)->nr_frags++;
....@@ -3095,19 +3329,7 @@
30953329 */
30963330 static int skb_prepare_for_shift(struct sk_buff *skb)
30973331 {
3098
- int ret = 0;
3099
-
3100
- if (skb_cloned(skb)) {
3101
- /* Save and restore truesize: pskb_expand_head() may reallocate
3102
- * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we
3103
- * cannot change truesize at this point.
3104
- */
3105
- unsigned int save_truesize = skb->truesize;
3106
-
3107
- ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3108
- skb->truesize = save_truesize;
3109
- }
3110
- return ret;
3332
+ return skb_unclone_keeptruesize(skb, GFP_ATOMIC);
31113333 }
31123334
31133335 /**
....@@ -3131,7 +3353,7 @@
31313353 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
31323354 {
31333355 int from, to, merge, todo;
3134
- struct skb_frag_struct *fragfrom, *fragto;
3356
+ skb_frag_t *fragfrom, *fragto;
31353357
31363358 BUG_ON(shiftlen > skb->len);
31373359
....@@ -3150,7 +3372,7 @@
31503372 */
31513373 if (!to ||
31523374 !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
3153
- fragfrom->page_offset)) {
3375
+ skb_frag_off(fragfrom))) {
31543376 merge = -1;
31553377 } else {
31563378 merge = to - 1;
....@@ -3167,7 +3389,7 @@
31673389
31683390 skb_frag_size_add(fragto, shiftlen);
31693391 skb_frag_size_sub(fragfrom, shiftlen);
3170
- fragfrom->page_offset += shiftlen;
3392
+ skb_frag_off_add(fragfrom, shiftlen);
31713393
31723394 goto onlymerged;
31733395 }
....@@ -3198,11 +3420,11 @@
31983420
31993421 } else {
32003422 __skb_frag_ref(fragfrom);
3201
- fragto->page = fragfrom->page;
3202
- fragto->page_offset = fragfrom->page_offset;
3423
+ skb_frag_page_copy(fragto, fragfrom);
3424
+ skb_frag_off_copy(fragto, fragfrom);
32033425 skb_frag_size_set(fragto, todo);
32043426
3205
- fragfrom->page_offset += todo;
3427
+ skb_frag_off_add(fragfrom, todo);
32063428 skb_frag_size_sub(fragfrom, todo);
32073429 todo = 0;
32083430
....@@ -3327,7 +3549,7 @@
33273549 if (!st->frag_data)
33283550 st->frag_data = kmap_atomic(skb_frag_page(frag));
33293551
3330
- *data = (u8 *) st->frag_data + frag->page_offset +
3552
+ *data = (u8 *) st->frag_data + skb_frag_off(frag) +
33313553 (abs_offset - st->stepped_offset);
33323554
33333555 return block_limit - abs_offset;
....@@ -3417,64 +3639,6 @@
34173639 }
34183640 EXPORT_SYMBOL(skb_find_text);
34193641
3420
-/**
3421
- * skb_append_datato_frags - append the user data to a skb
3422
- * @sk: sock structure
3423
- * @skb: skb structure to be appended with user data.
3424
- * @getfrag: call back function to be used for getting the user data
3425
- * @from: pointer to user message iov
3426
- * @length: length of the iov message
3427
- *
3428
- * Description: This procedure append the user data in the fragment part
3429
- * of the skb if any page alloc fails user this procedure returns -ENOMEM
3430
- */
3431
-int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
3432
- int (*getfrag)(void *from, char *to, int offset,
3433
- int len, int odd, struct sk_buff *skb),
3434
- void *from, int length)
3435
-{
3436
- int frg_cnt = skb_shinfo(skb)->nr_frags;
3437
- int copy;
3438
- int offset = 0;
3439
- int ret;
3440
- struct page_frag *pfrag = &current->task_frag;
3441
-
3442
- do {
3443
- /* Return error if we don't have space for new frag */
3444
- if (frg_cnt >= MAX_SKB_FRAGS)
3445
- return -EMSGSIZE;
3446
-
3447
- if (!sk_page_frag_refill(sk, pfrag))
3448
- return -ENOMEM;
3449
-
3450
- /* copy the user data to page */
3451
- copy = min_t(int, length, pfrag->size - pfrag->offset);
3452
-
3453
- ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
3454
- offset, copy, 0, skb);
3455
- if (ret < 0)
3456
- return -EFAULT;
3457
-
3458
- /* copy was successful so update the size parameters */
3459
- skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
3460
- copy);
3461
- frg_cnt++;
3462
- pfrag->offset += copy;
3463
- get_page(pfrag->page);
3464
-
3465
- skb->truesize += copy;
3466
- refcount_add(copy, &sk->sk_wmem_alloc);
3467
- skb->len += copy;
3468
- skb->data_len += copy;
3469
- offset += copy;
3470
- length -= copy;
3471
-
3472
- } while (length > 0);
3473
-
3474
- return 0;
3475
-}
3476
-EXPORT_SYMBOL(skb_append_datato_frags);
3477
-
34783642 int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
34793643 int offset, size_t size)
34803644 {
....@@ -3521,11 +3685,126 @@
35213685 struct page *page;
35223686
35233687 page = virt_to_head_page(frag_skb->head);
3524
- head_frag.page.p = page;
3525
- head_frag.page_offset = frag_skb->data -
3526
- (unsigned char *)page_address(page);
3527
- head_frag.size = skb_headlen(frag_skb);
3688
+ __skb_frag_set_page(&head_frag, page);
3689
+ skb_frag_off_set(&head_frag, frag_skb->data -
3690
+ (unsigned char *)page_address(page));
3691
+ skb_frag_size_set(&head_frag, skb_headlen(frag_skb));
35283692 return head_frag;
3693
+}
3694
+
3695
+struct sk_buff *skb_segment_list(struct sk_buff *skb,
3696
+ netdev_features_t features,
3697
+ unsigned int offset)
3698
+{
3699
+ struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
3700
+ unsigned int tnl_hlen = skb_tnl_header_len(skb);
3701
+ unsigned int delta_truesize = 0;
3702
+ unsigned int delta_len = 0;
3703
+ struct sk_buff *tail = NULL;
3704
+ struct sk_buff *nskb, *tmp;
3705
+ int len_diff, err;
3706
+
3707
+ skb_push(skb, -skb_network_offset(skb) + offset);
3708
+
3709
+ /* Ensure the head is writeable before touching the shared info */
3710
+ err = skb_unclone(skb, GFP_ATOMIC);
3711
+ if (err)
3712
+ goto err_linearize;
3713
+
3714
+ skb_shinfo(skb)->frag_list = NULL;
3715
+
3716
+ while (list_skb) {
3717
+ nskb = list_skb;
3718
+ list_skb = list_skb->next;
3719
+
3720
+ err = 0;
3721
+ delta_truesize += nskb->truesize;
3722
+ if (skb_shared(nskb)) {
3723
+ tmp = skb_clone(nskb, GFP_ATOMIC);
3724
+ if (tmp) {
3725
+ consume_skb(nskb);
3726
+ nskb = tmp;
3727
+ err = skb_unclone(nskb, GFP_ATOMIC);
3728
+ } else {
3729
+ err = -ENOMEM;
3730
+ }
3731
+ }
3732
+
3733
+ if (!tail)
3734
+ skb->next = nskb;
3735
+ else
3736
+ tail->next = nskb;
3737
+
3738
+ if (unlikely(err)) {
3739
+ nskb->next = list_skb;
3740
+ goto err_linearize;
3741
+ }
3742
+
3743
+ tail = nskb;
3744
+
3745
+ delta_len += nskb->len;
3746
+
3747
+ skb_push(nskb, -skb_network_offset(nskb) + offset);
3748
+
3749
+ skb_release_head_state(nskb);
3750
+ len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb);
3751
+ __copy_skb_header(nskb, skb);
3752
+
3753
+ skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
3754
+ nskb->transport_header += len_diff;
3755
+ skb_copy_from_linear_data_offset(skb, -tnl_hlen,
3756
+ nskb->data - tnl_hlen,
3757
+ offset + tnl_hlen);
3758
+
3759
+ if (skb_needs_linearize(nskb, features) &&
3760
+ __skb_linearize(nskb))
3761
+ goto err_linearize;
3762
+ }
3763
+
3764
+ skb->truesize = skb->truesize - delta_truesize;
3765
+ skb->data_len = skb->data_len - delta_len;
3766
+ skb->len = skb->len - delta_len;
3767
+
3768
+ skb_gso_reset(skb);
3769
+
3770
+ skb->prev = tail;
3771
+
3772
+ if (skb_needs_linearize(skb, features) &&
3773
+ __skb_linearize(skb))
3774
+ goto err_linearize;
3775
+
3776
+ skb_get(skb);
3777
+
3778
+ return skb;
3779
+
3780
+err_linearize:
3781
+ kfree_skb_list(skb->next);
3782
+ skb->next = NULL;
3783
+ return ERR_PTR(-ENOMEM);
3784
+}
3785
+EXPORT_SYMBOL_GPL(skb_segment_list);
3786
+
3787
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
3788
+{
3789
+ if (unlikely(p->len + skb->len >= 65536))
3790
+ return -E2BIG;
3791
+
3792
+ if (NAPI_GRO_CB(p)->last == p)
3793
+ skb_shinfo(p)->frag_list = skb;
3794
+ else
3795
+ NAPI_GRO_CB(p)->last->next = skb;
3796
+
3797
+ skb_pull(skb, skb_gro_offset(skb));
3798
+
3799
+ NAPI_GRO_CB(p)->last = skb;
3800
+ NAPI_GRO_CB(p)->count++;
3801
+ p->data_len += skb->len;
3802
+ p->truesize += skb->truesize;
3803
+ p->len += skb->len;
3804
+
3805
+ NAPI_GRO_CB(skb)->same_flow = 1;
3806
+
3807
+ return 0;
35293808 }
35303809
35313810 /**
....@@ -3543,44 +3822,44 @@
35433822 struct sk_buff *segs = NULL;
35443823 struct sk_buff *tail = NULL;
35453824 struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
3546
- skb_frag_t *frag = skb_shinfo(head_skb)->frags;
35473825 unsigned int mss = skb_shinfo(head_skb)->gso_size;
35483826 unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
3549
- struct sk_buff *frag_skb = head_skb;
35503827 unsigned int offset = doffset;
35513828 unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
35523829 unsigned int partial_segs = 0;
35533830 unsigned int headroom;
35543831 unsigned int len = head_skb->len;
3832
+ struct sk_buff *frag_skb;
3833
+ skb_frag_t *frag;
35553834 __be16 proto;
35563835 bool csum, sg;
3557
- int nfrags = skb_shinfo(head_skb)->nr_frags;
35583836 int err = -ENOMEM;
35593837 int i = 0;
3560
- int pos;
3561
- int dummy;
3838
+ int nfrags, pos;
35623839
3563
- if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
3564
- (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
3565
- /* gso_size is untrusted, and we have a frag_list with a linear
3566
- * non head_frag head.
3567
- *
3568
- * (we assume checking the first list_skb member suffices;
3569
- * i.e if either of the list_skb members have non head_frag
3570
- * head, then the first one has too).
3571
- *
3572
- * If head_skb's headlen does not fit requested gso_size, it
3573
- * means that the frag_list members do NOT terminate on exact
3574
- * gso_size boundaries. Hence we cannot perform skb_frag_t page
3575
- * sharing. Therefore we must fallback to copying the frag_list
3576
- * skbs; we do so by disabling SG.
3577
- */
3578
- if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
3579
- features &= ~NETIF_F_SG;
3840
+ if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
3841
+ mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
3842
+ struct sk_buff *check_skb;
3843
+
3844
+ for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
3845
+ if (skb_headlen(check_skb) && !check_skb->head_frag) {
3846
+ /* gso_size is untrusted, and we have a frag_list with
3847
+ * a linear non head_frag item.
3848
+ *
3849
+ * If head_skb's headlen does not fit requested gso_size,
3850
+ * it means that the frag_list members do NOT terminate
3851
+ * on exact gso_size boundaries. Hence we cannot perform
3852
+ * skb_frag_t page sharing. Therefore we must fallback to
3853
+ * copying the frag_list skbs; we do so by disabling SG.
3854
+ */
3855
+ features &= ~NETIF_F_SG;
3856
+ break;
3857
+ }
3858
+ }
35803859 }
35813860
35823861 __skb_push(head_skb, doffset);
3583
- proto = skb_network_protocol(head_skb, &dummy);
3862
+ proto = skb_network_protocol(head_skb, NULL);
35843863 if (unlikely(!proto))
35853864 return ERR_PTR(-EINVAL);
35863865
....@@ -3633,6 +3912,13 @@
36333912 headroom = skb_headroom(head_skb);
36343913 pos = skb_headlen(head_skb);
36353914
3915
+ if (skb_orphan_frags(head_skb, GFP_ATOMIC))
3916
+ return ERR_PTR(-ENOMEM);
3917
+
3918
+ nfrags = skb_shinfo(head_skb)->nr_frags;
3919
+ frag = skb_shinfo(head_skb)->frags;
3920
+ frag_skb = head_skb;
3921
+
36363922 do {
36373923 struct sk_buff *nskb;
36383924 skb_frag_t *nskb_frag;
....@@ -3657,6 +3943,10 @@
36573943 (skb_headlen(list_skb) == len || sg)) {
36583944 BUG_ON(skb_headlen(list_skb) > len);
36593945
3946
+ nskb = skb_clone(list_skb, GFP_ATOMIC);
3947
+ if (unlikely(!nskb))
3948
+ goto err;
3949
+
36603950 i = 0;
36613951 nfrags = skb_shinfo(list_skb)->nr_frags;
36623952 frag = skb_shinfo(list_skb)->frags;
....@@ -3675,11 +3965,7 @@
36753965 frag++;
36763966 }
36773967
3678
- nskb = skb_clone(list_skb, GFP_ATOMIC);
36793968 list_skb = list_skb->next;
3680
-
3681
- if (unlikely(!nskb))
3682
- goto err;
36833969
36843970 if (unlikely(pskb_trim(nskb, len))) {
36853971 kfree_skb(nskb);
....@@ -3726,14 +4012,20 @@
37264012 goto perform_csum_check;
37274013
37284014 if (!sg) {
3729
- if (!nskb->remcsum_offload)
3730
- nskb->ip_summed = CHECKSUM_NONE;
3731
- SKB_GSO_CB(nskb)->csum =
3732
- skb_copy_and_csum_bits(head_skb, offset,
3733
- skb_put(nskb, len),
3734
- len, 0);
3735
- SKB_GSO_CB(nskb)->csum_start =
3736
- skb_headroom(nskb) + doffset;
4015
+ if (!csum) {
4016
+ if (!nskb->remcsum_offload)
4017
+ nskb->ip_summed = CHECKSUM_NONE;
4018
+ SKB_GSO_CB(nskb)->csum =
4019
+ skb_copy_and_csum_bits(head_skb, offset,
4020
+ skb_put(nskb,
4021
+ len),
4022
+ len);
4023
+ SKB_GSO_CB(nskb)->csum_start =
4024
+ skb_headroom(nskb) + doffset;
4025
+ } else {
4026
+ if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len))
4027
+ goto err;
4028
+ }
37374029 continue;
37384030 }
37394031
....@@ -3745,12 +4037,16 @@
37454037 skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
37464038 SKBTX_SHARED_FRAG;
37474039
3748
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
3749
- skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
4040
+ if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
37504041 goto err;
37514042
37524043 while (pos < offset + len) {
37534044 if (i >= nfrags) {
4045
+ if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
4046
+ skb_zerocopy_clone(nskb, list_skb,
4047
+ GFP_ATOMIC))
4048
+ goto err;
4049
+
37544050 i = 0;
37554051 nfrags = skb_shinfo(list_skb)->nr_frags;
37564052 frag = skb_shinfo(list_skb)->frags;
....@@ -3764,10 +4060,6 @@
37644060 i--;
37654061 frag--;
37664062 }
3767
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
3768
- skb_zerocopy_clone(nskb, frag_skb,
3769
- GFP_ATOMIC))
3770
- goto err;
37714063
37724064 list_skb = list_skb->next;
37734065 }
....@@ -3786,7 +4078,7 @@
37864078 size = skb_frag_size(nskb_frag);
37874079
37884080 if (pos < offset) {
3789
- nskb_frag->page_offset += offset - pos;
4081
+ skb_frag_off_add(nskb_frag, offset - pos);
37904082 skb_frag_size_sub(nskb_frag, offset - pos);
37914083 }
37924084
....@@ -3907,7 +4199,7 @@
39074199 *--frag = *--frag2;
39084200 } while (--i);
39094201
3910
- frag->page_offset += offset;
4202
+ skb_frag_off_add(frag, offset);
39114203 skb_frag_size_sub(frag, offset);
39124204
39134205 /* all fragments truesize : remove (head size + sk_buff) */
....@@ -3936,8 +4228,8 @@
39364228
39374229 pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
39384230
3939
- frag->page.p = page;
3940
- frag->page_offset = first_offset;
4231
+ __skb_frag_set_page(frag, page);
4232
+ skb_frag_off_set(frag, first_offset);
39414233 skb_frag_size_set(frag, first_size);
39424234
39434235 memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
....@@ -3953,7 +4245,7 @@
39534245 if (offset > headlen) {
39544246 unsigned int eat = offset - headlen;
39554247
3956
- skbinfo->frags[0].page_offset += eat;
4248
+ skb_frag_off_add(&skbinfo->frags[0], eat);
39574249 skb_frag_size_sub(&skbinfo->frags[0], eat);
39584250 skb->data_len -= eat;
39594251 skb->len -= eat;
....@@ -3983,7 +4275,58 @@
39834275 NAPI_GRO_CB(skb)->same_flow = 1;
39844276 return 0;
39854277 }
3986
-EXPORT_SYMBOL_GPL(skb_gro_receive);
4278
+
4279
+#ifdef CONFIG_SKB_EXTENSIONS
4280
+#define SKB_EXT_ALIGN_VALUE 8
4281
+#define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE)
4282
+
4283
+static const u8 skb_ext_type_len[] = {
4284
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
4285
+ [SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info),
4286
+#endif
4287
+#ifdef CONFIG_XFRM
4288
+ [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
4289
+#endif
4290
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
4291
+ [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
4292
+#endif
4293
+#if IS_ENABLED(CONFIG_MPTCP)
4294
+ [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
4295
+#endif
4296
+};
4297
+
4298
+static __always_inline unsigned int skb_ext_total_length(void)
4299
+{
4300
+ return SKB_EXT_CHUNKSIZEOF(struct skb_ext) +
4301
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
4302
+ skb_ext_type_len[SKB_EXT_BRIDGE_NF] +
4303
+#endif
4304
+#ifdef CONFIG_XFRM
4305
+ skb_ext_type_len[SKB_EXT_SEC_PATH] +
4306
+#endif
4307
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
4308
+ skb_ext_type_len[TC_SKB_EXT] +
4309
+#endif
4310
+#if IS_ENABLED(CONFIG_MPTCP)
4311
+ skb_ext_type_len[SKB_EXT_MPTCP] +
4312
+#endif
4313
+ 0;
4314
+}
4315
+
4316
+static void skb_extensions_init(void)
4317
+{
4318
+ BUILD_BUG_ON(SKB_EXT_NUM >= 8);
4319
+ BUILD_BUG_ON(skb_ext_total_length() > 255);
4320
+
4321
+ skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
4322
+ SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
4323
+ 0,
4324
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC,
4325
+ NULL);
4326
+}
4327
+#else
4328
+static void skb_extensions_init(void) {}
4329
+#endif
39874330
39884331 void __init skb_init(void)
39894332 {
....@@ -3999,6 +4342,7 @@
39994342 0,
40004343 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
40014344 NULL);
4345
+ skb_extensions_init();
40024346 }
40034347
40044348 static int
....@@ -4037,7 +4381,7 @@
40374381 if (copy > len)
40384382 copy = len;
40394383 sg_set_page(&sg[elt], skb_frag_page(frag), copy,
4040
- frag->page_offset+offset-start);
4384
+ skb_frag_off(frag) + offset - start);
40414385 elt++;
40424386 if (!(len -= copy))
40434387 return elt;
....@@ -4154,7 +4498,7 @@
41544498 * at the moment even if they are anonymous).
41554499 */
41564500 if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
4157
- __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
4501
+ !__pskb_pull_tail(skb, __skb_pagelen(skb)))
41584502 return -ENOMEM;
41594503
41604504 /* Easy case. Most of packets will go this way. */
....@@ -4258,7 +4602,7 @@
42584602 int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
42594603 {
42604604 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
4261
- (unsigned int)sk->sk_rcvbuf)
4605
+ (unsigned int)READ_ONCE(sk->sk_rcvbuf))
42624606 return -ENOMEM;
42634607
42644608 skb_orphan(skb);
....@@ -4377,7 +4721,7 @@
43774721 {
43784722 bool ret;
43794723
4380
- if (likely(sysctl_tstamp_allow_data || tsonly))
4724
+ if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
43814725 return true;
43824726
43834727 read_lock_bh(&sk->sk_callback_lock);
....@@ -4433,13 +4777,18 @@
44334777 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
44344778 sk->sk_protocol == IPPROTO_TCP &&
44354779 sk->sk_type == SOCK_STREAM) {
4436
- skb = tcp_get_timestamping_opt_stats(sk);
4780
+ skb = tcp_get_timestamping_opt_stats(sk, orig_skb);
44374781 opt_stats = true;
44384782 } else
44394783 #endif
44404784 skb = alloc_skb(0, GFP_ATOMIC);
44414785 } else {
44424786 skb = skb_clone(orig_skb, GFP_ATOMIC);
4787
+
4788
+ if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
4789
+ kfree_skb(skb);
4790
+ return;
4791
+ }
44434792 }
44444793 if (!skb)
44454794 return;
....@@ -4550,9 +4899,9 @@
45504899 typeof(IPPROTO_IP) proto,
45514900 unsigned int off)
45524901 {
4553
- switch (proto) {
4554
- int err;
4902
+ int err;
45554903
4904
+ switch (proto) {
45564905 case IPPROTO_TCP:
45574906 err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
45584907 off + MAX_TCP_HDR_LEN);
....@@ -4595,7 +4944,7 @@
45954944 if (err < 0)
45964945 goto out;
45974946
4598
- if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
4947
+ if (ip_is_fragment(ip_hdr(skb)))
45994948 fragment = true;
46004949
46014950 off = ip_hdrlen(skb);
....@@ -4962,13 +5311,13 @@
49625311 skb->skb_iif = 0;
49635312 skb->ignore_df = 0;
49645313 skb_dst_drop(skb);
4965
- secpath_reset(skb);
4966
- nf_reset(skb);
5314
+ skb_ext_reset(skb);
5315
+ nf_reset_ct(skb);
49675316 nf_reset_trace(skb);
49685317
49695318 #ifdef CONFIG_NET_SWITCHDEV
49705319 skb->offload_fwd_mark = 0;
4971
- skb->offload_mr_fwd_mark = 0;
5320
+ skb->offload_l3_fwd_mark = 0;
49725321 #endif
49735322
49745323 if (!xnet)
....@@ -5060,6 +5409,8 @@
50605409 * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
50615410 *
50625411 * This is a helper to do that correctly considering GSO_BY_FRAGS.
5412
+ *
5413
+ * @skb: GSO skb
50635414 *
50645415 * @seg_len: The segmented length (from skb_gso_*_seglen). In the
50655416 * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
....@@ -5246,7 +5597,7 @@
52465597 int err;
52475598
52485599 if (likely(skb_vlan_tag_present(skb))) {
5249
- skb->vlan_tci = 0;
5600
+ __vlan_hwaccel_clear_tag(skb);
52505601 } else {
52515602 if (unlikely(!eth_type_vlan(skb->protocol)))
52525603 return 0;
....@@ -5298,6 +5649,252 @@
52985649 return 0;
52995650 }
53005651 EXPORT_SYMBOL(skb_vlan_push);
5652
+
5653
+/**
5654
+ * skb_eth_pop() - Drop the Ethernet header at the head of a packet
5655
+ *
5656
+ * @skb: Socket buffer to modify
5657
+ *
5658
+ * Drop the Ethernet header of @skb.
5659
+ *
5660
+ * Expects that skb->data points to the mac header and that no VLAN tags are
5661
+ * present.
5662
+ *
5663
+ * Returns 0 on success, -errno otherwise.
5664
+ */
5665
+int skb_eth_pop(struct sk_buff *skb)
5666
+{
5667
+ if (!pskb_may_pull(skb, ETH_HLEN) || skb_vlan_tagged(skb) ||
5668
+ skb_network_offset(skb) < ETH_HLEN)
5669
+ return -EPROTO;
5670
+
5671
+ skb_pull_rcsum(skb, ETH_HLEN);
5672
+ skb_reset_mac_header(skb);
5673
+ skb_reset_mac_len(skb);
5674
+
5675
+ return 0;
5676
+}
5677
+EXPORT_SYMBOL(skb_eth_pop);
5678
+
5679
+/**
5680
+ * skb_eth_push() - Add a new Ethernet header at the head of a packet
5681
+ *
5682
+ * @skb: Socket buffer to modify
5683
+ * @dst: Destination MAC address of the new header
5684
+ * @src: Source MAC address of the new header
5685
+ *
5686
+ * Prepend @skb with a new Ethernet header.
5687
+ *
5688
+ * Expects that skb->data points to the mac header, which must be empty.
5689
+ *
5690
+ * Returns 0 on success, -errno otherwise.
5691
+ */
5692
+int skb_eth_push(struct sk_buff *skb, const unsigned char *dst,
5693
+ const unsigned char *src)
5694
+{
5695
+ struct ethhdr *eth;
5696
+ int err;
5697
+
5698
+ if (skb_network_offset(skb) || skb_vlan_tag_present(skb))
5699
+ return -EPROTO;
5700
+
5701
+ err = skb_cow_head(skb, sizeof(*eth));
5702
+ if (err < 0)
5703
+ return err;
5704
+
5705
+ skb_push(skb, sizeof(*eth));
5706
+ skb_reset_mac_header(skb);
5707
+ skb_reset_mac_len(skb);
5708
+
5709
+ eth = eth_hdr(skb);
5710
+ ether_addr_copy(eth->h_dest, dst);
5711
+ ether_addr_copy(eth->h_source, src);
5712
+ eth->h_proto = skb->protocol;
5713
+
5714
+ skb_postpush_rcsum(skb, eth, sizeof(*eth));
5715
+
5716
+ return 0;
5717
+}
5718
+EXPORT_SYMBOL(skb_eth_push);
5719
+
5720
+/* Update the ethertype of hdr and the skb csum value if required. */
5721
+static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
5722
+ __be16 ethertype)
5723
+{
5724
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
5725
+ __be16 diff[] = { ~hdr->h_proto, ethertype };
5726
+
5727
+ skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
5728
+ }
5729
+
5730
+ hdr->h_proto = ethertype;
5731
+}
5732
+
5733
+/**
5734
+ * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of
5735
+ * the packet
5736
+ *
5737
+ * @skb: buffer
5738
+ * @mpls_lse: MPLS label stack entry to push
5739
+ * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
5740
+ * @mac_len: length of the MAC header
5741
+ * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is
5742
+ * ethernet
5743
+ *
5744
+ * Expects skb->data at mac header.
5745
+ *
5746
+ * Returns 0 on success, -errno otherwise.
5747
+ */
5748
+int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
5749
+ int mac_len, bool ethernet)
5750
+{
5751
+ struct mpls_shim_hdr *lse;
5752
+ int err;
5753
+
5754
+ if (unlikely(!eth_p_mpls(mpls_proto)))
5755
+ return -EINVAL;
5756
+
5757
+ /* Networking stack does not allow simultaneous Tunnel and MPLS GSO. */
5758
+ if (skb->encapsulation)
5759
+ return -EINVAL;
5760
+
5761
+ err = skb_cow_head(skb, MPLS_HLEN);
5762
+ if (unlikely(err))
5763
+ return err;
5764
+
5765
+ if (!skb->inner_protocol) {
5766
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
5767
+ skb_set_inner_protocol(skb, skb->protocol);
5768
+ }
5769
+
5770
+ skb_push(skb, MPLS_HLEN);
5771
+ memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
5772
+ mac_len);
5773
+ skb_reset_mac_header(skb);
5774
+ skb_set_network_header(skb, mac_len);
5775
+ skb_reset_mac_len(skb);
5776
+
5777
+ lse = mpls_hdr(skb);
5778
+ lse->label_stack_entry = mpls_lse;
5779
+ skb_postpush_rcsum(skb, lse, MPLS_HLEN);
5780
+
5781
+ if (ethernet && mac_len >= ETH_HLEN)
5782
+ skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
5783
+ skb->protocol = mpls_proto;
5784
+
5785
+ return 0;
5786
+}
5787
+EXPORT_SYMBOL_GPL(skb_mpls_push);
5788
+
5789
+/**
5790
+ * skb_mpls_pop() - pop the outermost MPLS header
5791
+ *
5792
+ * @skb: buffer
5793
+ * @next_proto: ethertype of header after popped MPLS header
5794
+ * @mac_len: length of the MAC header
5795
+ * @ethernet: flag to indicate if the packet is ethernet
5796
+ *
5797
+ * Expects skb->data at mac header.
5798
+ *
5799
+ * Returns 0 on success, -errno otherwise.
5800
+ */
5801
+int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
5802
+ bool ethernet)
5803
+{
5804
+ int err;
5805
+
5806
+ if (unlikely(!eth_p_mpls(skb->protocol)))
5807
+ return 0;
5808
+
5809
+ err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
5810
+ if (unlikely(err))
5811
+ return err;
5812
+
5813
+ skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
5814
+ memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
5815
+ mac_len);
5816
+
5817
+ __skb_pull(skb, MPLS_HLEN);
5818
+ skb_reset_mac_header(skb);
5819
+ skb_set_network_header(skb, mac_len);
5820
+
5821
+ if (ethernet && mac_len >= ETH_HLEN) {
5822
+ struct ethhdr *hdr;
5823
+
5824
+ /* use mpls_hdr() to get ethertype to account for VLANs. */
5825
+ hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
5826
+ skb_mod_eth_type(skb, hdr, next_proto);
5827
+ }
5828
+ skb->protocol = next_proto;
5829
+
5830
+ return 0;
5831
+}
5832
+EXPORT_SYMBOL_GPL(skb_mpls_pop);
5833
+
5834
+/**
5835
+ * skb_mpls_update_lse() - modify outermost MPLS header and update csum
5836
+ *
5837
+ * @skb: buffer
5838
+ * @mpls_lse: new MPLS label stack entry to update to
5839
+ *
5840
+ * Expects skb->data at mac header.
5841
+ *
5842
+ * Returns 0 on success, -errno otherwise.
5843
+ */
5844
+int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse)
5845
+{
5846
+ int err;
5847
+
5848
+ if (unlikely(!eth_p_mpls(skb->protocol)))
5849
+ return -EINVAL;
5850
+
5851
+ err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
5852
+ if (unlikely(err))
5853
+ return err;
5854
+
5855
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
5856
+ __be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse };
5857
+
5858
+ skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
5859
+ }
5860
+
5861
+ mpls_hdr(skb)->label_stack_entry = mpls_lse;
5862
+
5863
+ return 0;
5864
+}
5865
+EXPORT_SYMBOL_GPL(skb_mpls_update_lse);
5866
+
5867
+/**
5868
+ * skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header
5869
+ *
5870
+ * @skb: buffer
5871
+ *
5872
+ * Expects skb->data at mac header.
5873
+ *
5874
+ * Returns 0 on success, -errno otherwise.
5875
+ */
5876
+int skb_mpls_dec_ttl(struct sk_buff *skb)
5877
+{
5878
+ u32 lse;
5879
+ u8 ttl;
5880
+
5881
+ if (unlikely(!eth_p_mpls(skb->protocol)))
5882
+ return -EINVAL;
5883
+
5884
+ if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
5885
+ return -ENOMEM;
5886
+
5887
+ lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry);
5888
+ ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
5889
+ if (!--ttl)
5890
+ return -EINVAL;
5891
+
5892
+ lse &= ~MPLS_LS_TTL_MASK;
5893
+ lse |= ttl << MPLS_LS_TTL_SHIFT;
5894
+
5895
+ return skb_mpls_update_lse(skb, cpu_to_be32(lse));
5896
+}
5897
+EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
53015898
53025899 /**
53035900 * alloc_skb_with_frags - allocate skb with page frags
....@@ -5421,11 +6018,7 @@
54216018 skb->head = data;
54226019 skb->data = data;
54236020 skb->head_frag = 0;
5424
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
5425
- skb->end = size;
5426
-#else
5427
- skb->end = skb->head + size;
5428
-#endif
6021
+ skb_set_end_offset(skb, size);
54296022 skb_set_tail_pointer(skb, skb_headlen(skb));
54306023 skb_headers_offset_update(skb, 0);
54316024 skb->cloned = 0;
....@@ -5517,8 +6110,7 @@
55176110 size = SKB_WITH_OVERHEAD(ksize(data));
55186111
55196112 memcpy((struct skb_shared_info *)(data + size),
5520
- skb_shinfo(skb), offsetof(struct skb_shared_info,
5521
- frags[skb_shinfo(skb)->nr_frags]));
6113
+ skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
55226114 if (skb_orphan_frags(skb, gfp_mask)) {
55236115 kfree(data);
55246116 return -ENOMEM;
....@@ -5539,7 +6131,7 @@
55396131 * where splitting is expensive.
55406132 * 2. Split is accurately. We make this.
55416133 */
5542
- shinfo->frags[0].page_offset += off - pos;
6134
+ skb_frag_off_add(&shinfo->frags[0], off - pos);
55436135 skb_frag_size_sub(&shinfo->frags[0], off - pos);
55446136 }
55456137 skb_frag_ref(skb, i);
....@@ -5564,11 +6156,7 @@
55646156 skb->head = data;
55656157 skb->head_frag = 0;
55666158 skb->data = data;
5567
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
5568
- skb->end = size;
5569
-#else
5570
- skb->end = skb->head + size;
5571
-#endif
6159
+ skb_set_end_offset(skb, size);
55726160 skb_reset_tail_pointer(skb);
55736161 skb_headers_offset_update(skb, 0);
55746162 skb->cloned = 0;
....@@ -5642,4 +6230,181 @@
56426230 */
56436231 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
56446232 }
5645
-EXPORT_SYMBOL_GPL(skb_condense);
6233
+
6234
+#ifdef CONFIG_SKB_EXTENSIONS
6235
+static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
6236
+{
6237
+ return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
6238
+}
6239
+
6240
+/**
6241
+ * __skb_ext_alloc - allocate a new skb extensions storage
6242
+ *
6243
+ * @flags: See kmalloc().
6244
+ *
6245
+ * Returns the newly allocated pointer. The pointer can later attached to a
6246
+ * skb via __skb_ext_set().
6247
+ * Note: caller must handle the skb_ext as an opaque data.
6248
+ */
6249
+struct skb_ext *__skb_ext_alloc(gfp_t flags)
6250
+{
6251
+ struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags);
6252
+
6253
+ if (new) {
6254
+ memset(new->offset, 0, sizeof(new->offset));
6255
+ refcount_set(&new->refcnt, 1);
6256
+ }
6257
+
6258
+ return new;
6259
+}
6260
+
6261
+static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
6262
+ unsigned int old_active)
6263
+{
6264
+ struct skb_ext *new;
6265
+
6266
+ if (refcount_read(&old->refcnt) == 1)
6267
+ return old;
6268
+
6269
+ new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
6270
+ if (!new)
6271
+ return NULL;
6272
+
6273
+ memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
6274
+ refcount_set(&new->refcnt, 1);
6275
+
6276
+#ifdef CONFIG_XFRM
6277
+ if (old_active & (1 << SKB_EXT_SEC_PATH)) {
6278
+ struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH);
6279
+ unsigned int i;
6280
+
6281
+ for (i = 0; i < sp->len; i++)
6282
+ xfrm_state_hold(sp->xvec[i]);
6283
+ }
6284
+#endif
6285
+ __skb_ext_put(old);
6286
+ return new;
6287
+}
6288
+
6289
+/**
6290
+ * __skb_ext_set - attach the specified extension storage to this skb
6291
+ * @skb: buffer
6292
+ * @id: extension id
6293
+ * @ext: extension storage previously allocated via __skb_ext_alloc()
6294
+ *
6295
+ * Existing extensions, if any, are cleared.
6296
+ *
6297
+ * Returns the pointer to the extension.
6298
+ */
6299
+void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
6300
+ struct skb_ext *ext)
6301
+{
6302
+ unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext);
6303
+
6304
+ skb_ext_put(skb);
6305
+ newlen = newoff + skb_ext_type_len[id];
6306
+ ext->chunks = newlen;
6307
+ ext->offset[id] = newoff;
6308
+ skb->extensions = ext;
6309
+ skb->active_extensions = 1 << id;
6310
+ return skb_ext_get_ptr(ext, id);
6311
+}
6312
+
6313
+/**
6314
+ * skb_ext_add - allocate space for given extension, COW if needed
6315
+ * @skb: buffer
6316
+ * @id: extension to allocate space for
6317
+ *
6318
+ * Allocates enough space for the given extension.
6319
+ * If the extension is already present, a pointer to that extension
6320
+ * is returned.
6321
+ *
6322
+ * If the skb was cloned, COW applies and the returned memory can be
6323
+ * modified without changing the extension space of clones buffers.
6324
+ *
6325
+ * Returns pointer to the extension or NULL on allocation failure.
6326
+ */
6327
+void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
6328
+{
6329
+ struct skb_ext *new, *old = NULL;
6330
+ unsigned int newlen, newoff;
6331
+
6332
+ if (skb->active_extensions) {
6333
+ old = skb->extensions;
6334
+
6335
+ new = skb_ext_maybe_cow(old, skb->active_extensions);
6336
+ if (!new)
6337
+ return NULL;
6338
+
6339
+ if (__skb_ext_exist(new, id))
6340
+ goto set_active;
6341
+
6342
+ newoff = new->chunks;
6343
+ } else {
6344
+ newoff = SKB_EXT_CHUNKSIZEOF(*new);
6345
+
6346
+ new = __skb_ext_alloc(GFP_ATOMIC);
6347
+ if (!new)
6348
+ return NULL;
6349
+ }
6350
+
6351
+ newlen = newoff + skb_ext_type_len[id];
6352
+ new->chunks = newlen;
6353
+ new->offset[id] = newoff;
6354
+set_active:
6355
+ skb->extensions = new;
6356
+ skb->active_extensions |= 1 << id;
6357
+ return skb_ext_get_ptr(new, id);
6358
+}
6359
+EXPORT_SYMBOL(skb_ext_add);
6360
+
6361
+#ifdef CONFIG_XFRM
6362
+static void skb_ext_put_sp(struct sec_path *sp)
6363
+{
6364
+ unsigned int i;
6365
+
6366
+ for (i = 0; i < sp->len; i++)
6367
+ xfrm_state_put(sp->xvec[i]);
6368
+}
6369
+#endif
6370
+
6371
+void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
6372
+{
6373
+ struct skb_ext *ext = skb->extensions;
6374
+
6375
+ skb->active_extensions &= ~(1 << id);
6376
+ if (skb->active_extensions == 0) {
6377
+ skb->extensions = NULL;
6378
+ __skb_ext_put(ext);
6379
+#ifdef CONFIG_XFRM
6380
+ } else if (id == SKB_EXT_SEC_PATH &&
6381
+ refcount_read(&ext->refcnt) == 1) {
6382
+ struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH);
6383
+
6384
+ skb_ext_put_sp(sp);
6385
+ sp->len = 0;
6386
+#endif
6387
+ }
6388
+}
6389
+EXPORT_SYMBOL(__skb_ext_del);
6390
+
6391
+void __skb_ext_put(struct skb_ext *ext)
6392
+{
6393
+ /* If this is last clone, nothing can increment
6394
+ * it after check passes. Avoids one atomic op.
6395
+ */
6396
+ if (refcount_read(&ext->refcnt) == 1)
6397
+ goto free_now;
6398
+
6399
+ if (!refcount_dec_and_test(&ext->refcnt))
6400
+ return;
6401
+free_now:
6402
+#ifdef CONFIG_XFRM
6403
+ if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
6404
+ skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
6405
+#endif
6406
+
6407
+ kmem_cache_free(skbuff_ext_cache, ext);
6408
+}
6409
+EXPORT_SYMBOL(__skb_ext_put);
6410
+#endif /* CONFIG_SKB_EXTENSIONS */