forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/net/core/skbuff.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Routines having to do with the 'struct sk_buff' memory handlers.
34 *
....@@ -25,11 +26,6 @@
2526 * disabled, or you better be *real* sure that the operation is atomic
2627 * with respect to whatever list is being frobbed (e.g. via lock_sock()
2728 * or via disabling bottom half handlers, etc).
28
- *
29
- * This program is free software; you can redistribute it and/or
30
- * modify it under the terms of the GNU General Public License
31
- * as published by the Free Software Foundation; either version
32
- * 2 of the License, or (at your option) any later version.
3329 */
3430
3531 /*
....@@ -63,6 +59,7 @@
6359 #include <linux/errqueue.h>
6460 #include <linux/prefetch.h>
6561 #include <linux/if_vlan.h>
62
+#include <linux/mpls.h>
6663
6764 #include <net/protocol.h>
6865 #include <net/dst.h>
....@@ -70,15 +67,24 @@
7067 #include <net/checksum.h>
7168 #include <net/ip6_checksum.h>
7269 #include <net/xfrm.h>
70
+#include <net/mpls.h>
71
+#include <net/mptcp.h>
7372
7473 #include <linux/uaccess.h>
7574 #include <trace/events/skb.h>
7675 #include <linux/highmem.h>
7776 #include <linux/capability.h>
7877 #include <linux/user_namespace.h>
78
+#include <linux/indirect_call_wrapper.h>
79
+#include <trace/hooks/net.h>
80
+
81
+#include "datagram.h"
7982
8083 struct kmem_cache *skbuff_head_cache __ro_after_init;
8184 static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
85
+#ifdef CONFIG_SKB_EXTENSIONS
86
+static struct kmem_cache *skbuff_ext_cache __ro_after_init;
87
+#endif
8288 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
8389 EXPORT_SYMBOL(sysctl_max_skb_frags);
8490
....@@ -97,7 +103,7 @@
97103 static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
98104 const char msg[])
99105 {
100
- pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
106
+ pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n",
101107 msg, addr, skb->len, sz, skb->head, skb->data,
102108 (unsigned long)skb->tail, (unsigned long)skb->end,
103109 skb->dev ? skb->dev->name : "<NULL>");
....@@ -244,6 +250,9 @@
244250
245251 fclones->skb2.fclone = SKB_FCLONE_CLONE;
246252 }
253
+
254
+ skb_set_kcov_handle(skb, kcov_common_handle());
255
+
247256 out:
248257 return skb;
249258 nodata:
....@@ -252,6 +261,35 @@
252261 goto out;
253262 }
254263 EXPORT_SYMBOL(__alloc_skb);
264
+
265
+/* Caller must provide SKB that is memset cleared */
266
+static struct sk_buff *__build_skb_around(struct sk_buff *skb,
267
+ void *data, unsigned int frag_size)
268
+{
269
+ struct skb_shared_info *shinfo;
270
+ unsigned int size = frag_size ? : ksize(data);
271
+
272
+ size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
273
+
274
+ /* Assumes caller memset cleared SKB */
275
+ skb->truesize = SKB_TRUESIZE(size);
276
+ refcount_set(&skb->users, 1);
277
+ skb->head = data;
278
+ skb->data = data;
279
+ skb_reset_tail_pointer(skb);
280
+ skb->end = skb->tail + size;
281
+ skb->mac_header = (typeof(skb->mac_header))~0U;
282
+ skb->transport_header = (typeof(skb->transport_header))~0U;
283
+
284
+ /* make sure we initialize shinfo sequentially */
285
+ shinfo = skb_shinfo(skb);
286
+ memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
287
+ atomic_set(&shinfo->dataref, 1);
288
+
289
+ skb_set_kcov_handle(skb, kcov_common_handle());
290
+
291
+ return skb;
292
+}
255293
256294 /**
257295 * __build_skb - build a network buffer
....@@ -274,32 +312,15 @@
274312 */
275313 struct sk_buff *__build_skb(void *data, unsigned int frag_size)
276314 {
277
- struct skb_shared_info *shinfo;
278315 struct sk_buff *skb;
279
- unsigned int size = frag_size ? : ksize(data);
280316
281317 skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
282
- if (!skb)
318
+ if (unlikely(!skb))
283319 return NULL;
284320
285
- size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
286
-
287321 memset(skb, 0, offsetof(struct sk_buff, tail));
288
- skb->truesize = SKB_TRUESIZE(size);
289
- refcount_set(&skb->users, 1);
290
- skb->head = data;
291
- skb->data = data;
292
- skb_reset_tail_pointer(skb);
293
- skb->end = skb->tail + size;
294
- skb->mac_header = (typeof(skb->mac_header))~0U;
295
- skb->transport_header = (typeof(skb->transport_header))~0U;
296322
297
- /* make sure we initialize shinfo sequentially */
298
- shinfo = skb_shinfo(skb);
299
- memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
300
- atomic_set(&shinfo->dataref, 1);
301
-
302
- return skb;
323
+ return __build_skb_around(skb, data, frag_size);
303324 }
304325
305326 /* build_skb() is wrapper over __build_skb(), that specifically
....@@ -320,6 +341,29 @@
320341 }
321342 EXPORT_SYMBOL(build_skb);
322343
344
+/**
345
+ * build_skb_around - build a network buffer around provided skb
346
+ * @skb: sk_buff provide by caller, must be memset cleared
347
+ * @data: data buffer provided by caller
348
+ * @frag_size: size of data, or 0 if head was kmalloced
349
+ */
350
+struct sk_buff *build_skb_around(struct sk_buff *skb,
351
+ void *data, unsigned int frag_size)
352
+{
353
+ if (unlikely(!skb))
354
+ return NULL;
355
+
356
+ skb = __build_skb_around(skb, data, frag_size);
357
+
358
+ if (skb && frag_size) {
359
+ skb->head_frag = 1;
360
+ if (page_is_pfmemalloc(virt_to_head_page(data)))
361
+ skb->pfmemalloc = 1;
362
+ }
363
+ return skb;
364
+}
365
+EXPORT_SYMBOL(build_skb_around);
366
+
323367 #define NAPI_SKB_CACHE_SIZE 64
324368
325369 struct napi_alloc_cache {
....@@ -330,34 +374,6 @@
330374
331375 static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
332376 static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
333
-
334
-static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
335
-{
336
- struct page_frag_cache *nc;
337
- unsigned long flags;
338
- void *data;
339
-
340
- local_irq_save(flags);
341
- nc = this_cpu_ptr(&netdev_alloc_cache);
342
- data = page_frag_alloc(nc, fragsz, gfp_mask);
343
- local_irq_restore(flags);
344
- return data;
345
-}
346
-
347
-/**
348
- * netdev_alloc_frag - allocate a page fragment
349
- * @fragsz: fragment size
350
- *
351
- * Allocates a frag from a page for receive buffer.
352
- * Uses GFP_ATOMIC allocations.
353
- */
354
-void *netdev_alloc_frag(unsigned int fragsz)
355
-{
356
- fragsz = SKB_DATA_ALIGN(fragsz);
357
-
358
- return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
359
-}
360
-EXPORT_SYMBOL(netdev_alloc_frag);
361377
362378 static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
363379 {
....@@ -373,6 +389,31 @@
373389 return __napi_alloc_frag(fragsz, GFP_ATOMIC);
374390 }
375391 EXPORT_SYMBOL(napi_alloc_frag);
392
+
393
+/**
394
+ * netdev_alloc_frag - allocate a page fragment
395
+ * @fragsz: fragment size
396
+ *
397
+ * Allocates a frag from a page for receive buffer.
398
+ * Uses GFP_ATOMIC allocations.
399
+ */
400
+void *netdev_alloc_frag(unsigned int fragsz)
401
+{
402
+ struct page_frag_cache *nc;
403
+ void *data;
404
+
405
+ fragsz = SKB_DATA_ALIGN(fragsz);
406
+ if (in_irq() || irqs_disabled()) {
407
+ nc = this_cpu_ptr(&netdev_alloc_cache);
408
+ data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
409
+ } else {
410
+ local_bh_disable();
411
+ data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
412
+ local_bh_enable();
413
+ }
414
+ return data;
415
+}
416
+EXPORT_SYMBOL(netdev_alloc_frag);
376417
377418 /**
378419 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
....@@ -391,7 +432,6 @@
391432 gfp_t gfp_mask)
392433 {
393434 struct page_frag_cache *nc;
394
- unsigned long flags;
395435 struct sk_buff *skb;
396436 bool pfmemalloc;
397437 void *data;
....@@ -416,13 +456,17 @@
416456 if (sk_memalloc_socks())
417457 gfp_mask |= __GFP_MEMALLOC;
418458
419
- local_irq_save(flags);
420
-
421
- nc = this_cpu_ptr(&netdev_alloc_cache);
422
- data = page_frag_alloc(nc, len, gfp_mask);
423
- pfmemalloc = nc->pfmemalloc;
424
-
425
- local_irq_restore(flags);
459
+ if (in_irq() || irqs_disabled()) {
460
+ nc = this_cpu_ptr(&netdev_alloc_cache);
461
+ data = page_frag_alloc(nc, len, gfp_mask);
462
+ pfmemalloc = nc->pfmemalloc;
463
+ } else {
464
+ local_bh_disable();
465
+ nc = this_cpu_ptr(&napi_alloc_cache.page);
466
+ data = page_frag_alloc(nc, len, gfp_mask);
467
+ pfmemalloc = nc->pfmemalloc;
468
+ local_bh_enable();
469
+ }
426470
427471 if (unlikely(!data))
428472 return NULL;
....@@ -433,7 +477,6 @@
433477 return NULL;
434478 }
435479
436
- /* use OR instead of assignment to avoid clearing of bits in mask */
437480 if (pfmemalloc)
438481 skb->pfmemalloc = 1;
439482 skb->head_frag = 1;
....@@ -498,7 +541,6 @@
498541 return NULL;
499542 }
500543
501
- /* use OR instead of assignment to avoid clearing of bits in mask */
502544 if (nc->page.pfmemalloc)
503545 skb->pfmemalloc = 1;
504546 skb->head_frag = 1;
....@@ -618,8 +660,8 @@
618660
619661 void skb_release_head_state(struct sk_buff *skb)
620662 {
663
+ nf_reset_ct(skb);
621664 skb_dst_drop(skb);
622
- secpath_reset(skb);
623665 if (skb->destructor) {
624666 WARN_ON(in_irq());
625667 skb->destructor(skb);
....@@ -627,9 +669,7 @@
627669 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
628670 nf_conntrack_put(skb_nfct(skb));
629671 #endif
630
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
631
- nf_bridge_put(skb->nf_bridge);
632
-#endif
672
+ skb_ext_put(skb);
633673 }
634674
635675 /* Free everything but the sk_buff shell. */
....@@ -668,6 +708,7 @@
668708 if (!skb_unref(skb))
669709 return;
670710
711
+ trace_android_vh_kfree_skb(skb);
671712 trace_kfree_skb(skb, __builtin_return_address(0));
672713 __kfree_skb(skb);
673714 }
....@@ -684,6 +725,101 @@
684725 }
685726 EXPORT_SYMBOL(kfree_skb_list);
686727
728
+/* Dump skb information and contents.
729
+ *
730
+ * Must only be called from net_ratelimit()-ed paths.
731
+ *
732
+ * Dumps whole packets if full_pkt, only headers otherwise.
733
+ */
734
+void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
735
+{
736
+ struct skb_shared_info *sh = skb_shinfo(skb);
737
+ struct net_device *dev = skb->dev;
738
+ struct sock *sk = skb->sk;
739
+ struct sk_buff *list_skb;
740
+ bool has_mac, has_trans;
741
+ int headroom, tailroom;
742
+ int i, len, seg_len;
743
+
744
+ if (full_pkt)
745
+ len = skb->len;
746
+ else
747
+ len = min_t(int, skb->len, MAX_HEADER + 128);
748
+
749
+ headroom = skb_headroom(skb);
750
+ tailroom = skb_tailroom(skb);
751
+
752
+ has_mac = skb_mac_header_was_set(skb);
753
+ has_trans = skb_transport_header_was_set(skb);
754
+
755
+ printk("%sskb len=%u headroom=%u headlen=%u tailroom=%u\n"
756
+ "mac=(%d,%d) net=(%d,%d) trans=%d\n"
757
+ "shinfo(txflags=%u nr_frags=%u gso(size=%hu type=%u segs=%hu))\n"
758
+ "csum(0x%x ip_summed=%u complete_sw=%u valid=%u level=%u)\n"
759
+ "hash(0x%x sw=%u l4=%u) proto=0x%04x pkttype=%u iif=%d\n",
760
+ level, skb->len, headroom, skb_headlen(skb), tailroom,
761
+ has_mac ? skb->mac_header : -1,
762
+ has_mac ? skb_mac_header_len(skb) : -1,
763
+ skb->network_header,
764
+ has_trans ? skb_network_header_len(skb) : -1,
765
+ has_trans ? skb->transport_header : -1,
766
+ sh->tx_flags, sh->nr_frags,
767
+ sh->gso_size, sh->gso_type, sh->gso_segs,
768
+ skb->csum, skb->ip_summed, skb->csum_complete_sw,
769
+ skb->csum_valid, skb->csum_level,
770
+ skb->hash, skb->sw_hash, skb->l4_hash,
771
+ ntohs(skb->protocol), skb->pkt_type, skb->skb_iif);
772
+
773
+ if (dev)
774
+ printk("%sdev name=%s feat=%pNF\n",
775
+ level, dev->name, &dev->features);
776
+ if (sk)
777
+ printk("%ssk family=%hu type=%u proto=%u\n",
778
+ level, sk->sk_family, sk->sk_type, sk->sk_protocol);
779
+
780
+ if (full_pkt && headroom)
781
+ print_hex_dump(level, "skb headroom: ", DUMP_PREFIX_OFFSET,
782
+ 16, 1, skb->head, headroom, false);
783
+
784
+ seg_len = min_t(int, skb_headlen(skb), len);
785
+ if (seg_len)
786
+ print_hex_dump(level, "skb linear: ", DUMP_PREFIX_OFFSET,
787
+ 16, 1, skb->data, seg_len, false);
788
+ len -= seg_len;
789
+
790
+ if (full_pkt && tailroom)
791
+ print_hex_dump(level, "skb tailroom: ", DUMP_PREFIX_OFFSET,
792
+ 16, 1, skb_tail_pointer(skb), tailroom, false);
793
+
794
+ for (i = 0; len && i < skb_shinfo(skb)->nr_frags; i++) {
795
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
796
+ u32 p_off, p_len, copied;
797
+ struct page *p;
798
+ u8 *vaddr;
799
+
800
+ skb_frag_foreach_page(frag, skb_frag_off(frag),
801
+ skb_frag_size(frag), p, p_off, p_len,
802
+ copied) {
803
+ seg_len = min_t(int, p_len, len);
804
+ vaddr = kmap_atomic(p);
805
+ print_hex_dump(level, "skb frag: ",
806
+ DUMP_PREFIX_OFFSET,
807
+ 16, 1, vaddr + p_off, seg_len, false);
808
+ kunmap_atomic(vaddr);
809
+ len -= seg_len;
810
+ if (!len)
811
+ break;
812
+ }
813
+ }
814
+
815
+ if (full_pkt && skb_has_frag_list(skb)) {
816
+ printk("skb fraglist:\n");
817
+ skb_walk_frags(skb, list_skb)
818
+ skb_dump(level, list_skb, true);
819
+ }
820
+}
821
+EXPORT_SYMBOL(skb_dump);
822
+
687823 /**
688824 * skb_tx_error - report an sk_buff xmit error
689825 * @skb: buffer that triggered an error
....@@ -697,6 +833,7 @@
697833 }
698834 EXPORT_SYMBOL(skb_tx_error);
699835
836
+#ifdef CONFIG_TRACEPOINTS
700837 /**
701838 * consume_skb - free an skbuff
702839 * @skb: buffer to free
....@@ -714,6 +851,7 @@
714851 __kfree_skb(skb);
715852 }
716853 EXPORT_SYMBOL(consume_skb);
854
+#endif
717855
718856 /**
719857 * consume_stateless_skb - free an skbuff, assuming it is stateless
....@@ -770,9 +908,6 @@
770908
771909 void napi_consume_skb(struct sk_buff *skb, int budget)
772910 {
773
- if (unlikely(!skb))
774
- return;
775
-
776911 /* Zero budget indicate non-NAPI context called us, like netpoll */
777912 if (unlikely(!budget)) {
778913 dev_consume_skb_any(skb);
....@@ -809,9 +944,7 @@
809944 new->dev = old->dev;
810945 memcpy(new->cb, old->cb, sizeof(old->cb));
811946 skb_dst_copy(new, old);
812
-#ifdef CONFIG_XFRM
813
- new->sp = secpath_get(old->sp);
814
-#endif
947
+ __skb_ext_copy(new, old);
815948 __nf_copy(new, old, false);
816949
817950 /* Note : this field could be in headers_start/headers_end section
....@@ -887,6 +1020,31 @@
8871020 return n;
8881021 #undef C
8891022 }
1023
+
1024
+/**
1025
+ * alloc_skb_for_msg() - allocate sk_buff to wrap frag list forming a msg
1026
+ * @first: first sk_buff of the msg
1027
+ */
1028
+struct sk_buff *alloc_skb_for_msg(struct sk_buff *first)
1029
+{
1030
+ struct sk_buff *n;
1031
+
1032
+ n = alloc_skb(0, GFP_ATOMIC);
1033
+ if (!n)
1034
+ return NULL;
1035
+
1036
+ n->len = first->len;
1037
+ n->data_len = first->len;
1038
+ n->truesize = first->truesize;
1039
+
1040
+ skb_shinfo(n)->frag_list = first;
1041
+
1042
+ __copy_skb_header(n, first);
1043
+ n->destructor = NULL;
1044
+
1045
+ return n;
1046
+}
1047
+EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
8901048
8911049 /**
8921050 * skb_morph - morph one skb into another
....@@ -1012,7 +1170,11 @@
10121170 uarg->len++;
10131171 uarg->bytelen = bytelen;
10141172 atomic_set(&sk->sk_zckey, ++next);
1015
- sock_zerocopy_get(uarg);
1173
+
1174
+ /* no extra ref when appending to datagram (MSG_MORE) */
1175
+ if (sk->sk_type == SOCK_STREAM)
1176
+ sock_zerocopy_get(uarg);
1177
+
10161178 return uarg;
10171179 }
10181180 }
....@@ -1102,7 +1264,7 @@
11021264 }
11031265 EXPORT_SYMBOL_GPL(sock_zerocopy_put);
11041266
1105
-void sock_zerocopy_put_abort(struct ubuf_info *uarg)
1267
+void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
11061268 {
11071269 if (uarg) {
11081270 struct sock *sk = skb_from_uarg(uarg)->sk;
....@@ -1110,13 +1272,17 @@
11101272 atomic_dec(&sk->sk_zckey);
11111273 uarg->len--;
11121274
1113
- sock_zerocopy_put(uarg);
1275
+ if (have_uref)
1276
+ sock_zerocopy_put(uarg);
11141277 }
11151278 }
11161279 EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
11171280
1118
-extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
1119
- struct iov_iter *from, size_t length);
1281
+int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
1282
+{
1283
+ return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
1284
+}
1285
+EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
11201286
11211287 int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
11221288 struct msghdr *msg, int len,
....@@ -1144,7 +1310,7 @@
11441310 return err;
11451311 }
11461312
1147
- skb_zcopy_set(skb, uarg);
1313
+ skb_zcopy_set(skb, uarg, NULL);
11481314 return skb->len - orig_len;
11491315 }
11501316 EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
....@@ -1164,7 +1330,7 @@
11641330 if (skb_copy_ubufs(nskb, GFP_ATOMIC))
11651331 return -EIO;
11661332 }
1167
- skb_zcopy_set(nskb, skb_uarg(orig));
1333
+ skb_zcopy_set(nskb, skb_uarg(orig), NULL);
11681334 }
11691335 return 0;
11701336 }
....@@ -1220,7 +1386,7 @@
12201386 struct page *p;
12211387 u8 *vaddr;
12221388
1223
- skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f),
1389
+ skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f),
12241390 p, p_off, p_len, copied) {
12251391 u32 copy, done = 0;
12261392 vaddr = kmap_atomic(p);
....@@ -1510,11 +1676,10 @@
15101676 skb->head = data;
15111677 skb->head_frag = 0;
15121678 skb->data += off;
1679
+
1680
+ skb_set_end_offset(skb, size);
15131681 #ifdef NET_SKBUFF_DATA_USES_OFFSET
1514
- skb->end = size;
15151682 off = nhead;
1516
-#else
1517
- skb->end = skb->head + size;
15181683 #endif
15191684 skb->tail += off;
15201685 skb_headers_offset_update(skb, nhead);
....@@ -1561,6 +1726,38 @@
15611726 return skb2;
15621727 }
15631728 EXPORT_SYMBOL(skb_realloc_headroom);
1729
+
1730
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
1731
+{
1732
+ unsigned int saved_end_offset, saved_truesize;
1733
+ struct skb_shared_info *shinfo;
1734
+ int res;
1735
+
1736
+ saved_end_offset = skb_end_offset(skb);
1737
+ saved_truesize = skb->truesize;
1738
+
1739
+ res = pskb_expand_head(skb, 0, 0, pri);
1740
+ if (res)
1741
+ return res;
1742
+
1743
+ skb->truesize = saved_truesize;
1744
+
1745
+ if (likely(skb_end_offset(skb) == saved_end_offset))
1746
+ return 0;
1747
+
1748
+ shinfo = skb_shinfo(skb);
1749
+
1750
+ /* We are about to change back skb->end,
1751
+ * we need to move skb_shinfo() to its new location.
1752
+ */
1753
+ memmove(skb->head + saved_end_offset,
1754
+ shinfo,
1755
+ offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
1756
+
1757
+ skb_set_end_offset(skb, saved_end_offset);
1758
+
1759
+ return 0;
1760
+}
15641761
15651762 /**
15661763 * skb_copy_expand - copy and expand sk_buff
....@@ -1944,8 +2141,6 @@
19442141 struct sk_buff *insp = NULL;
19452142
19462143 do {
1947
- BUG_ON(!list);
1948
-
19492144 if (list->len <= eat) {
19502145 /* Eaten as whole. */
19512146 eat -= list->len;
....@@ -1997,10 +2192,12 @@
19972192 skb_frag_unref(skb, i);
19982193 eat -= size;
19992194 } else {
2000
- skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
2195
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[k];
2196
+
2197
+ *frag = skb_shinfo(skb)->frags[i];
20012198 if (eat) {
2002
- skb_shinfo(skb)->frags[k].page_offset += eat;
2003
- skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
2199
+ skb_frag_off_add(frag, eat);
2200
+ skb_frag_size_sub(frag, eat);
20042201 if (!i)
20052202 goto end;
20062203 eat = 0;
....@@ -2072,7 +2269,7 @@
20722269 copy = len;
20732270
20742271 skb_frag_foreach_page(f,
2075
- f->page_offset + offset - start,
2272
+ skb_frag_off(f) + offset - start,
20762273 copy, p, p_off, p_len, copied) {
20772274 vaddr = kmap_atomic(p);
20782275 memcpy(to + copied, vaddr + p_off, p_len);
....@@ -2248,7 +2445,7 @@
22482445 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
22492446
22502447 if (__splice_segment(skb_frag_page(f),
2251
- f->page_offset, skb_frag_size(f),
2448
+ skb_frag_off(f), skb_frag_size(f),
22522449 offset, len, spd, false, sk, pipe))
22532450 return true;
22542451 }
....@@ -2338,20 +2535,20 @@
23382535 for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
23392536 skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
23402537
2341
- if (offset < frag->size)
2538
+ if (offset < skb_frag_size(frag))
23422539 break;
23432540
2344
- offset -= frag->size;
2541
+ offset -= skb_frag_size(frag);
23452542 }
23462543
23472544 for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
23482545 skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx];
23492546
2350
- slen = min_t(size_t, len, frag->size - offset);
2547
+ slen = min_t(size_t, len, skb_frag_size(frag) - offset);
23512548
23522549 while (slen) {
2353
- ret = kernel_sendpage_locked(sk, frag->page.p,
2354
- frag->page_offset + offset,
2550
+ ret = kernel_sendpage_locked(sk, skb_frag_page(frag),
2551
+ skb_frag_off(frag) + offset,
23552552 slen, MSG_DONTWAIT);
23562553 if (ret <= 0)
23572554 goto error;
....@@ -2385,19 +2582,6 @@
23852582 return orig_len == len ? ret : orig_len - len;
23862583 }
23872584 EXPORT_SYMBOL_GPL(skb_send_sock_locked);
2388
-
2389
-/* Send skb data on a socket. */
2390
-int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
2391
-{
2392
- int ret = 0;
2393
-
2394
- lock_sock(sk);
2395
- ret = skb_send_sock_locked(sk, skb, offset, len);
2396
- release_sock(sk);
2397
-
2398
- return ret;
2399
-}
2400
-EXPORT_SYMBOL_GPL(skb_send_sock);
24012585
24022586 /**
24032587 * skb_store_bits - store bits from kernel buffer to skb
....@@ -2446,7 +2630,7 @@
24462630 copy = len;
24472631
24482632 skb_frag_foreach_page(frag,
2449
- frag->page_offset + offset - start,
2633
+ skb_frag_off(frag) + offset - start,
24502634 copy, p, p_off, p_len, copied) {
24512635 vaddr = kmap_atomic(p);
24522636 memcpy(vaddr + p_off, from + copied, p_len);
....@@ -2501,7 +2685,8 @@
25012685 if (copy > 0) {
25022686 if (copy > len)
25032687 copy = len;
2504
- csum = ops->update(skb->data + offset, copy, csum);
2688
+ csum = INDIRECT_CALL_1(ops->update, csum_partial_ext,
2689
+ skb->data + offset, copy, csum);
25052690 if ((len -= copy) == 0)
25062691 return csum;
25072692 offset += copy;
....@@ -2525,12 +2710,16 @@
25252710 copy = len;
25262711
25272712 skb_frag_foreach_page(frag,
2528
- frag->page_offset + offset - start,
2713
+ skb_frag_off(frag) + offset - start,
25292714 copy, p, p_off, p_len, copied) {
25302715 vaddr = kmap_atomic(p);
2531
- csum2 = ops->update(vaddr + p_off, p_len, 0);
2716
+ csum2 = INDIRECT_CALL_1(ops->update,
2717
+ csum_partial_ext,
2718
+ vaddr + p_off, p_len, 0);
25322719 kunmap_atomic(vaddr);
2533
- csum = ops->combine(csum, csum2, pos, p_len);
2720
+ csum = INDIRECT_CALL_1(ops->combine,
2721
+ csum_block_add_ext, csum,
2722
+ csum2, pos, p_len);
25342723 pos += p_len;
25352724 }
25362725
....@@ -2553,7 +2742,8 @@
25532742 copy = len;
25542743 csum2 = __skb_checksum(frag_iter, offset - start,
25552744 copy, 0, ops);
2556
- csum = ops->combine(csum, csum2, pos, copy);
2745
+ csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext,
2746
+ csum, csum2, pos, copy);
25572747 if ((len -= copy) == 0)
25582748 return csum;
25592749 offset += copy;
....@@ -2582,19 +2772,20 @@
25822772 /* Both of above in one bottle. */
25832773
25842774 __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
2585
- u8 *to, int len, __wsum csum)
2775
+ u8 *to, int len)
25862776 {
25872777 int start = skb_headlen(skb);
25882778 int i, copy = start - offset;
25892779 struct sk_buff *frag_iter;
25902780 int pos = 0;
2781
+ __wsum csum = 0;
25912782
25922783 /* Copy header. */
25932784 if (copy > 0) {
25942785 if (copy > len)
25952786 copy = len;
25962787 csum = csum_partial_copy_nocheck(skb->data + offset, to,
2597
- copy, csum);
2788
+ copy);
25982789 if ((len -= copy) == 0)
25992790 return csum;
26002791 offset += copy;
....@@ -2619,12 +2810,12 @@
26192810 copy = len;
26202811
26212812 skb_frag_foreach_page(frag,
2622
- frag->page_offset + offset - start,
2813
+ skb_frag_off(frag) + offset - start,
26232814 copy, p, p_off, p_len, copied) {
26242815 vaddr = kmap_atomic(p);
26252816 csum2 = csum_partial_copy_nocheck(vaddr + p_off,
26262817 to + copied,
2627
- p_len, 0);
2818
+ p_len);
26282819 kunmap_atomic(vaddr);
26292820 csum = csum_block_add(csum, csum2, pos);
26302821 pos += p_len;
....@@ -2650,7 +2841,7 @@
26502841 copy = len;
26512842 csum2 = skb_copy_and_csum_bits(frag_iter,
26522843 offset - start,
2653
- to, copy, 0);
2844
+ to, copy);
26542845 csum = csum_block_add(csum, csum2, pos);
26552846 if ((len -= copy) == 0)
26562847 return csum;
....@@ -2664,6 +2855,65 @@
26642855 return csum;
26652856 }
26662857 EXPORT_SYMBOL(skb_copy_and_csum_bits);
2858
+
2859
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
2860
+{
2861
+ __sum16 sum;
2862
+
2863
+ sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
2864
+ /* See comments in __skb_checksum_complete(). */
2865
+ if (likely(!sum)) {
2866
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
2867
+ !skb->csum_complete_sw)
2868
+ netdev_rx_csum_fault(skb->dev, skb);
2869
+ }
2870
+ if (!skb_shared(skb))
2871
+ skb->csum_valid = !sum;
2872
+ return sum;
2873
+}
2874
+EXPORT_SYMBOL(__skb_checksum_complete_head);
2875
+
2876
+/* This function assumes skb->csum already holds pseudo header's checksum,
2877
+ * which has been changed from the hardware checksum, for example, by
2878
+ * __skb_checksum_validate_complete(). And, the original skb->csum must
2879
+ * have been validated unsuccessfully for CHECKSUM_COMPLETE case.
2880
+ *
2881
+ * It returns non-zero if the recomputed checksum is still invalid, otherwise
2882
+ * zero. The new checksum is stored back into skb->csum unless the skb is
2883
+ * shared.
2884
+ */
2885
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
2886
+{
2887
+ __wsum csum;
2888
+ __sum16 sum;
2889
+
2890
+ csum = skb_checksum(skb, 0, skb->len, 0);
2891
+
2892
+ sum = csum_fold(csum_add(skb->csum, csum));
2893
+ /* This check is inverted, because we already knew the hardware
2894
+ * checksum is invalid before calling this function. So, if the
2895
+ * re-computed checksum is valid instead, then we have a mismatch
2896
+ * between the original skb->csum and skb_checksum(). This means either
2897
+ * the original hardware checksum is incorrect or we screw up skb->csum
2898
+ * when moving skb->data around.
2899
+ */
2900
+ if (likely(!sum)) {
2901
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
2902
+ !skb->csum_complete_sw)
2903
+ netdev_rx_csum_fault(skb->dev, skb);
2904
+ }
2905
+
2906
+ if (!skb_shared(skb)) {
2907
+ /* Save full packet checksum */
2908
+ skb->csum = csum;
2909
+ skb->ip_summed = CHECKSUM_COMPLETE;
2910
+ skb->csum_complete_sw = 1;
2911
+ skb->csum_valid = !sum;
2912
+ }
2913
+
2914
+ return sum;
2915
+}
2916
+EXPORT_SYMBOL(__skb_checksum_complete);
26672917
26682918 static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
26692919 {
....@@ -2779,11 +3029,15 @@
27793029 skb_zerocopy_clone(to, from, GFP_ATOMIC);
27803030
27813031 for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
3032
+ int size;
3033
+
27823034 if (!len)
27833035 break;
27843036 skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
2785
- skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
2786
- len -= skb_shinfo(to)->frags[j].size;
3037
+ size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]),
3038
+ len);
3039
+ skb_frag_size_set(&skb_shinfo(to)->frags[j], size);
3040
+ len -= size;
27873041 skb_frag_ref(to, j);
27883042 j++;
27893043 }
....@@ -2810,7 +3064,7 @@
28103064 csum = 0;
28113065 if (csstart != skb->len)
28123066 csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
2813
- skb->len - csstart, 0);
3067
+ skb->len - csstart);
28143068
28153069 if (skb->ip_summed == CHECKSUM_PARTIAL) {
28163070 long csstuff = csstart + skb->csum_offset;
....@@ -2985,28 +3239,6 @@
29853239 }
29863240 EXPORT_SYMBOL(skb_append);
29873241
2988
-/**
2989
- * skb_insert - insert a buffer
2990
- * @old: buffer to insert before
2991
- * @newsk: buffer to insert
2992
- * @list: list to use
2993
- *
2994
- * Place a packet before a given packet in a list. The list locks are
2995
- * taken and this function is atomic with respect to other list locked
2996
- * calls.
2997
- *
2998
- * A buffer cannot be placed on two lists at the same time.
2999
- */
3000
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
3001
-{
3002
- unsigned long flags;
3003
-
3004
- spin_lock_irqsave(&list->lock, flags);
3005
- __skb_insert(newsk, old->prev, old, list);
3006
- spin_unlock_irqrestore(&list->lock, flags);
3007
-}
3008
-EXPORT_SYMBOL(skb_insert);
3009
-
30103242 static inline void skb_split_inside_header(struct sk_buff *skb,
30113243 struct sk_buff* skb1,
30123244 const u32 len, const int pos)
....@@ -3056,7 +3288,7 @@
30563288 * 2. Split is accurately. We make this.
30573289 */
30583290 skb_frag_ref(skb, i);
3059
- skb_shinfo(skb1)->frags[0].page_offset += len - pos;
3291
+ skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos);
30603292 skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
30613293 skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
30623294 skb_shinfo(skb)->nr_frags++;
....@@ -3095,19 +3327,7 @@
30953327 */
30963328 static int skb_prepare_for_shift(struct sk_buff *skb)
30973329 {
3098
- int ret = 0;
3099
-
3100
- if (skb_cloned(skb)) {
3101
- /* Save and restore truesize: pskb_expand_head() may reallocate
3102
- * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we
3103
- * cannot change truesize at this point.
3104
- */
3105
- unsigned int save_truesize = skb->truesize;
3106
-
3107
- ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3108
- skb->truesize = save_truesize;
3109
- }
3110
- return ret;
3330
+ return skb_unclone_keeptruesize(skb, GFP_ATOMIC);
31113331 }
31123332
31133333 /**
....@@ -3131,7 +3351,7 @@
31313351 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
31323352 {
31333353 int from, to, merge, todo;
3134
- struct skb_frag_struct *fragfrom, *fragto;
3354
+ skb_frag_t *fragfrom, *fragto;
31353355
31363356 BUG_ON(shiftlen > skb->len);
31373357
....@@ -3150,7 +3370,7 @@
31503370 */
31513371 if (!to ||
31523372 !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
3153
- fragfrom->page_offset)) {
3373
+ skb_frag_off(fragfrom))) {
31543374 merge = -1;
31553375 } else {
31563376 merge = to - 1;
....@@ -3167,7 +3387,7 @@
31673387
31683388 skb_frag_size_add(fragto, shiftlen);
31693389 skb_frag_size_sub(fragfrom, shiftlen);
3170
- fragfrom->page_offset += shiftlen;
3390
+ skb_frag_off_add(fragfrom, shiftlen);
31713391
31723392 goto onlymerged;
31733393 }
....@@ -3198,11 +3418,11 @@
31983418
31993419 } else {
32003420 __skb_frag_ref(fragfrom);
3201
- fragto->page = fragfrom->page;
3202
- fragto->page_offset = fragfrom->page_offset;
3421
+ skb_frag_page_copy(fragto, fragfrom);
3422
+ skb_frag_off_copy(fragto, fragfrom);
32033423 skb_frag_size_set(fragto, todo);
32043424
3205
- fragfrom->page_offset += todo;
3425
+ skb_frag_off_add(fragfrom, todo);
32063426 skb_frag_size_sub(fragfrom, todo);
32073427 todo = 0;
32083428
....@@ -3327,7 +3547,7 @@
33273547 if (!st->frag_data)
33283548 st->frag_data = kmap_atomic(skb_frag_page(frag));
33293549
3330
- *data = (u8 *) st->frag_data + frag->page_offset +
3550
+ *data = (u8 *) st->frag_data + skb_frag_off(frag) +
33313551 (abs_offset - st->stepped_offset);
33323552
33333553 return block_limit - abs_offset;
....@@ -3417,64 +3637,6 @@
34173637 }
34183638 EXPORT_SYMBOL(skb_find_text);
34193639
3420
-/**
3421
- * skb_append_datato_frags - append the user data to a skb
3422
- * @sk: sock structure
3423
- * @skb: skb structure to be appended with user data.
3424
- * @getfrag: call back function to be used for getting the user data
3425
- * @from: pointer to user message iov
3426
- * @length: length of the iov message
3427
- *
3428
- * Description: This procedure append the user data in the fragment part
3429
- * of the skb if any page alloc fails user this procedure returns -ENOMEM
3430
- */
3431
-int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
3432
- int (*getfrag)(void *from, char *to, int offset,
3433
- int len, int odd, struct sk_buff *skb),
3434
- void *from, int length)
3435
-{
3436
- int frg_cnt = skb_shinfo(skb)->nr_frags;
3437
- int copy;
3438
- int offset = 0;
3439
- int ret;
3440
- struct page_frag *pfrag = &current->task_frag;
3441
-
3442
- do {
3443
- /* Return error if we don't have space for new frag */
3444
- if (frg_cnt >= MAX_SKB_FRAGS)
3445
- return -EMSGSIZE;
3446
-
3447
- if (!sk_page_frag_refill(sk, pfrag))
3448
- return -ENOMEM;
3449
-
3450
- /* copy the user data to page */
3451
- copy = min_t(int, length, pfrag->size - pfrag->offset);
3452
-
3453
- ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
3454
- offset, copy, 0, skb);
3455
- if (ret < 0)
3456
- return -EFAULT;
3457
-
3458
- /* copy was successful so update the size parameters */
3459
- skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
3460
- copy);
3461
- frg_cnt++;
3462
- pfrag->offset += copy;
3463
- get_page(pfrag->page);
3464
-
3465
- skb->truesize += copy;
3466
- refcount_add(copy, &sk->sk_wmem_alloc);
3467
- skb->len += copy;
3468
- skb->data_len += copy;
3469
- offset += copy;
3470
- length -= copy;
3471
-
3472
- } while (length > 0);
3473
-
3474
- return 0;
3475
-}
3476
-EXPORT_SYMBOL(skb_append_datato_frags);
3477
-
34783640 int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
34793641 int offset, size_t size)
34803642 {
....@@ -3521,11 +3683,122 @@
35213683 struct page *page;
35223684
35233685 page = virt_to_head_page(frag_skb->head);
3524
- head_frag.page.p = page;
3525
- head_frag.page_offset = frag_skb->data -
3526
- (unsigned char *)page_address(page);
3527
- head_frag.size = skb_headlen(frag_skb);
3686
+ __skb_frag_set_page(&head_frag, page);
3687
+ skb_frag_off_set(&head_frag, frag_skb->data -
3688
+ (unsigned char *)page_address(page));
3689
+ skb_frag_size_set(&head_frag, skb_headlen(frag_skb));
35283690 return head_frag;
3691
+}
3692
+
3693
+struct sk_buff *skb_segment_list(struct sk_buff *skb,
3694
+ netdev_features_t features,
3695
+ unsigned int offset)
3696
+{
3697
+ struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
3698
+ unsigned int tnl_hlen = skb_tnl_header_len(skb);
3699
+ unsigned int delta_truesize = 0;
3700
+ unsigned int delta_len = 0;
3701
+ struct sk_buff *tail = NULL;
3702
+ struct sk_buff *nskb, *tmp;
3703
+ int len_diff, err;
3704
+
3705
+ skb_push(skb, -skb_network_offset(skb) + offset);
3706
+
3707
+ skb_shinfo(skb)->frag_list = NULL;
3708
+
3709
+ do {
3710
+ nskb = list_skb;
3711
+ list_skb = list_skb->next;
3712
+
3713
+ err = 0;
3714
+ delta_truesize += nskb->truesize;
3715
+ if (skb_shared(nskb)) {
3716
+ tmp = skb_clone(nskb, GFP_ATOMIC);
3717
+ if (tmp) {
3718
+ consume_skb(nskb);
3719
+ nskb = tmp;
3720
+ err = skb_unclone(nskb, GFP_ATOMIC);
3721
+ } else {
3722
+ err = -ENOMEM;
3723
+ }
3724
+ }
3725
+
3726
+ if (!tail)
3727
+ skb->next = nskb;
3728
+ else
3729
+ tail->next = nskb;
3730
+
3731
+ if (unlikely(err)) {
3732
+ nskb->next = list_skb;
3733
+ goto err_linearize;
3734
+ }
3735
+
3736
+ tail = nskb;
3737
+
3738
+ delta_len += nskb->len;
3739
+
3740
+ skb_push(nskb, -skb_network_offset(nskb) + offset);
3741
+
3742
+ skb_release_head_state(nskb);
3743
+ len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb);
3744
+ __copy_skb_header(nskb, skb);
3745
+
3746
+ skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
3747
+ nskb->transport_header += len_diff;
3748
+ skb_copy_from_linear_data_offset(skb, -tnl_hlen,
3749
+ nskb->data - tnl_hlen,
3750
+ offset + tnl_hlen);
3751
+
3752
+ if (skb_needs_linearize(nskb, features) &&
3753
+ __skb_linearize(nskb))
3754
+ goto err_linearize;
3755
+
3756
+ } while (list_skb);
3757
+
3758
+ skb->truesize = skb->truesize - delta_truesize;
3759
+ skb->data_len = skb->data_len - delta_len;
3760
+ skb->len = skb->len - delta_len;
3761
+
3762
+ skb_gso_reset(skb);
3763
+
3764
+ skb->prev = tail;
3765
+
3766
+ if (skb_needs_linearize(skb, features) &&
3767
+ __skb_linearize(skb))
3768
+ goto err_linearize;
3769
+
3770
+ skb_get(skb);
3771
+
3772
+ return skb;
3773
+
3774
+err_linearize:
3775
+ kfree_skb_list(skb->next);
3776
+ skb->next = NULL;
3777
+ return ERR_PTR(-ENOMEM);
3778
+}
3779
+EXPORT_SYMBOL_GPL(skb_segment_list);
3780
+
3781
+int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
3782
+{
3783
+ if (unlikely(p->len + skb->len >= 65536))
3784
+ return -E2BIG;
3785
+
3786
+ if (NAPI_GRO_CB(p)->last == p)
3787
+ skb_shinfo(p)->frag_list = skb;
3788
+ else
3789
+ NAPI_GRO_CB(p)->last->next = skb;
3790
+
3791
+ skb_pull(skb, skb_gro_offset(skb));
3792
+
3793
+ NAPI_GRO_CB(p)->last = skb;
3794
+ NAPI_GRO_CB(p)->count++;
3795
+ p->data_len += skb->len;
3796
+ p->truesize += skb->truesize;
3797
+ p->len += skb->len;
3798
+
3799
+ NAPI_GRO_CB(skb)->same_flow = 1;
3800
+
3801
+ return 0;
35293802 }
35303803
35313804 /**
....@@ -3558,29 +3831,30 @@
35583831 int err = -ENOMEM;
35593832 int i = 0;
35603833 int pos;
3561
- int dummy;
35623834
3563
- if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
3564
- (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
3565
- /* gso_size is untrusted, and we have a frag_list with a linear
3566
- * non head_frag head.
3567
- *
3568
- * (we assume checking the first list_skb member suffices;
3569
- * i.e if either of the list_skb members have non head_frag
3570
- * head, then the first one has too).
3571
- *
3572
- * If head_skb's headlen does not fit requested gso_size, it
3573
- * means that the frag_list members do NOT terminate on exact
3574
- * gso_size boundaries. Hence we cannot perform skb_frag_t page
3575
- * sharing. Therefore we must fallback to copying the frag_list
3576
- * skbs; we do so by disabling SG.
3577
- */
3578
- if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
3579
- features &= ~NETIF_F_SG;
3835
+ if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
3836
+ mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
3837
+ struct sk_buff *check_skb;
3838
+
3839
+ for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
3840
+ if (skb_headlen(check_skb) && !check_skb->head_frag) {
3841
+ /* gso_size is untrusted, and we have a frag_list with
3842
+ * a linear non head_frag item.
3843
+ *
3844
+ * If head_skb's headlen does not fit requested gso_size,
3845
+ * it means that the frag_list members do NOT terminate
3846
+ * on exact gso_size boundaries. Hence we cannot perform
3847
+ * skb_frag_t page sharing. Therefore we must fallback to
3848
+ * copying the frag_list skbs; we do so by disabling SG.
3849
+ */
3850
+ features &= ~NETIF_F_SG;
3851
+ break;
3852
+ }
3853
+ }
35803854 }
35813855
35823856 __skb_push(head_skb, doffset);
3583
- proto = skb_network_protocol(head_skb, &dummy);
3857
+ proto = skb_network_protocol(head_skb, NULL);
35843858 if (unlikely(!proto))
35853859 return ERR_PTR(-EINVAL);
35863860
....@@ -3726,14 +4000,20 @@
37264000 goto perform_csum_check;
37274001
37284002 if (!sg) {
3729
- if (!nskb->remcsum_offload)
3730
- nskb->ip_summed = CHECKSUM_NONE;
3731
- SKB_GSO_CB(nskb)->csum =
3732
- skb_copy_and_csum_bits(head_skb, offset,
3733
- skb_put(nskb, len),
3734
- len, 0);
3735
- SKB_GSO_CB(nskb)->csum_start =
3736
- skb_headroom(nskb) + doffset;
4003
+ if (!csum) {
4004
+ if (!nskb->remcsum_offload)
4005
+ nskb->ip_summed = CHECKSUM_NONE;
4006
+ SKB_GSO_CB(nskb)->csum =
4007
+ skb_copy_and_csum_bits(head_skb, offset,
4008
+ skb_put(nskb,
4009
+ len),
4010
+ len);
4011
+ SKB_GSO_CB(nskb)->csum_start =
4012
+ skb_headroom(nskb) + doffset;
4013
+ } else {
4014
+ if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len))
4015
+ goto err;
4016
+ }
37374017 continue;
37384018 }
37394019
....@@ -3786,7 +4066,7 @@
37864066 size = skb_frag_size(nskb_frag);
37874067
37884068 if (pos < offset) {
3789
- nskb_frag->page_offset += offset - pos;
4069
+ skb_frag_off_add(nskb_frag, offset - pos);
37904070 skb_frag_size_sub(nskb_frag, offset - pos);
37914071 }
37924072
....@@ -3907,7 +4187,7 @@
39074187 *--frag = *--frag2;
39084188 } while (--i);
39094189
3910
- frag->page_offset += offset;
4190
+ skb_frag_off_add(frag, offset);
39114191 skb_frag_size_sub(frag, offset);
39124192
39134193 /* all fragments truesize : remove (head size + sk_buff) */
....@@ -3936,8 +4216,8 @@
39364216
39374217 pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
39384218
3939
- frag->page.p = page;
3940
- frag->page_offset = first_offset;
4219
+ __skb_frag_set_page(frag, page);
4220
+ skb_frag_off_set(frag, first_offset);
39414221 skb_frag_size_set(frag, first_size);
39424222
39434223 memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
....@@ -3953,7 +4233,7 @@
39534233 if (offset > headlen) {
39544234 unsigned int eat = offset - headlen;
39554235
3956
- skbinfo->frags[0].page_offset += eat;
4236
+ skb_frag_off_add(&skbinfo->frags[0], eat);
39574237 skb_frag_size_sub(&skbinfo->frags[0], eat);
39584238 skb->data_len -= eat;
39594239 skb->len -= eat;
....@@ -3983,7 +4263,64 @@
39834263 NAPI_GRO_CB(skb)->same_flow = 1;
39844264 return 0;
39854265 }
3986
-EXPORT_SYMBOL_GPL(skb_gro_receive);
4266
+
4267
+#ifdef CONFIG_SKB_EXTENSIONS
4268
+#define SKB_EXT_ALIGN_VALUE 8
4269
+#define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE)
4270
+
4271
+static const u8 skb_ext_type_len[] = {
4272
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
4273
+ [SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info),
4274
+#endif
4275
+#ifdef CONFIG_XFRM
4276
+ [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
4277
+#endif
4278
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
4279
+ [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
4280
+#endif
4281
+#if IS_ENABLED(CONFIG_MPTCP)
4282
+ [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
4283
+#endif
4284
+#if IS_ENABLED(CONFIG_KCOV)
4285
+ [SKB_EXT_KCOV_HANDLE] = SKB_EXT_CHUNKSIZEOF(u64),
4286
+#endif
4287
+};
4288
+
4289
+static __always_inline unsigned int skb_ext_total_length(void)
4290
+{
4291
+ return SKB_EXT_CHUNKSIZEOF(struct skb_ext) +
4292
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
4293
+ skb_ext_type_len[SKB_EXT_BRIDGE_NF] +
4294
+#endif
4295
+#ifdef CONFIG_XFRM
4296
+ skb_ext_type_len[SKB_EXT_SEC_PATH] +
4297
+#endif
4298
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
4299
+ skb_ext_type_len[TC_SKB_EXT] +
4300
+#endif
4301
+#if IS_ENABLED(CONFIG_MPTCP)
4302
+ skb_ext_type_len[SKB_EXT_MPTCP] +
4303
+#endif
4304
+#if IS_ENABLED(CONFIG_KCOV)
4305
+ skb_ext_type_len[SKB_EXT_KCOV_HANDLE] +
4306
+#endif
4307
+ 0;
4308
+}
4309
+
4310
+static void skb_extensions_init(void)
4311
+{
4312
+ BUILD_BUG_ON(SKB_EXT_NUM >= 8);
4313
+ BUILD_BUG_ON(skb_ext_total_length() > 255);
4314
+
4315
+ skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
4316
+ SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
4317
+ 0,
4318
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC,
4319
+ NULL);
4320
+}
4321
+#else
4322
+static void skb_extensions_init(void) {}
4323
+#endif
39874324
39884325 void __init skb_init(void)
39894326 {
....@@ -3999,6 +4336,7 @@
39994336 0,
40004337 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
40014338 NULL);
4339
+ skb_extensions_init();
40024340 }
40034341
40044342 static int
....@@ -4037,7 +4375,7 @@
40374375 if (copy > len)
40384376 copy = len;
40394377 sg_set_page(&sg[elt], skb_frag_page(frag), copy,
4040
- frag->page_offset+offset-start);
4378
+ skb_frag_off(frag) + offset - start);
40414379 elt++;
40424380 if (!(len -= copy))
40434381 return elt;
....@@ -4154,7 +4492,7 @@
41544492 * at the moment even if they are anonymous).
41554493 */
41564494 if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
4157
- __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
4495
+ !__pskb_pull_tail(skb, __skb_pagelen(skb)))
41584496 return -ENOMEM;
41594497
41604498 /* Easy case. Most of packets will go this way. */
....@@ -4258,7 +4596,7 @@
42584596 int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
42594597 {
42604598 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
4261
- (unsigned int)sk->sk_rcvbuf)
4599
+ (unsigned int)READ_ONCE(sk->sk_rcvbuf))
42624600 return -ENOMEM;
42634601
42644602 skb_orphan(skb);
....@@ -4377,7 +4715,7 @@
43774715 {
43784716 bool ret;
43794717
4380
- if (likely(sysctl_tstamp_allow_data || tsonly))
4718
+ if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
43814719 return true;
43824720
43834721 read_lock_bh(&sk->sk_callback_lock);
....@@ -4433,7 +4771,7 @@
44334771 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
44344772 sk->sk_protocol == IPPROTO_TCP &&
44354773 sk->sk_type == SOCK_STREAM) {
4436
- skb = tcp_get_timestamping_opt_stats(sk);
4774
+ skb = tcp_get_timestamping_opt_stats(sk, orig_skb);
44374775 opt_stats = true;
44384776 } else
44394777 #endif
....@@ -4550,9 +4888,9 @@
45504888 typeof(IPPROTO_IP) proto,
45514889 unsigned int off)
45524890 {
4553
- switch (proto) {
4554
- int err;
4891
+ int err;
45554892
4893
+ switch (proto) {
45564894 case IPPROTO_TCP:
45574895 err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
45584896 off + MAX_TCP_HDR_LEN);
....@@ -4595,7 +4933,7 @@
45954933 if (err < 0)
45964934 goto out;
45974935
4598
- if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
4936
+ if (ip_is_fragment(ip_hdr(skb)))
45994937 fragment = true;
46004938
46014939 off = ip_hdrlen(skb);
....@@ -4962,13 +5300,13 @@
49625300 skb->skb_iif = 0;
49635301 skb->ignore_df = 0;
49645302 skb_dst_drop(skb);
4965
- secpath_reset(skb);
4966
- nf_reset(skb);
5303
+ skb_ext_reset(skb);
5304
+ nf_reset_ct(skb);
49675305 nf_reset_trace(skb);
49685306
49695307 #ifdef CONFIG_NET_SWITCHDEV
49705308 skb->offload_fwd_mark = 0;
4971
- skb->offload_mr_fwd_mark = 0;
5309
+ skb->offload_l3_fwd_mark = 0;
49725310 #endif
49735311
49745312 if (!xnet)
....@@ -5060,6 +5398,8 @@
50605398 * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
50615399 *
50625400 * This is a helper to do that correctly considering GSO_BY_FRAGS.
5401
+ *
5402
+ * @skb: GSO skb
50635403 *
50645404 * @seg_len: The segmented length (from skb_gso_*_seglen). In the
50655405 * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
....@@ -5246,7 +5586,7 @@
52465586 int err;
52475587
52485588 if (likely(skb_vlan_tag_present(skb))) {
5249
- skb->vlan_tci = 0;
5589
+ __vlan_hwaccel_clear_tag(skb);
52505590 } else {
52515591 if (unlikely(!eth_type_vlan(skb->protocol)))
52525592 return 0;
....@@ -5298,6 +5638,252 @@
52985638 return 0;
52995639 }
53005640 EXPORT_SYMBOL(skb_vlan_push);
5641
+
5642
+/**
5643
+ * skb_eth_pop() - Drop the Ethernet header at the head of a packet
5644
+ *
5645
+ * @skb: Socket buffer to modify
5646
+ *
5647
+ * Drop the Ethernet header of @skb.
5648
+ *
5649
+ * Expects that skb->data points to the mac header and that no VLAN tags are
5650
+ * present.
5651
+ *
5652
+ * Returns 0 on success, -errno otherwise.
5653
+ */
5654
+int skb_eth_pop(struct sk_buff *skb)
5655
+{
5656
+ if (!pskb_may_pull(skb, ETH_HLEN) || skb_vlan_tagged(skb) ||
5657
+ skb_network_offset(skb) < ETH_HLEN)
5658
+ return -EPROTO;
5659
+
5660
+ skb_pull_rcsum(skb, ETH_HLEN);
5661
+ skb_reset_mac_header(skb);
5662
+ skb_reset_mac_len(skb);
5663
+
5664
+ return 0;
5665
+}
5666
+EXPORT_SYMBOL(skb_eth_pop);
5667
+
5668
+/**
5669
+ * skb_eth_push() - Add a new Ethernet header at the head of a packet
5670
+ *
5671
+ * @skb: Socket buffer to modify
5672
+ * @dst: Destination MAC address of the new header
5673
+ * @src: Source MAC address of the new header
5674
+ *
5675
+ * Prepend @skb with a new Ethernet header.
5676
+ *
5677
+ * Expects that skb->data points to the mac header, which must be empty.
5678
+ *
5679
+ * Returns 0 on success, -errno otherwise.
5680
+ */
5681
+int skb_eth_push(struct sk_buff *skb, const unsigned char *dst,
5682
+ const unsigned char *src)
5683
+{
5684
+ struct ethhdr *eth;
5685
+ int err;
5686
+
5687
+ if (skb_network_offset(skb) || skb_vlan_tag_present(skb))
5688
+ return -EPROTO;
5689
+
5690
+ err = skb_cow_head(skb, sizeof(*eth));
5691
+ if (err < 0)
5692
+ return err;
5693
+
5694
+ skb_push(skb, sizeof(*eth));
5695
+ skb_reset_mac_header(skb);
5696
+ skb_reset_mac_len(skb);
5697
+
5698
+ eth = eth_hdr(skb);
5699
+ ether_addr_copy(eth->h_dest, dst);
5700
+ ether_addr_copy(eth->h_source, src);
5701
+ eth->h_proto = skb->protocol;
5702
+
5703
+ skb_postpush_rcsum(skb, eth, sizeof(*eth));
5704
+
5705
+ return 0;
5706
+}
5707
+EXPORT_SYMBOL(skb_eth_push);
5708
+
5709
+/* Update the ethertype of hdr and the skb csum value if required. */
5710
+static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
5711
+ __be16 ethertype)
5712
+{
5713
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
5714
+ __be16 diff[] = { ~hdr->h_proto, ethertype };
5715
+
5716
+ skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
5717
+ }
5718
+
5719
+ hdr->h_proto = ethertype;
5720
+}
5721
+
5722
+/**
5723
+ * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of
5724
+ * the packet
5725
+ *
5726
+ * @skb: buffer
5727
+ * @mpls_lse: MPLS label stack entry to push
5728
+ * @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
5729
+ * @mac_len: length of the MAC header
5730
+ * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is
5731
+ * ethernet
5732
+ *
5733
+ * Expects skb->data at mac header.
5734
+ *
5735
+ * Returns 0 on success, -errno otherwise.
5736
+ */
5737
+int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
5738
+ int mac_len, bool ethernet)
5739
+{
5740
+ struct mpls_shim_hdr *lse;
5741
+ int err;
5742
+
5743
+ if (unlikely(!eth_p_mpls(mpls_proto)))
5744
+ return -EINVAL;
5745
+
5746
+ /* Networking stack does not allow simultaneous Tunnel and MPLS GSO. */
5747
+ if (skb->encapsulation)
5748
+ return -EINVAL;
5749
+
5750
+ err = skb_cow_head(skb, MPLS_HLEN);
5751
+ if (unlikely(err))
5752
+ return err;
5753
+
5754
+ if (!skb->inner_protocol) {
5755
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
5756
+ skb_set_inner_protocol(skb, skb->protocol);
5757
+ }
5758
+
5759
+ skb_push(skb, MPLS_HLEN);
5760
+ memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
5761
+ mac_len);
5762
+ skb_reset_mac_header(skb);
5763
+ skb_set_network_header(skb, mac_len);
5764
+ skb_reset_mac_len(skb);
5765
+
5766
+ lse = mpls_hdr(skb);
5767
+ lse->label_stack_entry = mpls_lse;
5768
+ skb_postpush_rcsum(skb, lse, MPLS_HLEN);
5769
+
5770
+ if (ethernet && mac_len >= ETH_HLEN)
5771
+ skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
5772
+ skb->protocol = mpls_proto;
5773
+
5774
+ return 0;
5775
+}
5776
+EXPORT_SYMBOL_GPL(skb_mpls_push);
5777
+
5778
+/**
5779
+ * skb_mpls_pop() - pop the outermost MPLS header
5780
+ *
5781
+ * @skb: buffer
5782
+ * @next_proto: ethertype of header after popped MPLS header
5783
+ * @mac_len: length of the MAC header
5784
+ * @ethernet: flag to indicate if the packet is ethernet
5785
+ *
5786
+ * Expects skb->data at mac header.
5787
+ *
5788
+ * Returns 0 on success, -errno otherwise.
5789
+ */
5790
+int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
5791
+ bool ethernet)
5792
+{
5793
+ int err;
5794
+
5795
+ if (unlikely(!eth_p_mpls(skb->protocol)))
5796
+ return 0;
5797
+
5798
+ err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
5799
+ if (unlikely(err))
5800
+ return err;
5801
+
5802
+ skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
5803
+ memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
5804
+ mac_len);
5805
+
5806
+ __skb_pull(skb, MPLS_HLEN);
5807
+ skb_reset_mac_header(skb);
5808
+ skb_set_network_header(skb, mac_len);
5809
+
5810
+ if (ethernet && mac_len >= ETH_HLEN) {
5811
+ struct ethhdr *hdr;
5812
+
5813
+ /* use mpls_hdr() to get ethertype to account for VLANs. */
5814
+ hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
5815
+ skb_mod_eth_type(skb, hdr, next_proto);
5816
+ }
5817
+ skb->protocol = next_proto;
5818
+
5819
+ return 0;
5820
+}
5821
+EXPORT_SYMBOL_GPL(skb_mpls_pop);
5822
+
5823
+/**
5824
+ * skb_mpls_update_lse() - modify outermost MPLS header and update csum
5825
+ *
5826
+ * @skb: buffer
5827
+ * @mpls_lse: new MPLS label stack entry to update to
5828
+ *
5829
+ * Expects skb->data at mac header.
5830
+ *
5831
+ * Returns 0 on success, -errno otherwise.
5832
+ */
5833
+int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse)
5834
+{
5835
+ int err;
5836
+
5837
+ if (unlikely(!eth_p_mpls(skb->protocol)))
5838
+ return -EINVAL;
5839
+
5840
+ err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
5841
+ if (unlikely(err))
5842
+ return err;
5843
+
5844
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
5845
+ __be32 diff[] = { ~mpls_hdr(skb)->label_stack_entry, mpls_lse };
5846
+
5847
+ skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
5848
+ }
5849
+
5850
+ mpls_hdr(skb)->label_stack_entry = mpls_lse;
5851
+
5852
+ return 0;
5853
+}
5854
+EXPORT_SYMBOL_GPL(skb_mpls_update_lse);
5855
+
5856
+/**
5857
+ * skb_mpls_dec_ttl() - decrement the TTL of the outermost MPLS header
5858
+ *
5859
+ * @skb: buffer
5860
+ *
5861
+ * Expects skb->data at mac header.
5862
+ *
5863
+ * Returns 0 on success, -errno otherwise.
5864
+ */
5865
+int skb_mpls_dec_ttl(struct sk_buff *skb)
5866
+{
5867
+ u32 lse;
5868
+ u8 ttl;
5869
+
5870
+ if (unlikely(!eth_p_mpls(skb->protocol)))
5871
+ return -EINVAL;
5872
+
5873
+ if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
5874
+ return -ENOMEM;
5875
+
5876
+ lse = be32_to_cpu(mpls_hdr(skb)->label_stack_entry);
5877
+ ttl = (lse & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
5878
+ if (!--ttl)
5879
+ return -EINVAL;
5880
+
5881
+ lse &= ~MPLS_LS_TTL_MASK;
5882
+ lse |= ttl << MPLS_LS_TTL_SHIFT;
5883
+
5884
+ return skb_mpls_update_lse(skb, cpu_to_be32(lse));
5885
+}
5886
+EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);
53015887
53025888 /**
53035889 * alloc_skb_with_frags - allocate skb with page frags
....@@ -5421,11 +6007,7 @@
54216007 skb->head = data;
54226008 skb->data = data;
54236009 skb->head_frag = 0;
5424
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
5425
- skb->end = size;
5426
-#else
5427
- skb->end = skb->head + size;
5428
-#endif
6010
+ skb_set_end_offset(skb, size);
54296011 skb_set_tail_pointer(skb, skb_headlen(skb));
54306012 skb_headers_offset_update(skb, 0);
54316013 skb->cloned = 0;
....@@ -5517,8 +6099,7 @@
55176099 size = SKB_WITH_OVERHEAD(ksize(data));
55186100
55196101 memcpy((struct skb_shared_info *)(data + size),
5520
- skb_shinfo(skb), offsetof(struct skb_shared_info,
5521
- frags[skb_shinfo(skb)->nr_frags]));
6102
+ skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
55226103 if (skb_orphan_frags(skb, gfp_mask)) {
55236104 kfree(data);
55246105 return -ENOMEM;
....@@ -5539,7 +6120,7 @@
55396120 * where splitting is expensive.
55406121 * 2. Split is accurately. We make this.
55416122 */
5542
- shinfo->frags[0].page_offset += off - pos;
6123
+ skb_frag_off_add(&shinfo->frags[0], off - pos);
55436124 skb_frag_size_sub(&shinfo->frags[0], off - pos);
55446125 }
55456126 skb_frag_ref(skb, i);
....@@ -5564,11 +6145,7 @@
55646145 skb->head = data;
55656146 skb->head_frag = 0;
55666147 skb->data = data;
5567
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
5568
- skb->end = size;
5569
-#else
5570
- skb->end = skb->head + size;
5571
-#endif
6148
+ skb_set_end_offset(skb, size);
55726149 skb_reset_tail_pointer(skb);
55736150 skb_headers_offset_update(skb, 0);
55746151 skb->cloned = 0;
....@@ -5642,4 +6219,181 @@
56426219 */
56436220 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
56446221 }
5645
-EXPORT_SYMBOL_GPL(skb_condense);
6222
+
6223
+#ifdef CONFIG_SKB_EXTENSIONS
6224
+static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
6225
+{
6226
+ return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
6227
+}
6228
+
6229
+/**
6230
+ * __skb_ext_alloc - allocate a new skb extensions storage
6231
+ *
6232
+ * @flags: See kmalloc().
6233
+ *
6234
+ * Returns the newly allocated pointer. The pointer can later attached to a
6235
+ * skb via __skb_ext_set().
6236
+ * Note: caller must handle the skb_ext as an opaque data.
6237
+ */
6238
+struct skb_ext *__skb_ext_alloc(gfp_t flags)
6239
+{
6240
+ struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags);
6241
+
6242
+ if (new) {
6243
+ memset(new->offset, 0, sizeof(new->offset));
6244
+ refcount_set(&new->refcnt, 1);
6245
+ }
6246
+
6247
+ return new;
6248
+}
6249
+
6250
+static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
6251
+ unsigned int old_active)
6252
+{
6253
+ struct skb_ext *new;
6254
+
6255
+ if (refcount_read(&old->refcnt) == 1)
6256
+ return old;
6257
+
6258
+ new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
6259
+ if (!new)
6260
+ return NULL;
6261
+
6262
+ memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
6263
+ refcount_set(&new->refcnt, 1);
6264
+
6265
+#ifdef CONFIG_XFRM
6266
+ if (old_active & (1 << SKB_EXT_SEC_PATH)) {
6267
+ struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH);
6268
+ unsigned int i;
6269
+
6270
+ for (i = 0; i < sp->len; i++)
6271
+ xfrm_state_hold(sp->xvec[i]);
6272
+ }
6273
+#endif
6274
+ __skb_ext_put(old);
6275
+ return new;
6276
+}
6277
+
6278
+/**
6279
+ * __skb_ext_set - attach the specified extension storage to this skb
6280
+ * @skb: buffer
6281
+ * @id: extension id
6282
+ * @ext: extension storage previously allocated via __skb_ext_alloc()
6283
+ *
6284
+ * Existing extensions, if any, are cleared.
6285
+ *
6286
+ * Returns the pointer to the extension.
6287
+ */
6288
+void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
6289
+ struct skb_ext *ext)
6290
+{
6291
+ unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext);
6292
+
6293
+ skb_ext_put(skb);
6294
+ newlen = newoff + skb_ext_type_len[id];
6295
+ ext->chunks = newlen;
6296
+ ext->offset[id] = newoff;
6297
+ skb->extensions = ext;
6298
+ skb->active_extensions = 1 << id;
6299
+ return skb_ext_get_ptr(ext, id);
6300
+}
6301
+
6302
+/**
6303
+ * skb_ext_add - allocate space for given extension, COW if needed
6304
+ * @skb: buffer
6305
+ * @id: extension to allocate space for
6306
+ *
6307
+ * Allocates enough space for the given extension.
6308
+ * If the extension is already present, a pointer to that extension
6309
+ * is returned.
6310
+ *
6311
+ * If the skb was cloned, COW applies and the returned memory can be
6312
+ * modified without changing the extension space of clones buffers.
6313
+ *
6314
+ * Returns pointer to the extension or NULL on allocation failure.
6315
+ */
6316
+void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
6317
+{
6318
+ struct skb_ext *new, *old = NULL;
6319
+ unsigned int newlen, newoff;
6320
+
6321
+ if (skb->active_extensions) {
6322
+ old = skb->extensions;
6323
+
6324
+ new = skb_ext_maybe_cow(old, skb->active_extensions);
6325
+ if (!new)
6326
+ return NULL;
6327
+
6328
+ if (__skb_ext_exist(new, id))
6329
+ goto set_active;
6330
+
6331
+ newoff = new->chunks;
6332
+ } else {
6333
+ newoff = SKB_EXT_CHUNKSIZEOF(*new);
6334
+
6335
+ new = __skb_ext_alloc(GFP_ATOMIC);
6336
+ if (!new)
6337
+ return NULL;
6338
+ }
6339
+
6340
+ newlen = newoff + skb_ext_type_len[id];
6341
+ new->chunks = newlen;
6342
+ new->offset[id] = newoff;
6343
+set_active:
6344
+ skb->extensions = new;
6345
+ skb->active_extensions |= 1 << id;
6346
+ return skb_ext_get_ptr(new, id);
6347
+}
6348
+EXPORT_SYMBOL(skb_ext_add);
6349
+
6350
+#ifdef CONFIG_XFRM
6351
+static void skb_ext_put_sp(struct sec_path *sp)
6352
+{
6353
+ unsigned int i;
6354
+
6355
+ for (i = 0; i < sp->len; i++)
6356
+ xfrm_state_put(sp->xvec[i]);
6357
+}
6358
+#endif
6359
+
6360
+void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
6361
+{
6362
+ struct skb_ext *ext = skb->extensions;
6363
+
6364
+ skb->active_extensions &= ~(1 << id);
6365
+ if (skb->active_extensions == 0) {
6366
+ skb->extensions = NULL;
6367
+ __skb_ext_put(ext);
6368
+#ifdef CONFIG_XFRM
6369
+ } else if (id == SKB_EXT_SEC_PATH &&
6370
+ refcount_read(&ext->refcnt) == 1) {
6371
+ struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH);
6372
+
6373
+ skb_ext_put_sp(sp);
6374
+ sp->len = 0;
6375
+#endif
6376
+ }
6377
+}
6378
+EXPORT_SYMBOL(__skb_ext_del);
6379
+
6380
+void __skb_ext_put(struct skb_ext *ext)
6381
+{
6382
+ /* If this is last clone, nothing can increment
6383
+ * it after check passes. Avoids one atomic op.
6384
+ */
6385
+ if (refcount_read(&ext->refcnt) == 1)
6386
+ goto free_now;
6387
+
6388
+ if (!refcount_dec_and_test(&ext->refcnt))
6389
+ return;
6390
+free_now:
6391
+#ifdef CONFIG_XFRM
6392
+ if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
6393
+ skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
6394
+#endif
6395
+
6396
+ kmem_cache_free(skbuff_ext_cache, ext);
6397
+}
6398
+EXPORT_SYMBOL(__skb_ext_put);
6399
+#endif /* CONFIG_SKB_EXTENSIONS */