From b22da3d8526a935aa31e086e63f60ff3246cb61c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Sat, 09 Dec 2023 07:24:11 +0000 Subject: [PATCH] add stmac read mac form eeprom --- kernel/include/linux/skbuff.h | 819 +++++++++++++++++++++++++++++++++++++++++++++------------- 1 files changed, 633 insertions(+), 186 deletions(-) diff --git a/kernel/include/linux/skbuff.h b/kernel/include/linux/skbuff.h index 93de9f6..1bcdb0f 100644 --- a/kernel/include/linux/skbuff.h +++ b/kernel/include/linux/skbuff.h @@ -1,14 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Definitions for the 'struct sk_buff' memory handlers. * * Authors: * Alan Cox, <gw4pts@gw4pts.ampr.org> * Florian La Roche, <rzsfl@rz.uni-sb.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #ifndef _LINUX_SKBUFF_H @@ -18,6 +14,7 @@ #include <linux/compiler.h> #include <linux/time.h> #include <linux/bug.h> +#include <linux/bvec.h> #include <linux/cache.h> #include <linux/rbtree.h> #include <linux/socket.h> @@ -40,6 +37,11 @@ #include <linux/in6.h> #include <linux/if_packet.h> #include <net/flow.h> +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <linux/netfilter/nf_conntrack_common.h> +#endif +#include <linux/android_kabi.h> +#include <linux/android_vendor.h> /* The interface for checksum offload between the stack and networking drivers * is as follows... @@ -47,8 +49,8 @@ * A. IP checksum related features * * Drivers advertise checksum offload capabilities in the features of a device. - * From the stack's point of view these are capabilities offered by the driver, - * a driver typically only advertises features that it is capable of offloading + * From the stack's point of view these are capabilities offered by the driver. + * A driver typically only advertises features that it is capable of offloading * to its device. * * The checksum related features are: @@ -63,7 +65,7 @@ * TCP or UDP packets over IPv4. These are specifically * unencapsulated packets of the form IPv4|TCP or * IPv4|UDP where the Protocol field in the IPv4 header - * is TCP or UDP. The IPv4 header may contain IP options + * is TCP or UDP. The IPv4 header may contain IP options. * This feature cannot be set in features for a device * with NETIF_F_HW_CSUM also set. This feature is being * DEPRECATED (see below). @@ -71,7 +73,7 @@ * NETIF_F_IPV6_CSUM - Driver (device) is only able to checksum plain * TCP or UDP packets over IPv6. These are specifically * unencapsulated packets of the form IPv6|TCP or - * IPv4|UDP where the Next Header field in the IPv6 + * IPv6|UDP where the Next Header field in the IPv6 * header is either TCP or UDP. IPv6 extension headers * are not supported with this feature. This feature * cannot be set in features for a device with @@ -79,13 +81,13 @@ * DEPRECATED (see below). * * NETIF_F_RXCSUM - Driver (device) performs receive checksum offload. - * This flag is used only used to disable the RX checksum + * This flag is only used to disable the RX checksum * feature for a device. The stack will accept receive * checksum indication in packets received on a device * regardless of whether NETIF_F_RXCSUM is set. * * B. Checksumming of received packets by device. Indication of checksum - * verification is in set skb->ip_summed. Possible values are: + * verification is set in skb->ip_summed. Possible values are: * * CHECKSUM_NONE: * @@ -115,16 +117,16 @@ * the packet minus one that have been verified as CHECKSUM_UNNECESSARY. * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet * and a device is able to verify the checksums for UDP (possibly zero), - * GRE (checksum flag is set), and TCP-- skb->csum_level would be set to + * GRE (checksum flag is set) and TCP, skb->csum_level would be set to * two. If the device were only able to verify the UDP checksum and not - * GRE, either because it doesn't support GRE checksum of because GRE + * GRE, either because it doesn't support GRE checksum or because GRE * checksum is bad, skb->csum_level would be set to zero (TCP checksum is * not considered in this case). * * CHECKSUM_COMPLETE: * * This is the most generic way. The device supplied checksum of the _whole_ - * packet as seen by netif_rx() and fills out in skb->csum. Meaning, the + * packet as seen by netif_rx() and fills in skb->csum. This means the * hardware doesn't need to parse L3/L4 headers to implement this. * * Notes: @@ -153,8 +155,8 @@ * from skb->csum_start up to the end, and to record/write the checksum at * offset skb->csum_start + skb->csum_offset. A driver may verify that the * csum_start and csum_offset values are valid values given the length and - * offset of the packet, however they should not attempt to validate that the - * checksum refers to a legitimate transport layer checksum-- it is the + * offset of the packet, but it should not attempt to validate that the + * checksum refers to a legitimate transport layer checksum -- it is the * purview of the stack to validate that csum_start and csum_offset are set * correctly. * @@ -178,18 +180,18 @@ * * CHECKSUM_UNNECESSARY: * - * This has the same meaning on as CHECKSUM_NONE for checksum offload on + * This has the same meaning as CHECKSUM_NONE for checksum offload on * output. * * CHECKSUM_COMPLETE: * Not used in checksum output. If a driver observes a packet with this value - * set in skbuff, if should treat as CHECKSUM_NONE being set. + * set in skbuff, it should treat the packet as if CHECKSUM_NONE were set. * * D. Non-IP checksum (CRC) offloads * * NETIF_F_SCTP_CRC - This feature indicates that a device is capable of * offloading the SCTP CRC in a packet. To perform this offload the stack - * will set set csum_start and csum_offset accordingly, set ip_summed to + * will set csum_start and csum_offset accordingly, set ip_summed to * CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication in * the skbuff that the CHECKSUM_PARTIAL refers to CRC32c. * A driver that supports both IP checksum offload and SCTP CRC32c offload @@ -200,10 +202,10 @@ * NETIF_F_FCOE_CRC - This feature indicates that a device is capable of * offloading the FCOE CRC in a packet. To perform this offload the stack * will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset - * accordingly. Note the there is no indication in the skbuff that the - * CHECKSUM_PARTIAL refers to an FCOE checksum, a driver that supports + * accordingly. Note that there is no indication in the skbuff that the + * CHECKSUM_PARTIAL refers to an FCOE checksum, so a driver that supports * both IP checksum offload and FCOE CRC offload must verify which offload - * is configured for a packet presumably by inspecting packet headers. + * is configured for a packet, presumably by inspecting packet headers. * * E. Checksumming on output with GSO. * @@ -211,9 +213,9 @@ * is implied by the SKB_GSO_* flags in gso_type. Most obviously, if the * gso_type is SKB_GSO_TCPV4 or SKB_GSO_TCPV6, TCP checksum offload as * part of the GSO operation is implied. If a checksum is being offloaded - * with GSO then ip_summed is CHECKSUM_PARTIAL, csum_start and csum_offset - * are set to refer to the outermost checksum being offload (two offloaded - * checksums are possible with UDP encapsulation). + * with GSO then ip_summed is CHECKSUM_PARTIAL, and both csum_start and + * csum_offset are set to refer to the outermost checksum being offloaded + * (two offloaded checksums are possible with UDP encapsulation). */ /* Don't change this without changing skb_csum_unnecessary! */ @@ -238,21 +240,18 @@ SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) +struct ahash_request; struct net_device; struct scatterlist; struct pipe_inode_info; struct iov_iter; struct napi_struct; +struct bpf_prog; +union bpf_attr; +struct skb_ext; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) -struct nf_conntrack { - atomic_t use; -}; -#endif -#include <linux/android_kabi.h> - +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { - refcount_t use; enum { BRNF_PROTO_UNCHANGED, BRNF_PROTO_8021Q, @@ -278,6 +277,18 @@ char neigh_header[8]; }; }; +#endif + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +/* Chain in tc_skb_ext will be used to share the tc chain with + * ovs recirc_id. It will be set to the current chain by tc + * and read by ovs to recirc_id. + */ +struct tc_skb_ext { + __u32 chain; + __u16 mru; +}; +#endif struct sk_buff_head { /* These two members must be first. */ @@ -310,41 +321,51 @@ */ #define GSO_BY_FRAGS 0xFFFF -typedef struct skb_frag_struct skb_frag_t; +typedef struct bio_vec skb_frag_t; -struct skb_frag_struct { - struct { - struct page *p; - } page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 page_offset; - __u32 size; -#else - __u16 page_offset; - __u16 size; -#endif -}; - +/** + * skb_frag_size() - Returns the size of a skb fragment + * @frag: skb fragment + */ static inline unsigned int skb_frag_size(const skb_frag_t *frag) { - return frag->size; + return frag->bv_len; } +/** + * skb_frag_size_set() - Sets the size of a skb fragment + * @frag: skb fragment + * @size: size of fragment + */ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) { - frag->size = size; + frag->bv_len = size; } +/** + * skb_frag_size_add() - Increments the size of a skb fragment by @delta + * @frag: skb fragment + * @delta: value to add + */ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) { - frag->size += delta; + frag->bv_len += delta; } +/** + * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta + * @frag: skb fragment + * @delta: value to subtract + */ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) { - frag->size -= delta; + frag->bv_len -= delta; } +/** + * skb_frag_must_loop - Test if %p is a high memory page + * @p: fragment's page + */ static inline bool skb_frag_must_loop(struct page *p) { #if defined(CONFIG_HIGHMEM) @@ -358,7 +379,7 @@ * skb_frag_foreach_page - loop over pages in a fragment * * @f: skb frag to operate on - * @f_off: offset from start of f->page.p + * @f_off: offset from start of f->bv_page * @f_len: length from f_off to loop over * @p: (temp var) current page * @p_off: (temp var) offset from start of current page, @@ -479,10 +500,11 @@ } void sock_zerocopy_put(struct ubuf_info *uarg); -void sock_zerocopy_put_abort(struct ubuf_info *uarg); +void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); +int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len); int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg); @@ -511,6 +533,8 @@ /* Intermediate layers must ensure that destructor_arg * remains valid until skb destructor */ void * destructor_arg; + + ANDROID_OEM_DATA_ARRAY(1, 3); /* must be last field, see pskb_expand_head() */ skb_frag_t frags[MAX_SKB_FRAGS]; @@ -575,6 +599,8 @@ SKB_GSO_UDP = 1 << 16, SKB_GSO_UDP_L4 = 1 << 17, + + SKB_GSO_FRAGLIST = 1 << 18, }; #if BITS_PER_LONG > 32 @@ -587,14 +613,20 @@ typedef unsigned char *sk_buff_data_t; #endif -/** +/** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list * @tstamp: Time we arrived/left + * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point + * for retransmit timer * @rbnode: RB tree node, alternative to next/prev for netem/tcp + * @list: queue head * @sk: Socket we are owned by + * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in + * fragmentation management * @dev: Device we arrived on/are leaving by + * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here * @_skb_refdst: destination entry (with norefcount bit) * @sp: the security path, used for xfrm @@ -613,10 +645,15 @@ * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs + * @inner_protocol_type: whether the inner protocol is + * ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO + * @remcsum_offload: remote checksum offload is enabled + * @offload_fwd_mark: Packet was L2-forwarded in hardware + * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware * @tc_skip_classify: do not classify packet. set by IFB device * @tc_at_ingress: used within tc_classify to distinguish in/egress - * @tc_redirected: packet was redirected by a tc action - * @tc_from_ingress: if tc_redirected, tc_at_ingress at time of redirect + * @redirected: packet was redirected by packet classifier + * @from_ingress: packet was redirected from the ingress path * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -629,8 +666,10 @@ * @tc_index: Traffic control index * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices - * @xmit_more: More SKBs are pending for this queue + * @head_frag: skb was allocated from page fragments, + * not allocated by kmalloc() or vmalloc(). * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves + * @active_extensions: active extensions (skb_ext_id types) * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport @@ -639,15 +678,29 @@ * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS + * @encapsulation: indicates the inner headers in the skbuff are valid + * @encap_hdr_csum: software checksum is needed + * @csum_valid: checksum is already valid * @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL + * @csum_complete_sw: checksum was completed by software + * @csum_level: indicates the number of consecutive checksums found in + * the packet minus one that have been verified as + * CHECKSUM_UNNECESSARY (max 3) + * @scm_io_uring: SKB holds io_uring registered files * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB - * @napi_id: id of the NAPI struct this skb came from + * @napi_id: id of the NAPI struct this skb came from + * @sender_cpu: (aka @napi_id) source CPU in XPS * @secmark: security marking * @mark: Generic packet mark + * @reserved_tailroom: (aka @mark) number of bytes of free space available + * at the tail of an sk_buff + * @vlan_present: VLAN tag is present * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_protocol: Protocol (encapsulation) + * @inner_ipproto: (aka @inner_protocol) stores ipproto when + * skb->inner_protocol_type == ENCAP_TYPE_IPPROTO; * @inner_transport_header: Inner transport layer header (encapsulation) * @inner_network_header: Network layer header (encapsulation) * @inner_mac_header: Link layer header (encapsulation) @@ -660,6 +713,7 @@ * @data: Data head pointer * @truesize: Buffer size * @users: User count - see {datagram,tcp}.c + * @extensions: allocated extensions, valid if active_extensions is nonzero */ struct sk_buff { @@ -689,7 +743,7 @@ union { ktime_t tstamp; - u64 skb_mstamp; + u64 skb_mstamp_ns; /* earliest departure time */ }; /* * This is the control buffer. It is free to use for every @@ -707,13 +761,9 @@ struct list_head tcp_tsorted_anchor; }; -#ifdef CONFIG_XFRM - struct sec_path *sp; -#endif #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) unsigned long _nfct; #endif - struct nf_bridge_info *nf_bridge; unsigned int len, data_len; __u16 mac_len, @@ -732,15 +782,18 @@ #endif #define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) + /* private: */ __u8 __cloned_offset[0]; + /* public: */ __u8 cloned:1, nohdr:1, fclone:2, peeked:1, head_frag:1, - xmit_more:1, pfmemalloc:1; - +#ifdef CONFIG_SKB_EXTENSIONS + __u8 active_extensions; +#endif /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() */ @@ -756,7 +809,9 @@ #endif #define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) + /* private: */ __u8 __pkt_type_offset[0]; + /* public: */ __u8 pkt_type:3; __u8 ignore_df:1; __u8 nf_trace:1; @@ -773,6 +828,16 @@ __u8 encap_hdr_csum:1; __u8 csum_valid:1; +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_VLAN_PRESENT_BIT 7 +#else +#define PKT_VLAN_PRESENT_BIT 0 +#endif +#define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset) + /* private: */ + __u8 __pkt_vlan_present_offset[0]; + /* public: */ + __u8 vlan_present:1; __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 csum_not_inet:1; @@ -780,19 +845,21 @@ #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif - __u8 ipvs_property:1; + __u8 ipvs_property:1; __u8 inner_protocol_type:1; __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; - __u8 offload_mr_fwd_mark:1; + __u8 offload_l3_fwd_mark:1; #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; __u8 tc_at_ingress:1; - __u8 tc_redirected:1; - __u8 tc_from_ingress:1; +#endif +#ifdef CONFIG_NET_REDIRECT + __u8 redirected:1; + __u8 from_ingress:1; #endif #ifdef CONFIG_TLS_DEVICE __u8 decrypted:1; @@ -847,7 +914,21 @@ __u32 headers_end[0]; /* public: */ - ANDROID_KABI_RESERVE(1); + /* Android KABI preservation. + * + * "open coded" version of ANDROID_KABI_USE() to pack more + * fields/variables into the space that we have. + * + * scm_io_uring is from 04df9719df18 ("io_uring/af_unix: defer + * registered files gc to io_uring release") + */ + _ANDROID_KABI_REPLACE(_ANDROID_KABI_RESERVE(1), + struct { + __u8 scm_io_uring:1; + __u8 android_kabi_reserved1_padding1; + __u16 android_kabi_reserved1_padding2; + __u32 android_kabi_reserved1_padding3; + }); ANDROID_KABI_RESERVE(2); /* These elements must be at the end, see alloc_skb() for details. */ @@ -857,6 +938,11 @@ *data; unsigned int truesize; refcount_t users; + +#ifdef CONFIG_SKB_EXTENSIONS + /* only useable after checking ->active_extensions != 0 */ + struct skb_ext *extensions; +#endif }; #ifdef __KERNEL__ @@ -868,7 +954,10 @@ #define SKB_ALLOC_RX 0x02 #define SKB_ALLOC_NAPI 0x04 -/* Returns true if the skb was allocated from PFMEMALLOC reserves */ +/** + * skb_pfmemalloc - Test if the skb was allocated from PFMEMALLOC reserves + * @skb: buffer + */ static inline bool skb_pfmemalloc(const struct sk_buff *skb) { return unlikely(skb->pfmemalloc); @@ -881,7 +970,6 @@ #define SKB_DST_NOREF 1UL #define SKB_DST_PTRMASK ~(SKB_DST_NOREF) -#define SKB_NFCT_PTRMASK ~(7UL) /** * skb_dst - returns skb dst_entry * @skb: buffer @@ -890,7 +978,7 @@ */ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) { - /* If refdst was not refcounted, check we still are in a + /* If refdst was not refcounted, check we still are in a * rcu_read_lock section */ WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) && @@ -937,6 +1025,10 @@ return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb); } +/** + * skb_rtable - Returns the skb &rtable + * @skb: buffer + */ static inline struct rtable *skb_rtable(const struct sk_buff *skb) { return (struct rtable *)skb_dst(skb); @@ -951,6 +1043,10 @@ return ptype <= PACKET_OTHERHOST; } +/** + * skb_napi_id - Returns the skb's NAPI id + * @skb: buffer + */ static inline unsigned int skb_napi_id(const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL @@ -960,7 +1056,12 @@ #endif } -/* decrement the reference count and return true if we can free the skb */ +/** + * skb_unref - decrement the skb's reference count + * @skb: buffer + * + * Returns true if we can free the skb. + */ static inline bool skb_unref(struct sk_buff *skb) { if (unlikely(!skb)) @@ -976,8 +1077,18 @@ void skb_release_head_state(struct sk_buff *skb); void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); +void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); + +#ifdef CONFIG_TRACEPOINTS void consume_skb(struct sk_buff *skb); +#else +static inline void consume_skb(struct sk_buff *skb) +{ + return kfree_skb(skb); +} +#endif + void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); extern struct kmem_cache *skbuff_head_cache; @@ -990,6 +1101,16 @@ int node); struct sk_buff *__build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb(void *data, unsigned int frag_size); +struct sk_buff *build_skb_around(struct sk_buff *skb, + void *data, unsigned int frag_size); + +/** + * alloc_skb - allocate a network buffer + * @size: size to allocate + * @priority: allocation mask + * + * This function is a convenient wrapper around __alloc_skb(). + */ static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { @@ -1001,6 +1122,7 @@ int max_page_order, int *errcode, gfp_t gfp_mask); +struct sk_buff *alloc_skb_for_msg(struct sk_buff *first); /* Layout of fast clones : [skb1][skb2][fclone_ref] */ struct sk_buff_fclones { @@ -1032,6 +1154,13 @@ fclones->skb2.sk == sk; } +/** + * alloc_skb_fclone - allocate a network buffer from fclone cache + * @size: size to allocate + * @priority: allocation mask + * + * This function is a convenient wrapper around __alloc_skb(). + */ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { @@ -1080,11 +1209,6 @@ return __skb_pad(skb, pad, true); } #define dev_kfree_skb(a) consume_skb(a) - -int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, - int getfrag(void *from, char *to, int offset, - int len, int odd, struct sk_buff *skb), - void *from, int length); int skb_append_pagefrags(struct sk_buff *skb, struct page *page, int offset, size_t size); @@ -1193,7 +1317,12 @@ const struct flow_dissector_key *key, unsigned int key_count); -bool __skb_flow_dissect(const struct sk_buff *skb, +struct bpf_flow_dissector; +bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, + __be16 proto, int nhoff, int hlen, unsigned int flags); + +bool __skb_flow_dissect(const struct net *net, + const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, void *data, __be16 proto, int nhoff, int hlen, @@ -1203,8 +1332,8 @@ struct flow_dissector *flow_dissector, void *target_container, unsigned int flags) { - return __skb_flow_dissect(skb, flow_dissector, target_container, - NULL, 0, 0, 0, flags); + return __skb_flow_dissect(NULL, skb, flow_dissector, + target_container, NULL, 0, 0, 0, flags); } static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, @@ -1212,25 +1341,44 @@ unsigned int flags) { memset(flow, 0, sizeof(*flow)); - return __skb_flow_dissect(skb, &flow_keys_dissector, flow, - NULL, 0, 0, 0, flags); + return __skb_flow_dissect(NULL, skb, &flow_keys_dissector, + flow, NULL, 0, 0, 0, flags); } static inline bool -skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb, +skb_flow_dissect_flow_keys_basic(const struct net *net, + const struct sk_buff *skb, struct flow_keys_basic *flow, void *data, __be16 proto, int nhoff, int hlen, unsigned int flags) { memset(flow, 0, sizeof(*flow)); - return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow, + return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow, data, proto, nhoff, hlen, flags); } +void skb_flow_dissect_meta(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container); + +/* Gets a skb connection tracking info, ctinfo map should be a + * map of mapsize to translate enum ip_conntrack_info states + * to user states. + */ +void +skb_flow_dissect_ct(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, + u16 *ctinfo_map, + size_t mapsize); void skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); + +void skb_flow_dissect_hash(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container); static inline __u32 skb_get_hash(struct sk_buff *skb) { @@ -1267,6 +1415,14 @@ to->l4_hash = from->l4_hash; }; +static inline void skb_copy_decrypted(struct sk_buff *to, + const struct sk_buff *from) +{ +#ifdef CONFIG_TLS_DEVICE + to->decrypted = from->decrypted; +#endif +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@ -1277,6 +1433,11 @@ { return skb->end; } + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = offset; +} #else static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@ -1286,6 +1447,11 @@ static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end - skb->head; +} + +static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) +{ + skb->end = skb->head + offset; } #endif @@ -1304,10 +1470,14 @@ return is_zcopy ? skb_uarg(skb) : NULL; } -static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg) +static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg, + bool *have_ref) { if (skb && uarg && !skb_zcopy(skb)) { - sock_zerocopy_get(uarg); + if (unlikely(have_ref && *have_ref)) + *have_ref = false; + else + sock_zerocopy_get(uarg); skb_shinfo(skb)->destructor_arg = uarg; skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; } @@ -1354,7 +1524,7 @@ struct ubuf_info *uarg = skb_zcopy(skb); if (uarg) { - sock_zerocopy_put_abort(uarg); + sock_zerocopy_put_abort(uarg, false); skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; } } @@ -1502,6 +1672,22 @@ return 0; } +/* This variant of skb_unclone() makes sure skb->truesize + * and skb_end_offset() are not changed, whenever a new skb->head is needed. + * + * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X)) + * when various debugging features are in place. + */ +int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri); +static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(gfpflags_allow_blocking(pri)); + + if (skb_cloned(skb)) + return __skb_unclone_keeptruesize(skb, pri); + return 0; +} + /** * skb_header_cloned - is the header a clone * @skb: buffer to check @@ -1642,6 +1828,17 @@ } /** + * __skb_peek - peek at the head of a non-empty &sk_buff_head + * @list_: list to peek at + * + * Like skb_peek(), but the caller knows that the list is not empty. + */ +static inline struct sk_buff *__skb_peek(const struct sk_buff_head *list_) +{ + return list_->next; +} + +/** * skb_peek_next - peek skb following the given one from a queue * @skb: skb to start from * @list_: list to peek at @@ -1755,8 +1952,6 @@ * The "__skb_xxxx()" functions are the non-atomic ones that * can only be called with interrupts disabled. */ -void skb_insert(struct sk_buff *old, struct sk_buff *newsk, - struct sk_buff_head *list); static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) @@ -1886,12 +2081,12 @@ * * A buffer cannot be placed on two lists at the same time. */ -void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) { __skb_queue_after(list, (struct sk_buff *)list, newsk); } +void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); /** * __skb_queue_tail - queue a buffer at the list tail @@ -1903,12 +2098,12 @@ * * A buffer cannot be placed on two lists at the same time. */ -void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) { __skb_queue_before(list, (struct sk_buff *)list, newsk); } +void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); /* * remove sk_buff from list. _Must_ be called atomically, and with @@ -1935,7 +2130,6 @@ * so must be used with appropriate locks held only. The head item is * returned or %NULL if the list is empty. */ -struct sk_buff *skb_dequeue(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek(list); @@ -1943,6 +2137,7 @@ __skb_unlink(skb, list); return skb; } +struct sk_buff *skb_dequeue(struct sk_buff_head *list); /** * __skb_dequeue_tail - remove from the tail of the queue @@ -1952,7 +2147,6 @@ * so must be used with appropriate locks held only. The tail item is * returned or %NULL if the list is empty. */ -struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek_tail(list); @@ -1960,6 +2154,7 @@ __skb_unlink(skb, list); return skb; } +struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline bool skb_is_nonlinear(const struct sk_buff *skb) @@ -2009,8 +2204,8 @@ * that not all callers have unique ownership of the page but rely * on page_is_pfmemalloc doing the right thing(tm). */ - frag->page.p = page; - frag->page_offset = off; + frag->bv_page = page; + frag->bv_offset = off; skb_frag_size_set(frag, size); page = compound_head(page); @@ -2045,8 +2240,6 @@ void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize); -#define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) -#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb)) #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) #ifdef NET_SKBUFF_DATA_USES_OFFSET @@ -2083,6 +2276,14 @@ } #endif /* NET_SKBUFF_DATA_USES_OFFSET */ + +static inline void skb_assert_len(struct sk_buff *skb) +{ +#ifdef CONFIG_DEBUG_NET + if (WARN_ONCE(!skb->len, "%s\n", __func__)) + DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); +#endif /* CONFIG_DEBUG_NET */ +} /* * Add data to an sk_buff @@ -2181,12 +2382,12 @@ return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len); } -static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) +static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) { if (likely(len <= skb_headlen(skb))) - return 1; + return true; if (unlikely(len > skb->len)) - return 0; + return false; return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL; } @@ -2410,6 +2611,11 @@ return skb->mac_header != (typeof(skb->mac_header))~0U; } +static inline void skb_unset_mac_header(struct sk_buff *skb) +{ + skb->mac_header = (typeof(skb->mac_header))~0U; +} + static inline void skb_reset_mac_header(struct sk_buff *skb) { skb->mac_header = skb->data - skb->head; @@ -2426,18 +2632,16 @@ skb->mac_header = skb->network_header; } -static inline void skb_probe_transport_header(struct sk_buff *skb, - const int offset_hint) +static inline void skb_probe_transport_header(struct sk_buff *skb) { struct flow_keys_basic keys; if (skb_transport_header_was_set(skb)) return; - if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) + if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, + NULL, 0, 0, 0, 0)) skb_set_transport_header(skb, keys.control.thoff); - else if (offset_hint >= 0) - skb_set_transport_header(skb, offset_hint); } static inline void skb_mac_header_rebuild(struct sk_buff *skb) @@ -2531,7 +2735,7 @@ * * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS) * to reduce average number of cache lines per packet. - * get_rps_cpus() for example only access one 64 bytes aligned block : + * get_rps_cpu() for example only access one 64 bytes aligned block : * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8) */ #ifndef NET_SKB_PAD @@ -2542,10 +2746,8 @@ static inline void __skb_set_length(struct sk_buff *skb, unsigned int len) { - if (unlikely(skb_is_nonlinear(skb))) { - WARN_ON(1); + if (WARN_ON(skb_is_nonlinear(skb))) return; - } skb->len = len; skb_set_tail_pointer(skb, len); } @@ -2653,13 +2855,13 @@ * the list and one reference dropped. This function does not take the * list lock and the caller must hold the relevant locks to use it. */ -void skb_queue_purge(struct sk_buff_head *list); static inline void __skb_queue_purge(struct sk_buff_head *list) { struct sk_buff *skb; while ((skb = __skb_dequeue(list)) != NULL) kfree_skb(skb); } +void skb_queue_purge(struct sk_buff_head *list); unsigned int skb_rbtree_purge(struct rb_root *root); @@ -2801,7 +3003,38 @@ */ static inline unsigned int skb_frag_off(const skb_frag_t *frag) { - return frag->page_offset; + return frag->bv_offset; +} + +/** + * skb_frag_off_add() - Increments the offset of a skb fragment by @delta + * @frag: skb fragment + * @delta: value to add + */ +static inline void skb_frag_off_add(skb_frag_t *frag, int delta) +{ + frag->bv_offset += delta; +} + +/** + * skb_frag_off_set() - Sets the offset of a skb fragment + * @frag: skb fragment + * @offset: offset of fragment + */ +static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) +{ + frag->bv_offset = offset; +} + +/** + * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment + * @fragto: skb fragment where offset is set + * @fragfrom: skb fragment offset is copied from + */ +static inline void skb_frag_off_copy(skb_frag_t *fragto, + const skb_frag_t *fragfrom) +{ + fragto->bv_offset = fragfrom->bv_offset; } /** @@ -2812,7 +3045,7 @@ */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { - return frag->page.p; + return frag->bv_page; } /** @@ -2870,7 +3103,7 @@ */ static inline void *skb_frag_address(const skb_frag_t *frag) { - return page_address(skb_frag_page(frag)) + frag->page_offset; + return page_address(skb_frag_page(frag)) + skb_frag_off(frag); } /** @@ -2886,7 +3119,18 @@ if (unlikely(!ptr)) return NULL; - return ptr + frag->page_offset; + return ptr + skb_frag_off(frag); +} + +/** + * skb_frag_page_copy() - sets the page in a fragment from another fragment + * @fragto: skb fragment where page is set + * @fragfrom: skb fragment page is copied from + */ +static inline void skb_frag_page_copy(skb_frag_t *fragto, + const skb_frag_t *fragfrom) +{ + fragto->bv_page = fragfrom->bv_page; } /** @@ -2898,7 +3142,7 @@ */ static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page) { - frag->page.p = page; + frag->bv_page = page; } /** @@ -2934,7 +3178,7 @@ enum dma_data_direction dir) { return dma_map_page(dev, skb_frag_page(frag), - frag->page_offset + offset, size, dir); + skb_frag_off(frag) + offset, size, dir); } static inline struct sk_buff *pskb_copy(struct sk_buff *skb, @@ -3037,7 +3281,7 @@ } /** - * skb_put_padto - increase size and pad an skbuff up to a minimal size + * __skb_put_padto - increase size and pad an skbuff up to a minimal size * @skb: buffer to pad * @len: minimal length * @free_on_error: free buffer on error @@ -3102,10 +3346,10 @@ if (skb_zcopy(skb)) return false; if (i) { - const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; return page == skb_frag_page(frag) && - off == frag->page_offset + skb_frag_size(frag); + off == skb_frag_off(frag) + skb_frag_size(frag); } return false; } @@ -3324,24 +3568,21 @@ for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) -int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, +int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, + int *err, long *timeo_p, const struct sk_buff *skb); struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, struct sk_buff_head *queue, unsigned int flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), - int *peeked, int *off, int *err, + int *off, int *err, struct sk_buff **last); -struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), - int *peeked, int *off, int *err, +struct sk_buff *__skb_try_recv_datagram(struct sock *sk, + struct sk_buff_head *queue, + unsigned int flags, int *off, int *err, struct sk_buff **last); -struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, - void (*destructor)(struct sock *sk, - struct sk_buff *skb), - int *peeked, int *off, int *err); +struct sk_buff *__skb_recv_datagram(struct sock *sk, + struct sk_buff_head *sk_queue, + unsigned int flags, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); __poll_t datagram_poll(struct file *file, struct socket *sock, @@ -3355,6 +3596,9 @@ } int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, struct msghdr *msg); +int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, + struct iov_iter *to, int len, + struct ahash_request *hash); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); @@ -3369,13 +3613,12 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, - int len, __wsum csum); + int len); int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, unsigned int flags); int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len); -int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, @@ -3386,11 +3629,22 @@ bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu); bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); +struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, + unsigned int offset); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); +int skb_eth_pop(struct sk_buff *skb); +int skb_eth_push(struct sk_buff *skb, const unsigned char *dst, + const unsigned char *src); +int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, + int mac_len, bool ethernet); +int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, + bool ethernet); +int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse); +int skb_mpls_dec_ttl(struct sk_buff *skb); struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, gfp_t gfp); @@ -3494,22 +3748,43 @@ /** * skb_get_timestamp - get timestamp from a skb * @skb: skb to get stamp from - * @stamp: pointer to struct timeval to store stamp in + * @stamp: pointer to struct __kernel_old_timeval to store stamp in * * Timestamps are stored in the skb as offsets to a base timestamp. * This function converts the offset back to a struct timeval and stores * it in stamp. */ static inline void skb_get_timestamp(const struct sk_buff *skb, - struct timeval *stamp) + struct __kernel_old_timeval *stamp) { - *stamp = ktime_to_timeval(skb->tstamp); + *stamp = ns_to_kernel_old_timeval(skb->tstamp); +} + +static inline void skb_get_new_timestamp(const struct sk_buff *skb, + struct __kernel_sock_timeval *stamp) +{ + struct timespec64 ts = ktime_to_timespec64(skb->tstamp); + + stamp->tv_sec = ts.tv_sec; + stamp->tv_usec = ts.tv_nsec / 1000; } static inline void skb_get_timestampns(const struct sk_buff *skb, - struct timespec *stamp) + struct __kernel_old_timespec *stamp) { - *stamp = ktime_to_timespec(skb->tstamp); + struct timespec64 ts = ktime_to_timespec64(skb->tstamp); + + stamp->tv_sec = ts.tv_sec; + stamp->tv_nsec = ts.tv_nsec; +} + +static inline void skb_get_new_timestampns(const struct sk_buff *skb, + struct __kernel_timespec *stamp) +{ + struct timespec64 ts = ktime_to_timespec64(skb->tstamp); + + stamp->tv_sec = ts.tv_sec; + stamp->tv_nsec = ts.tv_nsec; } static inline void __net_timestamp(struct sk_buff *skb) @@ -3551,13 +3826,19 @@ #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) case 32: diffs |= __it_diff(a, b, 64); + fallthrough; case 24: diffs |= __it_diff(a, b, 64); + fallthrough; case 16: diffs |= __it_diff(a, b, 64); + fallthrough; case 8: diffs |= __it_diff(a, b, 64); break; case 28: diffs |= __it_diff(a, b, 64); + fallthrough; case 20: diffs |= __it_diff(a, b, 64); + fallthrough; case 12: diffs |= __it_diff(a, b, 64); + fallthrough; case 4: diffs |= __it_diff(a, b, 32); break; } @@ -3618,7 +3899,7 @@ * must call this function to return the skb back to the stack with a * timestamp. * - * @skb: clone of the the original outgoing packet + * @skb: clone of the original outgoing packet * @hwtstamps: hardware time stamps * */ @@ -3721,6 +4002,14 @@ skb->csum_level++; } else if (skb->ip_summed == CHECKSUM_NONE) { skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = 0; + } +} + +static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + skb->ip_summed = CHECKSUM_NONE; skb->csum_level = 0; } } @@ -3840,18 +4129,16 @@ return (skb->ip_summed == CHECKSUM_NONE && skb->csum_valid); } -static inline void __skb_checksum_convert(struct sk_buff *skb, - __sum16 check, __wsum pseudo) +static inline void __skb_checksum_convert(struct sk_buff *skb, __wsum pseudo) { skb->csum = ~pseudo; skb->ip_summed = CHECKSUM_COMPLETE; } -#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \ +#define skb_checksum_try_convert(skb, proto, compute_pseudo) \ do { \ if (__skb_checksum_convert_check(skb)) \ - __skb_checksum_convert(skb, check, \ - compute_pseudo(skb, proto)); \ + __skb_checksum_convert(skb, compute_pseudo(skb, proto)); \ } while (0) static inline void skb_remcsum_adjust_partial(struct sk_buff *skb, void *ptr, @@ -3891,47 +4178,151 @@ static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) - return (void *)(skb->_nfct & SKB_NFCT_PTRMASK); + return (void *)(skb->_nfct & NFCT_PTRMASK); #else return NULL; #endif } -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) -void nf_conntrack_destroy(struct nf_conntrack *nfct); -static inline void nf_conntrack_put(struct nf_conntrack *nfct) +static inline unsigned long skb_get_nfct(const struct sk_buff *skb) { - if (nfct && atomic_dec_and_test(&nfct->use)) - nf_conntrack_destroy(nfct); -} -static inline void nf_conntrack_get(struct nf_conntrack *nfct) -{ - if (nfct) - atomic_inc(&nfct->use); -} +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + return skb->_nfct; +#else + return 0UL; #endif +} + +static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + skb->_nfct = nfct; +#endif +} + +#ifdef CONFIG_SKB_EXTENSIONS +enum skb_ext_id { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) -static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) + SKB_EXT_BRIDGE_NF, +#endif +#ifdef CONFIG_XFRM + SKB_EXT_SEC_PATH, +#endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + TC_SKB_EXT, +#endif +#if IS_ENABLED(CONFIG_MPTCP) + SKB_EXT_MPTCP, +#endif +#if IS_ENABLED(CONFIG_KCOV) + SKB_EXT_KCOV_HANDLE, +#endif + SKB_EXT_NUM, /* must be last */ +}; + +/** + * struct skb_ext - sk_buff extensions + * @refcnt: 1 on allocation, deallocated on 0 + * @offset: offset to add to @data to obtain extension address + * @chunks: size currently allocated, stored in SKB_EXT_ALIGN_SHIFT units + * @data: start of extension data, variable sized + * + * Note: offsets/lengths are stored in chunks of 8 bytes, this allows + * to use 'u8' types while allowing up to 2kb worth of extension data. + */ +struct skb_ext { + refcount_t refcnt; + u8 offset[SKB_EXT_NUM]; /* in chunks of 8 bytes */ + u8 chunks; /* same */ + char data[] __aligned(8); +}; + +struct skb_ext *__skb_ext_alloc(gfp_t flags); +void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, + struct skb_ext *ext); +void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id); +void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id); +void __skb_ext_put(struct skb_ext *ext); + +static inline void skb_ext_put(struct sk_buff *skb) { - if (nf_bridge && refcount_dec_and_test(&nf_bridge->use)) - kfree(nf_bridge); + if (skb->active_extensions) + __skb_ext_put(skb->extensions); } -static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) + +static inline void __skb_ext_copy(struct sk_buff *dst, + const struct sk_buff *src) { - if (nf_bridge) - refcount_inc(&nf_bridge->use); + dst->active_extensions = src->active_extensions; + + if (src->active_extensions) { + struct skb_ext *ext = src->extensions; + + refcount_inc(&ext->refcnt); + dst->extensions = ext; + } } -#endif /* CONFIG_BRIDGE_NETFILTER */ -static inline void nf_reset(struct sk_buff *skb) + +static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *src) +{ + skb_ext_put(dst); + __skb_ext_copy(dst, src); +} + +static inline bool __skb_ext_exist(const struct skb_ext *ext, enum skb_ext_id i) +{ + return !!ext->offset[i]; +} + +static inline bool skb_ext_exist(const struct sk_buff *skb, enum skb_ext_id id) +{ + return skb->active_extensions & (1 << id); +} + +static inline void skb_ext_del(struct sk_buff *skb, enum skb_ext_id id) +{ + if (skb_ext_exist(skb, id)) + __skb_ext_del(skb, id); +} + +static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id) +{ + if (skb_ext_exist(skb, id)) { + struct skb_ext *ext = skb->extensions; + + return (void *)ext + (ext->offset[id] << 3); + } + + return NULL; +} + +static inline void skb_ext_reset(struct sk_buff *skb) +{ + if (unlikely(skb->active_extensions)) { + __skb_ext_put(skb->extensions); + skb->active_extensions = 0; + } +} + +static inline bool skb_has_extensions(struct sk_buff *skb) +{ + return unlikely(skb->active_extensions); +} +#else +static inline void skb_ext_put(struct sk_buff *skb) {} +static inline void skb_ext_reset(struct sk_buff *skb) {} +static inline void skb_ext_del(struct sk_buff *skb, int unused) {} +static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {} +static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {} +static inline bool skb_has_extensions(struct sk_buff *skb) { return false; } +#endif /* CONFIG_SKB_EXTENSIONS */ + +static inline void nf_reset_ct(struct sk_buff *skb) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb_nfct(skb)); skb->_nfct = 0; #endif -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - nf_bridge_put(skb->nf_bridge); -#endif - skb->nf_bridge = NULL; } static inline void nf_reset_trace(struct sk_buff *skb) @@ -3948,17 +4339,13 @@ #endif } -/* Note: This doesn't put any conntrack and bridge info in dst. */ +/* Note: This doesn't put any conntrack info in dst. */ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, bool copy) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) dst->_nfct = src->_nfct; nf_conntrack_get(skb_nfct(src)); -#endif -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - dst->nf_bridge = src->nf_bridge; - nf_bridge_get(src->nf_bridge); #endif #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) if (copy) @@ -3970,9 +4357,6 @@ { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb_nfct(dst)); -#endif -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - nf_bridge_put(dst->nf_bridge); #endif __nf_copy(dst, src, true); } @@ -3995,12 +4379,19 @@ { } #endif +static inline int secpath_exists(const struct sk_buff *skb) +{ +#ifdef CONFIG_XFRM + return skb_ext_exist(skb, SKB_EXT_SEC_PATH); +#else + return 0; +#endif +} + static inline bool skb_irq_freeable(const struct sk_buff *skb) { return !skb->destructor && -#if IS_ENABLED(CONFIG_XFRM) - !skb->sp && -#endif + !secpath_exists(skb) && !skb_nfct(skb) && !skb->_skb_refdst && !skb_has_frag_list(skb); @@ -4046,10 +4437,10 @@ return skb->dst_pending_confirm != 0; } -static inline struct sec_path *skb_sec_path(struct sk_buff *skb) +static inline struct sec_path *skb_sec_path(const struct sk_buff *skb) { #ifdef CONFIG_XFRM - return skb->sp; + return skb_ext_find(skb, SKB_EXT_SEC_PATH); #else return NULL; #endif @@ -4070,8 +4461,8 @@ __wsum csum; __u16 csum_start; }; -#define SKB_SGO_CB_OFFSET 32 -#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_SGO_CB_OFFSET)) +#define SKB_GSO_CB_OFFSET 32 +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET)) static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) { @@ -4232,7 +4623,7 @@ /* Local Checksum Offload. * Compute outer checksum based on the assumption that the * inner checksum will be offloaded later. - * See Documentation/networking/checksum-offloads.txt for + * See Documentation/networking/checksum-offloads.rst for * explanation of how this works. * Fill in outer checksum adjustment (e.g. with sum of outer * pseudo-header) before calling. @@ -4254,5 +4645,61 @@ return csum_partial(l4_hdr, csum_start - l4_hdr, partial); } +static inline bool skb_is_redirected(const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_REDIRECT + return skb->redirected; +#else + return false; +#endif +} + +static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) +{ +#ifdef CONFIG_NET_REDIRECT + skb->redirected = 1; + skb->from_ingress = from_ingress; + if (skb->from_ingress) + skb->tstamp = 0; +#endif +} + +static inline void skb_reset_redirect(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_REDIRECT + skb->redirected = 0; +#endif +} + +#if IS_ENABLED(CONFIG_KCOV) && IS_ENABLED(CONFIG_SKB_EXTENSIONS) +static inline void skb_set_kcov_handle(struct sk_buff *skb, + const u64 kcov_handle) +{ + /* Do not allocate skb extensions only to set kcov_handle to zero + * (as it is zero by default). However, if the extensions are + * already allocated, update kcov_handle anyway since + * skb_set_kcov_handle can be called to zero a previously set + * value. + */ + if (skb_has_extensions(skb) || kcov_handle) { + u64 *kcov_handle_ptr = skb_ext_add(skb, SKB_EXT_KCOV_HANDLE); + + if (kcov_handle_ptr) + *kcov_handle_ptr = kcov_handle; + } +} + +static inline u64 skb_get_kcov_handle(struct sk_buff *skb) +{ + u64 *kcov_handle = skb_ext_find(skb, SKB_EXT_KCOV_HANDLE); + + return kcov_handle ? *kcov_handle : 0; +} +#else +static inline void skb_set_kcov_handle(struct sk_buff *skb, + const u64 kcov_handle) { } +static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { return 0; } +#endif /* CONFIG_KCOV && CONFIG_SKB_EXTENSIONS */ + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- Gitblit v1.6.2