.. | .. |
---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ |
---|
1 | 2 | /* |
---|
2 | 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
---|
3 | 4 | * operating system. INET is implemented using the BSD Socket |
---|
.. | .. |
---|
30 | 31 | * respective headers and ipv4/v6, etc now |
---|
31 | 32 | * use private slabcaches for its socks |
---|
32 | 33 | * Pedro Hortas : New flags field for socket options |
---|
33 | | - * |
---|
34 | | - * |
---|
35 | | - * This program is free software; you can redistribute it and/or |
---|
36 | | - * modify it under the terms of the GNU General Public License |
---|
37 | | - * as published by the Free Software Foundation; either version |
---|
38 | | - * 2 of the License, or (at your option) any later version. |
---|
39 | 34 | */ |
---|
40 | 35 | #ifndef _SOCK_H |
---|
41 | 36 | #define _SOCK_H |
---|
.. | .. |
---|
64 | 59 | #include <linux/filter.h> |
---|
65 | 60 | #include <linux/rculist_nulls.h> |
---|
66 | 61 | #include <linux/poll.h> |
---|
| 62 | +#include <linux/sockptr.h> |
---|
67 | 63 | |
---|
68 | 64 | #include <linux/atomic.h> |
---|
69 | 65 | #include <linux/refcount.h> |
---|
.. | .. |
---|
71 | 67 | #include <net/checksum.h> |
---|
72 | 68 | #include <net/tcp_states.h> |
---|
73 | 69 | #include <linux/net_tstamp.h> |
---|
74 | | -#include <net/smc.h> |
---|
75 | 70 | #include <net/l3mdev.h> |
---|
76 | 71 | #include <linux/android_kabi.h> |
---|
| 72 | +#include <linux/android_vendor.h> |
---|
77 | 73 | |
---|
78 | 74 | /* |
---|
79 | 75 | * This structure really needs to be cleaned up. |
---|
.. | .. |
---|
124 | 120 | * struct sock_common - minimal network layer representation of sockets |
---|
125 | 121 | * @skc_daddr: Foreign IPv4 addr |
---|
126 | 122 | * @skc_rcv_saddr: Bound local IPv4 addr |
---|
| 123 | + * @skc_addrpair: 8-byte-aligned __u64 union of @skc_daddr & @skc_rcv_saddr |
---|
127 | 124 | * @skc_hash: hash value used with various protocol lookup tables |
---|
128 | 125 | * @skc_u16hashes: two u16 hash values used by UDP lookup tables |
---|
129 | 126 | * @skc_dport: placeholder for inet_dport/tw_dport |
---|
130 | 127 | * @skc_num: placeholder for inet_num/tw_num |
---|
| 128 | + * @skc_portpair: __u32 union of @skc_dport & @skc_num |
---|
131 | 129 | * @skc_family: network address family |
---|
132 | 130 | * @skc_state: Connection state |
---|
133 | 131 | * @skc_reuse: %SO_REUSEADDR setting |
---|
134 | 132 | * @skc_reuseport: %SO_REUSEPORT setting |
---|
| 133 | + * @skc_ipv6only: socket is IPV6 only |
---|
| 134 | + * @skc_net_refcnt: socket is using net ref counting |
---|
135 | 135 | * @skc_bound_dev_if: bound device index if != 0 |
---|
136 | 136 | * @skc_bind_node: bind hash linkage for various protocol lookup tables |
---|
137 | 137 | * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol |
---|
138 | 138 | * @skc_prot: protocol handlers inside a network family |
---|
139 | 139 | * @skc_net: reference to the network namespace of this socket |
---|
| 140 | + * @skc_v6_daddr: IPV6 destination address |
---|
| 141 | + * @skc_v6_rcv_saddr: IPV6 source address |
---|
| 142 | + * @skc_cookie: socket's cookie value |
---|
140 | 143 | * @skc_node: main hash linkage for various protocol lookup tables |
---|
141 | 144 | * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol |
---|
142 | 145 | * @skc_tx_queue_mapping: tx queue number for this connection |
---|
.. | .. |
---|
144 | 147 | * @skc_flags: place holder for sk_flags |
---|
145 | 148 | * %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, |
---|
146 | 149 | * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings |
---|
| 150 | + * @skc_listener: connection request listener socket (aka rsk_listener) |
---|
| 151 | + * [union with @skc_flags] |
---|
| 152 | + * @skc_tw_dr: (aka tw_dr) ptr to &struct inet_timewait_death_row |
---|
| 153 | + * [union with @skc_flags] |
---|
147 | 154 | * @skc_incoming_cpu: record/match cpu processing incoming packets |
---|
| 155 | + * @skc_rcv_wnd: (aka rsk_rcv_wnd) TCP receive window size (possibly scaled) |
---|
| 156 | + * [union with @skc_incoming_cpu] |
---|
| 157 | + * @skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number |
---|
| 158 | + * [union with @skc_incoming_cpu] |
---|
148 | 159 | * @skc_refcnt: reference count |
---|
149 | 160 | * |
---|
150 | 161 | * This is the minimal network layer representation of sockets, the header |
---|
151 | 162 | * for struct sock and struct inet_timewait_sock. |
---|
152 | 163 | */ |
---|
153 | 164 | struct sock_common { |
---|
154 | | - /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned |
---|
155 | | - * address on 64bit arches : cf INET_MATCH() |
---|
156 | | - */ |
---|
157 | 165 | union { |
---|
158 | 166 | __addrpair skc_addrpair; |
---|
159 | 167 | struct { |
---|
.. | .. |
---|
237 | 245 | /* public: */ |
---|
238 | 246 | }; |
---|
239 | 247 | |
---|
| 248 | +struct bpf_local_storage; |
---|
| 249 | + |
---|
240 | 250 | /** |
---|
241 | 251 | * struct sock - network layer representation of sockets |
---|
242 | 252 | * @__sk_common: shared layout with inet_timewait_sock |
---|
.. | .. |
---|
250 | 260 | * @sk_dst_cache: destination cache |
---|
251 | 261 | * @sk_dst_pending_confirm: need to confirm neighbour |
---|
252 | 262 | * @sk_policy: flow policy |
---|
| 263 | + * @sk_rx_skb_cache: cache copy of recently accessed RX skb |
---|
253 | 264 | * @sk_receive_queue: incoming packets |
---|
254 | 265 | * @sk_wmem_alloc: transmit queue bytes committed |
---|
255 | 266 | * @sk_tsq_flags: TCP Small Queues flags |
---|
.. | .. |
---|
270 | 281 | * @sk_no_check_rx: allow zero checksum in RX packets |
---|
271 | 282 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) |
---|
272 | 283 | * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) |
---|
| 284 | + * @sk_route_forced_caps: static, forced route capabilities |
---|
| 285 | + * (set in tcp_init_sock()) |
---|
273 | 286 | * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) |
---|
274 | 287 | * @sk_gso_max_size: Maximum GSO segment size to build |
---|
275 | 288 | * @sk_gso_max_segs: Maximum number of GSO segments |
---|
.. | .. |
---|
308 | 321 | * @sk_frag: cached page frag |
---|
309 | 322 | * @sk_peek_off: current peek_offset value |
---|
310 | 323 | * @sk_send_head: front of stuff to transmit |
---|
| 324 | + * @tcp_rtx_queue: TCP re-transmit queue [union with @sk_send_head] |
---|
| 325 | + * @sk_tx_skb_cache: cache copy of recently accessed TX skb |
---|
311 | 326 | * @sk_security: used by security modules |
---|
312 | 327 | * @sk_mark: generic packet mark |
---|
313 | 328 | * @sk_cgrp_data: cgroup data for this cgroup |
---|
.. | .. |
---|
318 | 333 | * @sk_write_space: callback to indicate there is bf sending space available |
---|
319 | 334 | * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) |
---|
320 | 335 | * @sk_backlog_rcv: callback to process the backlog |
---|
| 336 | + * @sk_validate_xmit_skb: ptr to an optional validate function |
---|
321 | 337 | * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 |
---|
322 | 338 | * @sk_reuseport_cb: reuseport group container |
---|
| 339 | + * @sk_bpf_storage: ptr to cache and control for bpf_sk_storage |
---|
323 | 340 | * @sk_rcu: used during RCU grace period |
---|
324 | 341 | * @sk_clockid: clockid used by time-based scheduling (SO_TXTIME) |
---|
325 | 342 | * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME |
---|
| 343 | + * @sk_txtime_report_errors: set report errors mode for SO_TXTIME |
---|
326 | 344 | * @sk_txtime_unused: unused txtime flags |
---|
327 | 345 | */ |
---|
328 | 346 | struct sock { |
---|
.. | .. |
---|
369 | 387 | atomic_t sk_drops; |
---|
370 | 388 | int sk_rcvlowat; |
---|
371 | 389 | struct sk_buff_head sk_error_queue; |
---|
| 390 | + struct sk_buff *sk_rx_skb_cache; |
---|
372 | 391 | struct sk_buff_head sk_receive_queue; |
---|
373 | 392 | /* |
---|
374 | 393 | * The backlog queue is special, it is always used with |
---|
.. | .. |
---|
397 | 416 | struct sk_filter __rcu *sk_filter; |
---|
398 | 417 | union { |
---|
399 | 418 | struct socket_wq __rcu *sk_wq; |
---|
| 419 | + /* private: */ |
---|
400 | 420 | struct socket_wq *sk_wq_raw; |
---|
| 421 | + /* public: */ |
---|
401 | 422 | }; |
---|
402 | 423 | #ifdef CONFIG_XFRM |
---|
403 | 424 | struct xfrm_policy __rcu *sk_policy[2]; |
---|
404 | 425 | #endif |
---|
405 | | - struct dst_entry *sk_rx_dst; |
---|
| 426 | + struct dst_entry __rcu *sk_rx_dst; |
---|
406 | 427 | struct dst_entry __rcu *sk_dst_cache; |
---|
407 | 428 | atomic_t sk_omem_alloc; |
---|
408 | 429 | int sk_sndbuf; |
---|
.. | .. |
---|
415 | 436 | struct sk_buff *sk_send_head; |
---|
416 | 437 | struct rb_root tcp_rtx_queue; |
---|
417 | 438 | }; |
---|
| 439 | + struct sk_buff *sk_tx_skb_cache; |
---|
418 | 440 | struct sk_buff_head sk_write_queue; |
---|
419 | 441 | __s32 sk_peek_off; |
---|
420 | 442 | int sk_write_pending; |
---|
.. | .. |
---|
424 | 446 | struct timer_list sk_timer; |
---|
425 | 447 | __u32 sk_priority; |
---|
426 | 448 | __u32 sk_mark; |
---|
427 | | - u32 sk_pacing_rate; /* bytes per second */ |
---|
428 | | - u32 sk_max_pacing_rate; |
---|
| 449 | + unsigned long sk_pacing_rate; /* bytes per second */ |
---|
| 450 | + unsigned long sk_max_pacing_rate; |
---|
429 | 451 | struct page_frag sk_frag; |
---|
430 | 452 | netdev_features_t sk_route_caps; |
---|
431 | 453 | netdev_features_t sk_route_nocaps; |
---|
.. | .. |
---|
439 | 461 | * Because of non atomicity rules, all |
---|
440 | 462 | * changes are protected by socket lock. |
---|
441 | 463 | */ |
---|
442 | | - unsigned int __sk_flags_offset[0]; |
---|
443 | | -#ifdef __BIG_ENDIAN_BITFIELD |
---|
444 | | -#define SK_FL_PROTO_SHIFT 16 |
---|
445 | | -#define SK_FL_PROTO_MASK 0x00ff0000 |
---|
446 | | - |
---|
447 | | -#define SK_FL_TYPE_SHIFT 0 |
---|
448 | | -#define SK_FL_TYPE_MASK 0x0000ffff |
---|
449 | | -#else |
---|
450 | | -#define SK_FL_PROTO_SHIFT 8 |
---|
451 | | -#define SK_FL_PROTO_MASK 0x0000ff00 |
---|
452 | | - |
---|
453 | | -#define SK_FL_TYPE_SHIFT 16 |
---|
454 | | -#define SK_FL_TYPE_MASK 0xffff0000 |
---|
455 | | -#endif |
---|
456 | | - |
---|
457 | | - unsigned int sk_padding : 1, |
---|
| 464 | + u8 sk_padding : 1, |
---|
458 | 465 | sk_kern_sock : 1, |
---|
459 | 466 | sk_no_check_tx : 1, |
---|
460 | 467 | sk_no_check_rx : 1, |
---|
461 | | - sk_userlocks : 4, |
---|
462 | | - sk_protocol : 8, |
---|
463 | | - sk_type : 16; |
---|
464 | | -#define SK_PROTOCOL_MAX U8_MAX |
---|
465 | | - u16 sk_gso_max_segs; |
---|
| 468 | + sk_userlocks : 4; |
---|
466 | 469 | u8 sk_pacing_shift; |
---|
| 470 | + u16 sk_type; |
---|
| 471 | + u16 sk_protocol; |
---|
| 472 | + u16 sk_gso_max_segs; |
---|
467 | 473 | unsigned long sk_lingertime; |
---|
468 | 474 | struct proto *sk_prot_creator; |
---|
469 | 475 | rwlock_t sk_callback_lock; |
---|
.. | .. |
---|
472 | 478 | u32 sk_ack_backlog; |
---|
473 | 479 | u32 sk_max_ack_backlog; |
---|
474 | 480 | kuid_t sk_uid; |
---|
475 | | -#if IS_ENABLED(CONFIG_DEBUG_SPINLOCK) || IS_ENABLED(CONFIG_DEBUG_LOCK_ALLOC) || IS_ENABLED(CONFIG_PREEMPT_RT) |
---|
| 481 | +#if IS_ENABLED(CONFIG_DEBUG_SPINLOCK) || IS_ENABLED(CONFIG_DEBUG_LOCK_ALLOC) |
---|
476 | 482 | spinlock_t sk_peer_lock; |
---|
477 | 483 | #else |
---|
478 | 484 | /* sk_peer_lock is in the ANDROID_KABI_RESERVE(1) field below */ |
---|
.. | .. |
---|
515 | 521 | #endif |
---|
516 | 522 | void (*sk_destruct)(struct sock *sk); |
---|
517 | 523 | struct sock_reuseport __rcu *sk_reuseport_cb; |
---|
| 524 | +#ifdef CONFIG_BPF_SYSCALL |
---|
| 525 | + struct bpf_local_storage __rcu *sk_bpf_storage; |
---|
| 526 | +#endif |
---|
518 | 527 | struct rcu_head sk_rcu; |
---|
519 | 528 | |
---|
520 | | -#if IS_ENABLED(CONFIG_DEBUG_SPINLOCK) || IS_ENABLED(CONFIG_DEBUG_LOCK_ALLOC) || IS_ENABLED(CONFIG_PREEMPT_RT) |
---|
| 529 | +#if IS_ENABLED(CONFIG_DEBUG_SPINLOCK) || IS_ENABLED(CONFIG_DEBUG_LOCK_ALLOC) |
---|
521 | 530 | ANDROID_KABI_RESERVE(1); |
---|
522 | 531 | #else |
---|
523 | 532 | ANDROID_KABI_USE(1, spinlock_t sk_peer_lock); |
---|
.. | .. |
---|
529 | 538 | ANDROID_KABI_RESERVE(6); |
---|
530 | 539 | ANDROID_KABI_RESERVE(7); |
---|
531 | 540 | ANDROID_KABI_RESERVE(8); |
---|
| 541 | + |
---|
| 542 | + ANDROID_OEM_DATA(1); |
---|
532 | 543 | }; |
---|
533 | 544 | |
---|
534 | 545 | enum sk_pacing { |
---|
.. | .. |
---|
537 | 548 | SK_PACING_FQ = 2, |
---|
538 | 549 | }; |
---|
539 | 550 | |
---|
| 551 | +/* flag bits in sk_user_data |
---|
| 552 | + * |
---|
| 553 | + * - SK_USER_DATA_NOCOPY: Pointer stored in sk_user_data might |
---|
| 554 | + * not be suitable for copying when cloning the socket. For instance, |
---|
| 555 | + * it can point to a reference counted object. sk_user_data bottom |
---|
| 556 | + * bit is set if pointer must not be copied. |
---|
| 557 | + * |
---|
| 558 | + * - SK_USER_DATA_BPF: Mark whether sk_user_data field is |
---|
| 559 | + * managed/owned by a BPF reuseport array. This bit should be set |
---|
| 560 | + * when sk_user_data's sk is added to the bpf's reuseport_array. |
---|
| 561 | + * |
---|
| 562 | + * - SK_USER_DATA_PSOCK: Mark whether pointer stored in |
---|
| 563 | + * sk_user_data points to psock type. This bit should be set |
---|
| 564 | + * when sk_user_data is assigned to a psock object. |
---|
| 565 | + */ |
---|
| 566 | +#define SK_USER_DATA_NOCOPY 1UL |
---|
| 567 | +#define SK_USER_DATA_BPF 2UL |
---|
| 568 | +#define SK_USER_DATA_PSOCK 4UL |
---|
| 569 | +#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY | SK_USER_DATA_BPF |\ |
---|
| 570 | + SK_USER_DATA_PSOCK) |
---|
| 571 | + |
---|
| 572 | +/** |
---|
| 573 | + * sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied |
---|
| 574 | + * @sk: socket |
---|
| 575 | + */ |
---|
| 576 | +static inline bool sk_user_data_is_nocopy(const struct sock *sk) |
---|
| 577 | +{ |
---|
| 578 | + return ((uintptr_t)sk->sk_user_data & SK_USER_DATA_NOCOPY); |
---|
| 579 | +} |
---|
| 580 | + |
---|
540 | 581 | #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) |
---|
541 | 582 | |
---|
542 | | -#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk))) |
---|
543 | | -#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr) |
---|
| 583 | +/** |
---|
| 584 | + * __rcu_dereference_sk_user_data_with_flags - return the pointer |
---|
| 585 | + * only if argument flags all has been set in sk_user_data. Otherwise |
---|
| 586 | + * return NULL |
---|
| 587 | + * |
---|
| 588 | + * @sk: socket |
---|
| 589 | + * @flags: flag bits |
---|
| 590 | + */ |
---|
| 591 | +static inline void * |
---|
| 592 | +__rcu_dereference_sk_user_data_with_flags(const struct sock *sk, |
---|
| 593 | + uintptr_t flags) |
---|
| 594 | +{ |
---|
| 595 | + uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk)); |
---|
| 596 | + |
---|
| 597 | + WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK); |
---|
| 598 | + |
---|
| 599 | + if ((sk_user_data & flags) == flags) |
---|
| 600 | + return (void *)(sk_user_data & SK_USER_DATA_PTRMASK); |
---|
| 601 | + return NULL; |
---|
| 602 | +} |
---|
| 603 | + |
---|
| 604 | +#define rcu_dereference_sk_user_data(sk) \ |
---|
| 605 | + __rcu_dereference_sk_user_data_with_flags(sk, 0) |
---|
| 606 | +#define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \ |
---|
| 607 | +({ \ |
---|
| 608 | + uintptr_t __tmp1 = (uintptr_t)(ptr), \ |
---|
| 609 | + __tmp2 = (uintptr_t)(flags); \ |
---|
| 610 | + WARN_ON_ONCE(__tmp1 & ~SK_USER_DATA_PTRMASK); \ |
---|
| 611 | + WARN_ON_ONCE(__tmp2 & SK_USER_DATA_PTRMASK); \ |
---|
| 612 | + rcu_assign_pointer(__sk_user_data((sk)), \ |
---|
| 613 | + __tmp1 | __tmp2); \ |
---|
| 614 | +}) |
---|
| 615 | +#define rcu_assign_sk_user_data(sk, ptr) \ |
---|
| 616 | + __rcu_assign_sk_user_data_with_flags(sk, ptr, 0) |
---|
544 | 617 | |
---|
545 | 618 | /* |
---|
546 | 619 | * SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK |
---|
.. | .. |
---|
820 | 893 | SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */ |
---|
821 | 894 | SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */ |
---|
822 | 895 | SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ |
---|
823 | | - SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ |
---|
824 | 896 | SOCK_MEMALLOC, /* VM depends on this socket for swapping */ |
---|
825 | 897 | SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */ |
---|
826 | 898 | SOCK_FASYNC, /* fasync() active */ |
---|
.. | .. |
---|
835 | 907 | SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ |
---|
836 | 908 | SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */ |
---|
837 | 909 | SOCK_TXTIME, |
---|
| 910 | + SOCK_XDP, /* XDP is attached */ |
---|
| 911 | + SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ |
---|
838 | 912 | }; |
---|
839 | 913 | |
---|
840 | 914 | #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) |
---|
.. | .. |
---|
852 | 926 | static inline void sock_reset_flag(struct sock *sk, enum sock_flags flag) |
---|
853 | 927 | { |
---|
854 | 928 | __clear_bit(flag, &sk->sk_flags); |
---|
| 929 | +} |
---|
| 930 | + |
---|
| 931 | +static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit, |
---|
| 932 | + int valbool) |
---|
| 933 | +{ |
---|
| 934 | + if (valbool) |
---|
| 935 | + sock_set_flag(sk, bit); |
---|
| 936 | + else |
---|
| 937 | + sock_reset_flag(sk, bit); |
---|
855 | 938 | } |
---|
856 | 939 | |
---|
857 | 940 | static inline bool sock_flag(const struct sock *sk, enum sock_flags flag) |
---|
.. | .. |
---|
885 | 968 | |
---|
886 | 969 | static inline void sk_acceptq_removed(struct sock *sk) |
---|
887 | 970 | { |
---|
888 | | - sk->sk_ack_backlog--; |
---|
| 971 | + WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog - 1); |
---|
889 | 972 | } |
---|
890 | 973 | |
---|
891 | 974 | static inline void sk_acceptq_added(struct sock *sk) |
---|
892 | 975 | { |
---|
893 | | - sk->sk_ack_backlog++; |
---|
| 976 | + WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1); |
---|
894 | 977 | } |
---|
895 | 978 | |
---|
896 | 979 | static inline bool sk_acceptq_is_full(const struct sock *sk) |
---|
897 | 980 | { |
---|
898 | | - return sk->sk_ack_backlog > sk->sk_max_ack_backlog; |
---|
| 981 | + return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog); |
---|
899 | 982 | } |
---|
900 | 983 | |
---|
901 | 984 | /* |
---|
.. | .. |
---|
903 | 986 | */ |
---|
904 | 987 | static inline int sk_stream_min_wspace(const struct sock *sk) |
---|
905 | 988 | { |
---|
906 | | - return sk->sk_wmem_queued >> 1; |
---|
| 989 | + return READ_ONCE(sk->sk_wmem_queued) >> 1; |
---|
907 | 990 | } |
---|
908 | 991 | |
---|
909 | 992 | static inline int sk_stream_wspace(const struct sock *sk) |
---|
910 | 993 | { |
---|
911 | | - return sk->sk_sndbuf - sk->sk_wmem_queued; |
---|
| 994 | + return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued); |
---|
| 995 | +} |
---|
| 996 | + |
---|
| 997 | +static inline void sk_wmem_queued_add(struct sock *sk, int val) |
---|
| 998 | +{ |
---|
| 999 | + WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val); |
---|
912 | 1000 | } |
---|
913 | 1001 | |
---|
914 | 1002 | void sk_stream_write_space(struct sock *sk); |
---|
.. | .. |
---|
993 | 1081 | static inline void sock_rps_record_flow(const struct sock *sk) |
---|
994 | 1082 | { |
---|
995 | 1083 | #ifdef CONFIG_RPS |
---|
996 | | - if (static_key_false(&rfs_needed)) { |
---|
| 1084 | + if (static_branch_unlikely(&rfs_needed)) { |
---|
997 | 1085 | /* Reading sk->sk_rxhash might incur an expensive cache line |
---|
998 | 1086 | * miss. |
---|
999 | 1087 | * |
---|
.. | .. |
---|
1104 | 1192 | void (*destroy)(struct sock *sk); |
---|
1105 | 1193 | void (*shutdown)(struct sock *sk, int how); |
---|
1106 | 1194 | int (*setsockopt)(struct sock *sk, int level, |
---|
1107 | | - int optname, char __user *optval, |
---|
| 1195 | + int optname, sockptr_t optval, |
---|
1108 | 1196 | unsigned int optlen); |
---|
1109 | 1197 | int (*getsockopt)(struct sock *sk, int level, |
---|
1110 | 1198 | int optname, char __user *optval, |
---|
1111 | 1199 | int __user *option); |
---|
1112 | 1200 | void (*keepalive)(struct sock *sk, int valbool); |
---|
1113 | 1201 | #ifdef CONFIG_COMPAT |
---|
1114 | | - int (*compat_setsockopt)(struct sock *sk, |
---|
1115 | | - int level, |
---|
1116 | | - int optname, char __user *optval, |
---|
1117 | | - unsigned int optlen); |
---|
1118 | | - int (*compat_getsockopt)(struct sock *sk, |
---|
1119 | | - int level, |
---|
1120 | | - int optname, char __user *optval, |
---|
1121 | | - int __user *option); |
---|
1122 | 1202 | int (*compat_ioctl)(struct sock *sk, |
---|
1123 | 1203 | unsigned int cmd, unsigned long arg); |
---|
1124 | 1204 | #endif |
---|
.. | .. |
---|
1130 | 1210 | int (*sendpage)(struct sock *sk, struct page *page, |
---|
1131 | 1211 | int offset, size_t size, int flags); |
---|
1132 | 1212 | int (*bind)(struct sock *sk, |
---|
1133 | | - struct sockaddr *uaddr, int addr_len); |
---|
| 1213 | + struct sockaddr *addr, int addr_len); |
---|
| 1214 | + int (*bind_add)(struct sock *sk, |
---|
| 1215 | + struct sockaddr *addr, int addr_len); |
---|
1134 | 1216 | |
---|
1135 | 1217 | int (*backlog_rcv) (struct sock *sk, |
---|
1136 | 1218 | struct sk_buff *skb); |
---|
.. | .. |
---|
1148 | 1230 | unsigned int inuse_idx; |
---|
1149 | 1231 | #endif |
---|
1150 | 1232 | |
---|
1151 | | - bool (*stream_memory_free)(const struct sock *sk); |
---|
| 1233 | + bool (*stream_memory_free)(const struct sock *sk, int wake); |
---|
1152 | 1234 | bool (*stream_memory_read)(const struct sock *sk); |
---|
1153 | 1235 | /* Memory pressure */ |
---|
1154 | 1236 | void (*enter_memory_pressure)(struct sock *sk); |
---|
.. | .. |
---|
1230 | 1312 | #define sk_refcnt_debug_release(sk) do { } while (0) |
---|
1231 | 1313 | #endif /* SOCK_REFCNT_DEBUG */ |
---|
1232 | 1314 | |
---|
1233 | | -static inline bool sk_stream_memory_free(const struct sock *sk) |
---|
| 1315 | +static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) |
---|
1234 | 1316 | { |
---|
1235 | | - if (sk->sk_wmem_queued >= sk->sk_sndbuf) |
---|
| 1317 | + if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) |
---|
1236 | 1318 | return false; |
---|
1237 | 1319 | |
---|
1238 | 1320 | return sk->sk_prot->stream_memory_free ? |
---|
1239 | | - sk->sk_prot->stream_memory_free(sk) : true; |
---|
| 1321 | + sk->sk_prot->stream_memory_free(sk, wake) : true; |
---|
| 1322 | +} |
---|
| 1323 | + |
---|
| 1324 | +static inline bool sk_stream_memory_free(const struct sock *sk) |
---|
| 1325 | +{ |
---|
| 1326 | + return __sk_stream_memory_free(sk, 0); |
---|
| 1327 | +} |
---|
| 1328 | + |
---|
| 1329 | +static inline bool __sk_stream_is_writeable(const struct sock *sk, int wake) |
---|
| 1330 | +{ |
---|
| 1331 | + return sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && |
---|
| 1332 | + __sk_stream_memory_free(sk, wake); |
---|
1240 | 1333 | } |
---|
1241 | 1334 | |
---|
1242 | 1335 | static inline bool sk_stream_is_writeable(const struct sock *sk) |
---|
1243 | 1336 | { |
---|
1244 | | - return sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && |
---|
1245 | | - sk_stream_memory_free(sk); |
---|
| 1337 | + return __sk_stream_is_writeable(sk, 0); |
---|
1246 | 1338 | } |
---|
1247 | 1339 | |
---|
1248 | 1340 | static inline int sk_under_cgroup_hierarchy(struct sock *sk, |
---|
.. | .. |
---|
1399 | 1491 | /* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ |
---|
1400 | 1492 | static inline long sk_prot_mem_limits(const struct sock *sk, int index) |
---|
1401 | 1493 | { |
---|
1402 | | - long val = sk->sk_prot->sysctl_mem[index]; |
---|
| 1494 | + long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]); |
---|
1403 | 1495 | |
---|
1404 | 1496 | #if PAGE_SIZE > SK_MEM_QUANTUM |
---|
1405 | 1497 | val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; |
---|
.. | .. |
---|
1422 | 1514 | |
---|
1423 | 1515 | static inline bool sk_wmem_schedule(struct sock *sk, int size) |
---|
1424 | 1516 | { |
---|
| 1517 | + int delta; |
---|
| 1518 | + |
---|
1425 | 1519 | if (!sk_has_account(sk)) |
---|
1426 | 1520 | return true; |
---|
1427 | | - return size <= sk->sk_forward_alloc || |
---|
1428 | | - __sk_mem_schedule(sk, size, SK_MEM_SEND); |
---|
| 1521 | + delta = size - sk->sk_forward_alloc; |
---|
| 1522 | + return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND); |
---|
1429 | 1523 | } |
---|
1430 | 1524 | |
---|
1431 | 1525 | static inline bool |
---|
1432 | 1526 | sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) |
---|
1433 | 1527 | { |
---|
| 1528 | + int delta; |
---|
| 1529 | + |
---|
1434 | 1530 | if (!sk_has_account(sk)) |
---|
1435 | 1531 | return true; |
---|
1436 | | - return size<= sk->sk_forward_alloc || |
---|
1437 | | - __sk_mem_schedule(sk, size, SK_MEM_RECV) || |
---|
| 1532 | + delta = size - sk->sk_forward_alloc; |
---|
| 1533 | + return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) || |
---|
1438 | 1534 | skb_pfmemalloc(skb); |
---|
1439 | 1535 | } |
---|
1440 | 1536 | |
---|
.. | .. |
---|
1478 | 1574 | __sk_mem_reclaim(sk, 1 << 20); |
---|
1479 | 1575 | } |
---|
1480 | 1576 | |
---|
| 1577 | +DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); |
---|
1481 | 1578 | static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) |
---|
1482 | 1579 | { |
---|
1483 | | - sock_set_flag(sk, SOCK_QUEUE_SHRUNK); |
---|
1484 | | - sk->sk_wmem_queued -= skb->truesize; |
---|
| 1580 | + sk_wmem_queued_add(sk, -skb->truesize); |
---|
1485 | 1581 | sk_mem_uncharge(sk, skb->truesize); |
---|
| 1582 | + if (static_branch_unlikely(&tcp_tx_skb_cache_key) && |
---|
| 1583 | + !sk->sk_tx_skb_cache && !skb_cloned(skb)) { |
---|
| 1584 | + skb_ext_reset(skb); |
---|
| 1585 | + skb_zcopy_clear(skb, true); |
---|
| 1586 | + sk->sk_tx_skb_cache = skb; |
---|
| 1587 | + return; |
---|
| 1588 | + } |
---|
1486 | 1589 | __kfree_skb(skb); |
---|
1487 | 1590 | } |
---|
1488 | 1591 | |
---|
.. | .. |
---|
1492 | 1595 | sk->sk_lock.owned = 0; |
---|
1493 | 1596 | |
---|
1494 | 1597 | /* The sk_lock has mutex_unlock() semantics: */ |
---|
1495 | | - mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); |
---|
| 1598 | + mutex_release(&sk->sk_lock.dep_map, _RET_IP_); |
---|
1496 | 1599 | } |
---|
1497 | 1600 | } |
---|
1498 | 1601 | |
---|
.. | .. |
---|
1615 | 1718 | void sock_efree(struct sk_buff *skb); |
---|
1616 | 1719 | #ifdef CONFIG_INET |
---|
1617 | 1720 | void sock_edemux(struct sk_buff *skb); |
---|
| 1721 | +void sock_pfree(struct sk_buff *skb); |
---|
1618 | 1722 | #else |
---|
1619 | 1723 | #define sock_edemux sock_efree |
---|
1620 | 1724 | #endif |
---|
1621 | 1725 | |
---|
1622 | 1726 | int sock_setsockopt(struct socket *sock, int level, int op, |
---|
1623 | | - char __user *optval, unsigned int optlen); |
---|
| 1727 | + sockptr_t optval, unsigned int optlen); |
---|
1624 | 1728 | |
---|
1625 | 1729 | int sock_getsockopt(struct socket *sock, int level, int op, |
---|
1626 | 1730 | char __user *optval, int __user *optlen); |
---|
| 1731 | +int sock_gettstamp(struct socket *sock, void __user *userstamp, |
---|
| 1732 | + bool timeval, bool time32); |
---|
1627 | 1733 | struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, |
---|
1628 | 1734 | int noblock, int *errcode); |
---|
1629 | 1735 | struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, |
---|
.. | .. |
---|
1663 | 1769 | int sock_no_ioctl(struct socket *, unsigned int, unsigned long); |
---|
1664 | 1770 | int sock_no_listen(struct socket *, int); |
---|
1665 | 1771 | int sock_no_shutdown(struct socket *, int); |
---|
1666 | | -int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *); |
---|
1667 | | -int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int); |
---|
1668 | 1772 | int sock_no_sendmsg(struct socket *, struct msghdr *, size_t); |
---|
1669 | 1773 | int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len); |
---|
1670 | 1774 | int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int); |
---|
.. | .. |
---|
1684 | 1788 | int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, |
---|
1685 | 1789 | int flags); |
---|
1686 | 1790 | int sock_common_setsockopt(struct socket *sock, int level, int optname, |
---|
1687 | | - char __user *optval, unsigned int optlen); |
---|
1688 | | -int compat_sock_common_getsockopt(struct socket *sock, int level, |
---|
1689 | | - int optname, char __user *optval, int __user *optlen); |
---|
1690 | | -int compat_sock_common_setsockopt(struct socket *sock, int level, |
---|
1691 | | - int optname, char __user *optval, unsigned int optlen); |
---|
| 1791 | + sockptr_t optval, unsigned int optlen); |
---|
1692 | 1792 | |
---|
1693 | 1793 | void sk_common_release(struct sock *sk); |
---|
1694 | 1794 | |
---|
.. | .. |
---|
1827 | 1927 | { |
---|
1828 | 1928 | WARN_ON(parent->sk); |
---|
1829 | 1929 | write_lock_bh(&sk->sk_callback_lock); |
---|
1830 | | - rcu_assign_pointer(sk->sk_wq, parent->wq); |
---|
| 1930 | + rcu_assign_pointer(sk->sk_wq, &parent->wq); |
---|
1831 | 1931 | parent->sk = sk; |
---|
1832 | 1932 | sk_set_socket(sk, parent); |
---|
1833 | 1933 | sk->sk_uid = SOCK_INODE(parent)->i_uid; |
---|
.. | .. |
---|
1856 | 1956 | WRITE_ONCE(sk->sk_txhash, net_tx_rndhash()); |
---|
1857 | 1957 | } |
---|
1858 | 1958 | |
---|
1859 | | -static inline void sk_rethink_txhash(struct sock *sk) |
---|
| 1959 | +static inline bool sk_rethink_txhash(struct sock *sk) |
---|
1860 | 1960 | { |
---|
1861 | | - if (sk->sk_txhash) |
---|
| 1961 | + if (sk->sk_txhash) { |
---|
1862 | 1962 | sk_set_txhash(sk); |
---|
| 1963 | + return true; |
---|
| 1964 | + } |
---|
| 1965 | + return false; |
---|
1863 | 1966 | } |
---|
1864 | 1967 | |
---|
1865 | 1968 | static inline struct dst_entry * |
---|
.. | .. |
---|
1882 | 1985 | return dst; |
---|
1883 | 1986 | } |
---|
1884 | 1987 | |
---|
1885 | | -static inline void dst_negative_advice(struct sock *sk) |
---|
| 1988 | +static inline void __dst_negative_advice(struct sock *sk) |
---|
1886 | 1989 | { |
---|
1887 | 1990 | struct dst_entry *ndst, *dst = __sk_dst_get(sk); |
---|
1888 | | - |
---|
1889 | | - sk_rethink_txhash(sk); |
---|
1890 | 1991 | |
---|
1891 | 1992 | if (dst && dst->ops->negative_advice) { |
---|
1892 | 1993 | ndst = dst->ops->negative_advice(dst); |
---|
.. | .. |
---|
1897 | 1998 | sk->sk_dst_pending_confirm = 0; |
---|
1898 | 1999 | } |
---|
1899 | 2000 | } |
---|
| 2001 | +} |
---|
| 2002 | + |
---|
| 2003 | +static inline void dst_negative_advice(struct sock *sk) |
---|
| 2004 | +{ |
---|
| 2005 | + sk_rethink_txhash(sk); |
---|
| 2006 | + __dst_negative_advice(sk); |
---|
1900 | 2007 | } |
---|
1901 | 2008 | |
---|
1902 | 2009 | static inline void |
---|
.. | .. |
---|
1941 | 2048 | |
---|
1942 | 2049 | static inline void sk_dst_confirm(struct sock *sk) |
---|
1943 | 2050 | { |
---|
1944 | | - if (!sk->sk_dst_pending_confirm) |
---|
1945 | | - sk->sk_dst_pending_confirm = 1; |
---|
| 2051 | + if (!READ_ONCE(sk->sk_dst_pending_confirm)) |
---|
| 2052 | + WRITE_ONCE(sk->sk_dst_pending_confirm, 1); |
---|
1946 | 2053 | } |
---|
1947 | 2054 | |
---|
1948 | 2055 | static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n) |
---|
.. | .. |
---|
1952 | 2059 | unsigned long now = jiffies; |
---|
1953 | 2060 | |
---|
1954 | 2061 | /* avoid dirtying neighbour */ |
---|
1955 | | - if (n->confirmed != now) |
---|
1956 | | - n->confirmed = now; |
---|
1957 | | - if (sk && sk->sk_dst_pending_confirm) |
---|
1958 | | - sk->sk_dst_pending_confirm = 0; |
---|
| 2062 | + if (READ_ONCE(n->confirmed) != now) |
---|
| 2063 | + WRITE_ONCE(n->confirmed, now); |
---|
| 2064 | + if (sk && READ_ONCE(sk->sk_dst_pending_confirm)) |
---|
| 2065 | + WRITE_ONCE(sk->sk_dst_pending_confirm, 0); |
---|
1959 | 2066 | } |
---|
1960 | 2067 | } |
---|
1961 | 2068 | |
---|
.. | .. |
---|
2020 | 2127 | skb->len += copy; |
---|
2021 | 2128 | skb->data_len += copy; |
---|
2022 | 2129 | skb->truesize += copy; |
---|
2023 | | - sk->sk_wmem_queued += copy; |
---|
| 2130 | + sk_wmem_queued_add(sk, copy); |
---|
2024 | 2131 | sk_mem_charge(sk, copy); |
---|
2025 | 2132 | return 0; |
---|
2026 | 2133 | } |
---|
.. | .. |
---|
2029 | 2136 | * sk_wmem_alloc_get - returns write allocations |
---|
2030 | 2137 | * @sk: socket |
---|
2031 | 2138 | * |
---|
2032 | | - * Returns sk_wmem_alloc minus initial offset of one |
---|
| 2139 | + * Return: sk_wmem_alloc minus initial offset of one |
---|
2033 | 2140 | */ |
---|
2034 | 2141 | static inline int sk_wmem_alloc_get(const struct sock *sk) |
---|
2035 | 2142 | { |
---|
.. | .. |
---|
2040 | 2147 | * sk_rmem_alloc_get - returns read allocations |
---|
2041 | 2148 | * @sk: socket |
---|
2042 | 2149 | * |
---|
2043 | | - * Returns sk_rmem_alloc |
---|
| 2150 | + * Return: sk_rmem_alloc |
---|
2044 | 2151 | */ |
---|
2045 | 2152 | static inline int sk_rmem_alloc_get(const struct sock *sk) |
---|
2046 | 2153 | { |
---|
.. | .. |
---|
2051 | 2158 | * sk_has_allocations - check if allocations are outstanding |
---|
2052 | 2159 | * @sk: socket |
---|
2053 | 2160 | * |
---|
2054 | | - * Returns true if socket has write or read allocations |
---|
| 2161 | + * Return: true if socket has write or read allocations |
---|
2055 | 2162 | */ |
---|
2056 | 2163 | static inline bool sk_has_allocations(const struct sock *sk) |
---|
2057 | 2164 | { |
---|
.. | .. |
---|
2062 | 2169 | * skwq_has_sleeper - check if there are any waiting processes |
---|
2063 | 2170 | * @wq: struct socket_wq |
---|
2064 | 2171 | * |
---|
2065 | | - * Returns true if socket_wq has waiting processes |
---|
| 2172 | + * Return: true if socket_wq has waiting processes |
---|
2066 | 2173 | * |
---|
2067 | 2174 | * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory |
---|
2068 | 2175 | * barrier call. They were added due to the race found within the tcp code. |
---|
.. | .. |
---|
2101 | 2208 | * @p: poll_table |
---|
2102 | 2209 | * |
---|
2103 | 2210 | * See the comments in the wq_has_sleeper function. |
---|
2104 | | - * |
---|
2105 | | - * Do not derive sock from filp->private_data here. An SMC socket establishes |
---|
2106 | | - * an internal TCP socket that is used in the fallback case. All socket |
---|
2107 | | - * operations on the SMC socket are then forwarded to the TCP socket. In case of |
---|
2108 | | - * poll, the filp->private_data pointer references the SMC socket because the |
---|
2109 | | - * TCP socket has no file assigned. |
---|
2110 | 2211 | */ |
---|
2111 | 2212 | static inline void sock_poll_wait(struct file *filp, struct socket *sock, |
---|
2112 | 2213 | poll_table *p) |
---|
2113 | 2214 | { |
---|
2114 | 2215 | if (!poll_does_not_wait(p)) { |
---|
2115 | | - poll_wait(filp, &sock->wq->wait, p); |
---|
| 2216 | + poll_wait(filp, &sock->wq.wait, p); |
---|
2116 | 2217 | /* We need to be sure we are in sync with the |
---|
2117 | 2218 | * socket flags modification. |
---|
2118 | 2219 | * |
---|
.. | .. |
---|
2152 | 2253 | sk_mem_charge(sk, skb->truesize); |
---|
2153 | 2254 | } |
---|
2154 | 2255 | |
---|
| 2256 | +static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk) |
---|
| 2257 | +{ |
---|
| 2258 | + if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) { |
---|
| 2259 | + skb_orphan(skb); |
---|
| 2260 | + skb->destructor = sock_efree; |
---|
| 2261 | + skb->sk = sk; |
---|
| 2262 | + return true; |
---|
| 2263 | + } |
---|
| 2264 | + return false; |
---|
| 2265 | +} |
---|
| 2266 | + |
---|
2155 | 2267 | void sk_reset_timer(struct sock *sk, struct timer_list *timer, |
---|
2156 | 2268 | unsigned long expires); |
---|
2157 | 2269 | |
---|
2158 | 2270 | void sk_stop_timer(struct sock *sk, struct timer_list *timer); |
---|
| 2271 | + |
---|
| 2272 | +void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer); |
---|
2159 | 2273 | |
---|
2160 | 2274 | int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, |
---|
2161 | 2275 | struct sk_buff *skb, unsigned int flags, |
---|
.. | .. |
---|
2174 | 2288 | static inline int sock_error(struct sock *sk) |
---|
2175 | 2289 | { |
---|
2176 | 2290 | int err; |
---|
2177 | | - if (likely(!sk->sk_err)) |
---|
| 2291 | + |
---|
| 2292 | + /* Avoid an atomic operation for the common case. |
---|
| 2293 | + * This is racy since another cpu/thread can change sk_err under us. |
---|
| 2294 | + */ |
---|
| 2295 | + if (likely(data_race(!sk->sk_err))) |
---|
2178 | 2296 | return 0; |
---|
| 2297 | + |
---|
2179 | 2298 | err = xchg(&sk->sk_err, 0); |
---|
2180 | 2299 | return -err; |
---|
2181 | 2300 | } |
---|
.. | .. |
---|
2235 | 2354 | |
---|
2236 | 2355 | static inline void sk_stream_moderate_sndbuf(struct sock *sk) |
---|
2237 | 2356 | { |
---|
2238 | | - if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) { |
---|
2239 | | - sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); |
---|
2240 | | - sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF); |
---|
2241 | | - } |
---|
| 2357 | + u32 val; |
---|
| 2358 | + |
---|
| 2359 | + if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) |
---|
| 2360 | + return; |
---|
| 2361 | + |
---|
| 2362 | + val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1); |
---|
| 2363 | + |
---|
| 2364 | + WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF)); |
---|
2242 | 2365 | } |
---|
2243 | 2366 | |
---|
2244 | 2367 | struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, |
---|
.. | .. |
---|
2249 | 2372 | * @sk: socket |
---|
2250 | 2373 | * |
---|
2251 | 2374 | * Use the per task page_frag instead of the per socket one for |
---|
2252 | | - * optimization when we know that we're in the normal context and owns |
---|
| 2375 | + * optimization when we know that we're in process context and own |
---|
2253 | 2376 | * everything that's associated with %current. |
---|
2254 | 2377 | * |
---|
2255 | | - * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest |
---|
2256 | | - * inside other socket operations and end up recursing into sk_page_frag() |
---|
2257 | | - * while it's already in use. |
---|
| 2378 | + * Both direct reclaim and page faults can nest inside other |
---|
| 2379 | + * socket operations and end up recursing into sk_page_frag() |
---|
| 2380 | + * while it's already in use: explicitly avoid task page_frag |
---|
| 2381 | + * usage if the caller is potentially doing any of them. |
---|
| 2382 | + * This assumes that page fault handlers use the GFP_NOFS flags. |
---|
| 2383 | + * |
---|
| 2384 | + * Return: a per task page_frag if context allows that, |
---|
| 2385 | + * otherwise a per socket one. |
---|
2258 | 2386 | */ |
---|
2259 | 2387 | static inline struct page_frag *sk_page_frag(struct sock *sk) |
---|
2260 | 2388 | { |
---|
2261 | | - if (gfpflags_normal_context(sk->sk_allocation)) |
---|
| 2389 | + if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) == |
---|
| 2390 | + (__GFP_DIRECT_RECLAIM | __GFP_FS)) |
---|
2262 | 2391 | return ¤t->task_frag; |
---|
2263 | 2392 | |
---|
2264 | 2393 | return &sk->sk_frag; |
---|
.. | .. |
---|
2266 | 2395 | |
---|
2267 | 2396 | bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); |
---|
2268 | 2397 | |
---|
2269 | | -int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg, |
---|
2270 | | - int sg_start, int *sg_curr, unsigned int *sg_size, |
---|
2271 | | - int first_coalesce); |
---|
2272 | | - |
---|
2273 | 2398 | /* |
---|
2274 | 2399 | * Default write policy as shown to user space via poll/select/SIGIO |
---|
2275 | 2400 | */ |
---|
2276 | 2401 | static inline bool sock_writeable(const struct sock *sk) |
---|
2277 | 2402 | { |
---|
2278 | | - return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1); |
---|
| 2403 | + return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1); |
---|
2279 | 2404 | } |
---|
2280 | 2405 | |
---|
2281 | 2406 | static inline gfp_t gfp_any(void) |
---|
.. | .. |
---|
2295 | 2420 | |
---|
2296 | 2421 | static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len) |
---|
2297 | 2422 | { |
---|
2298 | | - return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1; |
---|
| 2423 | + int v = waitall ? len : min_t(int, READ_ONCE(sk->sk_rcvlowat), len); |
---|
| 2424 | + |
---|
| 2425 | + return v ?: 1; |
---|
2299 | 2426 | } |
---|
2300 | 2427 | |
---|
2301 | 2428 | /* Alas, with timeout socket operations are not restartable. |
---|
.. | .. |
---|
2314 | 2441 | * using skb->cb[] would keep using it directly and utilize its |
---|
2315 | 2442 | * alignement guarantee. |
---|
2316 | 2443 | */ |
---|
2317 | | -#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \ |
---|
| 2444 | +#define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \ |
---|
2318 | 2445 | sizeof(struct sock_skb_cb))) |
---|
2319 | 2446 | |
---|
2320 | 2447 | #define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \ |
---|
.. | .. |
---|
2418 | 2545 | void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); |
---|
2419 | 2546 | |
---|
2420 | 2547 | /** |
---|
2421 | | - * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped |
---|
| 2548 | + * _sock_tx_timestamp - checks whether the outgoing packet is to be time stamped |
---|
2422 | 2549 | * @sk: socket sending this packet |
---|
2423 | 2550 | * @tsflags: timestamping flags to use |
---|
2424 | 2551 | * @tx_flags: completed with instructions for time stamping |
---|
| 2552 | + * @tskey: filled in with next sk_tskey (not for TCP, which uses seqno) |
---|
2425 | 2553 | * |
---|
2426 | 2554 | * Note: callers should take care of initial ``*tx_flags`` value (usually 0) |
---|
2427 | 2555 | */ |
---|
2428 | | -static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags, |
---|
2429 | | - __u8 *tx_flags) |
---|
| 2556 | +static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags, |
---|
| 2557 | + __u8 *tx_flags, __u32 *tskey) |
---|
2430 | 2558 | { |
---|
2431 | | - if (unlikely(tsflags)) |
---|
| 2559 | + if (unlikely(tsflags)) { |
---|
2432 | 2560 | __sock_tx_timestamp(tsflags, tx_flags); |
---|
| 2561 | + if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey && |
---|
| 2562 | + tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) |
---|
| 2563 | + *tskey = sk->sk_tskey++; |
---|
| 2564 | + } |
---|
2433 | 2565 | if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) |
---|
2434 | 2566 | *tx_flags |= SKBTX_WIFI_STATUS; |
---|
2435 | 2567 | } |
---|
2436 | 2568 | |
---|
| 2569 | +static inline void sock_tx_timestamp(struct sock *sk, __u16 tsflags, |
---|
| 2570 | + __u8 *tx_flags) |
---|
| 2571 | +{ |
---|
| 2572 | + _sock_tx_timestamp(sk, tsflags, tx_flags, NULL); |
---|
| 2573 | +} |
---|
| 2574 | + |
---|
| 2575 | +static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) |
---|
| 2576 | +{ |
---|
| 2577 | + _sock_tx_timestamp(skb->sk, tsflags, &skb_shinfo(skb)->tx_flags, |
---|
| 2578 | + &skb_shinfo(skb)->tskey); |
---|
| 2579 | +} |
---|
| 2580 | + |
---|
| 2581 | +DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); |
---|
2437 | 2582 | /** |
---|
2438 | 2583 | * sk_eat_skb - Release a skb if it is no longer needed |
---|
2439 | 2584 | * @sk: socket to eat this skb from |
---|
.. | .. |
---|
2445 | 2590 | static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) |
---|
2446 | 2591 | { |
---|
2447 | 2592 | __skb_unlink(skb, &sk->sk_receive_queue); |
---|
| 2593 | + if (static_branch_unlikely(&tcp_rx_skb_cache_key) && |
---|
| 2594 | + !sk->sk_rx_skb_cache) { |
---|
| 2595 | + sk->sk_rx_skb_cache = skb; |
---|
| 2596 | + skb_orphan(skb); |
---|
| 2597 | + return; |
---|
| 2598 | + } |
---|
2448 | 2599 | __kfree_skb(skb); |
---|
2449 | 2600 | } |
---|
2450 | 2601 | |
---|
.. | .. |
---|
2460 | 2611 | write_pnet(&sk->sk_net, net); |
---|
2461 | 2612 | } |
---|
2462 | 2613 | |
---|
2463 | | -static inline struct sock *skb_steal_sock(struct sk_buff *skb) |
---|
| 2614 | +static inline bool |
---|
| 2615 | +skb_sk_is_prefetched(struct sk_buff *skb) |
---|
2464 | 2616 | { |
---|
2465 | | - if (skb->sk) { |
---|
2466 | | - struct sock *sk = skb->sk; |
---|
2467 | | - |
---|
2468 | | - skb->destructor = NULL; |
---|
2469 | | - skb->sk = NULL; |
---|
2470 | | - return sk; |
---|
2471 | | - } |
---|
2472 | | - return NULL; |
---|
| 2617 | +#ifdef CONFIG_INET |
---|
| 2618 | + return skb->destructor == sock_pfree; |
---|
| 2619 | +#else |
---|
| 2620 | + return false; |
---|
| 2621 | +#endif /* CONFIG_INET */ |
---|
2473 | 2622 | } |
---|
2474 | 2623 | |
---|
2475 | 2624 | /* This helper checks if a socket is a full socket, |
---|
.. | .. |
---|
2480 | 2629 | return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV); |
---|
2481 | 2630 | } |
---|
2482 | 2631 | |
---|
| 2632 | +static inline bool |
---|
| 2633 | +sk_is_refcounted(struct sock *sk) |
---|
| 2634 | +{ |
---|
| 2635 | + /* Only full sockets have sk->sk_flags. */ |
---|
| 2636 | + return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE); |
---|
| 2637 | +} |
---|
| 2638 | + |
---|
| 2639 | +/** |
---|
| 2640 | + * skb_steal_sock - steal a socket from an sk_buff |
---|
| 2641 | + * @skb: sk_buff to steal the socket from |
---|
| 2642 | + * @refcounted: is set to true if the socket is reference-counted |
---|
| 2643 | + */ |
---|
| 2644 | +static inline struct sock * |
---|
| 2645 | +skb_steal_sock(struct sk_buff *skb, bool *refcounted) |
---|
| 2646 | +{ |
---|
| 2647 | + if (skb->sk) { |
---|
| 2648 | + struct sock *sk = skb->sk; |
---|
| 2649 | + |
---|
| 2650 | + *refcounted = true; |
---|
| 2651 | + if (skb_sk_is_prefetched(skb)) |
---|
| 2652 | + *refcounted = sk_is_refcounted(sk); |
---|
| 2653 | + skb->destructor = NULL; |
---|
| 2654 | + skb->sk = NULL; |
---|
| 2655 | + return sk; |
---|
| 2656 | + } |
---|
| 2657 | + *refcounted = false; |
---|
| 2658 | + return NULL; |
---|
| 2659 | +} |
---|
| 2660 | + |
---|
2483 | 2661 | /* Checks if this SKB belongs to an HW offloaded socket |
---|
2484 | 2662 | * and whether any SW fallbacks are required based on dev. |
---|
| 2663 | + * Check decrypted mark in case skb_orphan() cleared socket. |
---|
2485 | 2664 | */ |
---|
2486 | 2665 | static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb, |
---|
2487 | 2666 | struct net_device *dev) |
---|
.. | .. |
---|
2489 | 2668 | #ifdef CONFIG_SOCK_VALIDATE_XMIT |
---|
2490 | 2669 | struct sock *sk = skb->sk; |
---|
2491 | 2670 | |
---|
2492 | | - if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) |
---|
| 2671 | + if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) { |
---|
2493 | 2672 | skb = sk->sk_validate_xmit_skb(sk, dev, skb); |
---|
| 2673 | +#ifdef CONFIG_TLS_DEVICE |
---|
| 2674 | + } else if (unlikely(skb->decrypted)) { |
---|
| 2675 | + pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n"); |
---|
| 2676 | + kfree_skb(skb); |
---|
| 2677 | + skb = NULL; |
---|
| 2678 | +#endif |
---|
| 2679 | + } |
---|
2494 | 2680 | #endif |
---|
2495 | 2681 | |
---|
2496 | 2682 | return skb; |
---|
.. | .. |
---|
2504 | 2690 | return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); |
---|
2505 | 2691 | } |
---|
2506 | 2692 | |
---|
2507 | | -void sock_enable_timestamp(struct sock *sk, int flag); |
---|
2508 | | -int sock_get_timestamp(struct sock *, struct timeval __user *); |
---|
2509 | | -int sock_get_timestampns(struct sock *, struct timespec __user *); |
---|
| 2693 | +void sock_enable_timestamp(struct sock *sk, enum sock_flags flag); |
---|
2510 | 2694 | int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, |
---|
2511 | 2695 | int type); |
---|
2512 | 2696 | |
---|
.. | .. |
---|
2536 | 2720 | extern __u32 sysctl_wmem_default; |
---|
2537 | 2721 | extern __u32 sysctl_rmem_default; |
---|
2538 | 2722 | |
---|
| 2723 | +#define SKB_FRAG_PAGE_ORDER get_order(32768) |
---|
| 2724 | +DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); |
---|
| 2725 | + |
---|
2539 | 2726 | static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) |
---|
2540 | 2727 | { |
---|
2541 | 2728 | /* Does this proto have per netns sysctl_wmem ? */ |
---|
2542 | 2729 | if (proto->sysctl_wmem_offset) |
---|
2543 | | - return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); |
---|
| 2730 | + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset)); |
---|
2544 | 2731 | |
---|
2545 | | - return *proto->sysctl_wmem; |
---|
| 2732 | + return READ_ONCE(*proto->sysctl_wmem); |
---|
2546 | 2733 | } |
---|
2547 | 2734 | |
---|
2548 | 2735 | static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) |
---|
2549 | 2736 | { |
---|
2550 | 2737 | /* Does this proto have per netns sysctl_rmem ? */ |
---|
2551 | 2738 | if (proto->sysctl_rmem_offset) |
---|
2552 | | - return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); |
---|
| 2739 | + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset)); |
---|
2553 | 2740 | |
---|
2554 | | - return *proto->sysctl_rmem; |
---|
| 2741 | + return READ_ONCE(*proto->sysctl_rmem); |
---|
2555 | 2742 | } |
---|
2556 | 2743 | |
---|
2557 | 2744 | /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) |
---|
.. | .. |
---|
2560 | 2747 | */ |
---|
2561 | 2748 | static inline void sk_pacing_shift_update(struct sock *sk, int val) |
---|
2562 | 2749 | { |
---|
2563 | | - if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val) |
---|
| 2750 | + if (!sk || !sk_fullsock(sk) || READ_ONCE(sk->sk_pacing_shift) == val) |
---|
2564 | 2751 | return; |
---|
2565 | | - sk->sk_pacing_shift = val; |
---|
| 2752 | + WRITE_ONCE(sk->sk_pacing_shift, val); |
---|
2566 | 2753 | } |
---|
2567 | 2754 | |
---|
2568 | 2755 | /* if a socket is bound to a device, check that the given device |
---|
.. | .. |
---|
2584 | 2771 | return false; |
---|
2585 | 2772 | } |
---|
2586 | 2773 | |
---|
| 2774 | +void sock_def_readable(struct sock *sk); |
---|
| 2775 | + |
---|
| 2776 | +int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk); |
---|
| 2777 | +void sock_enable_timestamps(struct sock *sk); |
---|
| 2778 | +void sock_no_linger(struct sock *sk); |
---|
| 2779 | +void sock_set_keepalive(struct sock *sk); |
---|
| 2780 | +void sock_set_priority(struct sock *sk, u32 priority); |
---|
| 2781 | +void sock_set_rcvbuf(struct sock *sk, int val); |
---|
| 2782 | +void sock_set_mark(struct sock *sk, u32 val); |
---|
| 2783 | +void sock_set_reuseaddr(struct sock *sk); |
---|
| 2784 | +void sock_set_reuseport(struct sock *sk); |
---|
| 2785 | +void sock_set_sndtimeo(struct sock *sk, s64 secs); |
---|
| 2786 | + |
---|
| 2787 | +int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len); |
---|
| 2788 | + |
---|
2587 | 2789 | #endif /* _SOCK_H */ |
---|