From 244b2c5ca8b14627e4a17755e5922221e121c771 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 09 Oct 2024 06:15:07 +0000
Subject: [PATCH] change system file
---
kernel/net/ipv4/af_inet.c | 203 ++++++++++++++++++++++++++++++--------------------
1 files changed, 123 insertions(+), 80 deletions(-)
diff --git a/kernel/net/ipv4/af_inet.c b/kernel/net/ipv4/af_inet.c
index d8c2224..769902f 100644
--- a/kernel/net/ipv4/af_inet.c
+++ b/kernel/net/ipv4/af_inet.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -58,11 +59,6 @@
* Some other random speedups.
* Cyrus Durgin : Cleaned up file for kmod hacks.
* Andi Kleen : Fix inet_stream_connect TCP race.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "IPv4: " fmt
@@ -120,6 +116,7 @@
#include <linux/mroute.h>
#endif
#include <net/l3mdev.h>
+#include <net/compat.h>
#include <trace/events/sock.h>
@@ -136,6 +133,10 @@
struct inet_sock *inet = inet_sk(sk);
__skb_queue_purge(&sk->sk_receive_queue);
+ if (sk->sk_rx_skb_cache) {
+ __kfree_skb(sk->sk_rx_skb_cache);
+ sk->sk_rx_skb_cache = NULL;
+ }
__skb_queue_purge(&sk->sk_error_queue);
sk_mem_reclaim(sk);
@@ -156,8 +157,8 @@
WARN_ON(sk->sk_forward_alloc);
kfree(rcu_dereference_protected(inet->inet_opt, 1));
- dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
- dst_release(sk->sk_rx_dst);
+ dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
+ dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1));
sk_refcnt_debug_dec(sk);
}
EXPORT_SYMBOL(inet_sock_destruct);
@@ -208,6 +209,7 @@
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;
+ WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
@@ -218,7 +220,7 @@
* because the socket was in TCP_LISTEN state previously but
* was shutdown() rather than close().
*/
- tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
@@ -231,7 +233,6 @@
goto out;
tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
}
- sk->sk_max_ack_backlog = backlog;
err = 0;
out:
@@ -337,7 +338,7 @@
inet->hdrincl = 1;
}
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
@@ -410,6 +411,9 @@
if (sk) {
long timeout;
+ if (!sk->sk_kern_sock)
+ BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk);
+
/* Applications forget to leave groups before exiting */
ip_mc_drop_socket(sk);
@@ -450,12 +454,12 @@
if (err)
return err;
- return __inet_bind(sk, uaddr, addr_len, false, true);
+ return __inet_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
}
EXPORT_SYMBOL(inet_bind);
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
- bool force_bind_address_no_port, bool with_lock)
+ u32 flags)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
@@ -494,8 +498,12 @@
goto out;
snum = ntohs(addr->sin_port);
+ err = -EPERM;
+ if (snum && inet_is_local_unbindable_port(net, snum))
+ goto out;
+
err = -EACCES;
- if (snum && snum < inet_prot_sock(net) &&
+ if (snum && inet_port_requires_bind_service(net, snum) &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
goto out;
@@ -506,7 +514,7 @@
* would be illegal to use them (multicast/broadcast) in
* which case the sending device address is used.
*/
- if (with_lock)
+ if (flags & BIND_WITH_LOCK)
lock_sock(sk);
/* Check these errors (active socket, double bind). */
@@ -520,16 +528,18 @@
/* Make sure we are allowed to bind here. */
if (snum || !(inet->bind_address_no_port ||
- force_bind_address_no_port)) {
+ (flags & BIND_FORCE_ADDRESS_NO_PORT))) {
if (sk->sk_prot->get_port(sk, snum)) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
err = -EADDRINUSE;
goto out_release_sock;
}
- err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
- if (err) {
- inet->inet_saddr = inet->inet_rcv_saddr = 0;
- goto out_release_sock;
+ if (!(flags & BIND_FROM_BPF)) {
+ err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
+ if (err) {
+ inet->inet_saddr = inet->inet_rcv_saddr = 0;
+ goto out_release_sock;
+ }
}
}
@@ -543,7 +553,7 @@
sk_dst_reset(sk);
err = 0;
out_release_sock:
- if (with_lock)
+ if (flags & BIND_WITH_LOCK)
release_sock(sk);
out:
return err;
@@ -566,7 +576,7 @@
return err;
}
- if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+ if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk))
return -EAGAIN;
return sk->sk_prot->connect(sk, uaddr, addr_len);
}
@@ -753,12 +763,11 @@
}
EXPORT_SYMBOL(inet_accept);
-
/*
* This does both peername and sockname.
*/
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
- int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
@@ -779,23 +788,38 @@
sin->sin_port = inet->inet_sport;
sin->sin_addr.s_addr = addr;
}
+ if (cgroup_bpf_enabled)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+ peer ? BPF_CGROUP_INET4_GETPEERNAME :
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ NULL);
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
return sizeof(*sin);
}
EXPORT_SYMBOL(inet_getname);
+int inet_send_prepare(struct sock *sk)
+{
+ sock_rps_record_flow(sk);
+
+ /* We may need to bind the socket. */
+ if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind &&
+ inet_autobind(sk))
+ return -EAGAIN;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(inet_send_prepare);
+
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
- sock_rps_record_flow(sk);
-
- /* We may need to bind the socket. */
- if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
- inet_autobind(sk))
+ if (unlikely(inet_send_prepare(sk)))
return -EAGAIN;
- return sk->sk_prot->sendmsg(sk, msg, size);
+ return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udp_sendmsg,
+ sk, msg, size);
}
EXPORT_SYMBOL(inet_sendmsg);
@@ -804,11 +828,7 @@
{
struct sock *sk = sock->sk;
- sock_rps_record_flow(sk);
-
- /* We may need to bind the socket. */
- if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
- inet_autobind(sk))
+ if (unlikely(inet_send_prepare(sk)))
return -EAGAIN;
if (sk->sk_prot->sendpage)
@@ -817,6 +837,8 @@
}
EXPORT_SYMBOL(inet_sendpage);
+INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *,
+ size_t, int, int, int *));
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
@@ -827,8 +849,9 @@
if (likely(!(flags & MSG_ERRQUEUE)))
sock_rps_record_flow(sk);
- err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, &addr_len);
+ err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg,
+ sk, msg, size, flags & MSG_DONTWAIT,
+ flags & ~MSG_DONTWAIT, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
@@ -863,9 +886,9 @@
err = -ENOTCONN;
/* Hack to wake up other listeners, who can poll for
EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
- /* fall through */
+ fallthrough;
default:
- sk->sk_shutdown |= how;
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | how);
if (sk->sk_prot->shutdown)
sk->sk_prot->shutdown(sk, how);
break;
@@ -877,7 +900,7 @@
case TCP_LISTEN:
if (!(how & RCV_SHUTDOWN))
break;
- /* fall through */
+ fallthrough;
case TCP_SYN_SENT:
err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
@@ -911,12 +934,6 @@
struct rtentry rt;
switch (cmd) {
- case SIOCGSTAMP:
- err = sock_get_timestamp(sk, (struct timeval __user *)arg);
- break;
- case SIOCGSTAMPNS:
- err = sock_get_timestampns(sk, (struct timespec __user *)arg);
- break;
case SIOCADDRT:
case SIOCDELRT:
if (copy_from_user(&rt, p, sizeof(struct rtentry)))
@@ -965,17 +982,42 @@
EXPORT_SYMBOL(inet_ioctl);
#ifdef CONFIG_COMPAT
+static int inet_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
+ struct compat_rtentry __user *ur)
+{
+ compat_uptr_t rtdev;
+ struct rtentry rt;
+
+ if (copy_from_user(&rt.rt_dst, &ur->rt_dst,
+ 3 * sizeof(struct sockaddr)) ||
+ get_user(rt.rt_flags, &ur->rt_flags) ||
+ get_user(rt.rt_metric, &ur->rt_metric) ||
+ get_user(rt.rt_mtu, &ur->rt_mtu) ||
+ get_user(rt.rt_window, &ur->rt_window) ||
+ get_user(rt.rt_irtt, &ur->rt_irtt) ||
+ get_user(rtdev, &ur->rt_dev))
+ return -EFAULT;
+
+ rt.rt_dev = compat_ptr(rtdev);
+ return ip_rt_ioctl(sock_net(sk), cmd, &rt);
+}
+
static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
+ void __user *argp = compat_ptr(arg);
struct sock *sk = sock->sk;
- int err = -ENOIOCTLCMD;
- if (sk->sk_prot->compat_ioctl)
- err = sk->sk_prot->compat_ioctl(sk, cmd, arg);
-
- return err;
+ switch (cmd) {
+ case SIOCADDRT:
+ case SIOCDELRT:
+ return inet_compat_routing_ioctl(sk, cmd, argp);
+ default:
+ if (!sk->sk_prot->compat_ioctl)
+ return -ENOIOCTLCMD;
+ return sk->sk_prot->compat_ioctl(sk, cmd, arg);
+ }
}
-#endif
+#endif /* CONFIG_COMPAT */
const struct proto_ops inet_stream_ops = {
.family = PF_INET,
@@ -988,6 +1030,7 @@
.getname = inet_getname,
.poll = tcp_poll,
.ioctl = inet_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = inet_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
@@ -1004,8 +1047,6 @@
.sendpage_locked = tcp_sendpage_locked,
.peek_len = tcp_peek_len,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
.set_rcvlowat = tcp_set_rcvlowat,
@@ -1023,6 +1064,7 @@
.getname = inet_getname,
.poll = udp_poll,
.ioctl = inet_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
@@ -1033,8 +1075,6 @@
.sendpage = inet_sendpage,
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
};
@@ -1055,6 +1095,7 @@
.getname = inet_getname,
.poll = datagram_poll,
.ioctl = inet_ioctl,
+ .gettstamp = sock_gettstamp,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
@@ -1064,8 +1105,6 @@
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_sock_common_setsockopt,
- .compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
};
@@ -1209,7 +1248,7 @@
if (new_saddr == old_saddr)
return 0;
- if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) {
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {
pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
__func__, &old_saddr, &new_saddr);
}
@@ -1264,7 +1303,7 @@
* Other protocols have to map its equivalent state to TCP_SYN_SENT.
* DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
*/
- if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr ||
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||
sk->sk_state != TCP_SYN_SENT ||
(sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
(err = inet_sk_reselect_saddr(sk)) != 0)
@@ -1388,6 +1427,15 @@
}
EXPORT_SYMBOL(inet_gso_segment);
+static struct sk_buff *ipip_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4))
+ return ERR_PTR(-EINVAL);
+
+ return inet_gso_segment(skb, features);
+}
+
struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
{
const struct net_offload *ops;
@@ -1497,7 +1545,8 @@
skb_gro_pull(skb, sizeof(*iph));
skb_set_transport_header(skb, skb_gro_offset(skb));
- pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+ pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive,
+ ops->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
@@ -1584,7 +1633,9 @@
* because any hdr with option will have been flushed in
* inet_gro_receive().
*/
- err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
+ err = INDIRECT_CALL_2(ops->callbacks.gro_complete,
+ tcp4_gro_complete, udp4_gro_complete,
+ skb, nhoff + sizeof(*iph));
out_unlock:
rcu_read_unlock();
@@ -1678,12 +1729,7 @@
};
#endif
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol tcp_protocol = {
- .early_demux = tcp_v4_early_demux,
- .early_demux_handler = tcp_v4_early_demux,
+static const struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
@@ -1691,12 +1737,7 @@
.icmp_strict_tag_validation = 1,
};
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol udp_protocol = {
- .early_demux = udp_v4_early_demux,
- .early_demux_handler = udp_v4_early_demux,
+static const struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
@@ -1772,6 +1813,10 @@
free_percpu(net->mib.net_statistics);
free_percpu(net->mib.ip_statistics);
free_percpu(net->mib.tcp_statistics);
+#ifdef CONFIG_MPTCP
+ /* allocated on demand, see mptcp_init_sock() */
+ free_percpu(net->mib.mptcp_statistics);
+#endif
}
static __net_initdata struct pernet_operations ipv4_mib_ops = {
@@ -1810,6 +1855,7 @@
net->ipv4.sysctl_ip_early_demux = 1;
net->ipv4.sysctl_udp_early_demux = 1;
net->ipv4.sysctl_tcp_early_demux = 1;
+ net->ipv4.sysctl_nexthop_compat_mode = 1;
#ifdef CONFIG_SYSCTL
net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
#endif
@@ -1824,13 +1870,8 @@
return 0;
}
-static __net_exit void inet_exit_net(struct net *net)
-{
-}
-
static __net_initdata struct pernet_operations af_inet_ops = {
.init = inet_init_net,
- .exit = inet_exit_net,
};
static int __init init_inet_pernet_ops(void)
@@ -1855,7 +1896,7 @@
static const struct net_offload ipip_offload = {
.callbacks = {
- .gso_segment = inet_gso_segment,
+ .gso_segment = ipip_gso_segment,
.gro_receive = ipip_gro_receive,
.gro_complete = ipip_gro_complete,
},
@@ -1894,7 +1935,7 @@
{
struct inet_protosw *q;
struct list_head *r;
- int rc = -EINVAL;
+ int rc;
sock_skb_cb_check_size(sizeof(struct inet_skb_parm));
@@ -1971,6 +2012,8 @@
/* Add UDP-Lite (RFC 3828) */
udplite4_register();
+ raw_init();
+
ping_init();
/*
--
Gitblit v1.6.2