hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/kcm/kcmsock.c
....@@ -1,11 +1,8 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Kernel Connection Multiplexor
34 *
45 * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
5
- *
6
- * This program is free software; you can redistribute it and/or modify
7
- * it under the terms of the GNU General Public License version 2
8
- * as published by the Free Software Foundation.
96 */
107
118 #include <linux/bpf.h>
....@@ -164,7 +161,8 @@
164161 /* Buffer limit is okay now, add to ready list */
165162 list_add_tail(&kcm->wait_rx_list,
166163 &kcm->mux->kcm_rx_waiters);
167
- kcm->rx_wait = true;
164
+ /* paired with lockless reads in kcm_rfree() */
165
+ WRITE_ONCE(kcm->rx_wait, true);
168166 }
169167
170168 static void kcm_rfree(struct sk_buff *skb)
....@@ -180,7 +178,7 @@
180178 /* For reading rx_wait and rx_psock without holding lock */
181179 smp_mb__after_atomic();
182180
183
- if (!kcm->rx_wait && !kcm->rx_psock &&
181
+ if (!READ_ONCE(kcm->rx_wait) && !READ_ONCE(kcm->rx_psock) &&
184182 sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
185183 spin_lock_bh(&mux->rx_lock);
186184 kcm_rcv_ready(kcm);
....@@ -223,7 +221,7 @@
223221 struct sk_buff *skb;
224222 struct kcm_sock *kcm;
225223
226
- while ((skb = __skb_dequeue(head))) {
224
+ while ((skb = skb_dequeue(head))) {
227225 /* Reset destructor to avoid calling kcm_rcv_ready */
228226 skb->destructor = sock_rfree;
229227 skb_orphan(skb);
....@@ -239,7 +237,8 @@
239237 if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
240238 /* Should mean socket buffer full */
241239 list_del(&kcm->wait_rx_list);
242
- kcm->rx_wait = false;
240
+ /* paired with lockless reads in kcm_rfree() */
241
+ WRITE_ONCE(kcm->rx_wait, false);
243242
244243 /* Commit rx_wait to read in kcm_free */
245244 smp_wmb();
....@@ -282,10 +281,12 @@
282281 kcm = list_first_entry(&mux->kcm_rx_waiters,
283282 struct kcm_sock, wait_rx_list);
284283 list_del(&kcm->wait_rx_list);
285
- kcm->rx_wait = false;
284
+ /* paired with lockless reads in kcm_rfree() */
285
+ WRITE_ONCE(kcm->rx_wait, false);
286286
287287 psock->rx_kcm = kcm;
288
- kcm->rx_psock = psock;
288
+ /* paired with lockless reads in kcm_rfree() */
289
+ WRITE_ONCE(kcm->rx_psock, psock);
289290
290291 spin_unlock_bh(&mux->rx_lock);
291292
....@@ -312,7 +313,8 @@
312313 spin_lock_bh(&mux->rx_lock);
313314
314315 psock->rx_kcm = NULL;
315
- kcm->rx_psock = NULL;
316
+ /* paired with lockless reads in kcm_rfree() */
317
+ WRITE_ONCE(kcm->rx_psock, NULL);
316318
317319 /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
318320 * kcm_rfree
....@@ -383,9 +385,7 @@
383385 struct bpf_prog *prog = psock->bpf_prog;
384386 int res;
385387
386
- preempt_disable();
387
- res = BPF_PROG_RUN(prog, skb);
388
- preempt_enable();
388
+ res = bpf_prog_run_pin_on_cpu(prog, skb);
389389 return res;
390390 }
391391
....@@ -642,15 +642,15 @@
642642 frag_offset = 0;
643643 do_frag:
644644 frag = &skb_shinfo(skb)->frags[fragidx];
645
- if (WARN_ON(!frag->size)) {
645
+ if (WARN_ON(!skb_frag_size(frag))) {
646646 ret = -EINVAL;
647647 goto out;
648648 }
649649
650650 ret = kernel_sendpage(psock->sk->sk_socket,
651
- frag->page.p,
652
- frag->page_offset + frag_offset,
653
- frag->size - frag_offset,
651
+ skb_frag_page(frag),
652
+ skb_frag_off(frag) + frag_offset,
653
+ skb_frag_size(frag) - frag_offset,
654654 MSG_DONTWAIT);
655655 if (ret <= 0) {
656656 if (ret == -EAGAIN) {
....@@ -685,7 +685,7 @@
685685 sent += ret;
686686 frag_offset += ret;
687687 KCM_STATS_ADD(psock->stats.tx_bytes, ret);
688
- if (frag_offset < frag->size) {
688
+ if (frag_offset < skb_frag_size(frag)) {
689689 /* Not finished with this frag */
690690 goto do_frag;
691691 }
....@@ -1064,15 +1064,18 @@
10641064 out_error:
10651065 kcm_push(kcm);
10661066
1067
- if (copied && sock->type == SOCK_SEQPACKET) {
1067
+ if (sock->type == SOCK_SEQPACKET) {
10681068 /* Wrote some bytes before encountering an
10691069 * error, return partial success.
10701070 */
1071
- goto partial_message;
1072
- }
1073
-
1074
- if (head != kcm->seq_skb)
1071
+ if (copied)
1072
+ goto partial_message;
1073
+ if (head != kcm->seq_skb)
1074
+ kfree_skb(head);
1075
+ } else {
10751076 kfree_skb(head);
1077
+ kcm->seq_skb = NULL;
1078
+ }
10761079
10771080 err = sk_stream_error(sk, msg->msg_flags, err);
10781081
....@@ -1084,53 +1087,18 @@
10841087 return err;
10851088 }
10861089
1087
-static struct sk_buff *kcm_wait_data(struct sock *sk, int flags,
1088
- long timeo, int *err)
1089
-{
1090
- struct sk_buff *skb;
1091
-
1092
- while (!(skb = skb_peek(&sk->sk_receive_queue))) {
1093
- if (sk->sk_err) {
1094
- *err = sock_error(sk);
1095
- return NULL;
1096
- }
1097
-
1098
- if (sock_flag(sk, SOCK_DONE))
1099
- return NULL;
1100
-
1101
- if ((flags & MSG_DONTWAIT) || !timeo) {
1102
- *err = -EAGAIN;
1103
- return NULL;
1104
- }
1105
-
1106
- sk_wait_data(sk, &timeo, NULL);
1107
-
1108
- /* Handle signals */
1109
- if (signal_pending(current)) {
1110
- *err = sock_intr_errno(timeo);
1111
- return NULL;
1112
- }
1113
- }
1114
-
1115
- return skb;
1116
-}
1117
-
11181090 static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
11191091 size_t len, int flags)
11201092 {
1093
+ int noblock = flags & MSG_DONTWAIT;
11211094 struct sock *sk = sock->sk;
11221095 struct kcm_sock *kcm = kcm_sk(sk);
11231096 int err = 0;
1124
- long timeo;
11251097 struct strp_msg *stm;
11261098 int copied = 0;
11271099 struct sk_buff *skb;
11281100
1129
- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1130
-
1131
- lock_sock(sk);
1132
-
1133
- skb = kcm_wait_data(sk, flags, timeo, &err);
1101
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
11341102 if (!skb)
11351103 goto out;
11361104
....@@ -1161,14 +1129,11 @@
11611129 /* Finished with message */
11621130 msg->msg_flags |= MSG_EOR;
11631131 KCM_STATS_INCR(kcm->stats.rx_msgs);
1164
- skb_unlink(skb, &sk->sk_receive_queue);
1165
- kfree_skb(skb);
11661132 }
11671133 }
11681134
11691135 out:
1170
- release_sock(sk);
1171
-
1136
+ skb_free_datagram(sk, skb);
11721137 return copied ? : err;
11731138 }
11741139
....@@ -1176,9 +1141,9 @@
11761141 struct pipe_inode_info *pipe, size_t len,
11771142 unsigned int flags)
11781143 {
1144
+ int noblock = flags & MSG_DONTWAIT;
11791145 struct sock *sk = sock->sk;
11801146 struct kcm_sock *kcm = kcm_sk(sk);
1181
- long timeo;
11821147 struct strp_msg *stm;
11831148 int err = 0;
11841149 ssize_t copied;
....@@ -1186,11 +1151,7 @@
11861151
11871152 /* Only support splice for SOCKSEQPACKET */
11881153
1189
- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1190
-
1191
- lock_sock(sk);
1192
-
1193
- skb = kcm_wait_data(sk, flags, timeo, &err);
1154
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
11941155 if (!skb)
11951156 goto err_out;
11961157
....@@ -1218,13 +1179,11 @@
12181179 * finish reading the message.
12191180 */
12201181
1221
- release_sock(sk);
1222
-
1182
+ skb_free_datagram(sk, skb);
12231183 return copied;
12241184
12251185 err_out:
1226
- release_sock(sk);
1227
-
1186
+ skb_free_datagram(sk, skb);
12281187 return err;
12291188 }
12301189
....@@ -1244,7 +1203,8 @@
12441203 if (!kcm->rx_psock) {
12451204 if (kcm->rx_wait) {
12461205 list_del(&kcm->wait_rx_list);
1247
- kcm->rx_wait = false;
1206
+ /* paired with lockless reads in kcm_rfree() */
1207
+ WRITE_ONCE(kcm->rx_wait, false);
12481208 }
12491209
12501210 requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
....@@ -1270,7 +1230,7 @@
12701230 }
12711231
12721232 static int kcm_setsockopt(struct socket *sock, int level, int optname,
1273
- char __user *optval, unsigned int optlen)
1233
+ sockptr_t optval, unsigned int optlen)
12741234 {
12751235 struct kcm_sock *kcm = kcm_sk(sock->sk);
12761236 int val, valbool;
....@@ -1282,8 +1242,8 @@
12821242 if (optlen < sizeof(int))
12831243 return -EINVAL;
12841244
1285
- if (get_user(val, (int __user *)optval))
1286
- return -EINVAL;
1245
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
1246
+ return -EFAULT;
12871247
12881248 valbool = val ? 1 : 0;
12891249
....@@ -1416,12 +1376,6 @@
14161376 psock->sk = csk;
14171377 psock->bpf_prog = prog;
14181378
1419
- err = strp_init(&psock->strp, csk, &cb);
1420
- if (err) {
1421
- kmem_cache_free(kcm_psockp, psock);
1422
- goto out;
1423
- }
1424
-
14251379 write_lock_bh(&csk->sk_callback_lock);
14261380
14271381 /* Check if sk_user_data is aready by KCM or someone else.
....@@ -1429,10 +1383,15 @@
14291383 */
14301384 if (csk->sk_user_data) {
14311385 write_unlock_bh(&csk->sk_callback_lock);
1432
- strp_stop(&psock->strp);
1433
- strp_done(&psock->strp);
14341386 kmem_cache_free(kcm_psockp, psock);
14351387 err = -EALREADY;
1388
+ goto out;
1389
+ }
1390
+
1391
+ err = strp_init(&psock->strp, csk, &cb);
1392
+ if (err) {
1393
+ write_unlock_bh(&csk->sk_callback_lock);
1394
+ kmem_cache_free(kcm_psockp, psock);
14361395 goto out;
14371396 }
14381397
....@@ -1798,7 +1757,8 @@
17981757
17991758 if (kcm->rx_wait) {
18001759 list_del(&kcm->wait_rx_list);
1801
- kcm->rx_wait = false;
1760
+ /* paired with lockless reads in kcm_rfree() */
1761
+ WRITE_ONCE(kcm->rx_wait, false);
18021762 }
18031763 /* Move any pending receive messages to other kcm sockets */
18041764 requeue_rx_msgs(mux, &sk->sk_receive_queue);
....@@ -1843,10 +1803,10 @@
18431803 kcm = kcm_sk(sk);
18441804 mux = kcm->mux;
18451805
1806
+ lock_sock(sk);
18461807 sock_orphan(sk);
18471808 kfree_skb(kcm->seq_skb);
18481809
1849
- lock_sock(sk);
18501810 /* Purge queue under lock to avoid race condition with tx_work trying
18511811 * to act when queue is nonempty. If tx_work runs after this point
18521812 * it will just return.
....@@ -2025,6 +1985,8 @@
20251985 * that all multiplexors and psocks have been destroyed.
20261986 */
20271987 WARN_ON(!list_empty(&knet->mux_list));
1988
+
1989
+ mutex_destroy(&knet->mutex);
20281990 }
20291991
20301992 static struct pernet_operations kcm_net_ops = {
....@@ -2040,13 +2002,13 @@
20402002
20412003 kcm_muxp = kmem_cache_create("kcm_mux_cache",
20422004 sizeof(struct kcm_mux), 0,
2043
- SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2005
+ SLAB_HWCACHE_ALIGN, NULL);
20442006 if (!kcm_muxp)
20452007 goto fail;
20462008
20472009 kcm_psockp = kmem_cache_create("kcm_psock_cache",
20482010 sizeof(struct kcm_psock), 0,
2049
- SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2011
+ SLAB_HWCACHE_ALIGN, NULL);
20502012 if (!kcm_psockp)
20512013 goto fail;
20522014