.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Kernel Connection Multiplexor |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (c) 2016 Tom Herbert <tom@herbertland.com> |
---|
5 | | - * |
---|
6 | | - * This program is free software; you can redistribute it and/or modify |
---|
7 | | - * it under the terms of the GNU General Public License version 2 |
---|
8 | | - * as published by the Free Software Foundation. |
---|
9 | 6 | */ |
---|
10 | 7 | |
---|
11 | 8 | #include <linux/bpf.h> |
---|
.. | .. |
---|
164 | 161 | /* Buffer limit is okay now, add to ready list */ |
---|
165 | 162 | list_add_tail(&kcm->wait_rx_list, |
---|
166 | 163 | &kcm->mux->kcm_rx_waiters); |
---|
167 | | - kcm->rx_wait = true; |
---|
| 164 | + /* paired with lockless reads in kcm_rfree() */ |
---|
| 165 | + WRITE_ONCE(kcm->rx_wait, true); |
---|
168 | 166 | } |
---|
169 | 167 | |
---|
170 | 168 | static void kcm_rfree(struct sk_buff *skb) |
---|
.. | .. |
---|
180 | 178 | /* For reading rx_wait and rx_psock without holding lock */ |
---|
181 | 179 | smp_mb__after_atomic(); |
---|
182 | 180 | |
---|
183 | | - if (!kcm->rx_wait && !kcm->rx_psock && |
---|
| 181 | + if (!READ_ONCE(kcm->rx_wait) && !READ_ONCE(kcm->rx_psock) && |
---|
184 | 182 | sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { |
---|
185 | 183 | spin_lock_bh(&mux->rx_lock); |
---|
186 | 184 | kcm_rcv_ready(kcm); |
---|
.. | .. |
---|
223 | 221 | struct sk_buff *skb; |
---|
224 | 222 | struct kcm_sock *kcm; |
---|
225 | 223 | |
---|
226 | | - while ((skb = __skb_dequeue(head))) { |
---|
| 224 | + while ((skb = skb_dequeue(head))) { |
---|
227 | 225 | /* Reset destructor to avoid calling kcm_rcv_ready */ |
---|
228 | 226 | skb->destructor = sock_rfree; |
---|
229 | 227 | skb_orphan(skb); |
---|
.. | .. |
---|
239 | 237 | if (kcm_queue_rcv_skb(&kcm->sk, skb)) { |
---|
240 | 238 | /* Should mean socket buffer full */ |
---|
241 | 239 | list_del(&kcm->wait_rx_list); |
---|
242 | | - kcm->rx_wait = false; |
---|
| 240 | + /* paired with lockless reads in kcm_rfree() */ |
---|
| 241 | + WRITE_ONCE(kcm->rx_wait, false); |
---|
243 | 242 | |
---|
244 | 243 | /* Commit rx_wait to read in kcm_free */ |
---|
245 | 244 | smp_wmb(); |
---|
.. | .. |
---|
282 | 281 | kcm = list_first_entry(&mux->kcm_rx_waiters, |
---|
283 | 282 | struct kcm_sock, wait_rx_list); |
---|
284 | 283 | list_del(&kcm->wait_rx_list); |
---|
285 | | - kcm->rx_wait = false; |
---|
| 284 | + /* paired with lockless reads in kcm_rfree() */ |
---|
| 285 | + WRITE_ONCE(kcm->rx_wait, false); |
---|
286 | 286 | |
---|
287 | 287 | psock->rx_kcm = kcm; |
---|
288 | | - kcm->rx_psock = psock; |
---|
| 288 | + /* paired with lockless reads in kcm_rfree() */ |
---|
| 289 | + WRITE_ONCE(kcm->rx_psock, psock); |
---|
289 | 290 | |
---|
290 | 291 | spin_unlock_bh(&mux->rx_lock); |
---|
291 | 292 | |
---|
.. | .. |
---|
312 | 313 | spin_lock_bh(&mux->rx_lock); |
---|
313 | 314 | |
---|
314 | 315 | psock->rx_kcm = NULL; |
---|
315 | | - kcm->rx_psock = NULL; |
---|
| 316 | + /* paired with lockless reads in kcm_rfree() */ |
---|
| 317 | + WRITE_ONCE(kcm->rx_psock, NULL); |
---|
316 | 318 | |
---|
317 | 319 | /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with |
---|
318 | 320 | * kcm_rfree |
---|
.. | .. |
---|
383 | 385 | struct bpf_prog *prog = psock->bpf_prog; |
---|
384 | 386 | int res; |
---|
385 | 387 | |
---|
386 | | - preempt_disable(); |
---|
387 | | - res = BPF_PROG_RUN(prog, skb); |
---|
388 | | - preempt_enable(); |
---|
| 388 | + res = bpf_prog_run_pin_on_cpu(prog, skb); |
---|
389 | 389 | return res; |
---|
390 | 390 | } |
---|
391 | 391 | |
---|
.. | .. |
---|
642 | 642 | frag_offset = 0; |
---|
643 | 643 | do_frag: |
---|
644 | 644 | frag = &skb_shinfo(skb)->frags[fragidx]; |
---|
645 | | - if (WARN_ON(!frag->size)) { |
---|
| 645 | + if (WARN_ON(!skb_frag_size(frag))) { |
---|
646 | 646 | ret = -EINVAL; |
---|
647 | 647 | goto out; |
---|
648 | 648 | } |
---|
649 | 649 | |
---|
650 | 650 | ret = kernel_sendpage(psock->sk->sk_socket, |
---|
651 | | - frag->page.p, |
---|
652 | | - frag->page_offset + frag_offset, |
---|
653 | | - frag->size - frag_offset, |
---|
| 651 | + skb_frag_page(frag), |
---|
| 652 | + skb_frag_off(frag) + frag_offset, |
---|
| 653 | + skb_frag_size(frag) - frag_offset, |
---|
654 | 654 | MSG_DONTWAIT); |
---|
655 | 655 | if (ret <= 0) { |
---|
656 | 656 | if (ret == -EAGAIN) { |
---|
.. | .. |
---|
685 | 685 | sent += ret; |
---|
686 | 686 | frag_offset += ret; |
---|
687 | 687 | KCM_STATS_ADD(psock->stats.tx_bytes, ret); |
---|
688 | | - if (frag_offset < frag->size) { |
---|
| 688 | + if (frag_offset < skb_frag_size(frag)) { |
---|
689 | 689 | /* Not finished with this frag */ |
---|
690 | 690 | goto do_frag; |
---|
691 | 691 | } |
---|
.. | .. |
---|
1064 | 1064 | out_error: |
---|
1065 | 1065 | kcm_push(kcm); |
---|
1066 | 1066 | |
---|
1067 | | - if (copied && sock->type == SOCK_SEQPACKET) { |
---|
| 1067 | + if (sock->type == SOCK_SEQPACKET) { |
---|
1068 | 1068 | /* Wrote some bytes before encountering an |
---|
1069 | 1069 | * error, return partial success. |
---|
1070 | 1070 | */ |
---|
1071 | | - goto partial_message; |
---|
1072 | | - } |
---|
1073 | | - |
---|
1074 | | - if (head != kcm->seq_skb) |
---|
| 1071 | + if (copied) |
---|
| 1072 | + goto partial_message; |
---|
| 1073 | + if (head != kcm->seq_skb) |
---|
| 1074 | + kfree_skb(head); |
---|
| 1075 | + } else { |
---|
1075 | 1076 | kfree_skb(head); |
---|
| 1077 | + kcm->seq_skb = NULL; |
---|
| 1078 | + } |
---|
1076 | 1079 | |
---|
1077 | 1080 | err = sk_stream_error(sk, msg->msg_flags, err); |
---|
1078 | 1081 | |
---|
.. | .. |
---|
1084 | 1087 | return err; |
---|
1085 | 1088 | } |
---|
1086 | 1089 | |
---|
1087 | | -static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, |
---|
1088 | | - long timeo, int *err) |
---|
1089 | | -{ |
---|
1090 | | - struct sk_buff *skb; |
---|
1091 | | - |
---|
1092 | | - while (!(skb = skb_peek(&sk->sk_receive_queue))) { |
---|
1093 | | - if (sk->sk_err) { |
---|
1094 | | - *err = sock_error(sk); |
---|
1095 | | - return NULL; |
---|
1096 | | - } |
---|
1097 | | - |
---|
1098 | | - if (sock_flag(sk, SOCK_DONE)) |
---|
1099 | | - return NULL; |
---|
1100 | | - |
---|
1101 | | - if ((flags & MSG_DONTWAIT) || !timeo) { |
---|
1102 | | - *err = -EAGAIN; |
---|
1103 | | - return NULL; |
---|
1104 | | - } |
---|
1105 | | - |
---|
1106 | | - sk_wait_data(sk, &timeo, NULL); |
---|
1107 | | - |
---|
1108 | | - /* Handle signals */ |
---|
1109 | | - if (signal_pending(current)) { |
---|
1110 | | - *err = sock_intr_errno(timeo); |
---|
1111 | | - return NULL; |
---|
1112 | | - } |
---|
1113 | | - } |
---|
1114 | | - |
---|
1115 | | - return skb; |
---|
1116 | | -} |
---|
1117 | | - |
---|
1118 | 1090 | static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, |
---|
1119 | 1091 | size_t len, int flags) |
---|
1120 | 1092 | { |
---|
| 1093 | + int noblock = flags & MSG_DONTWAIT; |
---|
1121 | 1094 | struct sock *sk = sock->sk; |
---|
1122 | 1095 | struct kcm_sock *kcm = kcm_sk(sk); |
---|
1123 | 1096 | int err = 0; |
---|
1124 | | - long timeo; |
---|
1125 | 1097 | struct strp_msg *stm; |
---|
1126 | 1098 | int copied = 0; |
---|
1127 | 1099 | struct sk_buff *skb; |
---|
1128 | 1100 | |
---|
1129 | | - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); |
---|
1130 | | - |
---|
1131 | | - lock_sock(sk); |
---|
1132 | | - |
---|
1133 | | - skb = kcm_wait_data(sk, flags, timeo, &err); |
---|
| 1101 | + skb = skb_recv_datagram(sk, flags, noblock, &err); |
---|
1134 | 1102 | if (!skb) |
---|
1135 | 1103 | goto out; |
---|
1136 | 1104 | |
---|
.. | .. |
---|
1161 | 1129 | /* Finished with message */ |
---|
1162 | 1130 | msg->msg_flags |= MSG_EOR; |
---|
1163 | 1131 | KCM_STATS_INCR(kcm->stats.rx_msgs); |
---|
1164 | | - skb_unlink(skb, &sk->sk_receive_queue); |
---|
1165 | | - kfree_skb(skb); |
---|
1166 | 1132 | } |
---|
1167 | 1133 | } |
---|
1168 | 1134 | |
---|
1169 | 1135 | out: |
---|
1170 | | - release_sock(sk); |
---|
1171 | | - |
---|
| 1136 | + skb_free_datagram(sk, skb); |
---|
1172 | 1137 | return copied ? : err; |
---|
1173 | 1138 | } |
---|
1174 | 1139 | |
---|
.. | .. |
---|
1176 | 1141 | struct pipe_inode_info *pipe, size_t len, |
---|
1177 | 1142 | unsigned int flags) |
---|
1178 | 1143 | { |
---|
| 1144 | + int noblock = flags & MSG_DONTWAIT; |
---|
1179 | 1145 | struct sock *sk = sock->sk; |
---|
1180 | 1146 | struct kcm_sock *kcm = kcm_sk(sk); |
---|
1181 | | - long timeo; |
---|
1182 | 1147 | struct strp_msg *stm; |
---|
1183 | 1148 | int err = 0; |
---|
1184 | 1149 | ssize_t copied; |
---|
.. | .. |
---|
1186 | 1151 | |
---|
1187 | 1152 | /* Only support splice for SOCKSEQPACKET */ |
---|
1188 | 1153 | |
---|
1189 | | - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); |
---|
1190 | | - |
---|
1191 | | - lock_sock(sk); |
---|
1192 | | - |
---|
1193 | | - skb = kcm_wait_data(sk, flags, timeo, &err); |
---|
| 1154 | + skb = skb_recv_datagram(sk, flags, noblock, &err); |
---|
1194 | 1155 | if (!skb) |
---|
1195 | 1156 | goto err_out; |
---|
1196 | 1157 | |
---|
.. | .. |
---|
1218 | 1179 | * finish reading the message. |
---|
1219 | 1180 | */ |
---|
1220 | 1181 | |
---|
1221 | | - release_sock(sk); |
---|
1222 | | - |
---|
| 1182 | + skb_free_datagram(sk, skb); |
---|
1223 | 1183 | return copied; |
---|
1224 | 1184 | |
---|
1225 | 1185 | err_out: |
---|
1226 | | - release_sock(sk); |
---|
1227 | | - |
---|
| 1186 | + skb_free_datagram(sk, skb); |
---|
1228 | 1187 | return err; |
---|
1229 | 1188 | } |
---|
1230 | 1189 | |
---|
.. | .. |
---|
1244 | 1203 | if (!kcm->rx_psock) { |
---|
1245 | 1204 | if (kcm->rx_wait) { |
---|
1246 | 1205 | list_del(&kcm->wait_rx_list); |
---|
1247 | | - kcm->rx_wait = false; |
---|
| 1206 | + /* paired with lockless reads in kcm_rfree() */ |
---|
| 1207 | + WRITE_ONCE(kcm->rx_wait, false); |
---|
1248 | 1208 | } |
---|
1249 | 1209 | |
---|
1250 | 1210 | requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); |
---|
.. | .. |
---|
1270 | 1230 | } |
---|
1271 | 1231 | |
---|
1272 | 1232 | static int kcm_setsockopt(struct socket *sock, int level, int optname, |
---|
1273 | | - char __user *optval, unsigned int optlen) |
---|
| 1233 | + sockptr_t optval, unsigned int optlen) |
---|
1274 | 1234 | { |
---|
1275 | 1235 | struct kcm_sock *kcm = kcm_sk(sock->sk); |
---|
1276 | 1236 | int val, valbool; |
---|
.. | .. |
---|
1282 | 1242 | if (optlen < sizeof(int)) |
---|
1283 | 1243 | return -EINVAL; |
---|
1284 | 1244 | |
---|
1285 | | - if (get_user(val, (int __user *)optval)) |
---|
1286 | | - return -EINVAL; |
---|
| 1245 | + if (copy_from_sockptr(&val, optval, sizeof(int))) |
---|
| 1246 | + return -EFAULT; |
---|
1287 | 1247 | |
---|
1288 | 1248 | valbool = val ? 1 : 0; |
---|
1289 | 1249 | |
---|
.. | .. |
---|
1416 | 1376 | psock->sk = csk; |
---|
1417 | 1377 | psock->bpf_prog = prog; |
---|
1418 | 1378 | |
---|
1419 | | - err = strp_init(&psock->strp, csk, &cb); |
---|
1420 | | - if (err) { |
---|
1421 | | - kmem_cache_free(kcm_psockp, psock); |
---|
1422 | | - goto out; |
---|
1423 | | - } |
---|
1424 | | - |
---|
1425 | 1379 | write_lock_bh(&csk->sk_callback_lock); |
---|
1426 | 1380 | |
---|
1427 | 1381 | /* Check if sk_user_data is aready by KCM or someone else. |
---|
.. | .. |
---|
1429 | 1383 | */ |
---|
1430 | 1384 | if (csk->sk_user_data) { |
---|
1431 | 1385 | write_unlock_bh(&csk->sk_callback_lock); |
---|
1432 | | - strp_stop(&psock->strp); |
---|
1433 | | - strp_done(&psock->strp); |
---|
1434 | 1386 | kmem_cache_free(kcm_psockp, psock); |
---|
1435 | 1387 | err = -EALREADY; |
---|
| 1388 | + goto out; |
---|
| 1389 | + } |
---|
| 1390 | + |
---|
| 1391 | + err = strp_init(&psock->strp, csk, &cb); |
---|
| 1392 | + if (err) { |
---|
| 1393 | + write_unlock_bh(&csk->sk_callback_lock); |
---|
| 1394 | + kmem_cache_free(kcm_psockp, psock); |
---|
1436 | 1395 | goto out; |
---|
1437 | 1396 | } |
---|
1438 | 1397 | |
---|
.. | .. |
---|
1798 | 1757 | |
---|
1799 | 1758 | if (kcm->rx_wait) { |
---|
1800 | 1759 | list_del(&kcm->wait_rx_list); |
---|
1801 | | - kcm->rx_wait = false; |
---|
| 1760 | + /* paired with lockless reads in kcm_rfree() */ |
---|
| 1761 | + WRITE_ONCE(kcm->rx_wait, false); |
---|
1802 | 1762 | } |
---|
1803 | 1763 | /* Move any pending receive messages to other kcm sockets */ |
---|
1804 | 1764 | requeue_rx_msgs(mux, &sk->sk_receive_queue); |
---|
.. | .. |
---|
1843 | 1803 | kcm = kcm_sk(sk); |
---|
1844 | 1804 | mux = kcm->mux; |
---|
1845 | 1805 | |
---|
| 1806 | + lock_sock(sk); |
---|
1846 | 1807 | sock_orphan(sk); |
---|
1847 | 1808 | kfree_skb(kcm->seq_skb); |
---|
1848 | 1809 | |
---|
1849 | | - lock_sock(sk); |
---|
1850 | 1810 | /* Purge queue under lock to avoid race condition with tx_work trying |
---|
1851 | 1811 | * to act when queue is nonempty. If tx_work runs after this point |
---|
1852 | 1812 | * it will just return. |
---|
.. | .. |
---|
2025 | 1985 | * that all multiplexors and psocks have been destroyed. |
---|
2026 | 1986 | */ |
---|
2027 | 1987 | WARN_ON(!list_empty(&knet->mux_list)); |
---|
| 1988 | + |
---|
| 1989 | + mutex_destroy(&knet->mutex); |
---|
2028 | 1990 | } |
---|
2029 | 1991 | |
---|
2030 | 1992 | static struct pernet_operations kcm_net_ops = { |
---|
.. | .. |
---|
2040 | 2002 | |
---|
2041 | 2003 | kcm_muxp = kmem_cache_create("kcm_mux_cache", |
---|
2042 | 2004 | sizeof(struct kcm_mux), 0, |
---|
2043 | | - SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
---|
| 2005 | + SLAB_HWCACHE_ALIGN, NULL); |
---|
2044 | 2006 | if (!kcm_muxp) |
---|
2045 | 2007 | goto fail; |
---|
2046 | 2008 | |
---|
2047 | 2009 | kcm_psockp = kmem_cache_create("kcm_psock_cache", |
---|
2048 | 2010 | sizeof(struct kcm_psock), 0, |
---|
2049 | | - SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
---|
| 2011 | + SLAB_HWCACHE_ALIGN, NULL); |
---|
2050 | 2012 | if (!kcm_psockp) |
---|
2051 | 2013 | goto fail; |
---|
2052 | 2014 | |
---|