| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Kernel Connection Multiplexor |
|---|
| 3 | 4 | * |
|---|
| 4 | 5 | * Copyright (c) 2016 Tom Herbert <tom@herbertland.com> |
|---|
| 5 | | - * |
|---|
| 6 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 7 | | - * it under the terms of the GNU General Public License version 2 |
|---|
| 8 | | - * as published by the Free Software Foundation. |
|---|
| 9 | 6 | */ |
|---|
| 10 | 7 | |
|---|
| 11 | 8 | #include <linux/bpf.h> |
|---|
| .. | .. |
|---|
| 164 | 161 | /* Buffer limit is okay now, add to ready list */ |
|---|
| 165 | 162 | list_add_tail(&kcm->wait_rx_list, |
|---|
| 166 | 163 | &kcm->mux->kcm_rx_waiters); |
|---|
| 167 | | - kcm->rx_wait = true; |
|---|
| 164 | + /* paired with lockless reads in kcm_rfree() */ |
|---|
| 165 | + WRITE_ONCE(kcm->rx_wait, true); |
|---|
| 168 | 166 | } |
|---|
| 169 | 167 | |
|---|
| 170 | 168 | static void kcm_rfree(struct sk_buff *skb) |
|---|
| .. | .. |
|---|
| 180 | 178 | /* For reading rx_wait and rx_psock without holding lock */ |
|---|
| 181 | 179 | smp_mb__after_atomic(); |
|---|
| 182 | 180 | |
|---|
| 183 | | - if (!kcm->rx_wait && !kcm->rx_psock && |
|---|
| 181 | + if (!READ_ONCE(kcm->rx_wait) && !READ_ONCE(kcm->rx_psock) && |
|---|
| 184 | 182 | sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) { |
|---|
| 185 | 183 | spin_lock_bh(&mux->rx_lock); |
|---|
| 186 | 184 | kcm_rcv_ready(kcm); |
|---|
| .. | .. |
|---|
| 223 | 221 | struct sk_buff *skb; |
|---|
| 224 | 222 | struct kcm_sock *kcm; |
|---|
| 225 | 223 | |
|---|
| 226 | | - while ((skb = __skb_dequeue(head))) { |
|---|
| 224 | + while ((skb = skb_dequeue(head))) { |
|---|
| 227 | 225 | /* Reset destructor to avoid calling kcm_rcv_ready */ |
|---|
| 228 | 226 | skb->destructor = sock_rfree; |
|---|
| 229 | 227 | skb_orphan(skb); |
|---|
| .. | .. |
|---|
| 239 | 237 | if (kcm_queue_rcv_skb(&kcm->sk, skb)) { |
|---|
| 240 | 238 | /* Should mean socket buffer full */ |
|---|
| 241 | 239 | list_del(&kcm->wait_rx_list); |
|---|
| 242 | | - kcm->rx_wait = false; |
|---|
| 240 | + /* paired with lockless reads in kcm_rfree() */ |
|---|
| 241 | + WRITE_ONCE(kcm->rx_wait, false); |
|---|
| 243 | 242 | |
|---|
| 244 | 243 | /* Commit rx_wait to read in kcm_free */ |
|---|
| 245 | 244 | smp_wmb(); |
|---|
| .. | .. |
|---|
| 282 | 281 | kcm = list_first_entry(&mux->kcm_rx_waiters, |
|---|
| 283 | 282 | struct kcm_sock, wait_rx_list); |
|---|
| 284 | 283 | list_del(&kcm->wait_rx_list); |
|---|
| 285 | | - kcm->rx_wait = false; |
|---|
| 284 | + /* paired with lockless reads in kcm_rfree() */ |
|---|
| 285 | + WRITE_ONCE(kcm->rx_wait, false); |
|---|
| 286 | 286 | |
|---|
| 287 | 287 | psock->rx_kcm = kcm; |
|---|
| 288 | | - kcm->rx_psock = psock; |
|---|
| 288 | + /* paired with lockless reads in kcm_rfree() */ |
|---|
| 289 | + WRITE_ONCE(kcm->rx_psock, psock); |
|---|
| 289 | 290 | |
|---|
| 290 | 291 | spin_unlock_bh(&mux->rx_lock); |
|---|
| 291 | 292 | |
|---|
| .. | .. |
|---|
| 312 | 313 | spin_lock_bh(&mux->rx_lock); |
|---|
| 313 | 314 | |
|---|
| 314 | 315 | psock->rx_kcm = NULL; |
|---|
| 315 | | - kcm->rx_psock = NULL; |
|---|
| 316 | + /* paired with lockless reads in kcm_rfree() */ |
|---|
| 317 | + WRITE_ONCE(kcm->rx_psock, NULL); |
|---|
| 316 | 318 | |
|---|
| 317 | 319 | /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with |
|---|
| 318 | 320 | * kcm_rfree |
|---|
| .. | .. |
|---|
| 383 | 385 | struct bpf_prog *prog = psock->bpf_prog; |
|---|
| 384 | 386 | int res; |
|---|
| 385 | 387 | |
|---|
| 386 | | - preempt_disable(); |
|---|
| 387 | | - res = BPF_PROG_RUN(prog, skb); |
|---|
| 388 | | - preempt_enable(); |
|---|
| 388 | + res = bpf_prog_run_pin_on_cpu(prog, skb); |
|---|
| 389 | 389 | return res; |
|---|
| 390 | 390 | } |
|---|
| 391 | 391 | |
|---|
| .. | .. |
|---|
| 642 | 642 | frag_offset = 0; |
|---|
| 643 | 643 | do_frag: |
|---|
| 644 | 644 | frag = &skb_shinfo(skb)->frags[fragidx]; |
|---|
| 645 | | - if (WARN_ON(!frag->size)) { |
|---|
| 645 | + if (WARN_ON(!skb_frag_size(frag))) { |
|---|
| 646 | 646 | ret = -EINVAL; |
|---|
| 647 | 647 | goto out; |
|---|
| 648 | 648 | } |
|---|
| 649 | 649 | |
|---|
| 650 | 650 | ret = kernel_sendpage(psock->sk->sk_socket, |
|---|
| 651 | | - frag->page.p, |
|---|
| 652 | | - frag->page_offset + frag_offset, |
|---|
| 653 | | - frag->size - frag_offset, |
|---|
| 651 | + skb_frag_page(frag), |
|---|
| 652 | + skb_frag_off(frag) + frag_offset, |
|---|
| 653 | + skb_frag_size(frag) - frag_offset, |
|---|
| 654 | 654 | MSG_DONTWAIT); |
|---|
| 655 | 655 | if (ret <= 0) { |
|---|
| 656 | 656 | if (ret == -EAGAIN) { |
|---|
| .. | .. |
|---|
| 685 | 685 | sent += ret; |
|---|
| 686 | 686 | frag_offset += ret; |
|---|
| 687 | 687 | KCM_STATS_ADD(psock->stats.tx_bytes, ret); |
|---|
| 688 | | - if (frag_offset < frag->size) { |
|---|
| 688 | + if (frag_offset < skb_frag_size(frag)) { |
|---|
| 689 | 689 | /* Not finished with this frag */ |
|---|
| 690 | 690 | goto do_frag; |
|---|
| 691 | 691 | } |
|---|
| .. | .. |
|---|
| 1064 | 1064 | out_error: |
|---|
| 1065 | 1065 | kcm_push(kcm); |
|---|
| 1066 | 1066 | |
|---|
| 1067 | | - if (copied && sock->type == SOCK_SEQPACKET) { |
|---|
| 1067 | + if (sock->type == SOCK_SEQPACKET) { |
|---|
| 1068 | 1068 | /* Wrote some bytes before encountering an |
|---|
| 1069 | 1069 | * error, return partial success. |
|---|
| 1070 | 1070 | */ |
|---|
| 1071 | | - goto partial_message; |
|---|
| 1072 | | - } |
|---|
| 1073 | | - |
|---|
| 1074 | | - if (head != kcm->seq_skb) |
|---|
| 1071 | + if (copied) |
|---|
| 1072 | + goto partial_message; |
|---|
| 1073 | + if (head != kcm->seq_skb) |
|---|
| 1074 | + kfree_skb(head); |
|---|
| 1075 | + } else { |
|---|
| 1075 | 1076 | kfree_skb(head); |
|---|
| 1077 | + kcm->seq_skb = NULL; |
|---|
| 1078 | + } |
|---|
| 1076 | 1079 | |
|---|
| 1077 | 1080 | err = sk_stream_error(sk, msg->msg_flags, err); |
|---|
| 1078 | 1081 | |
|---|
| .. | .. |
|---|
| 1084 | 1087 | return err; |
|---|
| 1085 | 1088 | } |
|---|
| 1086 | 1089 | |
|---|
| 1087 | | -static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, |
|---|
| 1088 | | - long timeo, int *err) |
|---|
| 1089 | | -{ |
|---|
| 1090 | | - struct sk_buff *skb; |
|---|
| 1091 | | - |
|---|
| 1092 | | - while (!(skb = skb_peek(&sk->sk_receive_queue))) { |
|---|
| 1093 | | - if (sk->sk_err) { |
|---|
| 1094 | | - *err = sock_error(sk); |
|---|
| 1095 | | - return NULL; |
|---|
| 1096 | | - } |
|---|
| 1097 | | - |
|---|
| 1098 | | - if (sock_flag(sk, SOCK_DONE)) |
|---|
| 1099 | | - return NULL; |
|---|
| 1100 | | - |
|---|
| 1101 | | - if ((flags & MSG_DONTWAIT) || !timeo) { |
|---|
| 1102 | | - *err = -EAGAIN; |
|---|
| 1103 | | - return NULL; |
|---|
| 1104 | | - } |
|---|
| 1105 | | - |
|---|
| 1106 | | - sk_wait_data(sk, &timeo, NULL); |
|---|
| 1107 | | - |
|---|
| 1108 | | - /* Handle signals */ |
|---|
| 1109 | | - if (signal_pending(current)) { |
|---|
| 1110 | | - *err = sock_intr_errno(timeo); |
|---|
| 1111 | | - return NULL; |
|---|
| 1112 | | - } |
|---|
| 1113 | | - } |
|---|
| 1114 | | - |
|---|
| 1115 | | - return skb; |
|---|
| 1116 | | -} |
|---|
| 1117 | | - |
|---|
| 1118 | 1090 | static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, |
|---|
| 1119 | 1091 | size_t len, int flags) |
|---|
| 1120 | 1092 | { |
|---|
| 1093 | + int noblock = flags & MSG_DONTWAIT; |
|---|
| 1121 | 1094 | struct sock *sk = sock->sk; |
|---|
| 1122 | 1095 | struct kcm_sock *kcm = kcm_sk(sk); |
|---|
| 1123 | 1096 | int err = 0; |
|---|
| 1124 | | - long timeo; |
|---|
| 1125 | 1097 | struct strp_msg *stm; |
|---|
| 1126 | 1098 | int copied = 0; |
|---|
| 1127 | 1099 | struct sk_buff *skb; |
|---|
| 1128 | 1100 | |
|---|
| 1129 | | - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); |
|---|
| 1130 | | - |
|---|
| 1131 | | - lock_sock(sk); |
|---|
| 1132 | | - |
|---|
| 1133 | | - skb = kcm_wait_data(sk, flags, timeo, &err); |
|---|
| 1101 | + skb = skb_recv_datagram(sk, flags, noblock, &err); |
|---|
| 1134 | 1102 | if (!skb) |
|---|
| 1135 | 1103 | goto out; |
|---|
| 1136 | 1104 | |
|---|
| .. | .. |
|---|
| 1161 | 1129 | /* Finished with message */ |
|---|
| 1162 | 1130 | msg->msg_flags |= MSG_EOR; |
|---|
| 1163 | 1131 | KCM_STATS_INCR(kcm->stats.rx_msgs); |
|---|
| 1164 | | - skb_unlink(skb, &sk->sk_receive_queue); |
|---|
| 1165 | | - kfree_skb(skb); |
|---|
| 1166 | 1132 | } |
|---|
| 1167 | 1133 | } |
|---|
| 1168 | 1134 | |
|---|
| 1169 | 1135 | out: |
|---|
| 1170 | | - release_sock(sk); |
|---|
| 1171 | | - |
|---|
| 1136 | + skb_free_datagram(sk, skb); |
|---|
| 1172 | 1137 | return copied ? : err; |
|---|
| 1173 | 1138 | } |
|---|
| 1174 | 1139 | |
|---|
| .. | .. |
|---|
| 1176 | 1141 | struct pipe_inode_info *pipe, size_t len, |
|---|
| 1177 | 1142 | unsigned int flags) |
|---|
| 1178 | 1143 | { |
|---|
| 1144 | + int noblock = flags & MSG_DONTWAIT; |
|---|
| 1179 | 1145 | struct sock *sk = sock->sk; |
|---|
| 1180 | 1146 | struct kcm_sock *kcm = kcm_sk(sk); |
|---|
| 1181 | | - long timeo; |
|---|
| 1182 | 1147 | struct strp_msg *stm; |
|---|
| 1183 | 1148 | int err = 0; |
|---|
| 1184 | 1149 | ssize_t copied; |
|---|
| .. | .. |
|---|
| 1186 | 1151 | |
|---|
| 1187 | 1152 | /* Only support splice for SOCKSEQPACKET */ |
|---|
| 1188 | 1153 | |
|---|
| 1189 | | - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); |
|---|
| 1190 | | - |
|---|
| 1191 | | - lock_sock(sk); |
|---|
| 1192 | | - |
|---|
| 1193 | | - skb = kcm_wait_data(sk, flags, timeo, &err); |
|---|
| 1154 | + skb = skb_recv_datagram(sk, flags, noblock, &err); |
|---|
| 1194 | 1155 | if (!skb) |
|---|
| 1195 | 1156 | goto err_out; |
|---|
| 1196 | 1157 | |
|---|
| .. | .. |
|---|
| 1218 | 1179 | * finish reading the message. |
|---|
| 1219 | 1180 | */ |
|---|
| 1220 | 1181 | |
|---|
| 1221 | | - release_sock(sk); |
|---|
| 1222 | | - |
|---|
| 1182 | + skb_free_datagram(sk, skb); |
|---|
| 1223 | 1183 | return copied; |
|---|
| 1224 | 1184 | |
|---|
| 1225 | 1185 | err_out: |
|---|
| 1226 | | - release_sock(sk); |
|---|
| 1227 | | - |
|---|
| 1186 | + skb_free_datagram(sk, skb); |
|---|
| 1228 | 1187 | return err; |
|---|
| 1229 | 1188 | } |
|---|
| 1230 | 1189 | |
|---|
| .. | .. |
|---|
| 1244 | 1203 | if (!kcm->rx_psock) { |
|---|
| 1245 | 1204 | if (kcm->rx_wait) { |
|---|
| 1246 | 1205 | list_del(&kcm->wait_rx_list); |
|---|
| 1247 | | - kcm->rx_wait = false; |
|---|
| 1206 | + /* paired with lockless reads in kcm_rfree() */ |
|---|
| 1207 | + WRITE_ONCE(kcm->rx_wait, false); |
|---|
| 1248 | 1208 | } |
|---|
| 1249 | 1209 | |
|---|
| 1250 | 1210 | requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue); |
|---|
| .. | .. |
|---|
| 1270 | 1230 | } |
|---|
| 1271 | 1231 | |
|---|
| 1272 | 1232 | static int kcm_setsockopt(struct socket *sock, int level, int optname, |
|---|
| 1273 | | - char __user *optval, unsigned int optlen) |
|---|
| 1233 | + sockptr_t optval, unsigned int optlen) |
|---|
| 1274 | 1234 | { |
|---|
| 1275 | 1235 | struct kcm_sock *kcm = kcm_sk(sock->sk); |
|---|
| 1276 | 1236 | int val, valbool; |
|---|
| .. | .. |
|---|
| 1282 | 1242 | if (optlen < sizeof(int)) |
|---|
| 1283 | 1243 | return -EINVAL; |
|---|
| 1284 | 1244 | |
|---|
| 1285 | | - if (get_user(val, (int __user *)optval)) |
|---|
| 1286 | | - return -EINVAL; |
|---|
| 1245 | + if (copy_from_sockptr(&val, optval, sizeof(int))) |
|---|
| 1246 | + return -EFAULT; |
|---|
| 1287 | 1247 | |
|---|
| 1288 | 1248 | valbool = val ? 1 : 0; |
|---|
| 1289 | 1249 | |
|---|
| .. | .. |
|---|
| 1416 | 1376 | psock->sk = csk; |
|---|
| 1417 | 1377 | psock->bpf_prog = prog; |
|---|
| 1418 | 1378 | |
|---|
| 1419 | | - err = strp_init(&psock->strp, csk, &cb); |
|---|
| 1420 | | - if (err) { |
|---|
| 1421 | | - kmem_cache_free(kcm_psockp, psock); |
|---|
| 1422 | | - goto out; |
|---|
| 1423 | | - } |
|---|
| 1424 | | - |
|---|
| 1425 | 1379 | write_lock_bh(&csk->sk_callback_lock); |
|---|
| 1426 | 1380 | |
|---|
| 1427 | 1381 | /* Check if sk_user_data is aready by KCM or someone else. |
|---|
| .. | .. |
|---|
| 1429 | 1383 | */ |
|---|
| 1430 | 1384 | if (csk->sk_user_data) { |
|---|
| 1431 | 1385 | write_unlock_bh(&csk->sk_callback_lock); |
|---|
| 1432 | | - strp_stop(&psock->strp); |
|---|
| 1433 | | - strp_done(&psock->strp); |
|---|
| 1434 | 1386 | kmem_cache_free(kcm_psockp, psock); |
|---|
| 1435 | 1387 | err = -EALREADY; |
|---|
| 1388 | + goto out; |
|---|
| 1389 | + } |
|---|
| 1390 | + |
|---|
| 1391 | + err = strp_init(&psock->strp, csk, &cb); |
|---|
| 1392 | + if (err) { |
|---|
| 1393 | + write_unlock_bh(&csk->sk_callback_lock); |
|---|
| 1394 | + kmem_cache_free(kcm_psockp, psock); |
|---|
| 1436 | 1395 | goto out; |
|---|
| 1437 | 1396 | } |
|---|
| 1438 | 1397 | |
|---|
| .. | .. |
|---|
| 1798 | 1757 | |
|---|
| 1799 | 1758 | if (kcm->rx_wait) { |
|---|
| 1800 | 1759 | list_del(&kcm->wait_rx_list); |
|---|
| 1801 | | - kcm->rx_wait = false; |
|---|
| 1760 | + /* paired with lockless reads in kcm_rfree() */ |
|---|
| 1761 | + WRITE_ONCE(kcm->rx_wait, false); |
|---|
| 1802 | 1762 | } |
|---|
| 1803 | 1763 | /* Move any pending receive messages to other kcm sockets */ |
|---|
| 1804 | 1764 | requeue_rx_msgs(mux, &sk->sk_receive_queue); |
|---|
| .. | .. |
|---|
| 1843 | 1803 | kcm = kcm_sk(sk); |
|---|
| 1844 | 1804 | mux = kcm->mux; |
|---|
| 1845 | 1805 | |
|---|
| 1806 | + lock_sock(sk); |
|---|
| 1846 | 1807 | sock_orphan(sk); |
|---|
| 1847 | 1808 | kfree_skb(kcm->seq_skb); |
|---|
| 1848 | 1809 | |
|---|
| 1849 | | - lock_sock(sk); |
|---|
| 1850 | 1810 | /* Purge queue under lock to avoid race condition with tx_work trying |
|---|
| 1851 | 1811 | * to act when queue is nonempty. If tx_work runs after this point |
|---|
| 1852 | 1812 | * it will just return. |
|---|
| .. | .. |
|---|
| 2025 | 1985 | * that all multiplexors and psocks have been destroyed. |
|---|
| 2026 | 1986 | */ |
|---|
| 2027 | 1987 | WARN_ON(!list_empty(&knet->mux_list)); |
|---|
| 1988 | + |
|---|
| 1989 | + mutex_destroy(&knet->mutex); |
|---|
| 2028 | 1990 | } |
|---|
| 2029 | 1991 | |
|---|
| 2030 | 1992 | static struct pernet_operations kcm_net_ops = { |
|---|
| .. | .. |
|---|
| 2040 | 2002 | |
|---|
| 2041 | 2003 | kcm_muxp = kmem_cache_create("kcm_mux_cache", |
|---|
| 2042 | 2004 | sizeof(struct kcm_mux), 0, |
|---|
| 2043 | | - SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
|---|
| 2005 | + SLAB_HWCACHE_ALIGN, NULL); |
|---|
| 2044 | 2006 | if (!kcm_muxp) |
|---|
| 2045 | 2007 | goto fail; |
|---|
| 2046 | 2008 | |
|---|
| 2047 | 2009 | kcm_psockp = kmem_cache_create("kcm_psock_cache", |
|---|
| 2048 | 2010 | sizeof(struct kcm_psock), 0, |
|---|
| 2049 | | - SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
|---|
| 2011 | + SLAB_HWCACHE_ALIGN, NULL); |
|---|
| 2050 | 2012 | if (!kcm_psockp) |
|---|
| 2051 | 2013 | goto fail; |
|---|
| 2052 | 2014 | |
|---|