.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /****************************************************************************** |
---|
2 | 3 | ******************************************************************************* |
---|
3 | 4 | ** |
---|
4 | 5 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
---|
5 | 6 | ** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. |
---|
6 | 7 | ** |
---|
7 | | -** This copyrighted material is made available to anyone wishing to use, |
---|
8 | | -** modify, copy, or redistribute it subject to the terms and conditions |
---|
9 | | -** of the GNU General Public License v.2. |
---|
10 | 8 | ** |
---|
11 | 9 | ******************************************************************************* |
---|
12 | 10 | ******************************************************************************/ |
---|
.. | .. |
---|
65 | 63 | |
---|
66 | 64 | /* Number of messages to send before rescheduling */ |
---|
67 | 65 | #define MAX_SEND_MSG_COUNT 25 |
---|
68 | | - |
---|
69 | | -struct cbuf { |
---|
70 | | - unsigned int base; |
---|
71 | | - unsigned int len; |
---|
72 | | - unsigned int mask; |
---|
73 | | -}; |
---|
74 | | - |
---|
75 | | -static void cbuf_add(struct cbuf *cb, int n) |
---|
76 | | -{ |
---|
77 | | - cb->len += n; |
---|
78 | | -} |
---|
79 | | - |
---|
80 | | -static int cbuf_data(struct cbuf *cb) |
---|
81 | | -{ |
---|
82 | | - return ((cb->base + cb->len) & cb->mask); |
---|
83 | | -} |
---|
84 | | - |
---|
85 | | -static void cbuf_init(struct cbuf *cb, int size) |
---|
86 | | -{ |
---|
87 | | - cb->base = cb->len = 0; |
---|
88 | | - cb->mask = size-1; |
---|
89 | | -} |
---|
90 | | - |
---|
91 | | -static void cbuf_eat(struct cbuf *cb, int n) |
---|
92 | | -{ |
---|
93 | | - cb->len -= n; |
---|
94 | | - cb->base += n; |
---|
95 | | - cb->base &= cb->mask; |
---|
96 | | -} |
---|
97 | | - |
---|
98 | | -static bool cbuf_empty(struct cbuf *cb) |
---|
99 | | -{ |
---|
100 | | - return cb->len == 0; |
---|
101 | | -} |
---|
| 66 | +#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000) |
---|
102 | 67 | |
---|
103 | 68 | struct connection { |
---|
104 | 69 | struct socket *sock; /* NULL if not connected */ |
---|
.. | .. |
---|
112 | 77 | #define CF_CLOSE 6 |
---|
113 | 78 | #define CF_APP_LIMITED 7 |
---|
114 | 79 | #define CF_CLOSING 8 |
---|
| 80 | +#define CF_SHUTDOWN 9 |
---|
115 | 81 | struct list_head writequeue; /* List of outgoing writequeue_entries */ |
---|
116 | 82 | spinlock_t writequeue_lock; |
---|
117 | 83 | int (*rx_action) (struct connection *); /* What to do when active */ |
---|
118 | 84 | void (*connect_action) (struct connection *); /* What to do to connect */ |
---|
119 | | - struct page *rx_page; |
---|
120 | | - struct cbuf cb; |
---|
| 85 | + void (*shutdown_action)(struct connection *con); /* What to do to shutdown */ |
---|
121 | 86 | int retries; |
---|
122 | 87 | #define MAX_CONNECT_RETRIES 3 |
---|
123 | 88 | struct hlist_node list; |
---|
124 | 89 | struct connection *othercon; |
---|
125 | 90 | struct work_struct rwork; /* Receive workqueue */ |
---|
126 | 91 | struct work_struct swork; /* Send workqueue */ |
---|
| 92 | + wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */ |
---|
| 93 | + unsigned char *rx_buf; |
---|
| 94 | + int rx_buflen; |
---|
| 95 | + int rx_leftover; |
---|
| 96 | + struct rcu_head rcu; |
---|
127 | 97 | }; |
---|
128 | 98 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) |
---|
129 | 99 | |
---|
.. | .. |
---|
165 | 135 | static struct workqueue_struct *send_workqueue; |
---|
166 | 136 | |
---|
167 | 137 | static struct hlist_head connection_hash[CONN_HASH_SIZE]; |
---|
168 | | -static DEFINE_MUTEX(connections_lock); |
---|
169 | | -static struct kmem_cache *con_cache; |
---|
| 138 | +static DEFINE_SPINLOCK(connections_lock); |
---|
| 139 | +DEFINE_STATIC_SRCU(connections_srcu); |
---|
170 | 140 | |
---|
171 | 141 | static void process_recv_sockets(struct work_struct *work); |
---|
172 | 142 | static void process_send_sockets(struct work_struct *work); |
---|
.. | .. |
---|
182 | 152 | |
---|
183 | 153 | static struct connection *__find_con(int nodeid) |
---|
184 | 154 | { |
---|
185 | | - int r; |
---|
| 155 | + int r, idx; |
---|
186 | 156 | struct connection *con; |
---|
187 | 157 | |
---|
188 | 158 | r = nodeid_hash(nodeid); |
---|
189 | 159 | |
---|
190 | | - hlist_for_each_entry(con, &connection_hash[r], list) { |
---|
191 | | - if (con->nodeid == nodeid) |
---|
| 160 | + idx = srcu_read_lock(&connections_srcu); |
---|
| 161 | + hlist_for_each_entry_rcu(con, &connection_hash[r], list) { |
---|
| 162 | + if (con->nodeid == nodeid) { |
---|
| 163 | + srcu_read_unlock(&connections_srcu, idx); |
---|
192 | 164 | return con; |
---|
| 165 | + } |
---|
193 | 166 | } |
---|
| 167 | + srcu_read_unlock(&connections_srcu, idx); |
---|
| 168 | + |
---|
194 | 169 | return NULL; |
---|
195 | 170 | } |
---|
196 | 171 | |
---|
.. | .. |
---|
198 | 173 | * If 'allocation' is zero then we don't attempt to create a new |
---|
199 | 174 | * connection structure for this node. |
---|
200 | 175 | */ |
---|
201 | | -static struct connection *__nodeid2con(int nodeid, gfp_t alloc) |
---|
| 176 | +static struct connection *nodeid2con(int nodeid, gfp_t alloc) |
---|
202 | 177 | { |
---|
203 | | - struct connection *con = NULL; |
---|
| 178 | + struct connection *con, *tmp; |
---|
204 | 179 | int r; |
---|
205 | 180 | |
---|
206 | 181 | con = __find_con(nodeid); |
---|
207 | 182 | if (con || !alloc) |
---|
208 | 183 | return con; |
---|
209 | 184 | |
---|
210 | | - con = kmem_cache_zalloc(con_cache, alloc); |
---|
| 185 | + con = kzalloc(sizeof(*con), alloc); |
---|
211 | 186 | if (!con) |
---|
212 | 187 | return NULL; |
---|
213 | 188 | |
---|
214 | | - r = nodeid_hash(nodeid); |
---|
215 | | - hlist_add_head(&con->list, &connection_hash[r]); |
---|
| 189 | + con->rx_buflen = dlm_config.ci_buffer_size; |
---|
| 190 | + con->rx_buf = kmalloc(con->rx_buflen, GFP_NOFS); |
---|
| 191 | + if (!con->rx_buf) { |
---|
| 192 | + kfree(con); |
---|
| 193 | + return NULL; |
---|
| 194 | + } |
---|
216 | 195 | |
---|
217 | 196 | con->nodeid = nodeid; |
---|
218 | 197 | mutex_init(&con->sock_mutex); |
---|
.. | .. |
---|
220 | 199 | spin_lock_init(&con->writequeue_lock); |
---|
221 | 200 | INIT_WORK(&con->swork, process_send_sockets); |
---|
222 | 201 | INIT_WORK(&con->rwork, process_recv_sockets); |
---|
| 202 | + init_waitqueue_head(&con->shutdown_wait); |
---|
223 | 203 | |
---|
224 | 204 | /* Setup action pointers for child sockets */ |
---|
225 | 205 | if (con->nodeid) { |
---|
.. | .. |
---|
230 | 210 | con->rx_action = zerocon->rx_action; |
---|
231 | 211 | } |
---|
232 | 212 | |
---|
| 213 | + r = nodeid_hash(nodeid); |
---|
| 214 | + |
---|
| 215 | + spin_lock(&connections_lock); |
---|
| 216 | + /* Because multiple workqueues/threads calls this function it can |
---|
| 217 | + * race on multiple cpu's. Instead of locking hot path __find_con() |
---|
| 218 | + * we just check in rare cases of recently added nodes again |
---|
| 219 | + * under protection of connections_lock. If this is the case we |
---|
| 220 | + * abort our connection creation and return the existing connection. |
---|
| 221 | + */ |
---|
| 222 | + tmp = __find_con(nodeid); |
---|
| 223 | + if (tmp) { |
---|
| 224 | + spin_unlock(&connections_lock); |
---|
| 225 | + kfree(con->rx_buf); |
---|
| 226 | + kfree(con); |
---|
| 227 | + return tmp; |
---|
| 228 | + } |
---|
| 229 | + |
---|
| 230 | + hlist_add_head_rcu(&con->list, &connection_hash[r]); |
---|
| 231 | + spin_unlock(&connections_lock); |
---|
| 232 | + |
---|
233 | 233 | return con; |
---|
234 | 234 | } |
---|
235 | 235 | |
---|
236 | 236 | /* Loop round all connections */ |
---|
237 | 237 | static void foreach_conn(void (*conn_func)(struct connection *c)) |
---|
238 | 238 | { |
---|
239 | | - int i; |
---|
240 | | - struct hlist_node *n; |
---|
| 239 | + int i, idx; |
---|
241 | 240 | struct connection *con; |
---|
242 | 241 | |
---|
| 242 | + idx = srcu_read_lock(&connections_srcu); |
---|
243 | 243 | for (i = 0; i < CONN_HASH_SIZE; i++) { |
---|
244 | | - hlist_for_each_entry_safe(con, n, &connection_hash[i], list) |
---|
| 244 | + hlist_for_each_entry_rcu(con, &connection_hash[i], list) |
---|
245 | 245 | conn_func(con); |
---|
246 | 246 | } |
---|
247 | | -} |
---|
248 | | - |
---|
249 | | -static struct connection *nodeid2con(int nodeid, gfp_t allocation) |
---|
250 | | -{ |
---|
251 | | - struct connection *con; |
---|
252 | | - |
---|
253 | | - mutex_lock(&connections_lock); |
---|
254 | | - con = __nodeid2con(nodeid, allocation); |
---|
255 | | - mutex_unlock(&connections_lock); |
---|
256 | | - |
---|
257 | | - return con; |
---|
| 247 | + srcu_read_unlock(&connections_srcu, idx); |
---|
258 | 248 | } |
---|
259 | 249 | |
---|
260 | 250 | static struct dlm_node_addr *find_node_addr(int nodeid) |
---|
.. | .. |
---|
481 | 471 | static void lowcomms_error_report(struct sock *sk) |
---|
482 | 472 | { |
---|
483 | 473 | struct connection *con; |
---|
484 | | - struct sockaddr_storage saddr; |
---|
485 | 474 | void (*orig_report)(struct sock *) = NULL; |
---|
| 475 | + struct inet_sock *inet; |
---|
486 | 476 | |
---|
487 | 477 | read_lock_bh(&sk->sk_callback_lock); |
---|
488 | 478 | con = sock2con(sk); |
---|
.. | .. |
---|
490 | 480 | goto out; |
---|
491 | 481 | |
---|
492 | 482 | orig_report = listen_sock.sk_error_report; |
---|
493 | | - if (con->sock == NULL || |
---|
494 | | - kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) { |
---|
495 | | - printk_ratelimited(KERN_ERR "dlm: node %d: socket error " |
---|
496 | | - "sending to node %d, port %d, " |
---|
497 | | - "sk_err=%d/%d\n", dlm_our_nodeid(), |
---|
498 | | - con->nodeid, dlm_config.ci_tcp_port, |
---|
499 | | - sk->sk_err, sk->sk_err_soft); |
---|
500 | | - } else if (saddr.ss_family == AF_INET) { |
---|
501 | | - struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr; |
---|
502 | 483 | |
---|
| 484 | + inet = inet_sk(sk); |
---|
| 485 | + switch (sk->sk_family) { |
---|
| 486 | + case AF_INET: |
---|
503 | 487 | printk_ratelimited(KERN_ERR "dlm: node %d: socket error " |
---|
504 | | - "sending to node %d at %pI4, port %d, " |
---|
| 488 | + "sending to node %d at %pI4, dport %d, " |
---|
505 | 489 | "sk_err=%d/%d\n", dlm_our_nodeid(), |
---|
506 | | - con->nodeid, &sin4->sin_addr.s_addr, |
---|
507 | | - dlm_config.ci_tcp_port, sk->sk_err, |
---|
| 490 | + con->nodeid, &inet->inet_daddr, |
---|
| 491 | + ntohs(inet->inet_dport), sk->sk_err, |
---|
508 | 492 | sk->sk_err_soft); |
---|
509 | | - } else { |
---|
510 | | - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr; |
---|
511 | | - |
---|
| 493 | + break; |
---|
| 494 | +#if IS_ENABLED(CONFIG_IPV6) |
---|
| 495 | + case AF_INET6: |
---|
512 | 496 | printk_ratelimited(KERN_ERR "dlm: node %d: socket error " |
---|
513 | | - "sending to node %d at %u.%u.%u.%u, " |
---|
514 | | - "port %d, sk_err=%d/%d\n", dlm_our_nodeid(), |
---|
515 | | - con->nodeid, sin6->sin6_addr.s6_addr32[0], |
---|
516 | | - sin6->sin6_addr.s6_addr32[1], |
---|
517 | | - sin6->sin6_addr.s6_addr32[2], |
---|
518 | | - sin6->sin6_addr.s6_addr32[3], |
---|
519 | | - dlm_config.ci_tcp_port, sk->sk_err, |
---|
| 497 | + "sending to node %d at %pI6c, " |
---|
| 498 | + "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(), |
---|
| 499 | + con->nodeid, &sk->sk_v6_daddr, |
---|
| 500 | + ntohs(inet->inet_dport), sk->sk_err, |
---|
520 | 501 | sk->sk_err_soft); |
---|
| 502 | + break; |
---|
| 503 | +#endif |
---|
| 504 | + default: |
---|
| 505 | + printk_ratelimited(KERN_ERR "dlm: node %d: socket error " |
---|
| 506 | + "invalid socket family %d set, " |
---|
| 507 | + "sk_err=%d/%d\n", dlm_our_nodeid(), |
---|
| 508 | + sk->sk_family, sk->sk_err, sk->sk_err_soft); |
---|
| 509 | + goto out; |
---|
521 | 510 | } |
---|
522 | 511 | out: |
---|
523 | 512 | read_unlock_bh(&sk->sk_callback_lock); |
---|
.. | .. |
---|
611 | 600 | /* Will only re-enter once. */ |
---|
612 | 601 | close_connection(con->othercon, false, tx, rx); |
---|
613 | 602 | } |
---|
614 | | - if (con->rx_page) { |
---|
615 | | - __free_page(con->rx_page); |
---|
616 | | - con->rx_page = NULL; |
---|
617 | | - } |
---|
618 | 603 | |
---|
| 604 | + con->rx_leftover = 0; |
---|
619 | 605 | con->retries = 0; |
---|
620 | 606 | mutex_unlock(&con->sock_mutex); |
---|
621 | 607 | clear_bit(CF_CLOSING, &con->flags); |
---|
622 | 608 | } |
---|
623 | 609 | |
---|
| 610 | +static void shutdown_connection(struct connection *con) |
---|
| 611 | +{ |
---|
| 612 | + int ret; |
---|
| 613 | + |
---|
| 614 | + flush_work(&con->swork); |
---|
| 615 | + |
---|
| 616 | + mutex_lock(&con->sock_mutex); |
---|
| 617 | + /* nothing to shutdown */ |
---|
| 618 | + if (!con->sock) { |
---|
| 619 | + mutex_unlock(&con->sock_mutex); |
---|
| 620 | + return; |
---|
| 621 | + } |
---|
| 622 | + |
---|
| 623 | + set_bit(CF_SHUTDOWN, &con->flags); |
---|
| 624 | + ret = kernel_sock_shutdown(con->sock, SHUT_WR); |
---|
| 625 | + mutex_unlock(&con->sock_mutex); |
---|
| 626 | + if (ret) { |
---|
| 627 | + log_print("Connection %p failed to shutdown: %d will force close", |
---|
| 628 | + con, ret); |
---|
| 629 | + goto force_close; |
---|
| 630 | + } else { |
---|
| 631 | + ret = wait_event_timeout(con->shutdown_wait, |
---|
| 632 | + !test_bit(CF_SHUTDOWN, &con->flags), |
---|
| 633 | + DLM_SHUTDOWN_WAIT_TIMEOUT); |
---|
| 634 | + if (ret == 0) { |
---|
| 635 | + log_print("Connection %p shutdown timed out, will force close", |
---|
| 636 | + con); |
---|
| 637 | + goto force_close; |
---|
| 638 | + } |
---|
| 639 | + } |
---|
| 640 | + |
---|
| 641 | + return; |
---|
| 642 | + |
---|
| 643 | +force_close: |
---|
| 644 | + clear_bit(CF_SHUTDOWN, &con->flags); |
---|
| 645 | + close_connection(con, false, true, true); |
---|
| 646 | +} |
---|
| 647 | + |
---|
| 648 | +static void dlm_tcp_shutdown(struct connection *con) |
---|
| 649 | +{ |
---|
| 650 | + if (con->othercon) |
---|
| 651 | + shutdown_connection(con->othercon); |
---|
| 652 | + shutdown_connection(con); |
---|
| 653 | +} |
---|
| 654 | + |
---|
| 655 | +static int con_realloc_receive_buf(struct connection *con, int newlen) |
---|
| 656 | +{ |
---|
| 657 | + unsigned char *newbuf; |
---|
| 658 | + |
---|
| 659 | + newbuf = kmalloc(newlen, GFP_NOFS); |
---|
| 660 | + if (!newbuf) |
---|
| 661 | + return -ENOMEM; |
---|
| 662 | + |
---|
| 663 | + /* copy any leftover from last receive */ |
---|
| 664 | + if (con->rx_leftover) |
---|
| 665 | + memmove(newbuf, con->rx_buf, con->rx_leftover); |
---|
| 666 | + |
---|
| 667 | + /* swap to new buffer space */ |
---|
| 668 | + kfree(con->rx_buf); |
---|
| 669 | + con->rx_buflen = newlen; |
---|
| 670 | + con->rx_buf = newbuf; |
---|
| 671 | + |
---|
| 672 | + return 0; |
---|
| 673 | +} |
---|
| 674 | + |
---|
624 | 675 | /* Data received from remote end */ |
---|
625 | 676 | static int receive_from_sock(struct connection *con) |
---|
626 | 677 | { |
---|
627 | | - int ret = 0; |
---|
628 | | - struct msghdr msg = {}; |
---|
629 | | - struct kvec iov[2]; |
---|
630 | | - unsigned len; |
---|
631 | | - int r; |
---|
632 | 678 | int call_again_soon = 0; |
---|
633 | | - int nvec; |
---|
| 679 | + struct msghdr msg; |
---|
| 680 | + struct kvec iov; |
---|
| 681 | + int ret, buflen; |
---|
634 | 682 | |
---|
635 | 683 | mutex_lock(&con->sock_mutex); |
---|
636 | 684 | |
---|
.. | .. |
---|
638 | 686 | ret = -EAGAIN; |
---|
639 | 687 | goto out_close; |
---|
640 | 688 | } |
---|
| 689 | + |
---|
641 | 690 | if (con->nodeid == 0) { |
---|
642 | 691 | ret = -EINVAL; |
---|
643 | 692 | goto out_close; |
---|
644 | 693 | } |
---|
645 | 694 | |
---|
646 | | - if (con->rx_page == NULL) { |
---|
647 | | - /* |
---|
648 | | - * This doesn't need to be atomic, but I think it should |
---|
649 | | - * improve performance if it is. |
---|
650 | | - */ |
---|
651 | | - con->rx_page = alloc_page(GFP_ATOMIC); |
---|
652 | | - if (con->rx_page == NULL) |
---|
| 695 | + /* realloc if we get new buffer size to read out */ |
---|
| 696 | + buflen = dlm_config.ci_buffer_size; |
---|
| 697 | + if (con->rx_buflen != buflen && con->rx_leftover <= buflen) { |
---|
| 698 | + ret = con_realloc_receive_buf(con, buflen); |
---|
| 699 | + if (ret < 0) |
---|
653 | 700 | goto out_resched; |
---|
654 | | - cbuf_init(&con->cb, PAGE_SIZE); |
---|
655 | 701 | } |
---|
656 | 702 | |
---|
657 | | - /* |
---|
658 | | - * iov[0] is the bit of the circular buffer between the current end |
---|
659 | | - * point (cb.base + cb.len) and the end of the buffer. |
---|
| 703 | + /* calculate new buffer parameter regarding last receive and |
---|
| 704 | + * possible leftover bytes |
---|
660 | 705 | */ |
---|
661 | | - iov[0].iov_len = con->cb.base - cbuf_data(&con->cb); |
---|
662 | | - iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb); |
---|
663 | | - iov[1].iov_len = 0; |
---|
664 | | - nvec = 1; |
---|
| 706 | + iov.iov_base = con->rx_buf + con->rx_leftover; |
---|
| 707 | + iov.iov_len = con->rx_buflen - con->rx_leftover; |
---|
665 | 708 | |
---|
666 | | - /* |
---|
667 | | - * iov[1] is the bit of the circular buffer between the start of the |
---|
668 | | - * buffer and the start of the currently used section (cb.base) |
---|
669 | | - */ |
---|
670 | | - if (cbuf_data(&con->cb) >= con->cb.base) { |
---|
671 | | - iov[0].iov_len = PAGE_SIZE - cbuf_data(&con->cb); |
---|
672 | | - iov[1].iov_len = con->cb.base; |
---|
673 | | - iov[1].iov_base = page_address(con->rx_page); |
---|
674 | | - nvec = 2; |
---|
675 | | - } |
---|
676 | | - len = iov[0].iov_len + iov[1].iov_len; |
---|
677 | | - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nvec, len); |
---|
678 | | - |
---|
679 | | - r = ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT | MSG_NOSIGNAL); |
---|
| 709 | + memset(&msg, 0, sizeof(msg)); |
---|
| 710 | + msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; |
---|
| 711 | + ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, |
---|
| 712 | + msg.msg_flags); |
---|
680 | 713 | if (ret <= 0) |
---|
681 | 714 | goto out_close; |
---|
682 | | - else if (ret == len) |
---|
| 715 | + else if (ret == iov.iov_len) |
---|
683 | 716 | call_again_soon = 1; |
---|
684 | 717 | |
---|
685 | | - cbuf_add(&con->cb, ret); |
---|
686 | | - ret = dlm_process_incoming_buffer(con->nodeid, |
---|
687 | | - page_address(con->rx_page), |
---|
688 | | - con->cb.base, con->cb.len, |
---|
689 | | - PAGE_SIZE); |
---|
690 | | - if (ret == -EBADMSG) { |
---|
691 | | - log_print("lowcomms: addr=%p, base=%u, len=%u, read=%d", |
---|
692 | | - page_address(con->rx_page), con->cb.base, |
---|
693 | | - con->cb.len, r); |
---|
694 | | - } |
---|
| 718 | + /* new buflen according readed bytes and leftover from last receive */ |
---|
| 719 | + buflen = ret + con->rx_leftover; |
---|
| 720 | + ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen); |
---|
695 | 721 | if (ret < 0) |
---|
696 | 722 | goto out_close; |
---|
697 | | - cbuf_eat(&con->cb, ret); |
---|
698 | 723 | |
---|
699 | | - if (cbuf_empty(&con->cb) && !call_again_soon) { |
---|
700 | | - __free_page(con->rx_page); |
---|
701 | | - con->rx_page = NULL; |
---|
| 724 | + /* calculate leftover bytes from process and put it into begin of |
---|
| 725 | + * the receive buffer, so next receive we have the full message |
---|
| 726 | + * at the start address of the receive buffer. |
---|
| 727 | + */ |
---|
| 728 | + con->rx_leftover = buflen - ret; |
---|
| 729 | + if (con->rx_leftover) { |
---|
| 730 | + memmove(con->rx_buf, con->rx_buf + ret, |
---|
| 731 | + con->rx_leftover); |
---|
| 732 | + call_again_soon = true; |
---|
702 | 733 | } |
---|
703 | 734 | |
---|
704 | 735 | if (call_again_soon) |
---|
705 | 736 | goto out_resched; |
---|
| 737 | + |
---|
706 | 738 | mutex_unlock(&con->sock_mutex); |
---|
707 | 739 | return 0; |
---|
708 | 740 | |
---|
.. | .. |
---|
715 | 747 | out_close: |
---|
716 | 748 | mutex_unlock(&con->sock_mutex); |
---|
717 | 749 | if (ret != -EAGAIN) { |
---|
718 | | - close_connection(con, true, true, false); |
---|
719 | 750 | /* Reconnect when there is something to send */ |
---|
| 751 | + close_connection(con, false, true, false); |
---|
| 752 | + if (ret == 0) { |
---|
| 753 | + log_print("connection %p got EOF from %d", |
---|
| 754 | + con, con->nodeid); |
---|
| 755 | + /* handling for tcp shutdown */ |
---|
| 756 | + clear_bit(CF_SHUTDOWN, &con->flags); |
---|
| 757 | + wake_up(&con->shutdown_wait); |
---|
| 758 | + /* signal to breaking receive worker */ |
---|
| 759 | + ret = -1; |
---|
| 760 | + } |
---|
720 | 761 | } |
---|
721 | | - /* Don't return success if we really got EOF */ |
---|
722 | | - if (ret == 0) |
---|
723 | | - ret = -EAGAIN; |
---|
724 | | - |
---|
725 | 762 | return ret; |
---|
726 | 763 | } |
---|
727 | 764 | |
---|
728 | 765 | /* Listening socket is busy, accept a connection */ |
---|
729 | | -static int tcp_accept_from_sock(struct connection *con) |
---|
| 766 | +static int accept_from_sock(struct connection *con) |
---|
730 | 767 | { |
---|
731 | 768 | int result; |
---|
732 | 769 | struct sockaddr_storage peeraddr; |
---|
.. | .. |
---|
735 | 772 | int nodeid; |
---|
736 | 773 | struct connection *newcon; |
---|
737 | 774 | struct connection *addcon; |
---|
| 775 | + unsigned int mark; |
---|
738 | 776 | |
---|
739 | | - mutex_lock(&connections_lock); |
---|
740 | 777 | if (!dlm_allow_conn) { |
---|
741 | | - mutex_unlock(&connections_lock); |
---|
742 | 778 | return -1; |
---|
743 | 779 | } |
---|
744 | | - mutex_unlock(&connections_lock); |
---|
745 | 780 | |
---|
746 | 781 | mutex_lock_nested(&con->sock_mutex, 0); |
---|
747 | 782 | |
---|
.. | .. |
---|
774 | 809 | return -1; |
---|
775 | 810 | } |
---|
776 | 811 | |
---|
| 812 | + dlm_comm_mark(nodeid, &mark); |
---|
| 813 | + sock_set_mark(newsock->sk, mark); |
---|
| 814 | + |
---|
777 | 815 | log_print("got connection from %d", nodeid); |
---|
778 | 816 | |
---|
779 | 817 | /* Check to see if we already have a connection to this node. This |
---|
.. | .. |
---|
791 | 829 | struct connection *othercon = newcon->othercon; |
---|
792 | 830 | |
---|
793 | 831 | if (!othercon) { |
---|
794 | | - othercon = kmem_cache_zalloc(con_cache, GFP_NOFS); |
---|
| 832 | + othercon = kzalloc(sizeof(*othercon), GFP_NOFS); |
---|
795 | 833 | if (!othercon) { |
---|
796 | 834 | log_print("failed to allocate incoming socket"); |
---|
797 | 835 | mutex_unlock(&newcon->sock_mutex); |
---|
798 | 836 | result = -ENOMEM; |
---|
799 | 837 | goto accept_err; |
---|
800 | 838 | } |
---|
| 839 | + |
---|
| 840 | + othercon->rx_buflen = dlm_config.ci_buffer_size; |
---|
| 841 | + othercon->rx_buf = kmalloc(othercon->rx_buflen, GFP_NOFS); |
---|
| 842 | + if (!othercon->rx_buf) { |
---|
| 843 | + mutex_unlock(&newcon->sock_mutex); |
---|
| 844 | + kfree(othercon); |
---|
| 845 | + log_print("failed to allocate incoming socket receive buffer"); |
---|
| 846 | + result = -ENOMEM; |
---|
| 847 | + goto accept_err; |
---|
| 848 | + } |
---|
| 849 | + |
---|
801 | 850 | othercon->nodeid = nodeid; |
---|
802 | 851 | othercon->rx_action = receive_from_sock; |
---|
803 | 852 | mutex_init(&othercon->sock_mutex); |
---|
.. | .. |
---|
805 | 854 | spin_lock_init(&othercon->writequeue_lock); |
---|
806 | 855 | INIT_WORK(&othercon->swork, process_send_sockets); |
---|
807 | 856 | INIT_WORK(&othercon->rwork, process_recv_sockets); |
---|
| 857 | + init_waitqueue_head(&othercon->shutdown_wait); |
---|
808 | 858 | set_bit(CF_IS_OTHERCON, &othercon->flags); |
---|
| 859 | + } else { |
---|
| 860 | + /* close other sock con if we have something new */ |
---|
| 861 | + close_connection(othercon, false, true, false); |
---|
809 | 862 | } |
---|
| 863 | + |
---|
810 | 864 | mutex_lock_nested(&othercon->sock_mutex, 2); |
---|
811 | | - if (!othercon->sock) { |
---|
812 | | - newcon->othercon = othercon; |
---|
813 | | - add_sock(newsock, othercon); |
---|
814 | | - addcon = othercon; |
---|
815 | | - mutex_unlock(&othercon->sock_mutex); |
---|
816 | | - } |
---|
817 | | - else { |
---|
818 | | - printk("Extra connection from node %d attempted\n", nodeid); |
---|
819 | | - result = -EAGAIN; |
---|
820 | | - mutex_unlock(&othercon->sock_mutex); |
---|
821 | | - mutex_unlock(&newcon->sock_mutex); |
---|
822 | | - goto accept_err; |
---|
823 | | - } |
---|
| 865 | + newcon->othercon = othercon; |
---|
| 866 | + add_sock(newsock, othercon); |
---|
| 867 | + addcon = othercon; |
---|
| 868 | + mutex_unlock(&othercon->sock_mutex); |
---|
824 | 869 | } |
---|
825 | 870 | else { |
---|
826 | 871 | newcon->rx_action = receive_from_sock; |
---|
.. | .. |
---|
854 | 899 | return result; |
---|
855 | 900 | } |
---|
856 | 901 | |
---|
857 | | -static int sctp_accept_from_sock(struct connection *con) |
---|
858 | | -{ |
---|
859 | | - /* Check that the new node is in the lockspace */ |
---|
860 | | - struct sctp_prim prim; |
---|
861 | | - int nodeid; |
---|
862 | | - int prim_len, ret; |
---|
863 | | - int addr_len; |
---|
864 | | - struct connection *newcon; |
---|
865 | | - struct connection *addcon; |
---|
866 | | - struct socket *newsock; |
---|
867 | | - |
---|
868 | | - mutex_lock(&connections_lock); |
---|
869 | | - if (!dlm_allow_conn) { |
---|
870 | | - mutex_unlock(&connections_lock); |
---|
871 | | - return -1; |
---|
872 | | - } |
---|
873 | | - mutex_unlock(&connections_lock); |
---|
874 | | - |
---|
875 | | - mutex_lock_nested(&con->sock_mutex, 0); |
---|
876 | | - |
---|
877 | | - ret = kernel_accept(con->sock, &newsock, O_NONBLOCK); |
---|
878 | | - if (ret < 0) |
---|
879 | | - goto accept_err; |
---|
880 | | - |
---|
881 | | - memset(&prim, 0, sizeof(struct sctp_prim)); |
---|
882 | | - prim_len = sizeof(struct sctp_prim); |
---|
883 | | - |
---|
884 | | - ret = kernel_getsockopt(newsock, IPPROTO_SCTP, SCTP_PRIMARY_ADDR, |
---|
885 | | - (char *)&prim, &prim_len); |
---|
886 | | - if (ret < 0) { |
---|
887 | | - log_print("getsockopt/sctp_primary_addr failed: %d", ret); |
---|
888 | | - goto accept_err; |
---|
889 | | - } |
---|
890 | | - |
---|
891 | | - make_sockaddr(&prim.ssp_addr, 0, &addr_len); |
---|
892 | | - ret = addr_to_nodeid(&prim.ssp_addr, &nodeid); |
---|
893 | | - if (ret) { |
---|
894 | | - unsigned char *b = (unsigned char *)&prim.ssp_addr; |
---|
895 | | - |
---|
896 | | - log_print("reject connect from unknown addr"); |
---|
897 | | - print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, |
---|
898 | | - b, sizeof(struct sockaddr_storage)); |
---|
899 | | - goto accept_err; |
---|
900 | | - } |
---|
901 | | - |
---|
902 | | - newcon = nodeid2con(nodeid, GFP_NOFS); |
---|
903 | | - if (!newcon) { |
---|
904 | | - ret = -ENOMEM; |
---|
905 | | - goto accept_err; |
---|
906 | | - } |
---|
907 | | - |
---|
908 | | - mutex_lock_nested(&newcon->sock_mutex, 1); |
---|
909 | | - |
---|
910 | | - if (newcon->sock) { |
---|
911 | | - struct connection *othercon = newcon->othercon; |
---|
912 | | - |
---|
913 | | - if (!othercon) { |
---|
914 | | - othercon = kmem_cache_zalloc(con_cache, GFP_NOFS); |
---|
915 | | - if (!othercon) { |
---|
916 | | - log_print("failed to allocate incoming socket"); |
---|
917 | | - mutex_unlock(&newcon->sock_mutex); |
---|
918 | | - ret = -ENOMEM; |
---|
919 | | - goto accept_err; |
---|
920 | | - } |
---|
921 | | - othercon->nodeid = nodeid; |
---|
922 | | - othercon->rx_action = receive_from_sock; |
---|
923 | | - mutex_init(&othercon->sock_mutex); |
---|
924 | | - INIT_LIST_HEAD(&othercon->writequeue); |
---|
925 | | - spin_lock_init(&othercon->writequeue_lock); |
---|
926 | | - INIT_WORK(&othercon->swork, process_send_sockets); |
---|
927 | | - INIT_WORK(&othercon->rwork, process_recv_sockets); |
---|
928 | | - set_bit(CF_IS_OTHERCON, &othercon->flags); |
---|
929 | | - } |
---|
930 | | - mutex_lock_nested(&othercon->sock_mutex, 2); |
---|
931 | | - if (!othercon->sock) { |
---|
932 | | - newcon->othercon = othercon; |
---|
933 | | - add_sock(newsock, othercon); |
---|
934 | | - addcon = othercon; |
---|
935 | | - mutex_unlock(&othercon->sock_mutex); |
---|
936 | | - } else { |
---|
937 | | - printk("Extra connection from node %d attempted\n", nodeid); |
---|
938 | | - ret = -EAGAIN; |
---|
939 | | - mutex_unlock(&othercon->sock_mutex); |
---|
940 | | - mutex_unlock(&newcon->sock_mutex); |
---|
941 | | - goto accept_err; |
---|
942 | | - } |
---|
943 | | - } else { |
---|
944 | | - newcon->rx_action = receive_from_sock; |
---|
945 | | - add_sock(newsock, newcon); |
---|
946 | | - addcon = newcon; |
---|
947 | | - } |
---|
948 | | - |
---|
949 | | - log_print("connected to %d", nodeid); |
---|
950 | | - |
---|
951 | | - mutex_unlock(&newcon->sock_mutex); |
---|
952 | | - |
---|
953 | | - /* |
---|
954 | | - * Add it to the active queue in case we got data |
---|
955 | | - * between processing the accept adding the socket |
---|
956 | | - * to the read_sockets list |
---|
957 | | - */ |
---|
958 | | - if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) |
---|
959 | | - queue_work(recv_workqueue, &addcon->rwork); |
---|
960 | | - mutex_unlock(&con->sock_mutex); |
---|
961 | | - |
---|
962 | | - return 0; |
---|
963 | | - |
---|
964 | | -accept_err: |
---|
965 | | - mutex_unlock(&con->sock_mutex); |
---|
966 | | - if (newsock) |
---|
967 | | - sock_release(newsock); |
---|
968 | | - if (ret != -EAGAIN) |
---|
969 | | - log_print("error accepting connection from node: %d", ret); |
---|
970 | | - |
---|
971 | | - return ret; |
---|
972 | | -} |
---|
973 | | - |
---|
974 | 902 | static void free_entry(struct writequeue_entry *e) |
---|
975 | 903 | { |
---|
976 | 904 | __free_page(e->page); |
---|
.. | .. |
---|
1001 | 929 | static int sctp_bind_addrs(struct connection *con, uint16_t port) |
---|
1002 | 930 | { |
---|
1003 | 931 | struct sockaddr_storage localaddr; |
---|
| 932 | + struct sockaddr *addr = (struct sockaddr *)&localaddr; |
---|
1004 | 933 | int i, addr_len, result = 0; |
---|
1005 | 934 | |
---|
1006 | 935 | for (i = 0; i < dlm_local_count; i++) { |
---|
.. | .. |
---|
1008 | 937 | make_sockaddr(&localaddr, port, &addr_len); |
---|
1009 | 938 | |
---|
1010 | 939 | if (!i) |
---|
1011 | | - result = kernel_bind(con->sock, |
---|
1012 | | - (struct sockaddr *)&localaddr, |
---|
1013 | | - addr_len); |
---|
| 940 | + result = kernel_bind(con->sock, addr, addr_len); |
---|
1014 | 941 | else |
---|
1015 | | - result = kernel_setsockopt(con->sock, SOL_SCTP, |
---|
1016 | | - SCTP_SOCKOPT_BINDX_ADD, |
---|
1017 | | - (char *)&localaddr, addr_len); |
---|
| 942 | + result = sock_bind_add(con->sock->sk, addr, addr_len); |
---|
1018 | 943 | |
---|
1019 | 944 | if (result < 0) { |
---|
1020 | 945 | log_print("Can't bind to %d addr number %d, %d.\n", |
---|
.. | .. |
---|
1033 | 958 | static void sctp_connect_to_sock(struct connection *con) |
---|
1034 | 959 | { |
---|
1035 | 960 | struct sockaddr_storage daddr; |
---|
1036 | | - int one = 1; |
---|
1037 | 961 | int result; |
---|
1038 | 962 | int addr_len; |
---|
1039 | 963 | struct socket *sock; |
---|
1040 | | - struct timeval tv = { .tv_sec = 5, .tv_usec = 0 }; |
---|
| 964 | + unsigned int mark; |
---|
1041 | 965 | |
---|
1042 | 966 | if (con->nodeid == 0) { |
---|
1043 | 967 | log_print("attempt to connect sock 0 foiled"); |
---|
1044 | 968 | return; |
---|
1045 | 969 | } |
---|
| 970 | + |
---|
| 971 | + dlm_comm_mark(con->nodeid, &mark); |
---|
1046 | 972 | |
---|
1047 | 973 | mutex_lock(&con->sock_mutex); |
---|
1048 | 974 | |
---|
.. | .. |
---|
1068 | 994 | if (result < 0) |
---|
1069 | 995 | goto socket_err; |
---|
1070 | 996 | |
---|
| 997 | + sock_set_mark(sock->sk, mark); |
---|
| 998 | + |
---|
1071 | 999 | con->rx_action = receive_from_sock; |
---|
1072 | 1000 | con->connect_action = sctp_connect_to_sock; |
---|
1073 | 1001 | add_sock(sock, con); |
---|
.. | .. |
---|
1081 | 1009 | log_print("connecting to %d", con->nodeid); |
---|
1082 | 1010 | |
---|
1083 | 1011 | /* Turn off Nagle's algorithm */ |
---|
1084 | | - kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, |
---|
1085 | | - sizeof(one)); |
---|
| 1012 | + sctp_sock_set_nodelay(sock->sk); |
---|
1086 | 1013 | |
---|
1087 | 1014 | /* |
---|
1088 | 1015 | * Make sock->ops->connect() function return in specified time, |
---|
1089 | 1016 | * since O_NONBLOCK argument in connect() function does not work here, |
---|
1090 | 1017 | * then, we should restore the default value of this attribute. |
---|
1091 | 1018 | */ |
---|
1092 | | - kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv, |
---|
1093 | | - sizeof(tv)); |
---|
| 1019 | + sock_set_sndtimeo(sock->sk, 5); |
---|
1094 | 1020 | result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len, |
---|
1095 | 1021 | 0); |
---|
1096 | | - memset(&tv, 0, sizeof(tv)); |
---|
1097 | | - kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv, |
---|
1098 | | - sizeof(tv)); |
---|
| 1022 | + sock_set_sndtimeo(sock->sk, 0); |
---|
1099 | 1023 | |
---|
1100 | 1024 | if (result == -EINPROGRESS) |
---|
1101 | 1025 | result = 0; |
---|
.. | .. |
---|
1134 | 1058 | struct sockaddr_storage saddr, src_addr; |
---|
1135 | 1059 | int addr_len; |
---|
1136 | 1060 | struct socket *sock = NULL; |
---|
1137 | | - int one = 1; |
---|
| 1061 | + unsigned int mark; |
---|
1138 | 1062 | int result; |
---|
1139 | 1063 | |
---|
1140 | 1064 | if (con->nodeid == 0) { |
---|
1141 | 1065 | log_print("attempt to connect sock 0 foiled"); |
---|
1142 | 1066 | return; |
---|
1143 | 1067 | } |
---|
| 1068 | + |
---|
| 1069 | + dlm_comm_mark(con->nodeid, &mark); |
---|
1144 | 1070 | |
---|
1145 | 1071 | mutex_lock(&con->sock_mutex); |
---|
1146 | 1072 | if (con->retries++ > MAX_CONNECT_RETRIES) |
---|
.. | .. |
---|
1156 | 1082 | if (result < 0) |
---|
1157 | 1083 | goto out_err; |
---|
1158 | 1084 | |
---|
| 1085 | + sock_set_mark(sock->sk, mark); |
---|
| 1086 | + |
---|
1159 | 1087 | memset(&saddr, 0, sizeof(saddr)); |
---|
1160 | 1088 | result = nodeid_to_addr(con->nodeid, &saddr, NULL, false); |
---|
1161 | 1089 | if (result < 0) { |
---|
.. | .. |
---|
1165 | 1093 | |
---|
1166 | 1094 | con->rx_action = receive_from_sock; |
---|
1167 | 1095 | con->connect_action = tcp_connect_to_sock; |
---|
| 1096 | + con->shutdown_action = dlm_tcp_shutdown; |
---|
1168 | 1097 | add_sock(sock, con); |
---|
1169 | 1098 | |
---|
1170 | 1099 | /* Bind to our cluster-known address connecting to avoid |
---|
.. | .. |
---|
1183 | 1112 | log_print("connecting to %d", con->nodeid); |
---|
1184 | 1113 | |
---|
1185 | 1114 | /* Turn off Nagle's algorithm */ |
---|
1186 | | - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, |
---|
1187 | | - sizeof(one)); |
---|
| 1115 | + tcp_sock_set_nodelay(sock->sk); |
---|
1188 | 1116 | |
---|
1189 | 1117 | result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, |
---|
1190 | 1118 | O_NONBLOCK); |
---|
.. | .. |
---|
1226 | 1154 | { |
---|
1227 | 1155 | struct socket *sock = NULL; |
---|
1228 | 1156 | int result = 0; |
---|
1229 | | - int one = 1; |
---|
1230 | 1157 | int addr_len; |
---|
1231 | 1158 | |
---|
1232 | 1159 | if (dlm_local_addr[0]->ss_family == AF_INET) |
---|
.. | .. |
---|
1242 | 1169 | goto create_out; |
---|
1243 | 1170 | } |
---|
1244 | 1171 | |
---|
| 1172 | + sock_set_mark(sock->sk, dlm_config.ci_mark); |
---|
| 1173 | + |
---|
1245 | 1174 | /* Turn off Nagle's algorithm */ |
---|
1246 | | - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, |
---|
1247 | | - sizeof(one)); |
---|
| 1175 | + tcp_sock_set_nodelay(sock->sk); |
---|
1248 | 1176 | |
---|
1249 | | - result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, |
---|
1250 | | - (char *)&one, sizeof(one)); |
---|
| 1177 | + sock_set_reuseaddr(sock->sk); |
---|
1251 | 1178 | |
---|
1252 | | - if (result < 0) { |
---|
1253 | | - log_print("Failed to set SO_REUSEADDR on socket: %d", result); |
---|
1254 | | - } |
---|
1255 | 1179 | write_lock_bh(&sock->sk->sk_callback_lock); |
---|
1256 | 1180 | sock->sk->sk_user_data = con; |
---|
1257 | 1181 | save_listen_callbacks(sock); |
---|
1258 | | - con->rx_action = tcp_accept_from_sock; |
---|
| 1182 | + con->rx_action = accept_from_sock; |
---|
1259 | 1183 | con->connect_action = tcp_connect_to_sock; |
---|
1260 | 1184 | write_unlock_bh(&sock->sk->sk_callback_lock); |
---|
1261 | 1185 | |
---|
.. | .. |
---|
1269 | 1193 | con->sock = NULL; |
---|
1270 | 1194 | goto create_out; |
---|
1271 | 1195 | } |
---|
1272 | | - result = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, |
---|
1273 | | - (char *)&one, sizeof(one)); |
---|
1274 | | - if (result < 0) { |
---|
1275 | | - log_print("Set keepalive failed: %d", result); |
---|
1276 | | - } |
---|
| 1196 | + sock_set_keepalive(sock->sk); |
---|
1277 | 1197 | |
---|
1278 | 1198 | result = sock->ops->listen(sock, 5); |
---|
1279 | 1199 | if (result < 0) { |
---|
.. | .. |
---|
1305 | 1225 | } |
---|
1306 | 1226 | } |
---|
1307 | 1227 | |
---|
| 1228 | +static void deinit_local(void) |
---|
| 1229 | +{ |
---|
| 1230 | + int i; |
---|
| 1231 | + |
---|
| 1232 | + for (i = 0; i < dlm_local_count; i++) |
---|
| 1233 | + kfree(dlm_local_addr[i]); |
---|
| 1234 | +} |
---|
| 1235 | + |
---|
1308 | 1236 | /* Initialise SCTP socket and bind to all interfaces */ |
---|
1309 | 1237 | static int sctp_listen_for_all(void) |
---|
1310 | 1238 | { |
---|
1311 | 1239 | struct socket *sock = NULL; |
---|
1312 | 1240 | int result = -EINVAL; |
---|
1313 | 1241 | struct connection *con = nodeid2con(0, GFP_NOFS); |
---|
1314 | | - int bufsize = NEEDED_RMEM; |
---|
1315 | | - int one = 1; |
---|
1316 | 1242 | |
---|
1317 | 1243 | if (!con) |
---|
1318 | 1244 | return -ENOMEM; |
---|
.. | .. |
---|
1326 | 1252 | goto out; |
---|
1327 | 1253 | } |
---|
1328 | 1254 | |
---|
1329 | | - result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE, |
---|
1330 | | - (char *)&bufsize, sizeof(bufsize)); |
---|
1331 | | - if (result) |
---|
1332 | | - log_print("Error increasing buffer space on socket %d", result); |
---|
1333 | | - |
---|
1334 | | - result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, |
---|
1335 | | - sizeof(one)); |
---|
1336 | | - if (result < 0) |
---|
1337 | | - log_print("Could not set SCTP NODELAY error %d\n", result); |
---|
| 1255 | + sock_set_rcvbuf(sock->sk, NEEDED_RMEM); |
---|
| 1256 | + sock_set_mark(sock->sk, dlm_config.ci_mark); |
---|
| 1257 | + sctp_sock_set_nodelay(sock->sk); |
---|
1338 | 1258 | |
---|
1339 | 1259 | write_lock_bh(&sock->sk->sk_callback_lock); |
---|
1340 | 1260 | /* Init con struct */ |
---|
.. | .. |
---|
1342 | 1262 | save_listen_callbacks(sock); |
---|
1343 | 1263 | con->sock = sock; |
---|
1344 | 1264 | con->sock->sk->sk_data_ready = lowcomms_data_ready; |
---|
1345 | | - con->rx_action = sctp_accept_from_sock; |
---|
| 1265 | + con->rx_action = accept_from_sock; |
---|
1346 | 1266 | con->connect_action = sctp_connect_to_sock; |
---|
1347 | 1267 | |
---|
1348 | 1268 | write_unlock_bh(&sock->sk->sk_callback_lock); |
---|
.. | .. |
---|
1545 | 1465 | |
---|
1546 | 1466 | send_error: |
---|
1547 | 1467 | mutex_unlock(&con->sock_mutex); |
---|
1548 | | - close_connection(con, true, false, true); |
---|
| 1468 | + close_connection(con, false, false, true); |
---|
1549 | 1469 | /* Requeue the send work. When the work daemon runs again, it will try |
---|
1550 | 1470 | a new connection, then call this function again. */ |
---|
1551 | 1471 | queue_work(send_workqueue, &con->swork); |
---|
.. | .. |
---|
1621 | 1541 | send_to_sock(con); |
---|
1622 | 1542 | } |
---|
1623 | 1543 | |
---|
1624 | | - |
---|
1625 | | -/* Discard all entries on the write queues */ |
---|
1626 | | -static void clean_writequeues(void) |
---|
1627 | | -{ |
---|
1628 | | - foreach_conn(clean_one_writequeue); |
---|
1629 | | -} |
---|
1630 | | - |
---|
1631 | 1544 | static void work_stop(void) |
---|
1632 | 1545 | { |
---|
1633 | 1546 | if (recv_workqueue) |
---|
.. | .. |
---|
1677 | 1590 | _stop_conn(con, true); |
---|
1678 | 1591 | } |
---|
1679 | 1592 | |
---|
| 1593 | +static void shutdown_conn(struct connection *con) |
---|
| 1594 | +{ |
---|
| 1595 | + if (con->shutdown_action) |
---|
| 1596 | + con->shutdown_action(con); |
---|
| 1597 | +} |
---|
| 1598 | + |
---|
| 1599 | +static void connection_release(struct rcu_head *rcu) |
---|
| 1600 | +{ |
---|
| 1601 | + struct connection *con = container_of(rcu, struct connection, rcu); |
---|
| 1602 | + |
---|
| 1603 | + kfree(con->rx_buf); |
---|
| 1604 | + kfree(con); |
---|
| 1605 | +} |
---|
| 1606 | + |
---|
1680 | 1607 | static void free_conn(struct connection *con) |
---|
1681 | 1608 | { |
---|
1682 | 1609 | close_connection(con, true, true, true); |
---|
1683 | | - if (con->othercon) |
---|
1684 | | - kmem_cache_free(con_cache, con->othercon); |
---|
1685 | | - hlist_del(&con->list); |
---|
1686 | | - kmem_cache_free(con_cache, con); |
---|
| 1610 | + spin_lock(&connections_lock); |
---|
| 1611 | + hlist_del_rcu(&con->list); |
---|
| 1612 | + spin_unlock(&connections_lock); |
---|
| 1613 | + if (con->othercon) { |
---|
| 1614 | + clean_one_writequeue(con->othercon); |
---|
| 1615 | + call_rcu(&con->othercon->rcu, connection_release); |
---|
| 1616 | + } |
---|
| 1617 | + clean_one_writequeue(con); |
---|
| 1618 | + call_rcu(&con->rcu, connection_release); |
---|
1687 | 1619 | } |
---|
1688 | 1620 | |
---|
1689 | 1621 | static void work_flush(void) |
---|
1690 | 1622 | { |
---|
1691 | | - int ok; |
---|
| 1623 | + int ok, idx; |
---|
1692 | 1624 | int i; |
---|
1693 | | - struct hlist_node *n; |
---|
1694 | 1625 | struct connection *con; |
---|
1695 | 1626 | |
---|
1696 | | - if (recv_workqueue) |
---|
1697 | | - flush_workqueue(recv_workqueue); |
---|
1698 | | - if (send_workqueue) |
---|
1699 | | - flush_workqueue(send_workqueue); |
---|
1700 | 1627 | do { |
---|
1701 | 1628 | ok = 1; |
---|
1702 | 1629 | foreach_conn(stop_conn); |
---|
.. | .. |
---|
1704 | 1631 | flush_workqueue(recv_workqueue); |
---|
1705 | 1632 | if (send_workqueue) |
---|
1706 | 1633 | flush_workqueue(send_workqueue); |
---|
| 1634 | + idx = srcu_read_lock(&connections_srcu); |
---|
1707 | 1635 | for (i = 0; i < CONN_HASH_SIZE && ok; i++) { |
---|
1708 | | - hlist_for_each_entry_safe(con, n, |
---|
1709 | | - &connection_hash[i], list) { |
---|
| 1636 | + hlist_for_each_entry_rcu(con, &connection_hash[i], |
---|
| 1637 | + list) { |
---|
1710 | 1638 | ok &= test_bit(CF_READ_PENDING, &con->flags); |
---|
1711 | 1639 | ok &= test_bit(CF_WRITE_PENDING, &con->flags); |
---|
1712 | 1640 | if (con->othercon) { |
---|
.. | .. |
---|
1717 | 1645 | } |
---|
1718 | 1646 | } |
---|
1719 | 1647 | } |
---|
| 1648 | + srcu_read_unlock(&connections_srcu, idx); |
---|
1720 | 1649 | } while (!ok); |
---|
1721 | 1650 | } |
---|
1722 | 1651 | |
---|
.. | .. |
---|
1725 | 1654 | /* Set all the flags to prevent any |
---|
1726 | 1655 | socket activity. |
---|
1727 | 1656 | */ |
---|
1728 | | - mutex_lock(&connections_lock); |
---|
1729 | 1657 | dlm_allow_conn = 0; |
---|
1730 | | - mutex_unlock(&connections_lock); |
---|
| 1658 | + |
---|
| 1659 | + if (recv_workqueue) |
---|
| 1660 | + flush_workqueue(recv_workqueue); |
---|
| 1661 | + if (send_workqueue) |
---|
| 1662 | + flush_workqueue(send_workqueue); |
---|
| 1663 | + |
---|
| 1664 | + foreach_conn(shutdown_conn); |
---|
1731 | 1665 | work_flush(); |
---|
1732 | | - clean_writequeues(); |
---|
1733 | 1666 | foreach_conn(free_conn); |
---|
1734 | 1667 | work_stop(); |
---|
1735 | | - |
---|
1736 | | - kmem_cache_destroy(con_cache); |
---|
| 1668 | + deinit_local(); |
---|
1737 | 1669 | } |
---|
1738 | 1670 | |
---|
1739 | 1671 | int dlm_lowcomms_start(void) |
---|
.. | .. |
---|
1752 | 1684 | goto fail; |
---|
1753 | 1685 | } |
---|
1754 | 1686 | |
---|
1755 | | - error = -ENOMEM; |
---|
1756 | | - con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection), |
---|
1757 | | - __alignof__(struct connection), 0, |
---|
1758 | | - NULL); |
---|
1759 | | - if (!con_cache) |
---|
1760 | | - goto fail; |
---|
1761 | | - |
---|
1762 | 1687 | error = work_start(); |
---|
1763 | 1688 | if (error) |
---|
1764 | | - goto fail_destroy; |
---|
| 1689 | + goto fail; |
---|
1765 | 1690 | |
---|
1766 | 1691 | dlm_allow_conn = 1; |
---|
1767 | 1692 | |
---|
.. | .. |
---|
1778 | 1703 | fail_unlisten: |
---|
1779 | 1704 | dlm_allow_conn = 0; |
---|
1780 | 1705 | con = nodeid2con(0,0); |
---|
1781 | | - if (con) { |
---|
1782 | | - close_connection(con, false, true, true); |
---|
1783 | | - kmem_cache_free(con_cache, con); |
---|
1784 | | - } |
---|
1785 | | -fail_destroy: |
---|
1786 | | - kmem_cache_destroy(con_cache); |
---|
| 1706 | + if (con) |
---|
| 1707 | + free_conn(con); |
---|
1787 | 1708 | fail: |
---|
1788 | 1709 | return error; |
---|
1789 | 1710 | } |
---|