| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /****************************************************************************** |
|---|
| 2 | 3 | ******************************************************************************* |
|---|
| 3 | 4 | ** |
|---|
| 4 | 5 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
|---|
| 5 | 6 | ** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. |
|---|
| 6 | 7 | ** |
|---|
| 7 | | -** This copyrighted material is made available to anyone wishing to use, |
|---|
| 8 | | -** modify, copy, or redistribute it subject to the terms and conditions |
|---|
| 9 | | -** of the GNU General Public License v.2. |
|---|
| 10 | 8 | ** |
|---|
| 11 | 9 | ******************************************************************************* |
|---|
| 12 | 10 | ******************************************************************************/ |
|---|
| .. | .. |
|---|
| 65 | 63 | |
|---|
| 66 | 64 | /* Number of messages to send before rescheduling */ |
|---|
| 67 | 65 | #define MAX_SEND_MSG_COUNT 25 |
|---|
| 68 | | - |
|---|
| 69 | | -struct cbuf { |
|---|
| 70 | | - unsigned int base; |
|---|
| 71 | | - unsigned int len; |
|---|
| 72 | | - unsigned int mask; |
|---|
| 73 | | -}; |
|---|
| 74 | | - |
|---|
| 75 | | -static void cbuf_add(struct cbuf *cb, int n) |
|---|
| 76 | | -{ |
|---|
| 77 | | - cb->len += n; |
|---|
| 78 | | -} |
|---|
| 79 | | - |
|---|
| 80 | | -static int cbuf_data(struct cbuf *cb) |
|---|
| 81 | | -{ |
|---|
| 82 | | - return ((cb->base + cb->len) & cb->mask); |
|---|
| 83 | | -} |
|---|
| 84 | | - |
|---|
| 85 | | -static void cbuf_init(struct cbuf *cb, int size) |
|---|
| 86 | | -{ |
|---|
| 87 | | - cb->base = cb->len = 0; |
|---|
| 88 | | - cb->mask = size-1; |
|---|
| 89 | | -} |
|---|
| 90 | | - |
|---|
| 91 | | -static void cbuf_eat(struct cbuf *cb, int n) |
|---|
| 92 | | -{ |
|---|
| 93 | | - cb->len -= n; |
|---|
| 94 | | - cb->base += n; |
|---|
| 95 | | - cb->base &= cb->mask; |
|---|
| 96 | | -} |
|---|
| 97 | | - |
|---|
| 98 | | -static bool cbuf_empty(struct cbuf *cb) |
|---|
| 99 | | -{ |
|---|
| 100 | | - return cb->len == 0; |
|---|
| 101 | | -} |
|---|
| 66 | +#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000) |
|---|
| 102 | 67 | |
|---|
| 103 | 68 | struct connection { |
|---|
| 104 | 69 | struct socket *sock; /* NULL if not connected */ |
|---|
| .. | .. |
|---|
| 112 | 77 | #define CF_CLOSE 6 |
|---|
| 113 | 78 | #define CF_APP_LIMITED 7 |
|---|
| 114 | 79 | #define CF_CLOSING 8 |
|---|
| 80 | +#define CF_SHUTDOWN 9 |
|---|
| 115 | 81 | struct list_head writequeue; /* List of outgoing writequeue_entries */ |
|---|
| 116 | 82 | spinlock_t writequeue_lock; |
|---|
| 117 | 83 | int (*rx_action) (struct connection *); /* What to do when active */ |
|---|
| 118 | 84 | void (*connect_action) (struct connection *); /* What to do to connect */ |
|---|
| 119 | | - struct page *rx_page; |
|---|
| 120 | | - struct cbuf cb; |
|---|
| 85 | + void (*shutdown_action)(struct connection *con); /* What to do to shutdown */ |
|---|
| 121 | 86 | int retries; |
|---|
| 122 | 87 | #define MAX_CONNECT_RETRIES 3 |
|---|
| 123 | 88 | struct hlist_node list; |
|---|
| 124 | 89 | struct connection *othercon; |
|---|
| 125 | 90 | struct work_struct rwork; /* Receive workqueue */ |
|---|
| 126 | 91 | struct work_struct swork; /* Send workqueue */ |
|---|
| 92 | + wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */ |
|---|
| 93 | + unsigned char *rx_buf; |
|---|
| 94 | + int rx_buflen; |
|---|
| 95 | + int rx_leftover; |
|---|
| 96 | + struct rcu_head rcu; |
|---|
| 127 | 97 | }; |
|---|
| 128 | 98 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) |
|---|
| 129 | 99 | |
|---|
| .. | .. |
|---|
| 165 | 135 | static struct workqueue_struct *send_workqueue; |
|---|
| 166 | 136 | |
|---|
| 167 | 137 | static struct hlist_head connection_hash[CONN_HASH_SIZE]; |
|---|
| 168 | | -static DEFINE_MUTEX(connections_lock); |
|---|
| 169 | | -static struct kmem_cache *con_cache; |
|---|
| 138 | +static DEFINE_SPINLOCK(connections_lock); |
|---|
| 139 | +DEFINE_STATIC_SRCU(connections_srcu); |
|---|
| 170 | 140 | |
|---|
| 171 | 141 | static void process_recv_sockets(struct work_struct *work); |
|---|
| 172 | 142 | static void process_send_sockets(struct work_struct *work); |
|---|
| .. | .. |
|---|
| 182 | 152 | |
|---|
| 183 | 153 | static struct connection *__find_con(int nodeid) |
|---|
| 184 | 154 | { |
|---|
| 185 | | - int r; |
|---|
| 155 | + int r, idx; |
|---|
| 186 | 156 | struct connection *con; |
|---|
| 187 | 157 | |
|---|
| 188 | 158 | r = nodeid_hash(nodeid); |
|---|
| 189 | 159 | |
|---|
| 190 | | - hlist_for_each_entry(con, &connection_hash[r], list) { |
|---|
| 191 | | - if (con->nodeid == nodeid) |
|---|
| 160 | + idx = srcu_read_lock(&connections_srcu); |
|---|
| 161 | + hlist_for_each_entry_rcu(con, &connection_hash[r], list) { |
|---|
| 162 | + if (con->nodeid == nodeid) { |
|---|
| 163 | + srcu_read_unlock(&connections_srcu, idx); |
|---|
| 192 | 164 | return con; |
|---|
| 165 | + } |
|---|
| 193 | 166 | } |
|---|
| 167 | + srcu_read_unlock(&connections_srcu, idx); |
|---|
| 168 | + |
|---|
| 194 | 169 | return NULL; |
|---|
| 195 | 170 | } |
|---|
| 196 | 171 | |
|---|
| .. | .. |
|---|
| 198 | 173 | * If 'allocation' is zero then we don't attempt to create a new |
|---|
| 199 | 174 | * connection structure for this node. |
|---|
| 200 | 175 | */ |
|---|
| 201 | | -static struct connection *__nodeid2con(int nodeid, gfp_t alloc) |
|---|
| 176 | +static struct connection *nodeid2con(int nodeid, gfp_t alloc) |
|---|
| 202 | 177 | { |
|---|
| 203 | | - struct connection *con = NULL; |
|---|
| 178 | + struct connection *con, *tmp; |
|---|
| 204 | 179 | int r; |
|---|
| 205 | 180 | |
|---|
| 206 | 181 | con = __find_con(nodeid); |
|---|
| 207 | 182 | if (con || !alloc) |
|---|
| 208 | 183 | return con; |
|---|
| 209 | 184 | |
|---|
| 210 | | - con = kmem_cache_zalloc(con_cache, alloc); |
|---|
| 185 | + con = kzalloc(sizeof(*con), alloc); |
|---|
| 211 | 186 | if (!con) |
|---|
| 212 | 187 | return NULL; |
|---|
| 213 | 188 | |
|---|
| 214 | | - r = nodeid_hash(nodeid); |
|---|
| 215 | | - hlist_add_head(&con->list, &connection_hash[r]); |
|---|
| 189 | + con->rx_buflen = dlm_config.ci_buffer_size; |
|---|
| 190 | + con->rx_buf = kmalloc(con->rx_buflen, GFP_NOFS); |
|---|
| 191 | + if (!con->rx_buf) { |
|---|
| 192 | + kfree(con); |
|---|
| 193 | + return NULL; |
|---|
| 194 | + } |
|---|
| 216 | 195 | |
|---|
| 217 | 196 | con->nodeid = nodeid; |
|---|
| 218 | 197 | mutex_init(&con->sock_mutex); |
|---|
| .. | .. |
|---|
| 220 | 199 | spin_lock_init(&con->writequeue_lock); |
|---|
| 221 | 200 | INIT_WORK(&con->swork, process_send_sockets); |
|---|
| 222 | 201 | INIT_WORK(&con->rwork, process_recv_sockets); |
|---|
| 202 | + init_waitqueue_head(&con->shutdown_wait); |
|---|
| 223 | 203 | |
|---|
| 224 | 204 | /* Setup action pointers for child sockets */ |
|---|
| 225 | 205 | if (con->nodeid) { |
|---|
| .. | .. |
|---|
| 230 | 210 | con->rx_action = zerocon->rx_action; |
|---|
| 231 | 211 | } |
|---|
| 232 | 212 | |
|---|
| 213 | + r = nodeid_hash(nodeid); |
|---|
| 214 | + |
|---|
| 215 | + spin_lock(&connections_lock); |
|---|
| 216 | + /* Because multiple workqueues/threads calls this function it can |
|---|
| 217 | + * race on multiple cpu's. Instead of locking hot path __find_con() |
|---|
| 218 | + * we just check in rare cases of recently added nodes again |
|---|
| 219 | + * under protection of connections_lock. If this is the case we |
|---|
| 220 | + * abort our connection creation and return the existing connection. |
|---|
| 221 | + */ |
|---|
| 222 | + tmp = __find_con(nodeid); |
|---|
| 223 | + if (tmp) { |
|---|
| 224 | + spin_unlock(&connections_lock); |
|---|
| 225 | + kfree(con->rx_buf); |
|---|
| 226 | + kfree(con); |
|---|
| 227 | + return tmp; |
|---|
| 228 | + } |
|---|
| 229 | + |
|---|
| 230 | + hlist_add_head_rcu(&con->list, &connection_hash[r]); |
|---|
| 231 | + spin_unlock(&connections_lock); |
|---|
| 232 | + |
|---|
| 233 | 233 | return con; |
|---|
| 234 | 234 | } |
|---|
| 235 | 235 | |
|---|
| 236 | 236 | /* Loop round all connections */ |
|---|
| 237 | 237 | static void foreach_conn(void (*conn_func)(struct connection *c)) |
|---|
| 238 | 238 | { |
|---|
| 239 | | - int i; |
|---|
| 240 | | - struct hlist_node *n; |
|---|
| 239 | + int i, idx; |
|---|
| 241 | 240 | struct connection *con; |
|---|
| 242 | 241 | |
|---|
| 242 | + idx = srcu_read_lock(&connections_srcu); |
|---|
| 243 | 243 | for (i = 0; i < CONN_HASH_SIZE; i++) { |
|---|
| 244 | | - hlist_for_each_entry_safe(con, n, &connection_hash[i], list) |
|---|
| 244 | + hlist_for_each_entry_rcu(con, &connection_hash[i], list) |
|---|
| 245 | 245 | conn_func(con); |
|---|
| 246 | 246 | } |
|---|
| 247 | | -} |
|---|
| 248 | | - |
|---|
| 249 | | -static struct connection *nodeid2con(int nodeid, gfp_t allocation) |
|---|
| 250 | | -{ |
|---|
| 251 | | - struct connection *con; |
|---|
| 252 | | - |
|---|
| 253 | | - mutex_lock(&connections_lock); |
|---|
| 254 | | - con = __nodeid2con(nodeid, allocation); |
|---|
| 255 | | - mutex_unlock(&connections_lock); |
|---|
| 256 | | - |
|---|
| 257 | | - return con; |
|---|
| 247 | + srcu_read_unlock(&connections_srcu, idx); |
|---|
| 258 | 248 | } |
|---|
| 259 | 249 | |
|---|
| 260 | 250 | static struct dlm_node_addr *find_node_addr(int nodeid) |
|---|
| .. | .. |
|---|
| 481 | 471 | static void lowcomms_error_report(struct sock *sk) |
|---|
| 482 | 472 | { |
|---|
| 483 | 473 | struct connection *con; |
|---|
| 484 | | - struct sockaddr_storage saddr; |
|---|
| 485 | 474 | void (*orig_report)(struct sock *) = NULL; |
|---|
| 475 | + struct inet_sock *inet; |
|---|
| 486 | 476 | |
|---|
| 487 | 477 | read_lock_bh(&sk->sk_callback_lock); |
|---|
| 488 | 478 | con = sock2con(sk); |
|---|
| .. | .. |
|---|
| 490 | 480 | goto out; |
|---|
| 491 | 481 | |
|---|
| 492 | 482 | orig_report = listen_sock.sk_error_report; |
|---|
| 493 | | - if (con->sock == NULL || |
|---|
| 494 | | - kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) { |
|---|
| 495 | | - printk_ratelimited(KERN_ERR "dlm: node %d: socket error " |
|---|
| 496 | | - "sending to node %d, port %d, " |
|---|
| 497 | | - "sk_err=%d/%d\n", dlm_our_nodeid(), |
|---|
| 498 | | - con->nodeid, dlm_config.ci_tcp_port, |
|---|
| 499 | | - sk->sk_err, sk->sk_err_soft); |
|---|
| 500 | | - } else if (saddr.ss_family == AF_INET) { |
|---|
| 501 | | - struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr; |
|---|
| 502 | 483 | |
|---|
| 484 | + inet = inet_sk(sk); |
|---|
| 485 | + switch (sk->sk_family) { |
|---|
| 486 | + case AF_INET: |
|---|
| 503 | 487 | printk_ratelimited(KERN_ERR "dlm: node %d: socket error " |
|---|
| 504 | | - "sending to node %d at %pI4, port %d, " |
|---|
| 488 | + "sending to node %d at %pI4, dport %d, " |
|---|
| 505 | 489 | "sk_err=%d/%d\n", dlm_our_nodeid(), |
|---|
| 506 | | - con->nodeid, &sin4->sin_addr.s_addr, |
|---|
| 507 | | - dlm_config.ci_tcp_port, sk->sk_err, |
|---|
| 490 | + con->nodeid, &inet->inet_daddr, |
|---|
| 491 | + ntohs(inet->inet_dport), sk->sk_err, |
|---|
| 508 | 492 | sk->sk_err_soft); |
|---|
| 509 | | - } else { |
|---|
| 510 | | - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr; |
|---|
| 511 | | - |
|---|
| 493 | + break; |
|---|
| 494 | +#if IS_ENABLED(CONFIG_IPV6) |
|---|
| 495 | + case AF_INET6: |
|---|
| 512 | 496 | printk_ratelimited(KERN_ERR "dlm: node %d: socket error " |
|---|
| 513 | | - "sending to node %d at %u.%u.%u.%u, " |
|---|
| 514 | | - "port %d, sk_err=%d/%d\n", dlm_our_nodeid(), |
|---|
| 515 | | - con->nodeid, sin6->sin6_addr.s6_addr32[0], |
|---|
| 516 | | - sin6->sin6_addr.s6_addr32[1], |
|---|
| 517 | | - sin6->sin6_addr.s6_addr32[2], |
|---|
| 518 | | - sin6->sin6_addr.s6_addr32[3], |
|---|
| 519 | | - dlm_config.ci_tcp_port, sk->sk_err, |
|---|
| 497 | + "sending to node %d at %pI6c, " |
|---|
| 498 | + "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(), |
|---|
| 499 | + con->nodeid, &sk->sk_v6_daddr, |
|---|
| 500 | + ntohs(inet->inet_dport), sk->sk_err, |
|---|
| 520 | 501 | sk->sk_err_soft); |
|---|
| 502 | + break; |
|---|
| 503 | +#endif |
|---|
| 504 | + default: |
|---|
| 505 | + printk_ratelimited(KERN_ERR "dlm: node %d: socket error " |
|---|
| 506 | + "invalid socket family %d set, " |
|---|
| 507 | + "sk_err=%d/%d\n", dlm_our_nodeid(), |
|---|
| 508 | + sk->sk_family, sk->sk_err, sk->sk_err_soft); |
|---|
| 509 | + goto out; |
|---|
| 521 | 510 | } |
|---|
| 522 | 511 | out: |
|---|
| 523 | 512 | read_unlock_bh(&sk->sk_callback_lock); |
|---|
| .. | .. |
|---|
| 611 | 600 | /* Will only re-enter once. */ |
|---|
| 612 | 601 | close_connection(con->othercon, false, tx, rx); |
|---|
| 613 | 602 | } |
|---|
| 614 | | - if (con->rx_page) { |
|---|
| 615 | | - __free_page(con->rx_page); |
|---|
| 616 | | - con->rx_page = NULL; |
|---|
| 617 | | - } |
|---|
| 618 | 603 | |
|---|
| 604 | + con->rx_leftover = 0; |
|---|
| 619 | 605 | con->retries = 0; |
|---|
| 620 | 606 | mutex_unlock(&con->sock_mutex); |
|---|
| 621 | 607 | clear_bit(CF_CLOSING, &con->flags); |
|---|
| 622 | 608 | } |
|---|
| 623 | 609 | |
|---|
| 610 | +static void shutdown_connection(struct connection *con) |
|---|
| 611 | +{ |
|---|
| 612 | + int ret; |
|---|
| 613 | + |
|---|
| 614 | + flush_work(&con->swork); |
|---|
| 615 | + |
|---|
| 616 | + mutex_lock(&con->sock_mutex); |
|---|
| 617 | + /* nothing to shutdown */ |
|---|
| 618 | + if (!con->sock) { |
|---|
| 619 | + mutex_unlock(&con->sock_mutex); |
|---|
| 620 | + return; |
|---|
| 621 | + } |
|---|
| 622 | + |
|---|
| 623 | + set_bit(CF_SHUTDOWN, &con->flags); |
|---|
| 624 | + ret = kernel_sock_shutdown(con->sock, SHUT_WR); |
|---|
| 625 | + mutex_unlock(&con->sock_mutex); |
|---|
| 626 | + if (ret) { |
|---|
| 627 | + log_print("Connection %p failed to shutdown: %d will force close", |
|---|
| 628 | + con, ret); |
|---|
| 629 | + goto force_close; |
|---|
| 630 | + } else { |
|---|
| 631 | + ret = wait_event_timeout(con->shutdown_wait, |
|---|
| 632 | + !test_bit(CF_SHUTDOWN, &con->flags), |
|---|
| 633 | + DLM_SHUTDOWN_WAIT_TIMEOUT); |
|---|
| 634 | + if (ret == 0) { |
|---|
| 635 | + log_print("Connection %p shutdown timed out, will force close", |
|---|
| 636 | + con); |
|---|
| 637 | + goto force_close; |
|---|
| 638 | + } |
|---|
| 639 | + } |
|---|
| 640 | + |
|---|
| 641 | + return; |
|---|
| 642 | + |
|---|
| 643 | +force_close: |
|---|
| 644 | + clear_bit(CF_SHUTDOWN, &con->flags); |
|---|
| 645 | + close_connection(con, false, true, true); |
|---|
| 646 | +} |
|---|
| 647 | + |
|---|
| 648 | +static void dlm_tcp_shutdown(struct connection *con) |
|---|
| 649 | +{ |
|---|
| 650 | + if (con->othercon) |
|---|
| 651 | + shutdown_connection(con->othercon); |
|---|
| 652 | + shutdown_connection(con); |
|---|
| 653 | +} |
|---|
| 654 | + |
|---|
| 655 | +static int con_realloc_receive_buf(struct connection *con, int newlen) |
|---|
| 656 | +{ |
|---|
| 657 | + unsigned char *newbuf; |
|---|
| 658 | + |
|---|
| 659 | + newbuf = kmalloc(newlen, GFP_NOFS); |
|---|
| 660 | + if (!newbuf) |
|---|
| 661 | + return -ENOMEM; |
|---|
| 662 | + |
|---|
| 663 | + /* copy any leftover from last receive */ |
|---|
| 664 | + if (con->rx_leftover) |
|---|
| 665 | + memmove(newbuf, con->rx_buf, con->rx_leftover); |
|---|
| 666 | + |
|---|
| 667 | + /* swap to new buffer space */ |
|---|
| 668 | + kfree(con->rx_buf); |
|---|
| 669 | + con->rx_buflen = newlen; |
|---|
| 670 | + con->rx_buf = newbuf; |
|---|
| 671 | + |
|---|
| 672 | + return 0; |
|---|
| 673 | +} |
|---|
| 674 | + |
|---|
| 624 | 675 | /* Data received from remote end */ |
|---|
| 625 | 676 | static int receive_from_sock(struct connection *con) |
|---|
| 626 | 677 | { |
|---|
| 627 | | - int ret = 0; |
|---|
| 628 | | - struct msghdr msg = {}; |
|---|
| 629 | | - struct kvec iov[2]; |
|---|
| 630 | | - unsigned len; |
|---|
| 631 | | - int r; |
|---|
| 632 | 678 | int call_again_soon = 0; |
|---|
| 633 | | - int nvec; |
|---|
| 679 | + struct msghdr msg; |
|---|
| 680 | + struct kvec iov; |
|---|
| 681 | + int ret, buflen; |
|---|
| 634 | 682 | |
|---|
| 635 | 683 | mutex_lock(&con->sock_mutex); |
|---|
| 636 | 684 | |
|---|
| .. | .. |
|---|
| 638 | 686 | ret = -EAGAIN; |
|---|
| 639 | 687 | goto out_close; |
|---|
| 640 | 688 | } |
|---|
| 689 | + |
|---|
| 641 | 690 | if (con->nodeid == 0) { |
|---|
| 642 | 691 | ret = -EINVAL; |
|---|
| 643 | 692 | goto out_close; |
|---|
| 644 | 693 | } |
|---|
| 645 | 694 | |
|---|
| 646 | | - if (con->rx_page == NULL) { |
|---|
| 647 | | - /* |
|---|
| 648 | | - * This doesn't need to be atomic, but I think it should |
|---|
| 649 | | - * improve performance if it is. |
|---|
| 650 | | - */ |
|---|
| 651 | | - con->rx_page = alloc_page(GFP_ATOMIC); |
|---|
| 652 | | - if (con->rx_page == NULL) |
|---|
| 695 | + /* realloc if we get new buffer size to read out */ |
|---|
| 696 | + buflen = dlm_config.ci_buffer_size; |
|---|
| 697 | + if (con->rx_buflen != buflen && con->rx_leftover <= buflen) { |
|---|
| 698 | + ret = con_realloc_receive_buf(con, buflen); |
|---|
| 699 | + if (ret < 0) |
|---|
| 653 | 700 | goto out_resched; |
|---|
| 654 | | - cbuf_init(&con->cb, PAGE_SIZE); |
|---|
| 655 | 701 | } |
|---|
| 656 | 702 | |
|---|
| 657 | | - /* |
|---|
| 658 | | - * iov[0] is the bit of the circular buffer between the current end |
|---|
| 659 | | - * point (cb.base + cb.len) and the end of the buffer. |
|---|
| 703 | + /* calculate new buffer parameter regarding last receive and |
|---|
| 704 | + * possible leftover bytes |
|---|
| 660 | 705 | */ |
|---|
| 661 | | - iov[0].iov_len = con->cb.base - cbuf_data(&con->cb); |
|---|
| 662 | | - iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb); |
|---|
| 663 | | - iov[1].iov_len = 0; |
|---|
| 664 | | - nvec = 1; |
|---|
| 706 | + iov.iov_base = con->rx_buf + con->rx_leftover; |
|---|
| 707 | + iov.iov_len = con->rx_buflen - con->rx_leftover; |
|---|
| 665 | 708 | |
|---|
| 666 | | - /* |
|---|
| 667 | | - * iov[1] is the bit of the circular buffer between the start of the |
|---|
| 668 | | - * buffer and the start of the currently used section (cb.base) |
|---|
| 669 | | - */ |
|---|
| 670 | | - if (cbuf_data(&con->cb) >= con->cb.base) { |
|---|
| 671 | | - iov[0].iov_len = PAGE_SIZE - cbuf_data(&con->cb); |
|---|
| 672 | | - iov[1].iov_len = con->cb.base; |
|---|
| 673 | | - iov[1].iov_base = page_address(con->rx_page); |
|---|
| 674 | | - nvec = 2; |
|---|
| 675 | | - } |
|---|
| 676 | | - len = iov[0].iov_len + iov[1].iov_len; |
|---|
| 677 | | - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nvec, len); |
|---|
| 678 | | - |
|---|
| 679 | | - r = ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT | MSG_NOSIGNAL); |
|---|
| 709 | + memset(&msg, 0, sizeof(msg)); |
|---|
| 710 | + msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; |
|---|
| 711 | + ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, |
|---|
| 712 | + msg.msg_flags); |
|---|
| 680 | 713 | if (ret <= 0) |
|---|
| 681 | 714 | goto out_close; |
|---|
| 682 | | - else if (ret == len) |
|---|
| 715 | + else if (ret == iov.iov_len) |
|---|
| 683 | 716 | call_again_soon = 1; |
|---|
| 684 | 717 | |
|---|
| 685 | | - cbuf_add(&con->cb, ret); |
|---|
| 686 | | - ret = dlm_process_incoming_buffer(con->nodeid, |
|---|
| 687 | | - page_address(con->rx_page), |
|---|
| 688 | | - con->cb.base, con->cb.len, |
|---|
| 689 | | - PAGE_SIZE); |
|---|
| 690 | | - if (ret == -EBADMSG) { |
|---|
| 691 | | - log_print("lowcomms: addr=%p, base=%u, len=%u, read=%d", |
|---|
| 692 | | - page_address(con->rx_page), con->cb.base, |
|---|
| 693 | | - con->cb.len, r); |
|---|
| 694 | | - } |
|---|
| 718 | + /* new buflen according readed bytes and leftover from last receive */ |
|---|
| 719 | + buflen = ret + con->rx_leftover; |
|---|
| 720 | + ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen); |
|---|
| 695 | 721 | if (ret < 0) |
|---|
| 696 | 722 | goto out_close; |
|---|
| 697 | | - cbuf_eat(&con->cb, ret); |
|---|
| 698 | 723 | |
|---|
| 699 | | - if (cbuf_empty(&con->cb) && !call_again_soon) { |
|---|
| 700 | | - __free_page(con->rx_page); |
|---|
| 701 | | - con->rx_page = NULL; |
|---|
| 724 | + /* calculate leftover bytes from process and put it into begin of |
|---|
| 725 | + * the receive buffer, so next receive we have the full message |
|---|
| 726 | + * at the start address of the receive buffer. |
|---|
| 727 | + */ |
|---|
| 728 | + con->rx_leftover = buflen - ret; |
|---|
| 729 | + if (con->rx_leftover) { |
|---|
| 730 | + memmove(con->rx_buf, con->rx_buf + ret, |
|---|
| 731 | + con->rx_leftover); |
|---|
| 732 | + call_again_soon = true; |
|---|
| 702 | 733 | } |
|---|
| 703 | 734 | |
|---|
| 704 | 735 | if (call_again_soon) |
|---|
| 705 | 736 | goto out_resched; |
|---|
| 737 | + |
|---|
| 706 | 738 | mutex_unlock(&con->sock_mutex); |
|---|
| 707 | 739 | return 0; |
|---|
| 708 | 740 | |
|---|
| .. | .. |
|---|
| 715 | 747 | out_close: |
|---|
| 716 | 748 | mutex_unlock(&con->sock_mutex); |
|---|
| 717 | 749 | if (ret != -EAGAIN) { |
|---|
| 718 | | - close_connection(con, true, true, false); |
|---|
| 719 | 750 | /* Reconnect when there is something to send */ |
|---|
| 751 | + close_connection(con, false, true, false); |
|---|
| 752 | + if (ret == 0) { |
|---|
| 753 | + log_print("connection %p got EOF from %d", |
|---|
| 754 | + con, con->nodeid); |
|---|
| 755 | + /* handling for tcp shutdown */ |
|---|
| 756 | + clear_bit(CF_SHUTDOWN, &con->flags); |
|---|
| 757 | + wake_up(&con->shutdown_wait); |
|---|
| 758 | + /* signal to breaking receive worker */ |
|---|
| 759 | + ret = -1; |
|---|
| 760 | + } |
|---|
| 720 | 761 | } |
|---|
| 721 | | - /* Don't return success if we really got EOF */ |
|---|
| 722 | | - if (ret == 0) |
|---|
| 723 | | - ret = -EAGAIN; |
|---|
| 724 | | - |
|---|
| 725 | 762 | return ret; |
|---|
| 726 | 763 | } |
|---|
| 727 | 764 | |
|---|
| 728 | 765 | /* Listening socket is busy, accept a connection */ |
|---|
| 729 | | -static int tcp_accept_from_sock(struct connection *con) |
|---|
| 766 | +static int accept_from_sock(struct connection *con) |
|---|
| 730 | 767 | { |
|---|
| 731 | 768 | int result; |
|---|
| 732 | 769 | struct sockaddr_storage peeraddr; |
|---|
| .. | .. |
|---|
| 735 | 772 | int nodeid; |
|---|
| 736 | 773 | struct connection *newcon; |
|---|
| 737 | 774 | struct connection *addcon; |
|---|
| 775 | + unsigned int mark; |
|---|
| 738 | 776 | |
|---|
| 739 | | - mutex_lock(&connections_lock); |
|---|
| 740 | 777 | if (!dlm_allow_conn) { |
|---|
| 741 | | - mutex_unlock(&connections_lock); |
|---|
| 742 | 778 | return -1; |
|---|
| 743 | 779 | } |
|---|
| 744 | | - mutex_unlock(&connections_lock); |
|---|
| 745 | 780 | |
|---|
| 746 | 781 | mutex_lock_nested(&con->sock_mutex, 0); |
|---|
| 747 | 782 | |
|---|
| .. | .. |
|---|
| 774 | 809 | return -1; |
|---|
| 775 | 810 | } |
|---|
| 776 | 811 | |
|---|
| 812 | + dlm_comm_mark(nodeid, &mark); |
|---|
| 813 | + sock_set_mark(newsock->sk, mark); |
|---|
| 814 | + |
|---|
| 777 | 815 | log_print("got connection from %d", nodeid); |
|---|
| 778 | 816 | |
|---|
| 779 | 817 | /* Check to see if we already have a connection to this node. This |
|---|
| .. | .. |
|---|
| 791 | 829 | struct connection *othercon = newcon->othercon; |
|---|
| 792 | 830 | |
|---|
| 793 | 831 | if (!othercon) { |
|---|
| 794 | | - othercon = kmem_cache_zalloc(con_cache, GFP_NOFS); |
|---|
| 832 | + othercon = kzalloc(sizeof(*othercon), GFP_NOFS); |
|---|
| 795 | 833 | if (!othercon) { |
|---|
| 796 | 834 | log_print("failed to allocate incoming socket"); |
|---|
| 797 | 835 | mutex_unlock(&newcon->sock_mutex); |
|---|
| 798 | 836 | result = -ENOMEM; |
|---|
| 799 | 837 | goto accept_err; |
|---|
| 800 | 838 | } |
|---|
| 839 | + |
|---|
| 840 | + othercon->rx_buflen = dlm_config.ci_buffer_size; |
|---|
| 841 | + othercon->rx_buf = kmalloc(othercon->rx_buflen, GFP_NOFS); |
|---|
| 842 | + if (!othercon->rx_buf) { |
|---|
| 843 | + mutex_unlock(&newcon->sock_mutex); |
|---|
| 844 | + kfree(othercon); |
|---|
| 845 | + log_print("failed to allocate incoming socket receive buffer"); |
|---|
| 846 | + result = -ENOMEM; |
|---|
| 847 | + goto accept_err; |
|---|
| 848 | + } |
|---|
| 849 | + |
|---|
| 801 | 850 | othercon->nodeid = nodeid; |
|---|
| 802 | 851 | othercon->rx_action = receive_from_sock; |
|---|
| 803 | 852 | mutex_init(&othercon->sock_mutex); |
|---|
| .. | .. |
|---|
| 805 | 854 | spin_lock_init(&othercon->writequeue_lock); |
|---|
| 806 | 855 | INIT_WORK(&othercon->swork, process_send_sockets); |
|---|
| 807 | 856 | INIT_WORK(&othercon->rwork, process_recv_sockets); |
|---|
| 857 | + init_waitqueue_head(&othercon->shutdown_wait); |
|---|
| 808 | 858 | set_bit(CF_IS_OTHERCON, &othercon->flags); |
|---|
| 859 | + } else { |
|---|
| 860 | + /* close other sock con if we have something new */ |
|---|
| 861 | + close_connection(othercon, false, true, false); |
|---|
| 809 | 862 | } |
|---|
| 863 | + |
|---|
| 810 | 864 | mutex_lock_nested(&othercon->sock_mutex, 2); |
|---|
| 811 | | - if (!othercon->sock) { |
|---|
| 812 | | - newcon->othercon = othercon; |
|---|
| 813 | | - add_sock(newsock, othercon); |
|---|
| 814 | | - addcon = othercon; |
|---|
| 815 | | - mutex_unlock(&othercon->sock_mutex); |
|---|
| 816 | | - } |
|---|
| 817 | | - else { |
|---|
| 818 | | - printk("Extra connection from node %d attempted\n", nodeid); |
|---|
| 819 | | - result = -EAGAIN; |
|---|
| 820 | | - mutex_unlock(&othercon->sock_mutex); |
|---|
| 821 | | - mutex_unlock(&newcon->sock_mutex); |
|---|
| 822 | | - goto accept_err; |
|---|
| 823 | | - } |
|---|
| 865 | + newcon->othercon = othercon; |
|---|
| 866 | + add_sock(newsock, othercon); |
|---|
| 867 | + addcon = othercon; |
|---|
| 868 | + mutex_unlock(&othercon->sock_mutex); |
|---|
| 824 | 869 | } |
|---|
| 825 | 870 | else { |
|---|
| 826 | 871 | newcon->rx_action = receive_from_sock; |
|---|
| .. | .. |
|---|
| 854 | 899 | return result; |
|---|
| 855 | 900 | } |
|---|
| 856 | 901 | |
|---|
| 857 | | -static int sctp_accept_from_sock(struct connection *con) |
|---|
| 858 | | -{ |
|---|
| 859 | | - /* Check that the new node is in the lockspace */ |
|---|
| 860 | | - struct sctp_prim prim; |
|---|
| 861 | | - int nodeid; |
|---|
| 862 | | - int prim_len, ret; |
|---|
| 863 | | - int addr_len; |
|---|
| 864 | | - struct connection *newcon; |
|---|
| 865 | | - struct connection *addcon; |
|---|
| 866 | | - struct socket *newsock; |
|---|
| 867 | | - |
|---|
| 868 | | - mutex_lock(&connections_lock); |
|---|
| 869 | | - if (!dlm_allow_conn) { |
|---|
| 870 | | - mutex_unlock(&connections_lock); |
|---|
| 871 | | - return -1; |
|---|
| 872 | | - } |
|---|
| 873 | | - mutex_unlock(&connections_lock); |
|---|
| 874 | | - |
|---|
| 875 | | - mutex_lock_nested(&con->sock_mutex, 0); |
|---|
| 876 | | - |
|---|
| 877 | | - ret = kernel_accept(con->sock, &newsock, O_NONBLOCK); |
|---|
| 878 | | - if (ret < 0) |
|---|
| 879 | | - goto accept_err; |
|---|
| 880 | | - |
|---|
| 881 | | - memset(&prim, 0, sizeof(struct sctp_prim)); |
|---|
| 882 | | - prim_len = sizeof(struct sctp_prim); |
|---|
| 883 | | - |
|---|
| 884 | | - ret = kernel_getsockopt(newsock, IPPROTO_SCTP, SCTP_PRIMARY_ADDR, |
|---|
| 885 | | - (char *)&prim, &prim_len); |
|---|
| 886 | | - if (ret < 0) { |
|---|
| 887 | | - log_print("getsockopt/sctp_primary_addr failed: %d", ret); |
|---|
| 888 | | - goto accept_err; |
|---|
| 889 | | - } |
|---|
| 890 | | - |
|---|
| 891 | | - make_sockaddr(&prim.ssp_addr, 0, &addr_len); |
|---|
| 892 | | - ret = addr_to_nodeid(&prim.ssp_addr, &nodeid); |
|---|
| 893 | | - if (ret) { |
|---|
| 894 | | - unsigned char *b = (unsigned char *)&prim.ssp_addr; |
|---|
| 895 | | - |
|---|
| 896 | | - log_print("reject connect from unknown addr"); |
|---|
| 897 | | - print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, |
|---|
| 898 | | - b, sizeof(struct sockaddr_storage)); |
|---|
| 899 | | - goto accept_err; |
|---|
| 900 | | - } |
|---|
| 901 | | - |
|---|
| 902 | | - newcon = nodeid2con(nodeid, GFP_NOFS); |
|---|
| 903 | | - if (!newcon) { |
|---|
| 904 | | - ret = -ENOMEM; |
|---|
| 905 | | - goto accept_err; |
|---|
| 906 | | - } |
|---|
| 907 | | - |
|---|
| 908 | | - mutex_lock_nested(&newcon->sock_mutex, 1); |
|---|
| 909 | | - |
|---|
| 910 | | - if (newcon->sock) { |
|---|
| 911 | | - struct connection *othercon = newcon->othercon; |
|---|
| 912 | | - |
|---|
| 913 | | - if (!othercon) { |
|---|
| 914 | | - othercon = kmem_cache_zalloc(con_cache, GFP_NOFS); |
|---|
| 915 | | - if (!othercon) { |
|---|
| 916 | | - log_print("failed to allocate incoming socket"); |
|---|
| 917 | | - mutex_unlock(&newcon->sock_mutex); |
|---|
| 918 | | - ret = -ENOMEM; |
|---|
| 919 | | - goto accept_err; |
|---|
| 920 | | - } |
|---|
| 921 | | - othercon->nodeid = nodeid; |
|---|
| 922 | | - othercon->rx_action = receive_from_sock; |
|---|
| 923 | | - mutex_init(&othercon->sock_mutex); |
|---|
| 924 | | - INIT_LIST_HEAD(&othercon->writequeue); |
|---|
| 925 | | - spin_lock_init(&othercon->writequeue_lock); |
|---|
| 926 | | - INIT_WORK(&othercon->swork, process_send_sockets); |
|---|
| 927 | | - INIT_WORK(&othercon->rwork, process_recv_sockets); |
|---|
| 928 | | - set_bit(CF_IS_OTHERCON, &othercon->flags); |
|---|
| 929 | | - } |
|---|
| 930 | | - mutex_lock_nested(&othercon->sock_mutex, 2); |
|---|
| 931 | | - if (!othercon->sock) { |
|---|
| 932 | | - newcon->othercon = othercon; |
|---|
| 933 | | - add_sock(newsock, othercon); |
|---|
| 934 | | - addcon = othercon; |
|---|
| 935 | | - mutex_unlock(&othercon->sock_mutex); |
|---|
| 936 | | - } else { |
|---|
| 937 | | - printk("Extra connection from node %d attempted\n", nodeid); |
|---|
| 938 | | - ret = -EAGAIN; |
|---|
| 939 | | - mutex_unlock(&othercon->sock_mutex); |
|---|
| 940 | | - mutex_unlock(&newcon->sock_mutex); |
|---|
| 941 | | - goto accept_err; |
|---|
| 942 | | - } |
|---|
| 943 | | - } else { |
|---|
| 944 | | - newcon->rx_action = receive_from_sock; |
|---|
| 945 | | - add_sock(newsock, newcon); |
|---|
| 946 | | - addcon = newcon; |
|---|
| 947 | | - } |
|---|
| 948 | | - |
|---|
| 949 | | - log_print("connected to %d", nodeid); |
|---|
| 950 | | - |
|---|
| 951 | | - mutex_unlock(&newcon->sock_mutex); |
|---|
| 952 | | - |
|---|
| 953 | | - /* |
|---|
| 954 | | - * Add it to the active queue in case we got data |
|---|
| 955 | | - * between processing the accept adding the socket |
|---|
| 956 | | - * to the read_sockets list |
|---|
| 957 | | - */ |
|---|
| 958 | | - if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) |
|---|
| 959 | | - queue_work(recv_workqueue, &addcon->rwork); |
|---|
| 960 | | - mutex_unlock(&con->sock_mutex); |
|---|
| 961 | | - |
|---|
| 962 | | - return 0; |
|---|
| 963 | | - |
|---|
| 964 | | -accept_err: |
|---|
| 965 | | - mutex_unlock(&con->sock_mutex); |
|---|
| 966 | | - if (newsock) |
|---|
| 967 | | - sock_release(newsock); |
|---|
| 968 | | - if (ret != -EAGAIN) |
|---|
| 969 | | - log_print("error accepting connection from node: %d", ret); |
|---|
| 970 | | - |
|---|
| 971 | | - return ret; |
|---|
| 972 | | -} |
|---|
| 973 | | - |
|---|
| 974 | 902 | static void free_entry(struct writequeue_entry *e) |
|---|
| 975 | 903 | { |
|---|
| 976 | 904 | __free_page(e->page); |
|---|
| .. | .. |
|---|
| 1001 | 929 | static int sctp_bind_addrs(struct connection *con, uint16_t port) |
|---|
| 1002 | 930 | { |
|---|
| 1003 | 931 | struct sockaddr_storage localaddr; |
|---|
| 932 | + struct sockaddr *addr = (struct sockaddr *)&localaddr; |
|---|
| 1004 | 933 | int i, addr_len, result = 0; |
|---|
| 1005 | 934 | |
|---|
| 1006 | 935 | for (i = 0; i < dlm_local_count; i++) { |
|---|
| .. | .. |
|---|
| 1008 | 937 | make_sockaddr(&localaddr, port, &addr_len); |
|---|
| 1009 | 938 | |
|---|
| 1010 | 939 | if (!i) |
|---|
| 1011 | | - result = kernel_bind(con->sock, |
|---|
| 1012 | | - (struct sockaddr *)&localaddr, |
|---|
| 1013 | | - addr_len); |
|---|
| 940 | + result = kernel_bind(con->sock, addr, addr_len); |
|---|
| 1014 | 941 | else |
|---|
| 1015 | | - result = kernel_setsockopt(con->sock, SOL_SCTP, |
|---|
| 1016 | | - SCTP_SOCKOPT_BINDX_ADD, |
|---|
| 1017 | | - (char *)&localaddr, addr_len); |
|---|
| 942 | + result = sock_bind_add(con->sock->sk, addr, addr_len); |
|---|
| 1018 | 943 | |
|---|
| 1019 | 944 | if (result < 0) { |
|---|
| 1020 | 945 | log_print("Can't bind to %d addr number %d, %d.\n", |
|---|
| .. | .. |
|---|
| 1033 | 958 | static void sctp_connect_to_sock(struct connection *con) |
|---|
| 1034 | 959 | { |
|---|
| 1035 | 960 | struct sockaddr_storage daddr; |
|---|
| 1036 | | - int one = 1; |
|---|
| 1037 | 961 | int result; |
|---|
| 1038 | 962 | int addr_len; |
|---|
| 1039 | 963 | struct socket *sock; |
|---|
| 1040 | | - struct timeval tv = { .tv_sec = 5, .tv_usec = 0 }; |
|---|
| 964 | + unsigned int mark; |
|---|
| 1041 | 965 | |
|---|
| 1042 | 966 | if (con->nodeid == 0) { |
|---|
| 1043 | 967 | log_print("attempt to connect sock 0 foiled"); |
|---|
| 1044 | 968 | return; |
|---|
| 1045 | 969 | } |
|---|
| 970 | + |
|---|
| 971 | + dlm_comm_mark(con->nodeid, &mark); |
|---|
| 1046 | 972 | |
|---|
| 1047 | 973 | mutex_lock(&con->sock_mutex); |
|---|
| 1048 | 974 | |
|---|
| .. | .. |
|---|
| 1068 | 994 | if (result < 0) |
|---|
| 1069 | 995 | goto socket_err; |
|---|
| 1070 | 996 | |
|---|
| 997 | + sock_set_mark(sock->sk, mark); |
|---|
| 998 | + |
|---|
| 1071 | 999 | con->rx_action = receive_from_sock; |
|---|
| 1072 | 1000 | con->connect_action = sctp_connect_to_sock; |
|---|
| 1073 | 1001 | add_sock(sock, con); |
|---|
| .. | .. |
|---|
| 1081 | 1009 | log_print("connecting to %d", con->nodeid); |
|---|
| 1082 | 1010 | |
|---|
| 1083 | 1011 | /* Turn off Nagle's algorithm */ |
|---|
| 1084 | | - kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, |
|---|
| 1085 | | - sizeof(one)); |
|---|
| 1012 | + sctp_sock_set_nodelay(sock->sk); |
|---|
| 1086 | 1013 | |
|---|
| 1087 | 1014 | /* |
|---|
| 1088 | 1015 | * Make sock->ops->connect() function return in specified time, |
|---|
| 1089 | 1016 | * since O_NONBLOCK argument in connect() function does not work here, |
|---|
| 1090 | 1017 | * then, we should restore the default value of this attribute. |
|---|
| 1091 | 1018 | */ |
|---|
| 1092 | | - kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv, |
|---|
| 1093 | | - sizeof(tv)); |
|---|
| 1019 | + sock_set_sndtimeo(sock->sk, 5); |
|---|
| 1094 | 1020 | result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len, |
|---|
| 1095 | 1021 | 0); |
|---|
| 1096 | | - memset(&tv, 0, sizeof(tv)); |
|---|
| 1097 | | - kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv, |
|---|
| 1098 | | - sizeof(tv)); |
|---|
| 1022 | + sock_set_sndtimeo(sock->sk, 0); |
|---|
| 1099 | 1023 | |
|---|
| 1100 | 1024 | if (result == -EINPROGRESS) |
|---|
| 1101 | 1025 | result = 0; |
|---|
| .. | .. |
|---|
| 1134 | 1058 | struct sockaddr_storage saddr, src_addr; |
|---|
| 1135 | 1059 | int addr_len; |
|---|
| 1136 | 1060 | struct socket *sock = NULL; |
|---|
| 1137 | | - int one = 1; |
|---|
| 1061 | + unsigned int mark; |
|---|
| 1138 | 1062 | int result; |
|---|
| 1139 | 1063 | |
|---|
| 1140 | 1064 | if (con->nodeid == 0) { |
|---|
| 1141 | 1065 | log_print("attempt to connect sock 0 foiled"); |
|---|
| 1142 | 1066 | return; |
|---|
| 1143 | 1067 | } |
|---|
| 1068 | + |
|---|
| 1069 | + dlm_comm_mark(con->nodeid, &mark); |
|---|
| 1144 | 1070 | |
|---|
| 1145 | 1071 | mutex_lock(&con->sock_mutex); |
|---|
| 1146 | 1072 | if (con->retries++ > MAX_CONNECT_RETRIES) |
|---|
| .. | .. |
|---|
| 1156 | 1082 | if (result < 0) |
|---|
| 1157 | 1083 | goto out_err; |
|---|
| 1158 | 1084 | |
|---|
| 1085 | + sock_set_mark(sock->sk, mark); |
|---|
| 1086 | + |
|---|
| 1159 | 1087 | memset(&saddr, 0, sizeof(saddr)); |
|---|
| 1160 | 1088 | result = nodeid_to_addr(con->nodeid, &saddr, NULL, false); |
|---|
| 1161 | 1089 | if (result < 0) { |
|---|
| .. | .. |
|---|
| 1165 | 1093 | |
|---|
| 1166 | 1094 | con->rx_action = receive_from_sock; |
|---|
| 1167 | 1095 | con->connect_action = tcp_connect_to_sock; |
|---|
| 1096 | + con->shutdown_action = dlm_tcp_shutdown; |
|---|
| 1168 | 1097 | add_sock(sock, con); |
|---|
| 1169 | 1098 | |
|---|
| 1170 | 1099 | /* Bind to our cluster-known address connecting to avoid |
|---|
| .. | .. |
|---|
| 1183 | 1112 | log_print("connecting to %d", con->nodeid); |
|---|
| 1184 | 1113 | |
|---|
| 1185 | 1114 | /* Turn off Nagle's algorithm */ |
|---|
| 1186 | | - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, |
|---|
| 1187 | | - sizeof(one)); |
|---|
| 1115 | + tcp_sock_set_nodelay(sock->sk); |
|---|
| 1188 | 1116 | |
|---|
| 1189 | 1117 | result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, |
|---|
| 1190 | 1118 | O_NONBLOCK); |
|---|
| .. | .. |
|---|
| 1226 | 1154 | { |
|---|
| 1227 | 1155 | struct socket *sock = NULL; |
|---|
| 1228 | 1156 | int result = 0; |
|---|
| 1229 | | - int one = 1; |
|---|
| 1230 | 1157 | int addr_len; |
|---|
| 1231 | 1158 | |
|---|
| 1232 | 1159 | if (dlm_local_addr[0]->ss_family == AF_INET) |
|---|
| .. | .. |
|---|
| 1242 | 1169 | goto create_out; |
|---|
| 1243 | 1170 | } |
|---|
| 1244 | 1171 | |
|---|
| 1172 | + sock_set_mark(sock->sk, dlm_config.ci_mark); |
|---|
| 1173 | + |
|---|
| 1245 | 1174 | /* Turn off Nagle's algorithm */ |
|---|
| 1246 | | - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, |
|---|
| 1247 | | - sizeof(one)); |
|---|
| 1175 | + tcp_sock_set_nodelay(sock->sk); |
|---|
| 1248 | 1176 | |
|---|
| 1249 | | - result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, |
|---|
| 1250 | | - (char *)&one, sizeof(one)); |
|---|
| 1177 | + sock_set_reuseaddr(sock->sk); |
|---|
| 1251 | 1178 | |
|---|
| 1252 | | - if (result < 0) { |
|---|
| 1253 | | - log_print("Failed to set SO_REUSEADDR on socket: %d", result); |
|---|
| 1254 | | - } |
|---|
| 1255 | 1179 | write_lock_bh(&sock->sk->sk_callback_lock); |
|---|
| 1256 | 1180 | sock->sk->sk_user_data = con; |
|---|
| 1257 | 1181 | save_listen_callbacks(sock); |
|---|
| 1258 | | - con->rx_action = tcp_accept_from_sock; |
|---|
| 1182 | + con->rx_action = accept_from_sock; |
|---|
| 1259 | 1183 | con->connect_action = tcp_connect_to_sock; |
|---|
| 1260 | 1184 | write_unlock_bh(&sock->sk->sk_callback_lock); |
|---|
| 1261 | 1185 | |
|---|
| .. | .. |
|---|
| 1269 | 1193 | con->sock = NULL; |
|---|
| 1270 | 1194 | goto create_out; |
|---|
| 1271 | 1195 | } |
|---|
| 1272 | | - result = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, |
|---|
| 1273 | | - (char *)&one, sizeof(one)); |
|---|
| 1274 | | - if (result < 0) { |
|---|
| 1275 | | - log_print("Set keepalive failed: %d", result); |
|---|
| 1276 | | - } |
|---|
| 1196 | + sock_set_keepalive(sock->sk); |
|---|
| 1277 | 1197 | |
|---|
| 1278 | 1198 | result = sock->ops->listen(sock, 5); |
|---|
| 1279 | 1199 | if (result < 0) { |
|---|
| .. | .. |
|---|
| 1305 | 1225 | } |
|---|
| 1306 | 1226 | } |
|---|
| 1307 | 1227 | |
|---|
| 1228 | +static void deinit_local(void) |
|---|
| 1229 | +{ |
|---|
| 1230 | + int i; |
|---|
| 1231 | + |
|---|
| 1232 | + for (i = 0; i < dlm_local_count; i++) |
|---|
| 1233 | + kfree(dlm_local_addr[i]); |
|---|
| 1234 | +} |
|---|
| 1235 | + |
|---|
| 1308 | 1236 | /* Initialise SCTP socket and bind to all interfaces */ |
|---|
| 1309 | 1237 | static int sctp_listen_for_all(void) |
|---|
| 1310 | 1238 | { |
|---|
| 1311 | 1239 | struct socket *sock = NULL; |
|---|
| 1312 | 1240 | int result = -EINVAL; |
|---|
| 1313 | 1241 | struct connection *con = nodeid2con(0, GFP_NOFS); |
|---|
| 1314 | | - int bufsize = NEEDED_RMEM; |
|---|
| 1315 | | - int one = 1; |
|---|
| 1316 | 1242 | |
|---|
| 1317 | 1243 | if (!con) |
|---|
| 1318 | 1244 | return -ENOMEM; |
|---|
| .. | .. |
|---|
| 1326 | 1252 | goto out; |
|---|
| 1327 | 1253 | } |
|---|
| 1328 | 1254 | |
|---|
| 1329 | | - result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE, |
|---|
| 1330 | | - (char *)&bufsize, sizeof(bufsize)); |
|---|
| 1331 | | - if (result) |
|---|
| 1332 | | - log_print("Error increasing buffer space on socket %d", result); |
|---|
| 1333 | | - |
|---|
| 1334 | | - result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, |
|---|
| 1335 | | - sizeof(one)); |
|---|
| 1336 | | - if (result < 0) |
|---|
| 1337 | | - log_print("Could not set SCTP NODELAY error %d\n", result); |
|---|
| 1255 | + sock_set_rcvbuf(sock->sk, NEEDED_RMEM); |
|---|
| 1256 | + sock_set_mark(sock->sk, dlm_config.ci_mark); |
|---|
| 1257 | + sctp_sock_set_nodelay(sock->sk); |
|---|
| 1338 | 1258 | |
|---|
| 1339 | 1259 | write_lock_bh(&sock->sk->sk_callback_lock); |
|---|
| 1340 | 1260 | /* Init con struct */ |
|---|
| .. | .. |
|---|
| 1342 | 1262 | save_listen_callbacks(sock); |
|---|
| 1343 | 1263 | con->sock = sock; |
|---|
| 1344 | 1264 | con->sock->sk->sk_data_ready = lowcomms_data_ready; |
|---|
| 1345 | | - con->rx_action = sctp_accept_from_sock; |
|---|
| 1265 | + con->rx_action = accept_from_sock; |
|---|
| 1346 | 1266 | con->connect_action = sctp_connect_to_sock; |
|---|
| 1347 | 1267 | |
|---|
| 1348 | 1268 | write_unlock_bh(&sock->sk->sk_callback_lock); |
|---|
| .. | .. |
|---|
| 1545 | 1465 | |
|---|
| 1546 | 1466 | send_error: |
|---|
| 1547 | 1467 | mutex_unlock(&con->sock_mutex); |
|---|
| 1548 | | - close_connection(con, true, false, true); |
|---|
| 1468 | + close_connection(con, false, false, true); |
|---|
| 1549 | 1469 | /* Requeue the send work. When the work daemon runs again, it will try |
|---|
| 1550 | 1470 | a new connection, then call this function again. */ |
|---|
| 1551 | 1471 | queue_work(send_workqueue, &con->swork); |
|---|
| .. | .. |
|---|
| 1621 | 1541 | send_to_sock(con); |
|---|
| 1622 | 1542 | } |
|---|
| 1623 | 1543 | |
|---|
| 1624 | | - |
|---|
| 1625 | | -/* Discard all entries on the write queues */ |
|---|
| 1626 | | -static void clean_writequeues(void) |
|---|
| 1627 | | -{ |
|---|
| 1628 | | - foreach_conn(clean_one_writequeue); |
|---|
| 1629 | | -} |
|---|
| 1630 | | - |
|---|
| 1631 | 1544 | static void work_stop(void) |
|---|
| 1632 | 1545 | { |
|---|
| 1633 | 1546 | if (recv_workqueue) |
|---|
| .. | .. |
|---|
| 1677 | 1590 | _stop_conn(con, true); |
|---|
| 1678 | 1591 | } |
|---|
| 1679 | 1592 | |
|---|
| 1593 | +static void shutdown_conn(struct connection *con) |
|---|
| 1594 | +{ |
|---|
| 1595 | + if (con->shutdown_action) |
|---|
| 1596 | + con->shutdown_action(con); |
|---|
| 1597 | +} |
|---|
| 1598 | + |
|---|
| 1599 | +static void connection_release(struct rcu_head *rcu) |
|---|
| 1600 | +{ |
|---|
| 1601 | + struct connection *con = container_of(rcu, struct connection, rcu); |
|---|
| 1602 | + |
|---|
| 1603 | + kfree(con->rx_buf); |
|---|
| 1604 | + kfree(con); |
|---|
| 1605 | +} |
|---|
| 1606 | + |
|---|
| 1680 | 1607 | static void free_conn(struct connection *con) |
|---|
| 1681 | 1608 | { |
|---|
| 1682 | 1609 | close_connection(con, true, true, true); |
|---|
| 1683 | | - if (con->othercon) |
|---|
| 1684 | | - kmem_cache_free(con_cache, con->othercon); |
|---|
| 1685 | | - hlist_del(&con->list); |
|---|
| 1686 | | - kmem_cache_free(con_cache, con); |
|---|
| 1610 | + spin_lock(&connections_lock); |
|---|
| 1611 | + hlist_del_rcu(&con->list); |
|---|
| 1612 | + spin_unlock(&connections_lock); |
|---|
| 1613 | + if (con->othercon) { |
|---|
| 1614 | + clean_one_writequeue(con->othercon); |
|---|
| 1615 | + call_rcu(&con->othercon->rcu, connection_release); |
|---|
| 1616 | + } |
|---|
| 1617 | + clean_one_writequeue(con); |
|---|
| 1618 | + call_rcu(&con->rcu, connection_release); |
|---|
| 1687 | 1619 | } |
|---|
| 1688 | 1620 | |
|---|
| 1689 | 1621 | static void work_flush(void) |
|---|
| 1690 | 1622 | { |
|---|
| 1691 | | - int ok; |
|---|
| 1623 | + int ok, idx; |
|---|
| 1692 | 1624 | int i; |
|---|
| 1693 | | - struct hlist_node *n; |
|---|
| 1694 | 1625 | struct connection *con; |
|---|
| 1695 | 1626 | |
|---|
| 1696 | | - if (recv_workqueue) |
|---|
| 1697 | | - flush_workqueue(recv_workqueue); |
|---|
| 1698 | | - if (send_workqueue) |
|---|
| 1699 | | - flush_workqueue(send_workqueue); |
|---|
| 1700 | 1627 | do { |
|---|
| 1701 | 1628 | ok = 1; |
|---|
| 1702 | 1629 | foreach_conn(stop_conn); |
|---|
| .. | .. |
|---|
| 1704 | 1631 | flush_workqueue(recv_workqueue); |
|---|
| 1705 | 1632 | if (send_workqueue) |
|---|
| 1706 | 1633 | flush_workqueue(send_workqueue); |
|---|
| 1634 | + idx = srcu_read_lock(&connections_srcu); |
|---|
| 1707 | 1635 | for (i = 0; i < CONN_HASH_SIZE && ok; i++) { |
|---|
| 1708 | | - hlist_for_each_entry_safe(con, n, |
|---|
| 1709 | | - &connection_hash[i], list) { |
|---|
| 1636 | + hlist_for_each_entry_rcu(con, &connection_hash[i], |
|---|
| 1637 | + list) { |
|---|
| 1710 | 1638 | ok &= test_bit(CF_READ_PENDING, &con->flags); |
|---|
| 1711 | 1639 | ok &= test_bit(CF_WRITE_PENDING, &con->flags); |
|---|
| 1712 | 1640 | if (con->othercon) { |
|---|
| .. | .. |
|---|
| 1717 | 1645 | } |
|---|
| 1718 | 1646 | } |
|---|
| 1719 | 1647 | } |
|---|
| 1648 | + srcu_read_unlock(&connections_srcu, idx); |
|---|
| 1720 | 1649 | } while (!ok); |
|---|
| 1721 | 1650 | } |
|---|
| 1722 | 1651 | |
|---|
| .. | .. |
|---|
| 1725 | 1654 | /* Set all the flags to prevent any |
|---|
| 1726 | 1655 | socket activity. |
|---|
| 1727 | 1656 | */ |
|---|
| 1728 | | - mutex_lock(&connections_lock); |
|---|
| 1729 | 1657 | dlm_allow_conn = 0; |
|---|
| 1730 | | - mutex_unlock(&connections_lock); |
|---|
| 1658 | + |
|---|
| 1659 | + if (recv_workqueue) |
|---|
| 1660 | + flush_workqueue(recv_workqueue); |
|---|
| 1661 | + if (send_workqueue) |
|---|
| 1662 | + flush_workqueue(send_workqueue); |
|---|
| 1663 | + |
|---|
| 1664 | + foreach_conn(shutdown_conn); |
|---|
| 1731 | 1665 | work_flush(); |
|---|
| 1732 | | - clean_writequeues(); |
|---|
| 1733 | 1666 | foreach_conn(free_conn); |
|---|
| 1734 | 1667 | work_stop(); |
|---|
| 1735 | | - |
|---|
| 1736 | | - kmem_cache_destroy(con_cache); |
|---|
| 1668 | + deinit_local(); |
|---|
| 1737 | 1669 | } |
|---|
| 1738 | 1670 | |
|---|
| 1739 | 1671 | int dlm_lowcomms_start(void) |
|---|
| .. | .. |
|---|
| 1752 | 1684 | goto fail; |
|---|
| 1753 | 1685 | } |
|---|
| 1754 | 1686 | |
|---|
| 1755 | | - error = -ENOMEM; |
|---|
| 1756 | | - con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection), |
|---|
| 1757 | | - __alignof__(struct connection), 0, |
|---|
| 1758 | | - NULL); |
|---|
| 1759 | | - if (!con_cache) |
|---|
| 1760 | | - goto fail; |
|---|
| 1761 | | - |
|---|
| 1762 | 1687 | error = work_start(); |
|---|
| 1763 | 1688 | if (error) |
|---|
| 1764 | | - goto fail_destroy; |
|---|
| 1689 | + goto fail; |
|---|
| 1765 | 1690 | |
|---|
| 1766 | 1691 | dlm_allow_conn = 1; |
|---|
| 1767 | 1692 | |
|---|
| .. | .. |
|---|
| 1778 | 1703 | fail_unlisten: |
|---|
| 1779 | 1704 | dlm_allow_conn = 0; |
|---|
| 1780 | 1705 | con = nodeid2con(0,0); |
|---|
| 1781 | | - if (con) { |
|---|
| 1782 | | - close_connection(con, false, true, true); |
|---|
| 1783 | | - kmem_cache_free(con_cache, con); |
|---|
| 1784 | | - } |
|---|
| 1785 | | -fail_destroy: |
|---|
| 1786 | | - kmem_cache_destroy(con_cache); |
|---|
| 1706 | + if (con) |
|---|
| 1707 | + free_conn(con); |
|---|
| 1787 | 1708 | fail: |
|---|
| 1788 | 1709 | return error; |
|---|
| 1789 | 1710 | } |
|---|