From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/net/ceph/messenger.c | 354 ++++++++++++++++++++++++++-------------------------------- 1 files changed, 160 insertions(+), 194 deletions(-) diff --git a/kernel/net/ceph/messenger.c b/kernel/net/ceph/messenger.c index 21bd37e..af0f1fa 100644 --- a/kernel/net/ceph/messenger.c +++ b/kernel/net/ceph/messenger.c @@ -156,7 +156,6 @@ /* Slab caches for frequently-allocated structures */ static struct kmem_cache *ceph_msg_cache; -static struct kmem_cache *ceph_msg_data_cache; /* static tag bytes (protocol control messages) */ static char tag_msg = CEPH_MSGR_TAG_MSG; @@ -187,30 +186,33 @@ static struct page *zero_page; /* used in certain error cases */ -const char *ceph_pr_addr(const struct sockaddr_storage *ss) +const char *ceph_pr_addr(const struct ceph_entity_addr *addr) { int i; char *s; - struct sockaddr_in *in4 = (struct sockaddr_in *) ss; - struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; + struct sockaddr_storage ss = addr->in_addr; /* align */ + struct sockaddr_in *in4 = (struct sockaddr_in *)&ss; + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)&ss; i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK; s = addr_str[i]; - switch (ss->ss_family) { + switch (ss.ss_family) { case AF_INET: - snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr, + snprintf(s, MAX_ADDR_STR_LEN, "(%d)%pI4:%hu", + le32_to_cpu(addr->type), &in4->sin_addr, ntohs(in4->sin_port)); break; case AF_INET6: - snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr, + snprintf(s, MAX_ADDR_STR_LEN, "(%d)[%pI6c]:%hu", + le32_to_cpu(addr->type), &in6->sin6_addr, ntohs(in6->sin6_port)); break; default: snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)", - ss->ss_family); + ss.ss_family); } return s; @@ -220,7 +222,7 @@ static void encode_my_addr(struct ceph_messenger *msgr) { memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr)); - ceph_encode_addr(&msgr->my_enc_addr); + ceph_encode_banner_addr(&msgr->my_enc_addr); } /* @@ -235,23 +237,11 @@ if (!ceph_msg_cache) return -ENOMEM; - BUG_ON(ceph_msg_data_cache); - ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0); - if (ceph_msg_data_cache) - return 0; - - kmem_cache_destroy(ceph_msg_cache); - ceph_msg_cache = NULL; - - return -ENOMEM; + return 0; } static void ceph_msgr_slab_exit(void) { - BUG_ON(!ceph_msg_data_cache); - kmem_cache_destroy(ceph_msg_data_cache); - ceph_msg_data_cache = NULL; - BUG_ON(!ceph_msg_cache); kmem_cache_destroy(ceph_msg_cache); ceph_msg_cache = NULL; @@ -422,7 +412,7 @@ switch (sk->sk_state) { case TCP_CLOSE: dout("%s TCP_CLOSE\n", __func__); - /* fall through */ + fallthrough; case TCP_CLOSE_WAIT: dout("%s TCP_CLOSE_WAIT\n", __func__); con_sock_state_closing(con); @@ -462,7 +452,7 @@ */ static int ceph_tcp_connect(struct ceph_connection *con) { - struct sockaddr_storage *paddr = &con->peer_addr.in_addr; + struct sockaddr_storage ss = con->peer_addr.in_addr; /* align */ struct socket *sock; unsigned int noio_flag; int ret; @@ -471,7 +461,7 @@ /* sock_create_kern() allocates with GFP_KERNEL */ noio_flag = memalloc_noio_save(); - ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, + ret = sock_create_kern(read_pnet(&con->msgr->net), ss.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); memalloc_noio_restore(noio_flag); if (ret) @@ -484,31 +474,24 @@ set_sock_callbacks(sock, con); - dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); + dout("connect %s\n", ceph_pr_addr(&con->peer_addr)); con_sock_state_connecting(con); - ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), + ret = sock->ops->connect(sock, (struct sockaddr *)&ss, sizeof(ss), O_NONBLOCK); if (ret == -EINPROGRESS) { dout("connect %s EINPROGRESS sk_state = %u\n", - ceph_pr_addr(&con->peer_addr.in_addr), + ceph_pr_addr(&con->peer_addr), sock->sk->sk_state); } else if (ret < 0) { pr_err("connect %s error %d\n", - ceph_pr_addr(&con->peer_addr.in_addr), ret); + ceph_pr_addr(&con->peer_addr), ret); sock_release(sock); return ret; } - if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) { - int optval = 1; - - ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)&optval, sizeof(optval)); - if (ret) - pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d", - ret); - } + if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) + tcp_sock_set_nodelay(sock->sk); con->sock = sock; return 0; @@ -526,7 +509,7 @@ if (!buf) msg.msg_flags |= MSG_TRUNC; - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len); r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; @@ -545,7 +528,7 @@ int r; BUG_ON(page_offset + length > PAGE_SIZE); - iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length); + iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length); r = sock_recvmsg(sock, &msg, msg.msg_flags); if (r == -EAGAIN) r = 0; @@ -557,7 +540,7 @@ * shortly. */ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, - size_t kvlen, size_t len, int more) + size_t kvlen, size_t len, bool more) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; int r; @@ -573,24 +556,15 @@ return r; } -static int __ceph_tcp_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, bool more) -{ - int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); - int ret; - - ret = kernel_sendpage(sock, page, offset, size, flags); - if (ret == -EAGAIN) - ret = 0; - - return ret; -} - +/* + * @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST + */ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, bool more) + int offset, size_t size, int more) { - struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; - struct bio_vec bvec; + ssize_t (*sendpage)(struct socket *sock, struct page *page, + int offset, size_t size, int flags); + int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more; int ret; /* @@ -601,20 +575,12 @@ * coalescing neighboring slab objects into a single frag which * triggers one of hardened usercopy checks. */ - if (page_count(page) >= 1 && !PageSlab(page)) - return __ceph_tcp_sendpage(sock, page, offset, size, more); - - bvec.bv_page = page; - bvec.bv_offset = offset; - bvec.bv_len = size; - - if (more) - msg.msg_flags |= MSG_MORE; + if (sendpage_ok(page)) + sendpage = sock->ops->sendpage; else - msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ + sendpage = sock_no_sendpage; - iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size); - ret = sock_sendmsg(sock, &msg); + ret = sendpage(sock, page, offset, size, flags); if (ret == -EAGAIN) ret = 0; @@ -699,8 +665,7 @@ void ceph_con_close(struct ceph_connection *con) { mutex_lock(&con->mutex); - dout("con_close %p peer %s\n", con, - ceph_pr_addr(&con->peer_addr.in_addr)); + dout("con_close %p peer %s\n", con, ceph_pr_addr(&con->peer_addr)); con->state = CON_STATE_CLOSED; con_flag_clear(con, CON_FLAG_LOSSYTX); /* so we retry next connect */ @@ -724,7 +689,7 @@ struct ceph_entity_addr *addr) { mutex_lock(&con->mutex); - dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); + dout("con_open %p %s\n", con, ceph_pr_addr(addr)); WARN_ON(con->state != CON_STATE_CLOSED); con->state = CON_STATE_PREOPEN; @@ -870,6 +835,7 @@ size_t bytes) { struct ceph_bio_iter *it = &cursor->bio_iter; + struct page *page = bio_iter_page(it->bio, it->iter); BUG_ON(bytes > cursor->resid); BUG_ON(bytes > bio_iter_len(it->bio, it->iter)); @@ -881,7 +847,8 @@ return false; /* no more data */ } - if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done)) + if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done && + page == bio_iter_page(it->bio, it->iter))) return false; /* more bytes to process in this segment */ if (!it->iter.bi_size) { @@ -929,6 +896,7 @@ size_t bytes) { struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs; + struct page *page = bvec_iter_page(bvecs, cursor->bvec_iter); BUG_ON(bytes > cursor->resid); BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter)); @@ -940,7 +908,8 @@ return false; /* no more data */ } - if (!bytes || cursor->bvec_iter.bi_bvec_done) + if (!bytes || (cursor->bvec_iter.bi_bvec_done && + page == bvec_iter_page(bvecs, cursor->bvec_iter))) return false; /* more bytes to process in this segment */ BUG_ON(cursor->last_piece); @@ -1147,16 +1116,13 @@ static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length) { struct ceph_msg_data_cursor *cursor = &msg->cursor; - struct ceph_msg_data *data; BUG_ON(!length); BUG_ON(length > msg->data_length); - BUG_ON(list_empty(&msg->data)); + BUG_ON(!msg->num_data_items); - cursor->data_head = &msg->data; cursor->total_resid = length; - data = list_first_entry(&msg->data, struct ceph_msg_data, links); - cursor->data = data; + cursor->data = msg->data; __ceph_msg_data_cursor_init(cursor); } @@ -1237,8 +1203,7 @@ if (!cursor->resid && cursor->total_resid) { WARN_ON(!cursor->last_piece); - BUG_ON(list_is_last(&cursor->data->links, cursor->data_head)); - cursor->data = list_next_entry(cursor->data, links); + cursor->data++; __ceph_msg_data_cursor_init(cursor); new_piece = true; } @@ -1254,9 +1219,6 @@ static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { - BUG_ON(!msg); - BUG_ON(!data_len); - /* Initialize data cursor */ ceph_msg_data_cursor_init(msg, (size_t)data_len); @@ -1592,11 +1554,12 @@ struct ceph_msg *msg = con->out_msg; struct ceph_msg_data_cursor *cursor = &msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); + int more = MSG_MORE | MSG_SENDPAGE_NOTLAST; u32 crc; dout("%s %p msg %p\n", __func__, con, msg); - if (list_empty(&msg->data)) + if (!msg->num_data_items) return -EINVAL; /* @@ -1612,7 +1575,6 @@ struct page *page; size_t page_offset; size_t length; - bool last_piece; int ret; if (!cursor->resid) { @@ -1620,10 +1582,11 @@ continue; } - page = ceph_msg_data_next(cursor, &page_offset, &length, - &last_piece); - ret = ceph_tcp_sendpage(con->sock, page, page_offset, - length, !last_piece); + page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); + if (length == cursor->total_resid) + more = MSG_MORE; + ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, + more); if (ret <= 0) { if (do_datacrc) msg->footer.data_crc = cpu_to_le32(crc); @@ -1653,13 +1616,16 @@ */ static int write_partial_skip(struct ceph_connection *con) { + int more = MSG_MORE | MSG_SENDPAGE_NOTLAST; int ret; dout("%s %p %d left\n", __func__, con, con->out_skip); while (con->out_skip > 0) { size_t size = min(con->out_skip, (int) PAGE_SIZE); - ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true); + if (size == con->out_skip) + more = MSG_MORE; + ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more); if (ret <= 0) goto out; con->out_skip -= ret; @@ -1761,12 +1727,14 @@ ret = read_partial(con, end, size, &con->actual_peer_addr); if (ret <= 0) goto out; + ceph_decode_banner_addr(&con->actual_peer_addr); size = sizeof (con->peer_addr_for_me); end += size; ret = read_partial(con, end, size, &con->peer_addr_for_me); if (ret <= 0) goto out; + ceph_decode_banner_addr(&con->peer_addr_for_me); out: return ret; @@ -1817,21 +1785,22 @@ { if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { pr_err("connect to %s got bad banner\n", - ceph_pr_addr(&con->peer_addr.in_addr)); + ceph_pr_addr(&con->peer_addr)); con->error_msg = "protocol error, bad banner"; return -1; } return 0; } -static bool addr_is_blank(struct sockaddr_storage *ss) +static bool addr_is_blank(struct ceph_entity_addr *addr) { - struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr; - struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr; + struct sockaddr_storage ss = addr->in_addr; /* align */ + struct in_addr *addr4 = &((struct sockaddr_in *)&ss)->sin_addr; + struct in6_addr *addr6 = &((struct sockaddr_in6 *)&ss)->sin6_addr; - switch (ss->ss_family) { + switch (ss.ss_family) { case AF_INET: - return addr->s_addr == htonl(INADDR_ANY); + return addr4->s_addr == htonl(INADDR_ANY); case AF_INET6: return ipv6_addr_any(addr6); default: @@ -1839,25 +1808,25 @@ } } -static int addr_port(struct sockaddr_storage *ss) +static int addr_port(struct ceph_entity_addr *addr) { - switch (ss->ss_family) { + switch (get_unaligned(&addr->in_addr.ss_family)) { case AF_INET: - return ntohs(((struct sockaddr_in *)ss)->sin_port); + return ntohs(get_unaligned(&((struct sockaddr_in *)&addr->in_addr)->sin_port)); case AF_INET6: - return ntohs(((struct sockaddr_in6 *)ss)->sin6_port); + return ntohs(get_unaligned(&((struct sockaddr_in6 *)&addr->in_addr)->sin6_port)); } return 0; } -static void addr_set_port(struct sockaddr_storage *ss, int p) +static void addr_set_port(struct ceph_entity_addr *addr, int p) { - switch (ss->ss_family) { + switch (get_unaligned(&addr->in_addr.ss_family)) { case AF_INET: - ((struct sockaddr_in *)ss)->sin_port = htons(p); + put_unaligned(htons(p), &((struct sockaddr_in *)&addr->in_addr)->sin_port); break; case AF_INET6: - ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); + put_unaligned(htons(p), &((struct sockaddr_in6 *)&addr->in_addr)->sin6_port); break; } } @@ -1865,21 +1834,18 @@ /* * Unlike other *_pton function semantics, zero indicates success. */ -static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, +static int ceph_pton(const char *str, size_t len, struct ceph_entity_addr *addr, char delim, const char **ipend) { - struct sockaddr_in *in4 = (struct sockaddr_in *) ss; - struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; + memset(&addr->in_addr, 0, sizeof(addr->in_addr)); - memset(ss, 0, sizeof(*ss)); - - if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) { - ss->ss_family = AF_INET; + if (in4_pton(str, len, (u8 *)&((struct sockaddr_in *)&addr->in_addr)->sin_addr.s_addr, delim, ipend)) { + put_unaligned(AF_INET, &addr->in_addr.ss_family); return 0; } - if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) { - ss->ss_family = AF_INET6; + if (in6_pton(str, len, (u8 *)&((struct sockaddr_in6 *)&addr->in_addr)->sin6_addr.s6_addr, delim, ipend)) { + put_unaligned(AF_INET6, &addr->in_addr.ss_family); return 0; } @@ -1891,7 +1857,7 @@ */ #ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER static int ceph_dns_resolve_name(const char *name, size_t namelen, - struct sockaddr_storage *ss, char delim, const char **ipend) + struct ceph_entity_addr *addr, char delim, const char **ipend) { const char *end, *delim_p; char *colon_p, *ip_addr = NULL; @@ -1918,9 +1884,10 @@ return -EINVAL; /* do dns_resolve upcall */ - ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL); + ip_len = dns_query(current->nsproxy->net_ns, + NULL, name, end - name, NULL, &ip_addr, NULL, false); if (ip_len > 0) - ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL); + ret = ceph_pton(ip_addr, ip_len, addr, -1, NULL); else ret = -ESRCH; @@ -1929,13 +1896,13 @@ *ipend = end; pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name, - ret, ret ? "failed" : ceph_pr_addr(ss)); + ret, ret ? "failed" : ceph_pr_addr(addr)); return ret; } #else static inline int ceph_dns_resolve_name(const char *name, size_t namelen, - struct sockaddr_storage *ss, char delim, const char **ipend) + struct ceph_entity_addr *addr, char delim, const char **ipend) { return -EINVAL; } @@ -1946,13 +1913,13 @@ * then try to extract a hostname to resolve using userspace DNS upcall. */ static int ceph_parse_server_name(const char *name, size_t namelen, - struct sockaddr_storage *ss, char delim, const char **ipend) + struct ceph_entity_addr *addr, char delim, const char **ipend) { int ret; - ret = ceph_pton(name, namelen, ss, delim, ipend); + ret = ceph_pton(name, namelen, addr, delim, ipend); if (ret) - ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend); + ret = ceph_dns_resolve_name(name, namelen, addr, delim, ipend); return ret; } @@ -1971,7 +1938,6 @@ dout("parse_ips on '%.*s'\n", (int)(end-c), c); for (i = 0; i < max_count; i++) { const char *ipend; - struct sockaddr_storage *ss = &addr[i].in_addr; int port; char delim = ','; @@ -1980,7 +1946,7 @@ p++; } - ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend); + ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend); if (ret) goto bad; ret = -EINVAL; @@ -2011,9 +1977,10 @@ port = CEPH_MON_PORT; } - addr_set_port(ss, port); + addr_set_port(&addr[i], port); + addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY; - dout("parse_ips got %s\n", ceph_pr_addr(ss)); + dout("parse_ips got %s\n", ceph_pr_addr(&addr[i])); if (p == end) break; @@ -2030,10 +1997,8 @@ return 0; bad: - pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); return ret; } -EXPORT_SYMBOL(ceph_parse_ips); static int process_banner(struct ceph_connection *con) { @@ -2042,9 +2007,6 @@ if (verify_hello(con) < 0) return -1; - ceph_decode_addr(&con->actual_peer_addr); - ceph_decode_addr(&con->peer_addr_for_me); - /* * Make sure the other end is who we wanted. note that the other * end may not yet know their ip address, so if it's 0.0.0.0, give @@ -2052,13 +2014,13 @@ */ if (memcmp(&con->peer_addr, &con->actual_peer_addr, sizeof(con->peer_addr)) != 0 && - !(addr_is_blank(&con->actual_peer_addr.in_addr) && + !(addr_is_blank(&con->actual_peer_addr) && con->actual_peer_addr.nonce == con->peer_addr.nonce)) { - pr_warn("wrong peer, want %s/%d, got %s/%d\n", - ceph_pr_addr(&con->peer_addr.in_addr), - (int)le32_to_cpu(con->peer_addr.nonce), - ceph_pr_addr(&con->actual_peer_addr.in_addr), - (int)le32_to_cpu(con->actual_peer_addr.nonce)); + pr_warn("wrong peer, want %s/%u, got %s/%u\n", + ceph_pr_addr(&con->peer_addr), + le32_to_cpu(con->peer_addr.nonce), + ceph_pr_addr(&con->actual_peer_addr), + le32_to_cpu(con->actual_peer_addr.nonce)); con->error_msg = "wrong peer at address"; return -1; } @@ -2066,16 +2028,16 @@ /* * did we learn our address? */ - if (addr_is_blank(&con->msgr->inst.addr.in_addr)) { - int port = addr_port(&con->msgr->inst.addr.in_addr); + if (addr_is_blank(&con->msgr->inst.addr)) { + int port = addr_port(&con->msgr->inst.addr); memcpy(&con->msgr->inst.addr.in_addr, &con->peer_addr_for_me.in_addr, sizeof(con->peer_addr_for_me.in_addr)); - addr_set_port(&con->msgr->inst.addr.in_addr, port); + addr_set_port(&con->msgr->inst.addr, port); encode_my_addr(con->msgr); dout("process_banner learned my addr is %s\n", - ceph_pr_addr(&con->msgr->inst.addr.in_addr)); + ceph_pr_addr(&con->msgr->inst.addr)); } return 0; @@ -2126,7 +2088,7 @@ pr_err("%s%lld %s feature set mismatch," " my %llx < server's %llx, missing %llx\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr), + ceph_pr_addr(&con->peer_addr), sup_feat, server_feat, server_feat & ~sup_feat); con->error_msg = "missing required protocol features"; reset_connection(con); @@ -2136,7 +2098,7 @@ pr_err("%s%lld %s protocol version mismatch," " my %d != server's %d\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr), + ceph_pr_addr(&con->peer_addr), le32_to_cpu(con->out_connect.protocol_version), le32_to_cpu(con->in_reply.protocol_version)); con->error_msg = "protocol version mismatch"; @@ -2170,7 +2132,7 @@ le32_to_cpu(con->in_reply.connect_seq)); pr_err("%s%lld %s connection reset\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr)); + ceph_pr_addr(&con->peer_addr)); reset_connection(con); con_out_kvec_reset(con); ret = prepare_write_connect(con); @@ -2227,7 +2189,7 @@ pr_err("%s%lld %s protocol feature mismatch," " my required %llx > server's %llx, need %llx\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr), + ceph_pr_addr(&con->peer_addr), req_feat, server_feat, req_feat & ~server_feat); con->error_msg = "missing required protocol features"; reset_connection(con); @@ -2356,8 +2318,7 @@ u32 crc = 0; int ret; - BUG_ON(!msg); - if (list_empty(&msg->data)) + if (!msg->num_data_items) return -EIO; if (do_datacrc) @@ -2435,7 +2396,7 @@ if ((s64)seq - (s64)con->in_seq < 1) { pr_info("skipping %s%lld %s seq %lld expected %lld\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr), + ceph_pr_addr(&con->peer_addr), seq, con->in_seq + 1); con->in_base_pos = -front_len - middle_len - data_len - sizeof_footer(con); @@ -2790,7 +2751,7 @@ switch (ret) { case -EBADMSG: con->error_msg = "bad crc/signature"; - /* fall through */ + fallthrough; case -EBADE: ret = -EIO; break; @@ -2850,13 +2811,13 @@ return -ENOENT; } + dout("%s %p %lu\n", __func__, con, delay); if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { dout("%s %p - already queued\n", __func__, con); con->ops->put(con); return -EBUSY; } - dout("%s %p %lu\n", __func__, con, delay); return 0; } @@ -3014,10 +2975,10 @@ static void con_fault(struct ceph_connection *con) { dout("fault %p state %lu to peer %s\n", - con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); + con, con->state, ceph_pr_addr(&con->peer_addr)); pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); + ceph_pr_addr(&con->peer_addr), con->error_msg); con->error_msg = NULL; WARN_ON(con->state != CON_STATE_CONNECTING && @@ -3066,6 +3027,12 @@ } +void ceph_messenger_reset_nonce(struct ceph_messenger *msgr) +{ + u32 nonce = le32_to_cpu(msgr->inst.addr.nonce) + 1000000; + msgr->inst.addr.nonce = cpu_to_le32(nonce); + encode_my_addr(msgr); +} /* * initialize a new messenger instance @@ -3271,49 +3238,37 @@ return false; } -static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) +static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg) { - struct ceph_msg_data *data; - - if (WARN_ON(!ceph_msg_data_type_valid(type))) - return NULL; - - data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS); - if (!data) - return NULL; - - data->type = type; - INIT_LIST_HEAD(&data->links); - - return data; + BUG_ON(msg->num_data_items >= msg->max_data_items); + return &msg->data[msg->num_data_items++]; } static void ceph_msg_data_destroy(struct ceph_msg_data *data) { - if (!data) - return; - - WARN_ON(!list_empty(&data->links)); - if (data->type == CEPH_MSG_DATA_PAGELIST) + if (data->type == CEPH_MSG_DATA_PAGES && data->own_pages) { + int num_pages = calc_pages_for(data->alignment, data->length); + ceph_release_page_vector(data->pages, num_pages); + } else if (data->type == CEPH_MSG_DATA_PAGELIST) { ceph_pagelist_release(data->pagelist); - kmem_cache_free(ceph_msg_data_cache, data); + } } void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, - size_t length, size_t alignment) + size_t length, size_t alignment, bool own_pages) { struct ceph_msg_data *data; BUG_ON(!pages); BUG_ON(!length); - data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES); - BUG_ON(!data); + data = ceph_msg_data_add(msg); + data->type = CEPH_MSG_DATA_PAGES; data->pages = pages; data->length = length; data->alignment = alignment & ~PAGE_MASK; + data->own_pages = own_pages; - list_add_tail(&data->links, &msg->data); msg->data_length += length; } EXPORT_SYMBOL(ceph_msg_data_add_pages); @@ -3326,11 +3281,11 @@ BUG_ON(!pagelist); BUG_ON(!pagelist->length); - data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST); - BUG_ON(!data); + data = ceph_msg_data_add(msg); + data->type = CEPH_MSG_DATA_PAGELIST; + refcount_inc(&pagelist->refcnt); data->pagelist = pagelist; - list_add_tail(&data->links, &msg->data); msg->data_length += pagelist->length; } EXPORT_SYMBOL(ceph_msg_data_add_pagelist); @@ -3341,12 +3296,11 @@ { struct ceph_msg_data *data; - data = ceph_msg_data_create(CEPH_MSG_DATA_BIO); - BUG_ON(!data); + data = ceph_msg_data_add(msg); + data->type = CEPH_MSG_DATA_BIO; data->bio_pos = *bio_pos; data->bio_length = length; - list_add_tail(&data->links, &msg->data); msg->data_length += length; } EXPORT_SYMBOL(ceph_msg_data_add_bio); @@ -3357,11 +3311,10 @@ { struct ceph_msg_data *data; - data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS); - BUG_ON(!data); + data = ceph_msg_data_add(msg); + data->type = CEPH_MSG_DATA_BVECS; data->bvec_pos = *bvec_pos; - list_add_tail(&data->links, &msg->data); msg->data_length += bvec_pos->iter.bi_size; } EXPORT_SYMBOL(ceph_msg_data_add_bvecs); @@ -3370,8 +3323,8 @@ * construct a new message with given type, size * the new msg has a ref count of 1. */ -struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, - bool can_fail) +struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, + gfp_t flags, bool can_fail) { struct ceph_msg *m; @@ -3385,7 +3338,6 @@ INIT_LIST_HEAD(&m->list_head); kref_init(&m->kref); - INIT_LIST_HEAD(&m->data); /* front */ if (front_len) { @@ -3399,6 +3351,15 @@ m->front.iov_base = NULL; } m->front_alloc_len = m->front.iov_len = front_len; + + if (max_data_items) { + m->data = kmalloc_array(max_data_items, sizeof(*m->data), + flags); + if (!m->data) + goto out2; + + m->max_data_items = max_data_items; + } dout("ceph_msg_new %p front %d\n", m, front_len); return m; @@ -3415,6 +3376,13 @@ front_len); } return NULL; +} +EXPORT_SYMBOL(ceph_msg_new2); + +struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, + bool can_fail) +{ + return ceph_msg_new2(type, front_len, 0, flags, can_fail); } EXPORT_SYMBOL(ceph_msg_new); @@ -3511,13 +3479,14 @@ { dout("%s %p\n", __func__, m); kvfree(m->front.iov_base); + kfree(m->data); kmem_cache_free(ceph_msg_cache, m); } static void ceph_msg_release(struct kref *kref) { struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); - struct ceph_msg_data *data, *next; + int i; dout("%s %p\n", __func__, m); WARN_ON(!list_empty(&m->list_head)); @@ -3530,11 +3499,8 @@ m->middle = NULL; } - list_for_each_entry_safe(data, next, &m->data, links) { - list_del_init(&data->links); - ceph_msg_data_destroy(data); - } - m->data_length = 0; + for (i = 0; i < m->num_data_items; i++) + ceph_msg_data_destroy(&m->data[i]); if (m->pool) ceph_msgpool_put(m->pool, m); -- Gitblit v1.6.2