hc
2024-10-22 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5
kernel/net/ceph/messenger.c
....@@ -156,7 +156,6 @@
156156 /* Slab caches for frequently-allocated structures */
157157
158158 static struct kmem_cache *ceph_msg_cache;
159
-static struct kmem_cache *ceph_msg_data_cache;
160159
161160 /* static tag bytes (protocol control messages) */
162161 static char tag_msg = CEPH_MSGR_TAG_MSG;
....@@ -187,30 +186,33 @@
187186
188187 static struct page *zero_page; /* used in certain error cases */
189188
190
-const char *ceph_pr_addr(const struct sockaddr_storage *ss)
189
+const char *ceph_pr_addr(const struct ceph_entity_addr *addr)
191190 {
192191 int i;
193192 char *s;
194
- struct sockaddr_in *in4 = (struct sockaddr_in *) ss;
195
- struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss;
193
+ struct sockaddr_storage ss = addr->in_addr; /* align */
194
+ struct sockaddr_in *in4 = (struct sockaddr_in *)&ss;
195
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)&ss;
196196
197197 i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK;
198198 s = addr_str[i];
199199
200
- switch (ss->ss_family) {
200
+ switch (ss.ss_family) {
201201 case AF_INET:
202
- snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr,
202
+ snprintf(s, MAX_ADDR_STR_LEN, "(%d)%pI4:%hu",
203
+ le32_to_cpu(addr->type), &in4->sin_addr,
203204 ntohs(in4->sin_port));
204205 break;
205206
206207 case AF_INET6:
207
- snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr,
208
+ snprintf(s, MAX_ADDR_STR_LEN, "(%d)[%pI6c]:%hu",
209
+ le32_to_cpu(addr->type), &in6->sin6_addr,
208210 ntohs(in6->sin6_port));
209211 break;
210212
211213 default:
212214 snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)",
213
- ss->ss_family);
215
+ ss.ss_family);
214216 }
215217
216218 return s;
....@@ -220,7 +222,7 @@
220222 static void encode_my_addr(struct ceph_messenger *msgr)
221223 {
222224 memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr));
223
- ceph_encode_addr(&msgr->my_enc_addr);
225
+ ceph_encode_banner_addr(&msgr->my_enc_addr);
224226 }
225227
226228 /*
....@@ -235,23 +237,11 @@
235237 if (!ceph_msg_cache)
236238 return -ENOMEM;
237239
238
- BUG_ON(ceph_msg_data_cache);
239
- ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0);
240
- if (ceph_msg_data_cache)
241
- return 0;
242
-
243
- kmem_cache_destroy(ceph_msg_cache);
244
- ceph_msg_cache = NULL;
245
-
246
- return -ENOMEM;
240
+ return 0;
247241 }
248242
249243 static void ceph_msgr_slab_exit(void)
250244 {
251
- BUG_ON(!ceph_msg_data_cache);
252
- kmem_cache_destroy(ceph_msg_data_cache);
253
- ceph_msg_data_cache = NULL;
254
-
255245 BUG_ON(!ceph_msg_cache);
256246 kmem_cache_destroy(ceph_msg_cache);
257247 ceph_msg_cache = NULL;
....@@ -422,7 +412,7 @@
422412 switch (sk->sk_state) {
423413 case TCP_CLOSE:
424414 dout("%s TCP_CLOSE\n", __func__);
425
- /* fall through */
415
+ fallthrough;
426416 case TCP_CLOSE_WAIT:
427417 dout("%s TCP_CLOSE_WAIT\n", __func__);
428418 con_sock_state_closing(con);
....@@ -462,7 +452,7 @@
462452 */
463453 static int ceph_tcp_connect(struct ceph_connection *con)
464454 {
465
- struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
455
+ struct sockaddr_storage ss = con->peer_addr.in_addr; /* align */
466456 struct socket *sock;
467457 unsigned int noio_flag;
468458 int ret;
....@@ -471,7 +461,7 @@
471461
472462 /* sock_create_kern() allocates with GFP_KERNEL */
473463 noio_flag = memalloc_noio_save();
474
- ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
464
+ ret = sock_create_kern(read_pnet(&con->msgr->net), ss.ss_family,
475465 SOCK_STREAM, IPPROTO_TCP, &sock);
476466 memalloc_noio_restore(noio_flag);
477467 if (ret)
....@@ -484,31 +474,24 @@
484474
485475 set_sock_callbacks(sock, con);
486476
487
- dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
477
+ dout("connect %s\n", ceph_pr_addr(&con->peer_addr));
488478
489479 con_sock_state_connecting(con);
490
- ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
480
+ ret = sock->ops->connect(sock, (struct sockaddr *)&ss, sizeof(ss),
491481 O_NONBLOCK);
492482 if (ret == -EINPROGRESS) {
493483 dout("connect %s EINPROGRESS sk_state = %u\n",
494
- ceph_pr_addr(&con->peer_addr.in_addr),
484
+ ceph_pr_addr(&con->peer_addr),
495485 sock->sk->sk_state);
496486 } else if (ret < 0) {
497487 pr_err("connect %s error %d\n",
498
- ceph_pr_addr(&con->peer_addr.in_addr), ret);
488
+ ceph_pr_addr(&con->peer_addr), ret);
499489 sock_release(sock);
500490 return ret;
501491 }
502492
503
- if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
504
- int optval = 1;
505
-
506
- ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
507
- (char *)&optval, sizeof(optval));
508
- if (ret)
509
- pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d",
510
- ret);
511
- }
493
+ if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY))
494
+ tcp_sock_set_nodelay(sock->sk);
512495
513496 con->sock = sock;
514497 return 0;
....@@ -526,7 +509,7 @@
526509 if (!buf)
527510 msg.msg_flags |= MSG_TRUNC;
528511
529
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
512
+ iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len);
530513 r = sock_recvmsg(sock, &msg, msg.msg_flags);
531514 if (r == -EAGAIN)
532515 r = 0;
....@@ -545,7 +528,7 @@
545528 int r;
546529
547530 BUG_ON(page_offset + length > PAGE_SIZE);
548
- iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length);
531
+ iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length);
549532 r = sock_recvmsg(sock, &msg, msg.msg_flags);
550533 if (r == -EAGAIN)
551534 r = 0;
....@@ -557,7 +540,7 @@
557540 * shortly.
558541 */
559542 static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
560
- size_t kvlen, size_t len, int more)
543
+ size_t kvlen, size_t len, bool more)
561544 {
562545 struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
563546 int r;
....@@ -573,24 +556,15 @@
573556 return r;
574557 }
575558
576
-static int __ceph_tcp_sendpage(struct socket *sock, struct page *page,
577
- int offset, size_t size, bool more)
578
-{
579
- int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
580
- int ret;
581
-
582
- ret = kernel_sendpage(sock, page, offset, size, flags);
583
- if (ret == -EAGAIN)
584
- ret = 0;
585
-
586
- return ret;
587
-}
588
-
559
+/*
560
+ * @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST
561
+ */
589562 static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
590
- int offset, size_t size, bool more)
563
+ int offset, size_t size, int more)
591564 {
592
- struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
593
- struct bio_vec bvec;
565
+ ssize_t (*sendpage)(struct socket *sock, struct page *page,
566
+ int offset, size_t size, int flags);
567
+ int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;
594568 int ret;
595569
596570 /*
....@@ -601,20 +575,12 @@
601575 * coalescing neighboring slab objects into a single frag which
602576 * triggers one of hardened usercopy checks.
603577 */
604
- if (page_count(page) >= 1 && !PageSlab(page))
605
- return __ceph_tcp_sendpage(sock, page, offset, size, more);
606
-
607
- bvec.bv_page = page;
608
- bvec.bv_offset = offset;
609
- bvec.bv_len = size;
610
-
611
- if (more)
612
- msg.msg_flags |= MSG_MORE;
578
+ if (sendpage_ok(page))
579
+ sendpage = sock->ops->sendpage;
613580 else
614
- msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
581
+ sendpage = sock_no_sendpage;
615582
616
- iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size);
617
- ret = sock_sendmsg(sock, &msg);
583
+ ret = sendpage(sock, page, offset, size, flags);
618584 if (ret == -EAGAIN)
619585 ret = 0;
620586
....@@ -699,8 +665,7 @@
699665 void ceph_con_close(struct ceph_connection *con)
700666 {
701667 mutex_lock(&con->mutex);
702
- dout("con_close %p peer %s\n", con,
703
- ceph_pr_addr(&con->peer_addr.in_addr));
668
+ dout("con_close %p peer %s\n", con, ceph_pr_addr(&con->peer_addr));
704669 con->state = CON_STATE_CLOSED;
705670
706671 con_flag_clear(con, CON_FLAG_LOSSYTX); /* so we retry next connect */
....@@ -724,7 +689,7 @@
724689 struct ceph_entity_addr *addr)
725690 {
726691 mutex_lock(&con->mutex);
727
- dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
692
+ dout("con_open %p %s\n", con, ceph_pr_addr(addr));
728693
729694 WARN_ON(con->state != CON_STATE_CLOSED);
730695 con->state = CON_STATE_PREOPEN;
....@@ -870,6 +835,7 @@
870835 size_t bytes)
871836 {
872837 struct ceph_bio_iter *it = &cursor->bio_iter;
838
+ struct page *page = bio_iter_page(it->bio, it->iter);
873839
874840 BUG_ON(bytes > cursor->resid);
875841 BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
....@@ -881,7 +847,8 @@
881847 return false; /* no more data */
882848 }
883849
884
- if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done))
850
+ if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done &&
851
+ page == bio_iter_page(it->bio, it->iter)))
885852 return false; /* more bytes to process in this segment */
886853
887854 if (!it->iter.bi_size) {
....@@ -929,6 +896,7 @@
929896 size_t bytes)
930897 {
931898 struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs;
899
+ struct page *page = bvec_iter_page(bvecs, cursor->bvec_iter);
932900
933901 BUG_ON(bytes > cursor->resid);
934902 BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter));
....@@ -940,7 +908,8 @@
940908 return false; /* no more data */
941909 }
942910
943
- if (!bytes || cursor->bvec_iter.bi_bvec_done)
911
+ if (!bytes || (cursor->bvec_iter.bi_bvec_done &&
912
+ page == bvec_iter_page(bvecs, cursor->bvec_iter)))
944913 return false; /* more bytes to process in this segment */
945914
946915 BUG_ON(cursor->last_piece);
....@@ -1147,16 +1116,13 @@
11471116 static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
11481117 {
11491118 struct ceph_msg_data_cursor *cursor = &msg->cursor;
1150
- struct ceph_msg_data *data;
11511119
11521120 BUG_ON(!length);
11531121 BUG_ON(length > msg->data_length);
1154
- BUG_ON(list_empty(&msg->data));
1122
+ BUG_ON(!msg->num_data_items);
11551123
1156
- cursor->data_head = &msg->data;
11571124 cursor->total_resid = length;
1158
- data = list_first_entry(&msg->data, struct ceph_msg_data, links);
1159
- cursor->data = data;
1125
+ cursor->data = msg->data;
11601126
11611127 __ceph_msg_data_cursor_init(cursor);
11621128 }
....@@ -1237,8 +1203,7 @@
12371203
12381204 if (!cursor->resid && cursor->total_resid) {
12391205 WARN_ON(!cursor->last_piece);
1240
- BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
1241
- cursor->data = list_next_entry(cursor->data, links);
1206
+ cursor->data++;
12421207 __ceph_msg_data_cursor_init(cursor);
12431208 new_piece = true;
12441209 }
....@@ -1254,9 +1219,6 @@
12541219
12551220 static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
12561221 {
1257
- BUG_ON(!msg);
1258
- BUG_ON(!data_len);
1259
-
12601222 /* Initialize data cursor */
12611223
12621224 ceph_msg_data_cursor_init(msg, (size_t)data_len);
....@@ -1592,11 +1554,12 @@
15921554 struct ceph_msg *msg = con->out_msg;
15931555 struct ceph_msg_data_cursor *cursor = &msg->cursor;
15941556 bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
1557
+ int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
15951558 u32 crc;
15961559
15971560 dout("%s %p msg %p\n", __func__, con, msg);
15981561
1599
- if (list_empty(&msg->data))
1562
+ if (!msg->num_data_items)
16001563 return -EINVAL;
16011564
16021565 /*
....@@ -1612,7 +1575,6 @@
16121575 struct page *page;
16131576 size_t page_offset;
16141577 size_t length;
1615
- bool last_piece;
16161578 int ret;
16171579
16181580 if (!cursor->resid) {
....@@ -1620,10 +1582,11 @@
16201582 continue;
16211583 }
16221584
1623
- page = ceph_msg_data_next(cursor, &page_offset, &length,
1624
- &last_piece);
1625
- ret = ceph_tcp_sendpage(con->sock, page, page_offset,
1626
- length, !last_piece);
1585
+ page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
1586
+ if (length == cursor->total_resid)
1587
+ more = MSG_MORE;
1588
+ ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
1589
+ more);
16271590 if (ret <= 0) {
16281591 if (do_datacrc)
16291592 msg->footer.data_crc = cpu_to_le32(crc);
....@@ -1653,13 +1616,16 @@
16531616 */
16541617 static int write_partial_skip(struct ceph_connection *con)
16551618 {
1619
+ int more = MSG_MORE | MSG_SENDPAGE_NOTLAST;
16561620 int ret;
16571621
16581622 dout("%s %p %d left\n", __func__, con, con->out_skip);
16591623 while (con->out_skip > 0) {
16601624 size_t size = min(con->out_skip, (int) PAGE_SIZE);
16611625
1662
- ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
1626
+ if (size == con->out_skip)
1627
+ more = MSG_MORE;
1628
+ ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more);
16631629 if (ret <= 0)
16641630 goto out;
16651631 con->out_skip -= ret;
....@@ -1761,12 +1727,14 @@
17611727 ret = read_partial(con, end, size, &con->actual_peer_addr);
17621728 if (ret <= 0)
17631729 goto out;
1730
+ ceph_decode_banner_addr(&con->actual_peer_addr);
17641731
17651732 size = sizeof (con->peer_addr_for_me);
17661733 end += size;
17671734 ret = read_partial(con, end, size, &con->peer_addr_for_me);
17681735 if (ret <= 0)
17691736 goto out;
1737
+ ceph_decode_banner_addr(&con->peer_addr_for_me);
17701738
17711739 out:
17721740 return ret;
....@@ -1817,21 +1785,22 @@
18171785 {
18181786 if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
18191787 pr_err("connect to %s got bad banner\n",
1820
- ceph_pr_addr(&con->peer_addr.in_addr));
1788
+ ceph_pr_addr(&con->peer_addr));
18211789 con->error_msg = "protocol error, bad banner";
18221790 return -1;
18231791 }
18241792 return 0;
18251793 }
18261794
1827
-static bool addr_is_blank(struct sockaddr_storage *ss)
1795
+static bool addr_is_blank(struct ceph_entity_addr *addr)
18281796 {
1829
- struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr;
1830
- struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr;
1797
+ struct sockaddr_storage ss = addr->in_addr; /* align */
1798
+ struct in_addr *addr4 = &((struct sockaddr_in *)&ss)->sin_addr;
1799
+ struct in6_addr *addr6 = &((struct sockaddr_in6 *)&ss)->sin6_addr;
18311800
1832
- switch (ss->ss_family) {
1801
+ switch (ss.ss_family) {
18331802 case AF_INET:
1834
- return addr->s_addr == htonl(INADDR_ANY);
1803
+ return addr4->s_addr == htonl(INADDR_ANY);
18351804 case AF_INET6:
18361805 return ipv6_addr_any(addr6);
18371806 default:
....@@ -1839,25 +1808,25 @@
18391808 }
18401809 }
18411810
1842
-static int addr_port(struct sockaddr_storage *ss)
1811
+static int addr_port(struct ceph_entity_addr *addr)
18431812 {
1844
- switch (ss->ss_family) {
1813
+ switch (get_unaligned(&addr->in_addr.ss_family)) {
18451814 case AF_INET:
1846
- return ntohs(((struct sockaddr_in *)ss)->sin_port);
1815
+ return ntohs(get_unaligned(&((struct sockaddr_in *)&addr->in_addr)->sin_port));
18471816 case AF_INET6:
1848
- return ntohs(((struct sockaddr_in6 *)ss)->sin6_port);
1817
+ return ntohs(get_unaligned(&((struct sockaddr_in6 *)&addr->in_addr)->sin6_port));
18491818 }
18501819 return 0;
18511820 }
18521821
1853
-static void addr_set_port(struct sockaddr_storage *ss, int p)
1822
+static void addr_set_port(struct ceph_entity_addr *addr, int p)
18541823 {
1855
- switch (ss->ss_family) {
1824
+ switch (get_unaligned(&addr->in_addr.ss_family)) {
18561825 case AF_INET:
1857
- ((struct sockaddr_in *)ss)->sin_port = htons(p);
1826
+ put_unaligned(htons(p), &((struct sockaddr_in *)&addr->in_addr)->sin_port);
18581827 break;
18591828 case AF_INET6:
1860
- ((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
1829
+ put_unaligned(htons(p), &((struct sockaddr_in6 *)&addr->in_addr)->sin6_port);
18611830 break;
18621831 }
18631832 }
....@@ -1865,21 +1834,18 @@
18651834 /*
18661835 * Unlike other *_pton function semantics, zero indicates success.
18671836 */
1868
-static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss,
1837
+static int ceph_pton(const char *str, size_t len, struct ceph_entity_addr *addr,
18691838 char delim, const char **ipend)
18701839 {
1871
- struct sockaddr_in *in4 = (struct sockaddr_in *) ss;
1872
- struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss;
1840
+ memset(&addr->in_addr, 0, sizeof(addr->in_addr));
18731841
1874
- memset(ss, 0, sizeof(*ss));
1875
-
1876
- if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) {
1877
- ss->ss_family = AF_INET;
1842
+ if (in4_pton(str, len, (u8 *)&((struct sockaddr_in *)&addr->in_addr)->sin_addr.s_addr, delim, ipend)) {
1843
+ put_unaligned(AF_INET, &addr->in_addr.ss_family);
18781844 return 0;
18791845 }
18801846
1881
- if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) {
1882
- ss->ss_family = AF_INET6;
1847
+ if (in6_pton(str, len, (u8 *)&((struct sockaddr_in6 *)&addr->in_addr)->sin6_addr.s6_addr, delim, ipend)) {
1848
+ put_unaligned(AF_INET6, &addr->in_addr.ss_family);
18831849 return 0;
18841850 }
18851851
....@@ -1891,7 +1857,7 @@
18911857 */
18921858 #ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER
18931859 static int ceph_dns_resolve_name(const char *name, size_t namelen,
1894
- struct sockaddr_storage *ss, char delim, const char **ipend)
1860
+ struct ceph_entity_addr *addr, char delim, const char **ipend)
18951861 {
18961862 const char *end, *delim_p;
18971863 char *colon_p, *ip_addr = NULL;
....@@ -1918,9 +1884,10 @@
19181884 return -EINVAL;
19191885
19201886 /* do dns_resolve upcall */
1921
- ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL);
1887
+ ip_len = dns_query(current->nsproxy->net_ns,
1888
+ NULL, name, end - name, NULL, &ip_addr, NULL, false);
19221889 if (ip_len > 0)
1923
- ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL);
1890
+ ret = ceph_pton(ip_addr, ip_len, addr, -1, NULL);
19241891 else
19251892 ret = -ESRCH;
19261893
....@@ -1929,13 +1896,13 @@
19291896 *ipend = end;
19301897
19311898 pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name,
1932
- ret, ret ? "failed" : ceph_pr_addr(ss));
1899
+ ret, ret ? "failed" : ceph_pr_addr(addr));
19331900
19341901 return ret;
19351902 }
19361903 #else
19371904 static inline int ceph_dns_resolve_name(const char *name, size_t namelen,
1938
- struct sockaddr_storage *ss, char delim, const char **ipend)
1905
+ struct ceph_entity_addr *addr, char delim, const char **ipend)
19391906 {
19401907 return -EINVAL;
19411908 }
....@@ -1946,13 +1913,13 @@
19461913 * then try to extract a hostname to resolve using userspace DNS upcall.
19471914 */
19481915 static int ceph_parse_server_name(const char *name, size_t namelen,
1949
- struct sockaddr_storage *ss, char delim, const char **ipend)
1916
+ struct ceph_entity_addr *addr, char delim, const char **ipend)
19501917 {
19511918 int ret;
19521919
1953
- ret = ceph_pton(name, namelen, ss, delim, ipend);
1920
+ ret = ceph_pton(name, namelen, addr, delim, ipend);
19541921 if (ret)
1955
- ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend);
1922
+ ret = ceph_dns_resolve_name(name, namelen, addr, delim, ipend);
19561923
19571924 return ret;
19581925 }
....@@ -1971,7 +1938,6 @@
19711938 dout("parse_ips on '%.*s'\n", (int)(end-c), c);
19721939 for (i = 0; i < max_count; i++) {
19731940 const char *ipend;
1974
- struct sockaddr_storage *ss = &addr[i].in_addr;
19751941 int port;
19761942 char delim = ',';
19771943
....@@ -1980,7 +1946,7 @@
19801946 p++;
19811947 }
19821948
1983
- ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend);
1949
+ ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend);
19841950 if (ret)
19851951 goto bad;
19861952 ret = -EINVAL;
....@@ -2011,9 +1977,10 @@
20111977 port = CEPH_MON_PORT;
20121978 }
20131979
2014
- addr_set_port(ss, port);
1980
+ addr_set_port(&addr[i], port);
1981
+ addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
20151982
2016
- dout("parse_ips got %s\n", ceph_pr_addr(ss));
1983
+ dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
20171984
20181985 if (p == end)
20191986 break;
....@@ -2030,10 +1997,8 @@
20301997 return 0;
20311998
20321999 bad:
2033
- pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
20342000 return ret;
20352001 }
2036
-EXPORT_SYMBOL(ceph_parse_ips);
20372002
20382003 static int process_banner(struct ceph_connection *con)
20392004 {
....@@ -2042,9 +2007,6 @@
20422007 if (verify_hello(con) < 0)
20432008 return -1;
20442009
2045
- ceph_decode_addr(&con->actual_peer_addr);
2046
- ceph_decode_addr(&con->peer_addr_for_me);
2047
-
20482010 /*
20492011 * Make sure the other end is who we wanted. note that the other
20502012 * end may not yet know their ip address, so if it's 0.0.0.0, give
....@@ -2052,13 +2014,13 @@
20522014 */
20532015 if (memcmp(&con->peer_addr, &con->actual_peer_addr,
20542016 sizeof(con->peer_addr)) != 0 &&
2055
- !(addr_is_blank(&con->actual_peer_addr.in_addr) &&
2017
+ !(addr_is_blank(&con->actual_peer_addr) &&
20562018 con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
2057
- pr_warn("wrong peer, want %s/%d, got %s/%d\n",
2058
- ceph_pr_addr(&con->peer_addr.in_addr),
2059
- (int)le32_to_cpu(con->peer_addr.nonce),
2060
- ceph_pr_addr(&con->actual_peer_addr.in_addr),
2061
- (int)le32_to_cpu(con->actual_peer_addr.nonce));
2019
+ pr_warn("wrong peer, want %s/%u, got %s/%u\n",
2020
+ ceph_pr_addr(&con->peer_addr),
2021
+ le32_to_cpu(con->peer_addr.nonce),
2022
+ ceph_pr_addr(&con->actual_peer_addr),
2023
+ le32_to_cpu(con->actual_peer_addr.nonce));
20622024 con->error_msg = "wrong peer at address";
20632025 return -1;
20642026 }
....@@ -2066,16 +2028,16 @@
20662028 /*
20672029 * did we learn our address?
20682030 */
2069
- if (addr_is_blank(&con->msgr->inst.addr.in_addr)) {
2070
- int port = addr_port(&con->msgr->inst.addr.in_addr);
2031
+ if (addr_is_blank(&con->msgr->inst.addr)) {
2032
+ int port = addr_port(&con->msgr->inst.addr);
20712033
20722034 memcpy(&con->msgr->inst.addr.in_addr,
20732035 &con->peer_addr_for_me.in_addr,
20742036 sizeof(con->peer_addr_for_me.in_addr));
2075
- addr_set_port(&con->msgr->inst.addr.in_addr, port);
2037
+ addr_set_port(&con->msgr->inst.addr, port);
20762038 encode_my_addr(con->msgr);
20772039 dout("process_banner learned my addr is %s\n",
2078
- ceph_pr_addr(&con->msgr->inst.addr.in_addr));
2040
+ ceph_pr_addr(&con->msgr->inst.addr));
20792041 }
20802042
20812043 return 0;
....@@ -2126,7 +2088,7 @@
21262088 pr_err("%s%lld %s feature set mismatch,"
21272089 " my %llx < server's %llx, missing %llx\n",
21282090 ENTITY_NAME(con->peer_name),
2129
- ceph_pr_addr(&con->peer_addr.in_addr),
2091
+ ceph_pr_addr(&con->peer_addr),
21302092 sup_feat, server_feat, server_feat & ~sup_feat);
21312093 con->error_msg = "missing required protocol features";
21322094 reset_connection(con);
....@@ -2136,7 +2098,7 @@
21362098 pr_err("%s%lld %s protocol version mismatch,"
21372099 " my %d != server's %d\n",
21382100 ENTITY_NAME(con->peer_name),
2139
- ceph_pr_addr(&con->peer_addr.in_addr),
2101
+ ceph_pr_addr(&con->peer_addr),
21402102 le32_to_cpu(con->out_connect.protocol_version),
21412103 le32_to_cpu(con->in_reply.protocol_version));
21422104 con->error_msg = "protocol version mismatch";
....@@ -2170,7 +2132,7 @@
21702132 le32_to_cpu(con->in_reply.connect_seq));
21712133 pr_err("%s%lld %s connection reset\n",
21722134 ENTITY_NAME(con->peer_name),
2173
- ceph_pr_addr(&con->peer_addr.in_addr));
2135
+ ceph_pr_addr(&con->peer_addr));
21742136 reset_connection(con);
21752137 con_out_kvec_reset(con);
21762138 ret = prepare_write_connect(con);
....@@ -2227,7 +2189,7 @@
22272189 pr_err("%s%lld %s protocol feature mismatch,"
22282190 " my required %llx > server's %llx, need %llx\n",
22292191 ENTITY_NAME(con->peer_name),
2230
- ceph_pr_addr(&con->peer_addr.in_addr),
2192
+ ceph_pr_addr(&con->peer_addr),
22312193 req_feat, server_feat, req_feat & ~server_feat);
22322194 con->error_msg = "missing required protocol features";
22332195 reset_connection(con);
....@@ -2356,8 +2318,7 @@
23562318 u32 crc = 0;
23572319 int ret;
23582320
2359
- BUG_ON(!msg);
2360
- if (list_empty(&msg->data))
2321
+ if (!msg->num_data_items)
23612322 return -EIO;
23622323
23632324 if (do_datacrc)
....@@ -2435,7 +2396,7 @@
24352396 if ((s64)seq - (s64)con->in_seq < 1) {
24362397 pr_info("skipping %s%lld %s seq %lld expected %lld\n",
24372398 ENTITY_NAME(con->peer_name),
2438
- ceph_pr_addr(&con->peer_addr.in_addr),
2399
+ ceph_pr_addr(&con->peer_addr),
24392400 seq, con->in_seq + 1);
24402401 con->in_base_pos = -front_len - middle_len - data_len -
24412402 sizeof_footer(con);
....@@ -2790,7 +2751,7 @@
27902751 switch (ret) {
27912752 case -EBADMSG:
27922753 con->error_msg = "bad crc/signature";
2793
- /* fall through */
2754
+ fallthrough;
27942755 case -EBADE:
27952756 ret = -EIO;
27962757 break;
....@@ -2850,13 +2811,13 @@
28502811 return -ENOENT;
28512812 }
28522813
2814
+ dout("%s %p %lu\n", __func__, con, delay);
28532815 if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
28542816 dout("%s %p - already queued\n", __func__, con);
28552817 con->ops->put(con);
28562818 return -EBUSY;
28572819 }
28582820
2859
- dout("%s %p %lu\n", __func__, con, delay);
28602821 return 0;
28612822 }
28622823
....@@ -3014,10 +2975,10 @@
30142975 static void con_fault(struct ceph_connection *con)
30152976 {
30162977 dout("fault %p state %lu to peer %s\n",
3017
- con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
2978
+ con, con->state, ceph_pr_addr(&con->peer_addr));
30182979
30192980 pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
3020
- ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
2981
+ ceph_pr_addr(&con->peer_addr), con->error_msg);
30212982 con->error_msg = NULL;
30222983
30232984 WARN_ON(con->state != CON_STATE_CONNECTING &&
....@@ -3066,6 +3027,12 @@
30663027 }
30673028
30683029
3030
+void ceph_messenger_reset_nonce(struct ceph_messenger *msgr)
3031
+{
3032
+ u32 nonce = le32_to_cpu(msgr->inst.addr.nonce) + 1000000;
3033
+ msgr->inst.addr.nonce = cpu_to_le32(nonce);
3034
+ encode_my_addr(msgr);
3035
+}
30693036
30703037 /*
30713038 * initialize a new messenger instance
....@@ -3271,49 +3238,37 @@
32713238 return false;
32723239 }
32733240
3274
-static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
3241
+static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg)
32753242 {
3276
- struct ceph_msg_data *data;
3277
-
3278
- if (WARN_ON(!ceph_msg_data_type_valid(type)))
3279
- return NULL;
3280
-
3281
- data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
3282
- if (!data)
3283
- return NULL;
3284
-
3285
- data->type = type;
3286
- INIT_LIST_HEAD(&data->links);
3287
-
3288
- return data;
3243
+ BUG_ON(msg->num_data_items >= msg->max_data_items);
3244
+ return &msg->data[msg->num_data_items++];
32893245 }
32903246
32913247 static void ceph_msg_data_destroy(struct ceph_msg_data *data)
32923248 {
3293
- if (!data)
3294
- return;
3295
-
3296
- WARN_ON(!list_empty(&data->links));
3297
- if (data->type == CEPH_MSG_DATA_PAGELIST)
3249
+ if (data->type == CEPH_MSG_DATA_PAGES && data->own_pages) {
3250
+ int num_pages = calc_pages_for(data->alignment, data->length);
3251
+ ceph_release_page_vector(data->pages, num_pages);
3252
+ } else if (data->type == CEPH_MSG_DATA_PAGELIST) {
32983253 ceph_pagelist_release(data->pagelist);
3299
- kmem_cache_free(ceph_msg_data_cache, data);
3254
+ }
33003255 }
33013256
33023257 void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
3303
- size_t length, size_t alignment)
3258
+ size_t length, size_t alignment, bool own_pages)
33043259 {
33053260 struct ceph_msg_data *data;
33063261
33073262 BUG_ON(!pages);
33083263 BUG_ON(!length);
33093264
3310
- data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
3311
- BUG_ON(!data);
3265
+ data = ceph_msg_data_add(msg);
3266
+ data->type = CEPH_MSG_DATA_PAGES;
33123267 data->pages = pages;
33133268 data->length = length;
33143269 data->alignment = alignment & ~PAGE_MASK;
3270
+ data->own_pages = own_pages;
33153271
3316
- list_add_tail(&data->links, &msg->data);
33173272 msg->data_length += length;
33183273 }
33193274 EXPORT_SYMBOL(ceph_msg_data_add_pages);
....@@ -3326,11 +3281,11 @@
33263281 BUG_ON(!pagelist);
33273282 BUG_ON(!pagelist->length);
33283283
3329
- data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
3330
- BUG_ON(!data);
3284
+ data = ceph_msg_data_add(msg);
3285
+ data->type = CEPH_MSG_DATA_PAGELIST;
3286
+ refcount_inc(&pagelist->refcnt);
33313287 data->pagelist = pagelist;
33323288
3333
- list_add_tail(&data->links, &msg->data);
33343289 msg->data_length += pagelist->length;
33353290 }
33363291 EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
....@@ -3341,12 +3296,11 @@
33413296 {
33423297 struct ceph_msg_data *data;
33433298
3344
- data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
3345
- BUG_ON(!data);
3299
+ data = ceph_msg_data_add(msg);
3300
+ data->type = CEPH_MSG_DATA_BIO;
33463301 data->bio_pos = *bio_pos;
33473302 data->bio_length = length;
33483303
3349
- list_add_tail(&data->links, &msg->data);
33503304 msg->data_length += length;
33513305 }
33523306 EXPORT_SYMBOL(ceph_msg_data_add_bio);
....@@ -3357,11 +3311,10 @@
33573311 {
33583312 struct ceph_msg_data *data;
33593313
3360
- data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS);
3361
- BUG_ON(!data);
3314
+ data = ceph_msg_data_add(msg);
3315
+ data->type = CEPH_MSG_DATA_BVECS;
33623316 data->bvec_pos = *bvec_pos;
33633317
3364
- list_add_tail(&data->links, &msg->data);
33653318 msg->data_length += bvec_pos->iter.bi_size;
33663319 }
33673320 EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
....@@ -3370,8 +3323,8 @@
33703323 * construct a new message with given type, size
33713324 * the new msg has a ref count of 1.
33723325 */
3373
-struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
3374
- bool can_fail)
3326
+struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
3327
+ gfp_t flags, bool can_fail)
33753328 {
33763329 struct ceph_msg *m;
33773330
....@@ -3385,7 +3338,6 @@
33853338
33863339 INIT_LIST_HEAD(&m->list_head);
33873340 kref_init(&m->kref);
3388
- INIT_LIST_HEAD(&m->data);
33893341
33903342 /* front */
33913343 if (front_len) {
....@@ -3399,6 +3351,15 @@
33993351 m->front.iov_base = NULL;
34003352 }
34013353 m->front_alloc_len = m->front.iov_len = front_len;
3354
+
3355
+ if (max_data_items) {
3356
+ m->data = kmalloc_array(max_data_items, sizeof(*m->data),
3357
+ flags);
3358
+ if (!m->data)
3359
+ goto out2;
3360
+
3361
+ m->max_data_items = max_data_items;
3362
+ }
34023363
34033364 dout("ceph_msg_new %p front %d\n", m, front_len);
34043365 return m;
....@@ -3415,6 +3376,13 @@
34153376 front_len);
34163377 }
34173378 return NULL;
3379
+}
3380
+EXPORT_SYMBOL(ceph_msg_new2);
3381
+
3382
+struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
3383
+ bool can_fail)
3384
+{
3385
+ return ceph_msg_new2(type, front_len, 0, flags, can_fail);
34183386 }
34193387 EXPORT_SYMBOL(ceph_msg_new);
34203388
....@@ -3511,13 +3479,14 @@
35113479 {
35123480 dout("%s %p\n", __func__, m);
35133481 kvfree(m->front.iov_base);
3482
+ kfree(m->data);
35143483 kmem_cache_free(ceph_msg_cache, m);
35153484 }
35163485
35173486 static void ceph_msg_release(struct kref *kref)
35183487 {
35193488 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
3520
- struct ceph_msg_data *data, *next;
3489
+ int i;
35213490
35223491 dout("%s %p\n", __func__, m);
35233492 WARN_ON(!list_empty(&m->list_head));
....@@ -3530,11 +3499,8 @@
35303499 m->middle = NULL;
35313500 }
35323501
3533
- list_for_each_entry_safe(data, next, &m->data, links) {
3534
- list_del_init(&data->links);
3535
- ceph_msg_data_destroy(data);
3536
- }
3537
- m->data_length = 0;
3502
+ for (i = 0; i < m->num_data_items; i++)
3503
+ ceph_msg_data_destroy(&m->data[i]);
35383504
35393505 if (m->pool)
35403506 ceph_msgpool_put(m->pool, m);