.. | .. |
---|
156 | 156 | /* Slab caches for frequently-allocated structures */ |
---|
157 | 157 | |
---|
158 | 158 | static struct kmem_cache *ceph_msg_cache; |
---|
159 | | -static struct kmem_cache *ceph_msg_data_cache; |
---|
160 | 159 | |
---|
161 | 160 | /* static tag bytes (protocol control messages) */ |
---|
162 | 161 | static char tag_msg = CEPH_MSGR_TAG_MSG; |
---|
.. | .. |
---|
187 | 186 | |
---|
188 | 187 | static struct page *zero_page; /* used in certain error cases */ |
---|
189 | 188 | |
---|
190 | | -const char *ceph_pr_addr(const struct sockaddr_storage *ss) |
---|
| 189 | +const char *ceph_pr_addr(const struct ceph_entity_addr *addr) |
---|
191 | 190 | { |
---|
192 | 191 | int i; |
---|
193 | 192 | char *s; |
---|
194 | | - struct sockaddr_in *in4 = (struct sockaddr_in *) ss; |
---|
195 | | - struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; |
---|
| 193 | + struct sockaddr_storage ss = addr->in_addr; /* align */ |
---|
| 194 | + struct sockaddr_in *in4 = (struct sockaddr_in *)&ss; |
---|
| 195 | + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)&ss; |
---|
196 | 196 | |
---|
197 | 197 | i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK; |
---|
198 | 198 | s = addr_str[i]; |
---|
199 | 199 | |
---|
200 | | - switch (ss->ss_family) { |
---|
| 200 | + switch (ss.ss_family) { |
---|
201 | 201 | case AF_INET: |
---|
202 | | - snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr, |
---|
| 202 | + snprintf(s, MAX_ADDR_STR_LEN, "(%d)%pI4:%hu", |
---|
| 203 | + le32_to_cpu(addr->type), &in4->sin_addr, |
---|
203 | 204 | ntohs(in4->sin_port)); |
---|
204 | 205 | break; |
---|
205 | 206 | |
---|
206 | 207 | case AF_INET6: |
---|
207 | | - snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr, |
---|
| 208 | + snprintf(s, MAX_ADDR_STR_LEN, "(%d)[%pI6c]:%hu", |
---|
| 209 | + le32_to_cpu(addr->type), &in6->sin6_addr, |
---|
208 | 210 | ntohs(in6->sin6_port)); |
---|
209 | 211 | break; |
---|
210 | 212 | |
---|
211 | 213 | default: |
---|
212 | 214 | snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)", |
---|
213 | | - ss->ss_family); |
---|
| 215 | + ss.ss_family); |
---|
214 | 216 | } |
---|
215 | 217 | |
---|
216 | 218 | return s; |
---|
.. | .. |
---|
220 | 222 | static void encode_my_addr(struct ceph_messenger *msgr) |
---|
221 | 223 | { |
---|
222 | 224 | memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr)); |
---|
223 | | - ceph_encode_addr(&msgr->my_enc_addr); |
---|
| 225 | + ceph_encode_banner_addr(&msgr->my_enc_addr); |
---|
224 | 226 | } |
---|
225 | 227 | |
---|
226 | 228 | /* |
---|
.. | .. |
---|
235 | 237 | if (!ceph_msg_cache) |
---|
236 | 238 | return -ENOMEM; |
---|
237 | 239 | |
---|
238 | | - BUG_ON(ceph_msg_data_cache); |
---|
239 | | - ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0); |
---|
240 | | - if (ceph_msg_data_cache) |
---|
241 | | - return 0; |
---|
242 | | - |
---|
243 | | - kmem_cache_destroy(ceph_msg_cache); |
---|
244 | | - ceph_msg_cache = NULL; |
---|
245 | | - |
---|
246 | | - return -ENOMEM; |
---|
| 240 | + return 0; |
---|
247 | 241 | } |
---|
248 | 242 | |
---|
249 | 243 | static void ceph_msgr_slab_exit(void) |
---|
250 | 244 | { |
---|
251 | | - BUG_ON(!ceph_msg_data_cache); |
---|
252 | | - kmem_cache_destroy(ceph_msg_data_cache); |
---|
253 | | - ceph_msg_data_cache = NULL; |
---|
254 | | - |
---|
255 | 245 | BUG_ON(!ceph_msg_cache); |
---|
256 | 246 | kmem_cache_destroy(ceph_msg_cache); |
---|
257 | 247 | ceph_msg_cache = NULL; |
---|
.. | .. |
---|
422 | 412 | switch (sk->sk_state) { |
---|
423 | 413 | case TCP_CLOSE: |
---|
424 | 414 | dout("%s TCP_CLOSE\n", __func__); |
---|
425 | | - /* fall through */ |
---|
| 415 | + fallthrough; |
---|
426 | 416 | case TCP_CLOSE_WAIT: |
---|
427 | 417 | dout("%s TCP_CLOSE_WAIT\n", __func__); |
---|
428 | 418 | con_sock_state_closing(con); |
---|
.. | .. |
---|
462 | 452 | */ |
---|
463 | 453 | static int ceph_tcp_connect(struct ceph_connection *con) |
---|
464 | 454 | { |
---|
465 | | - struct sockaddr_storage *paddr = &con->peer_addr.in_addr; |
---|
| 455 | + struct sockaddr_storage ss = con->peer_addr.in_addr; /* align */ |
---|
466 | 456 | struct socket *sock; |
---|
467 | 457 | unsigned int noio_flag; |
---|
468 | 458 | int ret; |
---|
.. | .. |
---|
471 | 461 | |
---|
472 | 462 | /* sock_create_kern() allocates with GFP_KERNEL */ |
---|
473 | 463 | noio_flag = memalloc_noio_save(); |
---|
474 | | - ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, |
---|
| 464 | + ret = sock_create_kern(read_pnet(&con->msgr->net), ss.ss_family, |
---|
475 | 465 | SOCK_STREAM, IPPROTO_TCP, &sock); |
---|
476 | 466 | memalloc_noio_restore(noio_flag); |
---|
477 | 467 | if (ret) |
---|
.. | .. |
---|
484 | 474 | |
---|
485 | 475 | set_sock_callbacks(sock, con); |
---|
486 | 476 | |
---|
487 | | - dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); |
---|
| 477 | + dout("connect %s\n", ceph_pr_addr(&con->peer_addr)); |
---|
488 | 478 | |
---|
489 | 479 | con_sock_state_connecting(con); |
---|
490 | | - ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), |
---|
| 480 | + ret = sock->ops->connect(sock, (struct sockaddr *)&ss, sizeof(ss), |
---|
491 | 481 | O_NONBLOCK); |
---|
492 | 482 | if (ret == -EINPROGRESS) { |
---|
493 | 483 | dout("connect %s EINPROGRESS sk_state = %u\n", |
---|
494 | | - ceph_pr_addr(&con->peer_addr.in_addr), |
---|
| 484 | + ceph_pr_addr(&con->peer_addr), |
---|
495 | 485 | sock->sk->sk_state); |
---|
496 | 486 | } else if (ret < 0) { |
---|
497 | 487 | pr_err("connect %s error %d\n", |
---|
498 | | - ceph_pr_addr(&con->peer_addr.in_addr), ret); |
---|
| 488 | + ceph_pr_addr(&con->peer_addr), ret); |
---|
499 | 489 | sock_release(sock); |
---|
500 | 490 | return ret; |
---|
501 | 491 | } |
---|
502 | 492 | |
---|
503 | | - if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) { |
---|
504 | | - int optval = 1; |
---|
505 | | - |
---|
506 | | - ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, |
---|
507 | | - (char *)&optval, sizeof(optval)); |
---|
508 | | - if (ret) |
---|
509 | | - pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d", |
---|
510 | | - ret); |
---|
511 | | - } |
---|
| 493 | + if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) |
---|
| 494 | + tcp_sock_set_nodelay(sock->sk); |
---|
512 | 495 | |
---|
513 | 496 | con->sock = sock; |
---|
514 | 497 | return 0; |
---|
.. | .. |
---|
526 | 509 | if (!buf) |
---|
527 | 510 | msg.msg_flags |= MSG_TRUNC; |
---|
528 | 511 | |
---|
529 | | - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len); |
---|
| 512 | + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len); |
---|
530 | 513 | r = sock_recvmsg(sock, &msg, msg.msg_flags); |
---|
531 | 514 | if (r == -EAGAIN) |
---|
532 | 515 | r = 0; |
---|
.. | .. |
---|
545 | 528 | int r; |
---|
546 | 529 | |
---|
547 | 530 | BUG_ON(page_offset + length > PAGE_SIZE); |
---|
548 | | - iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length); |
---|
| 531 | + iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length); |
---|
549 | 532 | r = sock_recvmsg(sock, &msg, msg.msg_flags); |
---|
550 | 533 | if (r == -EAGAIN) |
---|
551 | 534 | r = 0; |
---|
.. | .. |
---|
557 | 540 | * shortly. |
---|
558 | 541 | */ |
---|
559 | 542 | static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, |
---|
560 | | - size_t kvlen, size_t len, int more) |
---|
| 543 | + size_t kvlen, size_t len, bool more) |
---|
561 | 544 | { |
---|
562 | 545 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; |
---|
563 | 546 | int r; |
---|
.. | .. |
---|
573 | 556 | return r; |
---|
574 | 557 | } |
---|
575 | 558 | |
---|
576 | | -static int __ceph_tcp_sendpage(struct socket *sock, struct page *page, |
---|
577 | | - int offset, size_t size, bool more) |
---|
578 | | -{ |
---|
579 | | - int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); |
---|
580 | | - int ret; |
---|
581 | | - |
---|
582 | | - ret = kernel_sendpage(sock, page, offset, size, flags); |
---|
583 | | - if (ret == -EAGAIN) |
---|
584 | | - ret = 0; |
---|
585 | | - |
---|
586 | | - return ret; |
---|
587 | | -} |
---|
588 | | - |
---|
| 559 | +/* |
---|
| 560 | + * @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST |
---|
| 561 | + */ |
---|
589 | 562 | static int ceph_tcp_sendpage(struct socket *sock, struct page *page, |
---|
590 | | - int offset, size_t size, bool more) |
---|
| 563 | + int offset, size_t size, int more) |
---|
591 | 564 | { |
---|
592 | | - struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; |
---|
593 | | - struct bio_vec bvec; |
---|
| 565 | + ssize_t (*sendpage)(struct socket *sock, struct page *page, |
---|
| 566 | + int offset, size_t size, int flags); |
---|
| 567 | + int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more; |
---|
594 | 568 | int ret; |
---|
595 | 569 | |
---|
596 | 570 | /* |
---|
.. | .. |
---|
601 | 575 | * coalescing neighboring slab objects into a single frag which |
---|
602 | 576 | * triggers one of hardened usercopy checks. |
---|
603 | 577 | */ |
---|
604 | | - if (page_count(page) >= 1 && !PageSlab(page)) |
---|
605 | | - return __ceph_tcp_sendpage(sock, page, offset, size, more); |
---|
606 | | - |
---|
607 | | - bvec.bv_page = page; |
---|
608 | | - bvec.bv_offset = offset; |
---|
609 | | - bvec.bv_len = size; |
---|
610 | | - |
---|
611 | | - if (more) |
---|
612 | | - msg.msg_flags |= MSG_MORE; |
---|
| 578 | + if (sendpage_ok(page)) |
---|
| 579 | + sendpage = sock->ops->sendpage; |
---|
613 | 580 | else |
---|
614 | | - msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ |
---|
| 581 | + sendpage = sock_no_sendpage; |
---|
615 | 582 | |
---|
616 | | - iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size); |
---|
617 | | - ret = sock_sendmsg(sock, &msg); |
---|
| 583 | + ret = sendpage(sock, page, offset, size, flags); |
---|
618 | 584 | if (ret == -EAGAIN) |
---|
619 | 585 | ret = 0; |
---|
620 | 586 | |
---|
.. | .. |
---|
699 | 665 | void ceph_con_close(struct ceph_connection *con) |
---|
700 | 666 | { |
---|
701 | 667 | mutex_lock(&con->mutex); |
---|
702 | | - dout("con_close %p peer %s\n", con, |
---|
703 | | - ceph_pr_addr(&con->peer_addr.in_addr)); |
---|
| 668 | + dout("con_close %p peer %s\n", con, ceph_pr_addr(&con->peer_addr)); |
---|
704 | 669 | con->state = CON_STATE_CLOSED; |
---|
705 | 670 | |
---|
706 | 671 | con_flag_clear(con, CON_FLAG_LOSSYTX); /* so we retry next connect */ |
---|
.. | .. |
---|
724 | 689 | struct ceph_entity_addr *addr) |
---|
725 | 690 | { |
---|
726 | 691 | mutex_lock(&con->mutex); |
---|
727 | | - dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); |
---|
| 692 | + dout("con_open %p %s\n", con, ceph_pr_addr(addr)); |
---|
728 | 693 | |
---|
729 | 694 | WARN_ON(con->state != CON_STATE_CLOSED); |
---|
730 | 695 | con->state = CON_STATE_PREOPEN; |
---|
.. | .. |
---|
870 | 835 | size_t bytes) |
---|
871 | 836 | { |
---|
872 | 837 | struct ceph_bio_iter *it = &cursor->bio_iter; |
---|
| 838 | + struct page *page = bio_iter_page(it->bio, it->iter); |
---|
873 | 839 | |
---|
874 | 840 | BUG_ON(bytes > cursor->resid); |
---|
875 | 841 | BUG_ON(bytes > bio_iter_len(it->bio, it->iter)); |
---|
.. | .. |
---|
881 | 847 | return false; /* no more data */ |
---|
882 | 848 | } |
---|
883 | 849 | |
---|
884 | | - if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done)) |
---|
| 850 | + if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done && |
---|
| 851 | + page == bio_iter_page(it->bio, it->iter))) |
---|
885 | 852 | return false; /* more bytes to process in this segment */ |
---|
886 | 853 | |
---|
887 | 854 | if (!it->iter.bi_size) { |
---|
.. | .. |
---|
929 | 896 | size_t bytes) |
---|
930 | 897 | { |
---|
931 | 898 | struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs; |
---|
| 899 | + struct page *page = bvec_iter_page(bvecs, cursor->bvec_iter); |
---|
932 | 900 | |
---|
933 | 901 | BUG_ON(bytes > cursor->resid); |
---|
934 | 902 | BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter)); |
---|
.. | .. |
---|
940 | 908 | return false; /* no more data */ |
---|
941 | 909 | } |
---|
942 | 910 | |
---|
943 | | - if (!bytes || cursor->bvec_iter.bi_bvec_done) |
---|
| 911 | + if (!bytes || (cursor->bvec_iter.bi_bvec_done && |
---|
| 912 | + page == bvec_iter_page(bvecs, cursor->bvec_iter))) |
---|
944 | 913 | return false; /* more bytes to process in this segment */ |
---|
945 | 914 | |
---|
946 | 915 | BUG_ON(cursor->last_piece); |
---|
.. | .. |
---|
1147 | 1116 | static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length) |
---|
1148 | 1117 | { |
---|
1149 | 1118 | struct ceph_msg_data_cursor *cursor = &msg->cursor; |
---|
1150 | | - struct ceph_msg_data *data; |
---|
1151 | 1119 | |
---|
1152 | 1120 | BUG_ON(!length); |
---|
1153 | 1121 | BUG_ON(length > msg->data_length); |
---|
1154 | | - BUG_ON(list_empty(&msg->data)); |
---|
| 1122 | + BUG_ON(!msg->num_data_items); |
---|
1155 | 1123 | |
---|
1156 | | - cursor->data_head = &msg->data; |
---|
1157 | 1124 | cursor->total_resid = length; |
---|
1158 | | - data = list_first_entry(&msg->data, struct ceph_msg_data, links); |
---|
1159 | | - cursor->data = data; |
---|
| 1125 | + cursor->data = msg->data; |
---|
1160 | 1126 | |
---|
1161 | 1127 | __ceph_msg_data_cursor_init(cursor); |
---|
1162 | 1128 | } |
---|
.. | .. |
---|
1237 | 1203 | |
---|
1238 | 1204 | if (!cursor->resid && cursor->total_resid) { |
---|
1239 | 1205 | WARN_ON(!cursor->last_piece); |
---|
1240 | | - BUG_ON(list_is_last(&cursor->data->links, cursor->data_head)); |
---|
1241 | | - cursor->data = list_next_entry(cursor->data, links); |
---|
| 1206 | + cursor->data++; |
---|
1242 | 1207 | __ceph_msg_data_cursor_init(cursor); |
---|
1243 | 1208 | new_piece = true; |
---|
1244 | 1209 | } |
---|
.. | .. |
---|
1254 | 1219 | |
---|
1255 | 1220 | static void prepare_message_data(struct ceph_msg *msg, u32 data_len) |
---|
1256 | 1221 | { |
---|
1257 | | - BUG_ON(!msg); |
---|
1258 | | - BUG_ON(!data_len); |
---|
1259 | | - |
---|
1260 | 1222 | /* Initialize data cursor */ |
---|
1261 | 1223 | |
---|
1262 | 1224 | ceph_msg_data_cursor_init(msg, (size_t)data_len); |
---|
.. | .. |
---|
1592 | 1554 | struct ceph_msg *msg = con->out_msg; |
---|
1593 | 1555 | struct ceph_msg_data_cursor *cursor = &msg->cursor; |
---|
1594 | 1556 | bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); |
---|
| 1557 | + int more = MSG_MORE | MSG_SENDPAGE_NOTLAST; |
---|
1595 | 1558 | u32 crc; |
---|
1596 | 1559 | |
---|
1597 | 1560 | dout("%s %p msg %p\n", __func__, con, msg); |
---|
1598 | 1561 | |
---|
1599 | | - if (list_empty(&msg->data)) |
---|
| 1562 | + if (!msg->num_data_items) |
---|
1600 | 1563 | return -EINVAL; |
---|
1601 | 1564 | |
---|
1602 | 1565 | /* |
---|
.. | .. |
---|
1612 | 1575 | struct page *page; |
---|
1613 | 1576 | size_t page_offset; |
---|
1614 | 1577 | size_t length; |
---|
1615 | | - bool last_piece; |
---|
1616 | 1578 | int ret; |
---|
1617 | 1579 | |
---|
1618 | 1580 | if (!cursor->resid) { |
---|
.. | .. |
---|
1620 | 1582 | continue; |
---|
1621 | 1583 | } |
---|
1622 | 1584 | |
---|
1623 | | - page = ceph_msg_data_next(cursor, &page_offset, &length, |
---|
1624 | | - &last_piece); |
---|
1625 | | - ret = ceph_tcp_sendpage(con->sock, page, page_offset, |
---|
1626 | | - length, !last_piece); |
---|
| 1585 | + page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); |
---|
| 1586 | + if (length == cursor->total_resid) |
---|
| 1587 | + more = MSG_MORE; |
---|
| 1588 | + ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, |
---|
| 1589 | + more); |
---|
1627 | 1590 | if (ret <= 0) { |
---|
1628 | 1591 | if (do_datacrc) |
---|
1629 | 1592 | msg->footer.data_crc = cpu_to_le32(crc); |
---|
.. | .. |
---|
1653 | 1616 | */ |
---|
1654 | 1617 | static int write_partial_skip(struct ceph_connection *con) |
---|
1655 | 1618 | { |
---|
| 1619 | + int more = MSG_MORE | MSG_SENDPAGE_NOTLAST; |
---|
1656 | 1620 | int ret; |
---|
1657 | 1621 | |
---|
1658 | 1622 | dout("%s %p %d left\n", __func__, con, con->out_skip); |
---|
1659 | 1623 | while (con->out_skip > 0) { |
---|
1660 | 1624 | size_t size = min(con->out_skip, (int) PAGE_SIZE); |
---|
1661 | 1625 | |
---|
1662 | | - ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true); |
---|
| 1626 | + if (size == con->out_skip) |
---|
| 1627 | + more = MSG_MORE; |
---|
| 1628 | + ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more); |
---|
1663 | 1629 | if (ret <= 0) |
---|
1664 | 1630 | goto out; |
---|
1665 | 1631 | con->out_skip -= ret; |
---|
.. | .. |
---|
1761 | 1727 | ret = read_partial(con, end, size, &con->actual_peer_addr); |
---|
1762 | 1728 | if (ret <= 0) |
---|
1763 | 1729 | goto out; |
---|
| 1730 | + ceph_decode_banner_addr(&con->actual_peer_addr); |
---|
1764 | 1731 | |
---|
1765 | 1732 | size = sizeof (con->peer_addr_for_me); |
---|
1766 | 1733 | end += size; |
---|
1767 | 1734 | ret = read_partial(con, end, size, &con->peer_addr_for_me); |
---|
1768 | 1735 | if (ret <= 0) |
---|
1769 | 1736 | goto out; |
---|
| 1737 | + ceph_decode_banner_addr(&con->peer_addr_for_me); |
---|
1770 | 1738 | |
---|
1771 | 1739 | out: |
---|
1772 | 1740 | return ret; |
---|
.. | .. |
---|
1817 | 1785 | { |
---|
1818 | 1786 | if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { |
---|
1819 | 1787 | pr_err("connect to %s got bad banner\n", |
---|
1820 | | - ceph_pr_addr(&con->peer_addr.in_addr)); |
---|
| 1788 | + ceph_pr_addr(&con->peer_addr)); |
---|
1821 | 1789 | con->error_msg = "protocol error, bad banner"; |
---|
1822 | 1790 | return -1; |
---|
1823 | 1791 | } |
---|
1824 | 1792 | return 0; |
---|
1825 | 1793 | } |
---|
1826 | 1794 | |
---|
1827 | | -static bool addr_is_blank(struct sockaddr_storage *ss) |
---|
| 1795 | +static bool addr_is_blank(struct ceph_entity_addr *addr) |
---|
1828 | 1796 | { |
---|
1829 | | - struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr; |
---|
1830 | | - struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr; |
---|
| 1797 | + struct sockaddr_storage ss = addr->in_addr; /* align */ |
---|
| 1798 | + struct in_addr *addr4 = &((struct sockaddr_in *)&ss)->sin_addr; |
---|
| 1799 | + struct in6_addr *addr6 = &((struct sockaddr_in6 *)&ss)->sin6_addr; |
---|
1831 | 1800 | |
---|
1832 | | - switch (ss->ss_family) { |
---|
| 1801 | + switch (ss.ss_family) { |
---|
1833 | 1802 | case AF_INET: |
---|
1834 | | - return addr->s_addr == htonl(INADDR_ANY); |
---|
| 1803 | + return addr4->s_addr == htonl(INADDR_ANY); |
---|
1835 | 1804 | case AF_INET6: |
---|
1836 | 1805 | return ipv6_addr_any(addr6); |
---|
1837 | 1806 | default: |
---|
.. | .. |
---|
1839 | 1808 | } |
---|
1840 | 1809 | } |
---|
1841 | 1810 | |
---|
1842 | | -static int addr_port(struct sockaddr_storage *ss) |
---|
| 1811 | +static int addr_port(struct ceph_entity_addr *addr) |
---|
1843 | 1812 | { |
---|
1844 | | - switch (ss->ss_family) { |
---|
| 1813 | + switch (get_unaligned(&addr->in_addr.ss_family)) { |
---|
1845 | 1814 | case AF_INET: |
---|
1846 | | - return ntohs(((struct sockaddr_in *)ss)->sin_port); |
---|
| 1815 | + return ntohs(get_unaligned(&((struct sockaddr_in *)&addr->in_addr)->sin_port)); |
---|
1847 | 1816 | case AF_INET6: |
---|
1848 | | - return ntohs(((struct sockaddr_in6 *)ss)->sin6_port); |
---|
| 1817 | + return ntohs(get_unaligned(&((struct sockaddr_in6 *)&addr->in_addr)->sin6_port)); |
---|
1849 | 1818 | } |
---|
1850 | 1819 | return 0; |
---|
1851 | 1820 | } |
---|
1852 | 1821 | |
---|
1853 | | -static void addr_set_port(struct sockaddr_storage *ss, int p) |
---|
| 1822 | +static void addr_set_port(struct ceph_entity_addr *addr, int p) |
---|
1854 | 1823 | { |
---|
1855 | | - switch (ss->ss_family) { |
---|
| 1824 | + switch (get_unaligned(&addr->in_addr.ss_family)) { |
---|
1856 | 1825 | case AF_INET: |
---|
1857 | | - ((struct sockaddr_in *)ss)->sin_port = htons(p); |
---|
| 1826 | + put_unaligned(htons(p), &((struct sockaddr_in *)&addr->in_addr)->sin_port); |
---|
1858 | 1827 | break; |
---|
1859 | 1828 | case AF_INET6: |
---|
1860 | | - ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); |
---|
| 1829 | + put_unaligned(htons(p), &((struct sockaddr_in6 *)&addr->in_addr)->sin6_port); |
---|
1861 | 1830 | break; |
---|
1862 | 1831 | } |
---|
1863 | 1832 | } |
---|
.. | .. |
---|
1865 | 1834 | /* |
---|
1866 | 1835 | * Unlike other *_pton function semantics, zero indicates success. |
---|
1867 | 1836 | */ |
---|
1868 | | -static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, |
---|
| 1837 | +static int ceph_pton(const char *str, size_t len, struct ceph_entity_addr *addr, |
---|
1869 | 1838 | char delim, const char **ipend) |
---|
1870 | 1839 | { |
---|
1871 | | - struct sockaddr_in *in4 = (struct sockaddr_in *) ss; |
---|
1872 | | - struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; |
---|
| 1840 | + memset(&addr->in_addr, 0, sizeof(addr->in_addr)); |
---|
1873 | 1841 | |
---|
1874 | | - memset(ss, 0, sizeof(*ss)); |
---|
1875 | | - |
---|
1876 | | - if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) { |
---|
1877 | | - ss->ss_family = AF_INET; |
---|
| 1842 | + if (in4_pton(str, len, (u8 *)&((struct sockaddr_in *)&addr->in_addr)->sin_addr.s_addr, delim, ipend)) { |
---|
| 1843 | + put_unaligned(AF_INET, &addr->in_addr.ss_family); |
---|
1878 | 1844 | return 0; |
---|
1879 | 1845 | } |
---|
1880 | 1846 | |
---|
1881 | | - if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) { |
---|
1882 | | - ss->ss_family = AF_INET6; |
---|
| 1847 | + if (in6_pton(str, len, (u8 *)&((struct sockaddr_in6 *)&addr->in_addr)->sin6_addr.s6_addr, delim, ipend)) { |
---|
| 1848 | + put_unaligned(AF_INET6, &addr->in_addr.ss_family); |
---|
1883 | 1849 | return 0; |
---|
1884 | 1850 | } |
---|
1885 | 1851 | |
---|
.. | .. |
---|
1891 | 1857 | */ |
---|
1892 | 1858 | #ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER |
---|
1893 | 1859 | static int ceph_dns_resolve_name(const char *name, size_t namelen, |
---|
1894 | | - struct sockaddr_storage *ss, char delim, const char **ipend) |
---|
| 1860 | + struct ceph_entity_addr *addr, char delim, const char **ipend) |
---|
1895 | 1861 | { |
---|
1896 | 1862 | const char *end, *delim_p; |
---|
1897 | 1863 | char *colon_p, *ip_addr = NULL; |
---|
.. | .. |
---|
1918 | 1884 | return -EINVAL; |
---|
1919 | 1885 | |
---|
1920 | 1886 | /* do dns_resolve upcall */ |
---|
1921 | | - ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL); |
---|
| 1887 | + ip_len = dns_query(current->nsproxy->net_ns, |
---|
| 1888 | + NULL, name, end - name, NULL, &ip_addr, NULL, false); |
---|
1922 | 1889 | if (ip_len > 0) |
---|
1923 | | - ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL); |
---|
| 1890 | + ret = ceph_pton(ip_addr, ip_len, addr, -1, NULL); |
---|
1924 | 1891 | else |
---|
1925 | 1892 | ret = -ESRCH; |
---|
1926 | 1893 | |
---|
.. | .. |
---|
1929 | 1896 | *ipend = end; |
---|
1930 | 1897 | |
---|
1931 | 1898 | pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name, |
---|
1932 | | - ret, ret ? "failed" : ceph_pr_addr(ss)); |
---|
| 1899 | + ret, ret ? "failed" : ceph_pr_addr(addr)); |
---|
1933 | 1900 | |
---|
1934 | 1901 | return ret; |
---|
1935 | 1902 | } |
---|
1936 | 1903 | #else |
---|
1937 | 1904 | static inline int ceph_dns_resolve_name(const char *name, size_t namelen, |
---|
1938 | | - struct sockaddr_storage *ss, char delim, const char **ipend) |
---|
| 1905 | + struct ceph_entity_addr *addr, char delim, const char **ipend) |
---|
1939 | 1906 | { |
---|
1940 | 1907 | return -EINVAL; |
---|
1941 | 1908 | } |
---|
.. | .. |
---|
1946 | 1913 | * then try to extract a hostname to resolve using userspace DNS upcall. |
---|
1947 | 1914 | */ |
---|
1948 | 1915 | static int ceph_parse_server_name(const char *name, size_t namelen, |
---|
1949 | | - struct sockaddr_storage *ss, char delim, const char **ipend) |
---|
| 1916 | + struct ceph_entity_addr *addr, char delim, const char **ipend) |
---|
1950 | 1917 | { |
---|
1951 | 1918 | int ret; |
---|
1952 | 1919 | |
---|
1953 | | - ret = ceph_pton(name, namelen, ss, delim, ipend); |
---|
| 1920 | + ret = ceph_pton(name, namelen, addr, delim, ipend); |
---|
1954 | 1921 | if (ret) |
---|
1955 | | - ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend); |
---|
| 1922 | + ret = ceph_dns_resolve_name(name, namelen, addr, delim, ipend); |
---|
1956 | 1923 | |
---|
1957 | 1924 | return ret; |
---|
1958 | 1925 | } |
---|
.. | .. |
---|
1971 | 1938 | dout("parse_ips on '%.*s'\n", (int)(end-c), c); |
---|
1972 | 1939 | for (i = 0; i < max_count; i++) { |
---|
1973 | 1940 | const char *ipend; |
---|
1974 | | - struct sockaddr_storage *ss = &addr[i].in_addr; |
---|
1975 | 1941 | int port; |
---|
1976 | 1942 | char delim = ','; |
---|
1977 | 1943 | |
---|
.. | .. |
---|
1980 | 1946 | p++; |
---|
1981 | 1947 | } |
---|
1982 | 1948 | |
---|
1983 | | - ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend); |
---|
| 1949 | + ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend); |
---|
1984 | 1950 | if (ret) |
---|
1985 | 1951 | goto bad; |
---|
1986 | 1952 | ret = -EINVAL; |
---|
.. | .. |
---|
2011 | 1977 | port = CEPH_MON_PORT; |
---|
2012 | 1978 | } |
---|
2013 | 1979 | |
---|
2014 | | - addr_set_port(ss, port); |
---|
| 1980 | + addr_set_port(&addr[i], port); |
---|
| 1981 | + addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY; |
---|
2015 | 1982 | |
---|
2016 | | - dout("parse_ips got %s\n", ceph_pr_addr(ss)); |
---|
| 1983 | + dout("parse_ips got %s\n", ceph_pr_addr(&addr[i])); |
---|
2017 | 1984 | |
---|
2018 | 1985 | if (p == end) |
---|
2019 | 1986 | break; |
---|
.. | .. |
---|
2030 | 1997 | return 0; |
---|
2031 | 1998 | |
---|
2032 | 1999 | bad: |
---|
2033 | | - pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); |
---|
2034 | 2000 | return ret; |
---|
2035 | 2001 | } |
---|
2036 | | -EXPORT_SYMBOL(ceph_parse_ips); |
---|
2037 | 2002 | |
---|
2038 | 2003 | static int process_banner(struct ceph_connection *con) |
---|
2039 | 2004 | { |
---|
.. | .. |
---|
2042 | 2007 | if (verify_hello(con) < 0) |
---|
2043 | 2008 | return -1; |
---|
2044 | 2009 | |
---|
2045 | | - ceph_decode_addr(&con->actual_peer_addr); |
---|
2046 | | - ceph_decode_addr(&con->peer_addr_for_me); |
---|
2047 | | - |
---|
2048 | 2010 | /* |
---|
2049 | 2011 | * Make sure the other end is who we wanted. note that the other |
---|
2050 | 2012 | * end may not yet know their ip address, so if it's 0.0.0.0, give |
---|
.. | .. |
---|
2052 | 2014 | */ |
---|
2053 | 2015 | if (memcmp(&con->peer_addr, &con->actual_peer_addr, |
---|
2054 | 2016 | sizeof(con->peer_addr)) != 0 && |
---|
2055 | | - !(addr_is_blank(&con->actual_peer_addr.in_addr) && |
---|
| 2017 | + !(addr_is_blank(&con->actual_peer_addr) && |
---|
2056 | 2018 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { |
---|
2057 | | - pr_warn("wrong peer, want %s/%d, got %s/%d\n", |
---|
2058 | | - ceph_pr_addr(&con->peer_addr.in_addr), |
---|
2059 | | - (int)le32_to_cpu(con->peer_addr.nonce), |
---|
2060 | | - ceph_pr_addr(&con->actual_peer_addr.in_addr), |
---|
2061 | | - (int)le32_to_cpu(con->actual_peer_addr.nonce)); |
---|
| 2019 | + pr_warn("wrong peer, want %s/%u, got %s/%u\n", |
---|
| 2020 | + ceph_pr_addr(&con->peer_addr), |
---|
| 2021 | + le32_to_cpu(con->peer_addr.nonce), |
---|
| 2022 | + ceph_pr_addr(&con->actual_peer_addr), |
---|
| 2023 | + le32_to_cpu(con->actual_peer_addr.nonce)); |
---|
2062 | 2024 | con->error_msg = "wrong peer at address"; |
---|
2063 | 2025 | return -1; |
---|
2064 | 2026 | } |
---|
.. | .. |
---|
2066 | 2028 | /* |
---|
2067 | 2029 | * did we learn our address? |
---|
2068 | 2030 | */ |
---|
2069 | | - if (addr_is_blank(&con->msgr->inst.addr.in_addr)) { |
---|
2070 | | - int port = addr_port(&con->msgr->inst.addr.in_addr); |
---|
| 2031 | + if (addr_is_blank(&con->msgr->inst.addr)) { |
---|
| 2032 | + int port = addr_port(&con->msgr->inst.addr); |
---|
2071 | 2033 | |
---|
2072 | 2034 | memcpy(&con->msgr->inst.addr.in_addr, |
---|
2073 | 2035 | &con->peer_addr_for_me.in_addr, |
---|
2074 | 2036 | sizeof(con->peer_addr_for_me.in_addr)); |
---|
2075 | | - addr_set_port(&con->msgr->inst.addr.in_addr, port); |
---|
| 2037 | + addr_set_port(&con->msgr->inst.addr, port); |
---|
2076 | 2038 | encode_my_addr(con->msgr); |
---|
2077 | 2039 | dout("process_banner learned my addr is %s\n", |
---|
2078 | | - ceph_pr_addr(&con->msgr->inst.addr.in_addr)); |
---|
| 2040 | + ceph_pr_addr(&con->msgr->inst.addr)); |
---|
2079 | 2041 | } |
---|
2080 | 2042 | |
---|
2081 | 2043 | return 0; |
---|
.. | .. |
---|
2126 | 2088 | pr_err("%s%lld %s feature set mismatch," |
---|
2127 | 2089 | " my %llx < server's %llx, missing %llx\n", |
---|
2128 | 2090 | ENTITY_NAME(con->peer_name), |
---|
2129 | | - ceph_pr_addr(&con->peer_addr.in_addr), |
---|
| 2091 | + ceph_pr_addr(&con->peer_addr), |
---|
2130 | 2092 | sup_feat, server_feat, server_feat & ~sup_feat); |
---|
2131 | 2093 | con->error_msg = "missing required protocol features"; |
---|
2132 | 2094 | reset_connection(con); |
---|
.. | .. |
---|
2136 | 2098 | pr_err("%s%lld %s protocol version mismatch," |
---|
2137 | 2099 | " my %d != server's %d\n", |
---|
2138 | 2100 | ENTITY_NAME(con->peer_name), |
---|
2139 | | - ceph_pr_addr(&con->peer_addr.in_addr), |
---|
| 2101 | + ceph_pr_addr(&con->peer_addr), |
---|
2140 | 2102 | le32_to_cpu(con->out_connect.protocol_version), |
---|
2141 | 2103 | le32_to_cpu(con->in_reply.protocol_version)); |
---|
2142 | 2104 | con->error_msg = "protocol version mismatch"; |
---|
.. | .. |
---|
2170 | 2132 | le32_to_cpu(con->in_reply.connect_seq)); |
---|
2171 | 2133 | pr_err("%s%lld %s connection reset\n", |
---|
2172 | 2134 | ENTITY_NAME(con->peer_name), |
---|
2173 | | - ceph_pr_addr(&con->peer_addr.in_addr)); |
---|
| 2135 | + ceph_pr_addr(&con->peer_addr)); |
---|
2174 | 2136 | reset_connection(con); |
---|
2175 | 2137 | con_out_kvec_reset(con); |
---|
2176 | 2138 | ret = prepare_write_connect(con); |
---|
.. | .. |
---|
2227 | 2189 | pr_err("%s%lld %s protocol feature mismatch," |
---|
2228 | 2190 | " my required %llx > server's %llx, need %llx\n", |
---|
2229 | 2191 | ENTITY_NAME(con->peer_name), |
---|
2230 | | - ceph_pr_addr(&con->peer_addr.in_addr), |
---|
| 2192 | + ceph_pr_addr(&con->peer_addr), |
---|
2231 | 2193 | req_feat, server_feat, req_feat & ~server_feat); |
---|
2232 | 2194 | con->error_msg = "missing required protocol features"; |
---|
2233 | 2195 | reset_connection(con); |
---|
.. | .. |
---|
2356 | 2318 | u32 crc = 0; |
---|
2357 | 2319 | int ret; |
---|
2358 | 2320 | |
---|
2359 | | - BUG_ON(!msg); |
---|
2360 | | - if (list_empty(&msg->data)) |
---|
| 2321 | + if (!msg->num_data_items) |
---|
2361 | 2322 | return -EIO; |
---|
2362 | 2323 | |
---|
2363 | 2324 | if (do_datacrc) |
---|
.. | .. |
---|
2435 | 2396 | if ((s64)seq - (s64)con->in_seq < 1) { |
---|
2436 | 2397 | pr_info("skipping %s%lld %s seq %lld expected %lld\n", |
---|
2437 | 2398 | ENTITY_NAME(con->peer_name), |
---|
2438 | | - ceph_pr_addr(&con->peer_addr.in_addr), |
---|
| 2399 | + ceph_pr_addr(&con->peer_addr), |
---|
2439 | 2400 | seq, con->in_seq + 1); |
---|
2440 | 2401 | con->in_base_pos = -front_len - middle_len - data_len - |
---|
2441 | 2402 | sizeof_footer(con); |
---|
.. | .. |
---|
2790 | 2751 | switch (ret) { |
---|
2791 | 2752 | case -EBADMSG: |
---|
2792 | 2753 | con->error_msg = "bad crc/signature"; |
---|
2793 | | - /* fall through */ |
---|
| 2754 | + fallthrough; |
---|
2794 | 2755 | case -EBADE: |
---|
2795 | 2756 | ret = -EIO; |
---|
2796 | 2757 | break; |
---|
.. | .. |
---|
2850 | 2811 | return -ENOENT; |
---|
2851 | 2812 | } |
---|
2852 | 2813 | |
---|
| 2814 | + dout("%s %p %lu\n", __func__, con, delay); |
---|
2853 | 2815 | if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { |
---|
2854 | 2816 | dout("%s %p - already queued\n", __func__, con); |
---|
2855 | 2817 | con->ops->put(con); |
---|
2856 | 2818 | return -EBUSY; |
---|
2857 | 2819 | } |
---|
2858 | 2820 | |
---|
2859 | | - dout("%s %p %lu\n", __func__, con, delay); |
---|
2860 | 2821 | return 0; |
---|
2861 | 2822 | } |
---|
2862 | 2823 | |
---|
.. | .. |
---|
3014 | 2975 | static void con_fault(struct ceph_connection *con) |
---|
3015 | 2976 | { |
---|
3016 | 2977 | dout("fault %p state %lu to peer %s\n", |
---|
3017 | | - con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); |
---|
| 2978 | + con, con->state, ceph_pr_addr(&con->peer_addr)); |
---|
3018 | 2979 | |
---|
3019 | 2980 | pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), |
---|
3020 | | - ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); |
---|
| 2981 | + ceph_pr_addr(&con->peer_addr), con->error_msg); |
---|
3021 | 2982 | con->error_msg = NULL; |
---|
3022 | 2983 | |
---|
3023 | 2984 | WARN_ON(con->state != CON_STATE_CONNECTING && |
---|
.. | .. |
---|
3066 | 3027 | } |
---|
3067 | 3028 | |
---|
3068 | 3029 | |
---|
| 3030 | +void ceph_messenger_reset_nonce(struct ceph_messenger *msgr) |
---|
| 3031 | +{ |
---|
| 3032 | + u32 nonce = le32_to_cpu(msgr->inst.addr.nonce) + 1000000; |
---|
| 3033 | + msgr->inst.addr.nonce = cpu_to_le32(nonce); |
---|
| 3034 | + encode_my_addr(msgr); |
---|
| 3035 | +} |
---|
3069 | 3036 | |
---|
3070 | 3037 | /* |
---|
3071 | 3038 | * initialize a new messenger instance |
---|
.. | .. |
---|
3271 | 3238 | return false; |
---|
3272 | 3239 | } |
---|
3273 | 3240 | |
---|
3274 | | -static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) |
---|
| 3241 | +static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg) |
---|
3275 | 3242 | { |
---|
3276 | | - struct ceph_msg_data *data; |
---|
3277 | | - |
---|
3278 | | - if (WARN_ON(!ceph_msg_data_type_valid(type))) |
---|
3279 | | - return NULL; |
---|
3280 | | - |
---|
3281 | | - data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS); |
---|
3282 | | - if (!data) |
---|
3283 | | - return NULL; |
---|
3284 | | - |
---|
3285 | | - data->type = type; |
---|
3286 | | - INIT_LIST_HEAD(&data->links); |
---|
3287 | | - |
---|
3288 | | - return data; |
---|
| 3243 | + BUG_ON(msg->num_data_items >= msg->max_data_items); |
---|
| 3244 | + return &msg->data[msg->num_data_items++]; |
---|
3289 | 3245 | } |
---|
3290 | 3246 | |
---|
3291 | 3247 | static void ceph_msg_data_destroy(struct ceph_msg_data *data) |
---|
3292 | 3248 | { |
---|
3293 | | - if (!data) |
---|
3294 | | - return; |
---|
3295 | | - |
---|
3296 | | - WARN_ON(!list_empty(&data->links)); |
---|
3297 | | - if (data->type == CEPH_MSG_DATA_PAGELIST) |
---|
| 3249 | + if (data->type == CEPH_MSG_DATA_PAGES && data->own_pages) { |
---|
| 3250 | + int num_pages = calc_pages_for(data->alignment, data->length); |
---|
| 3251 | + ceph_release_page_vector(data->pages, num_pages); |
---|
| 3252 | + } else if (data->type == CEPH_MSG_DATA_PAGELIST) { |
---|
3298 | 3253 | ceph_pagelist_release(data->pagelist); |
---|
3299 | | - kmem_cache_free(ceph_msg_data_cache, data); |
---|
| 3254 | + } |
---|
3300 | 3255 | } |
---|
3301 | 3256 | |
---|
3302 | 3257 | void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, |
---|
3303 | | - size_t length, size_t alignment) |
---|
| 3258 | + size_t length, size_t alignment, bool own_pages) |
---|
3304 | 3259 | { |
---|
3305 | 3260 | struct ceph_msg_data *data; |
---|
3306 | 3261 | |
---|
3307 | 3262 | BUG_ON(!pages); |
---|
3308 | 3263 | BUG_ON(!length); |
---|
3309 | 3264 | |
---|
3310 | | - data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES); |
---|
3311 | | - BUG_ON(!data); |
---|
| 3265 | + data = ceph_msg_data_add(msg); |
---|
| 3266 | + data->type = CEPH_MSG_DATA_PAGES; |
---|
3312 | 3267 | data->pages = pages; |
---|
3313 | 3268 | data->length = length; |
---|
3314 | 3269 | data->alignment = alignment & ~PAGE_MASK; |
---|
| 3270 | + data->own_pages = own_pages; |
---|
3315 | 3271 | |
---|
3316 | | - list_add_tail(&data->links, &msg->data); |
---|
3317 | 3272 | msg->data_length += length; |
---|
3318 | 3273 | } |
---|
3319 | 3274 | EXPORT_SYMBOL(ceph_msg_data_add_pages); |
---|
.. | .. |
---|
3326 | 3281 | BUG_ON(!pagelist); |
---|
3327 | 3282 | BUG_ON(!pagelist->length); |
---|
3328 | 3283 | |
---|
3329 | | - data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST); |
---|
3330 | | - BUG_ON(!data); |
---|
| 3284 | + data = ceph_msg_data_add(msg); |
---|
| 3285 | + data->type = CEPH_MSG_DATA_PAGELIST; |
---|
| 3286 | + refcount_inc(&pagelist->refcnt); |
---|
3331 | 3287 | data->pagelist = pagelist; |
---|
3332 | 3288 | |
---|
3333 | | - list_add_tail(&data->links, &msg->data); |
---|
3334 | 3289 | msg->data_length += pagelist->length; |
---|
3335 | 3290 | } |
---|
3336 | 3291 | EXPORT_SYMBOL(ceph_msg_data_add_pagelist); |
---|
.. | .. |
---|
3341 | 3296 | { |
---|
3342 | 3297 | struct ceph_msg_data *data; |
---|
3343 | 3298 | |
---|
3344 | | - data = ceph_msg_data_create(CEPH_MSG_DATA_BIO); |
---|
3345 | | - BUG_ON(!data); |
---|
| 3299 | + data = ceph_msg_data_add(msg); |
---|
| 3300 | + data->type = CEPH_MSG_DATA_BIO; |
---|
3346 | 3301 | data->bio_pos = *bio_pos; |
---|
3347 | 3302 | data->bio_length = length; |
---|
3348 | 3303 | |
---|
3349 | | - list_add_tail(&data->links, &msg->data); |
---|
3350 | 3304 | msg->data_length += length; |
---|
3351 | 3305 | } |
---|
3352 | 3306 | EXPORT_SYMBOL(ceph_msg_data_add_bio); |
---|
.. | .. |
---|
3357 | 3311 | { |
---|
3358 | 3312 | struct ceph_msg_data *data; |
---|
3359 | 3313 | |
---|
3360 | | - data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS); |
---|
3361 | | - BUG_ON(!data); |
---|
| 3314 | + data = ceph_msg_data_add(msg); |
---|
| 3315 | + data->type = CEPH_MSG_DATA_BVECS; |
---|
3362 | 3316 | data->bvec_pos = *bvec_pos; |
---|
3363 | 3317 | |
---|
3364 | | - list_add_tail(&data->links, &msg->data); |
---|
3365 | 3318 | msg->data_length += bvec_pos->iter.bi_size; |
---|
3366 | 3319 | } |
---|
3367 | 3320 | EXPORT_SYMBOL(ceph_msg_data_add_bvecs); |
---|
.. | .. |
---|
3370 | 3323 | * construct a new message with given type, size |
---|
3371 | 3324 | * the new msg has a ref count of 1. |
---|
3372 | 3325 | */ |
---|
3373 | | -struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, |
---|
3374 | | - bool can_fail) |
---|
| 3326 | +struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, |
---|
| 3327 | + gfp_t flags, bool can_fail) |
---|
3375 | 3328 | { |
---|
3376 | 3329 | struct ceph_msg *m; |
---|
3377 | 3330 | |
---|
.. | .. |
---|
3385 | 3338 | |
---|
3386 | 3339 | INIT_LIST_HEAD(&m->list_head); |
---|
3387 | 3340 | kref_init(&m->kref); |
---|
3388 | | - INIT_LIST_HEAD(&m->data); |
---|
3389 | 3341 | |
---|
3390 | 3342 | /* front */ |
---|
3391 | 3343 | if (front_len) { |
---|
.. | .. |
---|
3399 | 3351 | m->front.iov_base = NULL; |
---|
3400 | 3352 | } |
---|
3401 | 3353 | m->front_alloc_len = m->front.iov_len = front_len; |
---|
| 3354 | + |
---|
| 3355 | + if (max_data_items) { |
---|
| 3356 | + m->data = kmalloc_array(max_data_items, sizeof(*m->data), |
---|
| 3357 | + flags); |
---|
| 3358 | + if (!m->data) |
---|
| 3359 | + goto out2; |
---|
| 3360 | + |
---|
| 3361 | + m->max_data_items = max_data_items; |
---|
| 3362 | + } |
---|
3402 | 3363 | |
---|
3403 | 3364 | dout("ceph_msg_new %p front %d\n", m, front_len); |
---|
3404 | 3365 | return m; |
---|
.. | .. |
---|
3415 | 3376 | front_len); |
---|
3416 | 3377 | } |
---|
3417 | 3378 | return NULL; |
---|
| 3379 | +} |
---|
| 3380 | +EXPORT_SYMBOL(ceph_msg_new2); |
---|
| 3381 | + |
---|
| 3382 | +struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, |
---|
| 3383 | + bool can_fail) |
---|
| 3384 | +{ |
---|
| 3385 | + return ceph_msg_new2(type, front_len, 0, flags, can_fail); |
---|
3418 | 3386 | } |
---|
3419 | 3387 | EXPORT_SYMBOL(ceph_msg_new); |
---|
3420 | 3388 | |
---|
.. | .. |
---|
3511 | 3479 | { |
---|
3512 | 3480 | dout("%s %p\n", __func__, m); |
---|
3513 | 3481 | kvfree(m->front.iov_base); |
---|
| 3482 | + kfree(m->data); |
---|
3514 | 3483 | kmem_cache_free(ceph_msg_cache, m); |
---|
3515 | 3484 | } |
---|
3516 | 3485 | |
---|
3517 | 3486 | static void ceph_msg_release(struct kref *kref) |
---|
3518 | 3487 | { |
---|
3519 | 3488 | struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); |
---|
3520 | | - struct ceph_msg_data *data, *next; |
---|
| 3489 | + int i; |
---|
3521 | 3490 | |
---|
3522 | 3491 | dout("%s %p\n", __func__, m); |
---|
3523 | 3492 | WARN_ON(!list_empty(&m->list_head)); |
---|
.. | .. |
---|
3530 | 3499 | m->middle = NULL; |
---|
3531 | 3500 | } |
---|
3532 | 3501 | |
---|
3533 | | - list_for_each_entry_safe(data, next, &m->data, links) { |
---|
3534 | | - list_del_init(&data->links); |
---|
3535 | | - ceph_msg_data_destroy(data); |
---|
3536 | | - } |
---|
3537 | | - m->data_length = 0; |
---|
| 3502 | + for (i = 0; i < m->num_data_items; i++) |
---|
| 3503 | + ceph_msg_data_destroy(&m->data[i]); |
---|
3538 | 3504 | |
---|
3539 | 3505 | if (m->pool) |
---|
3540 | 3506 | ceph_msgpool_put(m->pool, m); |
---|