| .. | .. |
|---|
| 156 | 156 | /* Slab caches for frequently-allocated structures */ |
|---|
| 157 | 157 | |
|---|
| 158 | 158 | static struct kmem_cache *ceph_msg_cache; |
|---|
| 159 | | -static struct kmem_cache *ceph_msg_data_cache; |
|---|
| 160 | 159 | |
|---|
| 161 | 160 | /* static tag bytes (protocol control messages) */ |
|---|
| 162 | 161 | static char tag_msg = CEPH_MSGR_TAG_MSG; |
|---|
| .. | .. |
|---|
| 187 | 186 | |
|---|
| 188 | 187 | static struct page *zero_page; /* used in certain error cases */ |
|---|
| 189 | 188 | |
|---|
| 190 | | -const char *ceph_pr_addr(const struct sockaddr_storage *ss) |
|---|
| 189 | +const char *ceph_pr_addr(const struct ceph_entity_addr *addr) |
|---|
| 191 | 190 | { |
|---|
| 192 | 191 | int i; |
|---|
| 193 | 192 | char *s; |
|---|
| 194 | | - struct sockaddr_in *in4 = (struct sockaddr_in *) ss; |
|---|
| 195 | | - struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; |
|---|
| 193 | + struct sockaddr_storage ss = addr->in_addr; /* align */ |
|---|
| 194 | + struct sockaddr_in *in4 = (struct sockaddr_in *)&ss; |
|---|
| 195 | + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)&ss; |
|---|
| 196 | 196 | |
|---|
| 197 | 197 | i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK; |
|---|
| 198 | 198 | s = addr_str[i]; |
|---|
| 199 | 199 | |
|---|
| 200 | | - switch (ss->ss_family) { |
|---|
| 200 | + switch (ss.ss_family) { |
|---|
| 201 | 201 | case AF_INET: |
|---|
| 202 | | - snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr, |
|---|
| 202 | + snprintf(s, MAX_ADDR_STR_LEN, "(%d)%pI4:%hu", |
|---|
| 203 | + le32_to_cpu(addr->type), &in4->sin_addr, |
|---|
| 203 | 204 | ntohs(in4->sin_port)); |
|---|
| 204 | 205 | break; |
|---|
| 205 | 206 | |
|---|
| 206 | 207 | case AF_INET6: |
|---|
| 207 | | - snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr, |
|---|
| 208 | + snprintf(s, MAX_ADDR_STR_LEN, "(%d)[%pI6c]:%hu", |
|---|
| 209 | + le32_to_cpu(addr->type), &in6->sin6_addr, |
|---|
| 208 | 210 | ntohs(in6->sin6_port)); |
|---|
| 209 | 211 | break; |
|---|
| 210 | 212 | |
|---|
| 211 | 213 | default: |
|---|
| 212 | 214 | snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)", |
|---|
| 213 | | - ss->ss_family); |
|---|
| 215 | + ss.ss_family); |
|---|
| 214 | 216 | } |
|---|
| 215 | 217 | |
|---|
| 216 | 218 | return s; |
|---|
| .. | .. |
|---|
| 220 | 222 | static void encode_my_addr(struct ceph_messenger *msgr) |
|---|
| 221 | 223 | { |
|---|
| 222 | 224 | memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr)); |
|---|
| 223 | | - ceph_encode_addr(&msgr->my_enc_addr); |
|---|
| 225 | + ceph_encode_banner_addr(&msgr->my_enc_addr); |
|---|
| 224 | 226 | } |
|---|
| 225 | 227 | |
|---|
| 226 | 228 | /* |
|---|
| .. | .. |
|---|
| 235 | 237 | if (!ceph_msg_cache) |
|---|
| 236 | 238 | return -ENOMEM; |
|---|
| 237 | 239 | |
|---|
| 238 | | - BUG_ON(ceph_msg_data_cache); |
|---|
| 239 | | - ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0); |
|---|
| 240 | | - if (ceph_msg_data_cache) |
|---|
| 241 | | - return 0; |
|---|
| 242 | | - |
|---|
| 243 | | - kmem_cache_destroy(ceph_msg_cache); |
|---|
| 244 | | - ceph_msg_cache = NULL; |
|---|
| 245 | | - |
|---|
| 246 | | - return -ENOMEM; |
|---|
| 240 | + return 0; |
|---|
| 247 | 241 | } |
|---|
| 248 | 242 | |
|---|
| 249 | 243 | static void ceph_msgr_slab_exit(void) |
|---|
| 250 | 244 | { |
|---|
| 251 | | - BUG_ON(!ceph_msg_data_cache); |
|---|
| 252 | | - kmem_cache_destroy(ceph_msg_data_cache); |
|---|
| 253 | | - ceph_msg_data_cache = NULL; |
|---|
| 254 | | - |
|---|
| 255 | 245 | BUG_ON(!ceph_msg_cache); |
|---|
| 256 | 246 | kmem_cache_destroy(ceph_msg_cache); |
|---|
| 257 | 247 | ceph_msg_cache = NULL; |
|---|
| .. | .. |
|---|
| 422 | 412 | switch (sk->sk_state) { |
|---|
| 423 | 413 | case TCP_CLOSE: |
|---|
| 424 | 414 | dout("%s TCP_CLOSE\n", __func__); |
|---|
| 425 | | - /* fall through */ |
|---|
| 415 | + fallthrough; |
|---|
| 426 | 416 | case TCP_CLOSE_WAIT: |
|---|
| 427 | 417 | dout("%s TCP_CLOSE_WAIT\n", __func__); |
|---|
| 428 | 418 | con_sock_state_closing(con); |
|---|
| .. | .. |
|---|
| 462 | 452 | */ |
|---|
| 463 | 453 | static int ceph_tcp_connect(struct ceph_connection *con) |
|---|
| 464 | 454 | { |
|---|
| 465 | | - struct sockaddr_storage *paddr = &con->peer_addr.in_addr; |
|---|
| 455 | + struct sockaddr_storage ss = con->peer_addr.in_addr; /* align */ |
|---|
| 466 | 456 | struct socket *sock; |
|---|
| 467 | 457 | unsigned int noio_flag; |
|---|
| 468 | 458 | int ret; |
|---|
| .. | .. |
|---|
| 471 | 461 | |
|---|
| 472 | 462 | /* sock_create_kern() allocates with GFP_KERNEL */ |
|---|
| 473 | 463 | noio_flag = memalloc_noio_save(); |
|---|
| 474 | | - ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, |
|---|
| 464 | + ret = sock_create_kern(read_pnet(&con->msgr->net), ss.ss_family, |
|---|
| 475 | 465 | SOCK_STREAM, IPPROTO_TCP, &sock); |
|---|
| 476 | 466 | memalloc_noio_restore(noio_flag); |
|---|
| 477 | 467 | if (ret) |
|---|
| .. | .. |
|---|
| 484 | 474 | |
|---|
| 485 | 475 | set_sock_callbacks(sock, con); |
|---|
| 486 | 476 | |
|---|
| 487 | | - dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); |
|---|
| 477 | + dout("connect %s\n", ceph_pr_addr(&con->peer_addr)); |
|---|
| 488 | 478 | |
|---|
| 489 | 479 | con_sock_state_connecting(con); |
|---|
| 490 | | - ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), |
|---|
| 480 | + ret = sock->ops->connect(sock, (struct sockaddr *)&ss, sizeof(ss), |
|---|
| 491 | 481 | O_NONBLOCK); |
|---|
| 492 | 482 | if (ret == -EINPROGRESS) { |
|---|
| 493 | 483 | dout("connect %s EINPROGRESS sk_state = %u\n", |
|---|
| 494 | | - ceph_pr_addr(&con->peer_addr.in_addr), |
|---|
| 484 | + ceph_pr_addr(&con->peer_addr), |
|---|
| 495 | 485 | sock->sk->sk_state); |
|---|
| 496 | 486 | } else if (ret < 0) { |
|---|
| 497 | 487 | pr_err("connect %s error %d\n", |
|---|
| 498 | | - ceph_pr_addr(&con->peer_addr.in_addr), ret); |
|---|
| 488 | + ceph_pr_addr(&con->peer_addr), ret); |
|---|
| 499 | 489 | sock_release(sock); |
|---|
| 500 | 490 | return ret; |
|---|
| 501 | 491 | } |
|---|
| 502 | 492 | |
|---|
| 503 | | - if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) { |
|---|
| 504 | | - int optval = 1; |
|---|
| 505 | | - |
|---|
| 506 | | - ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, |
|---|
| 507 | | - (char *)&optval, sizeof(optval)); |
|---|
| 508 | | - if (ret) |
|---|
| 509 | | - pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d", |
|---|
| 510 | | - ret); |
|---|
| 511 | | - } |
|---|
| 493 | + if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) |
|---|
| 494 | + tcp_sock_set_nodelay(sock->sk); |
|---|
| 512 | 495 | |
|---|
| 513 | 496 | con->sock = sock; |
|---|
| 514 | 497 | return 0; |
|---|
| .. | .. |
|---|
| 526 | 509 | if (!buf) |
|---|
| 527 | 510 | msg.msg_flags |= MSG_TRUNC; |
|---|
| 528 | 511 | |
|---|
| 529 | | - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len); |
|---|
| 512 | + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len); |
|---|
| 530 | 513 | r = sock_recvmsg(sock, &msg, msg.msg_flags); |
|---|
| 531 | 514 | if (r == -EAGAIN) |
|---|
| 532 | 515 | r = 0; |
|---|
| .. | .. |
|---|
| 545 | 528 | int r; |
|---|
| 546 | 529 | |
|---|
| 547 | 530 | BUG_ON(page_offset + length > PAGE_SIZE); |
|---|
| 548 | | - iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length); |
|---|
| 531 | + iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length); |
|---|
| 549 | 532 | r = sock_recvmsg(sock, &msg, msg.msg_flags); |
|---|
| 550 | 533 | if (r == -EAGAIN) |
|---|
| 551 | 534 | r = 0; |
|---|
| .. | .. |
|---|
| 557 | 540 | * shortly. |
|---|
| 558 | 541 | */ |
|---|
| 559 | 542 | static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, |
|---|
| 560 | | - size_t kvlen, size_t len, int more) |
|---|
| 543 | + size_t kvlen, size_t len, bool more) |
|---|
| 561 | 544 | { |
|---|
| 562 | 545 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; |
|---|
| 563 | 546 | int r; |
|---|
| .. | .. |
|---|
| 573 | 556 | return r; |
|---|
| 574 | 557 | } |
|---|
| 575 | 558 | |
|---|
| 576 | | -static int __ceph_tcp_sendpage(struct socket *sock, struct page *page, |
|---|
| 577 | | - int offset, size_t size, bool more) |
|---|
| 578 | | -{ |
|---|
| 579 | | - int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); |
|---|
| 580 | | - int ret; |
|---|
| 581 | | - |
|---|
| 582 | | - ret = kernel_sendpage(sock, page, offset, size, flags); |
|---|
| 583 | | - if (ret == -EAGAIN) |
|---|
| 584 | | - ret = 0; |
|---|
| 585 | | - |
|---|
| 586 | | - return ret; |
|---|
| 587 | | -} |
|---|
| 588 | | - |
|---|
| 559 | +/* |
|---|
| 560 | + * @more: either or both of MSG_MORE and MSG_SENDPAGE_NOTLAST |
|---|
| 561 | + */ |
|---|
| 589 | 562 | static int ceph_tcp_sendpage(struct socket *sock, struct page *page, |
|---|
| 590 | | - int offset, size_t size, bool more) |
|---|
| 563 | + int offset, size_t size, int more) |
|---|
| 591 | 564 | { |
|---|
| 592 | | - struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; |
|---|
| 593 | | - struct bio_vec bvec; |
|---|
| 565 | + ssize_t (*sendpage)(struct socket *sock, struct page *page, |
|---|
| 566 | + int offset, size_t size, int flags); |
|---|
| 567 | + int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more; |
|---|
| 594 | 568 | int ret; |
|---|
| 595 | 569 | |
|---|
| 596 | 570 | /* |
|---|
| .. | .. |
|---|
| 601 | 575 | * coalescing neighboring slab objects into a single frag which |
|---|
| 602 | 576 | * triggers one of hardened usercopy checks. |
|---|
| 603 | 577 | */ |
|---|
| 604 | | - if (page_count(page) >= 1 && !PageSlab(page)) |
|---|
| 605 | | - return __ceph_tcp_sendpage(sock, page, offset, size, more); |
|---|
| 606 | | - |
|---|
| 607 | | - bvec.bv_page = page; |
|---|
| 608 | | - bvec.bv_offset = offset; |
|---|
| 609 | | - bvec.bv_len = size; |
|---|
| 610 | | - |
|---|
| 611 | | - if (more) |
|---|
| 612 | | - msg.msg_flags |= MSG_MORE; |
|---|
| 578 | + if (sendpage_ok(page)) |
|---|
| 579 | + sendpage = sock->ops->sendpage; |
|---|
| 613 | 580 | else |
|---|
| 614 | | - msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ |
|---|
| 581 | + sendpage = sock_no_sendpage; |
|---|
| 615 | 582 | |
|---|
| 616 | | - iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size); |
|---|
| 617 | | - ret = sock_sendmsg(sock, &msg); |
|---|
| 583 | + ret = sendpage(sock, page, offset, size, flags); |
|---|
| 618 | 584 | if (ret == -EAGAIN) |
|---|
| 619 | 585 | ret = 0; |
|---|
| 620 | 586 | |
|---|
| .. | .. |
|---|
| 699 | 665 | void ceph_con_close(struct ceph_connection *con) |
|---|
| 700 | 666 | { |
|---|
| 701 | 667 | mutex_lock(&con->mutex); |
|---|
| 702 | | - dout("con_close %p peer %s\n", con, |
|---|
| 703 | | - ceph_pr_addr(&con->peer_addr.in_addr)); |
|---|
| 668 | + dout("con_close %p peer %s\n", con, ceph_pr_addr(&con->peer_addr)); |
|---|
| 704 | 669 | con->state = CON_STATE_CLOSED; |
|---|
| 705 | 670 | |
|---|
| 706 | 671 | con_flag_clear(con, CON_FLAG_LOSSYTX); /* so we retry next connect */ |
|---|
| .. | .. |
|---|
| 724 | 689 | struct ceph_entity_addr *addr) |
|---|
| 725 | 690 | { |
|---|
| 726 | 691 | mutex_lock(&con->mutex); |
|---|
| 727 | | - dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); |
|---|
| 692 | + dout("con_open %p %s\n", con, ceph_pr_addr(addr)); |
|---|
| 728 | 693 | |
|---|
| 729 | 694 | WARN_ON(con->state != CON_STATE_CLOSED); |
|---|
| 730 | 695 | con->state = CON_STATE_PREOPEN; |
|---|
| .. | .. |
|---|
| 870 | 835 | size_t bytes) |
|---|
| 871 | 836 | { |
|---|
| 872 | 837 | struct ceph_bio_iter *it = &cursor->bio_iter; |
|---|
| 838 | + struct page *page = bio_iter_page(it->bio, it->iter); |
|---|
| 873 | 839 | |
|---|
| 874 | 840 | BUG_ON(bytes > cursor->resid); |
|---|
| 875 | 841 | BUG_ON(bytes > bio_iter_len(it->bio, it->iter)); |
|---|
| .. | .. |
|---|
| 881 | 847 | return false; /* no more data */ |
|---|
| 882 | 848 | } |
|---|
| 883 | 849 | |
|---|
| 884 | | - if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done)) |
|---|
| 850 | + if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done && |
|---|
| 851 | + page == bio_iter_page(it->bio, it->iter))) |
|---|
| 885 | 852 | return false; /* more bytes to process in this segment */ |
|---|
| 886 | 853 | |
|---|
| 887 | 854 | if (!it->iter.bi_size) { |
|---|
| .. | .. |
|---|
| 929 | 896 | size_t bytes) |
|---|
| 930 | 897 | { |
|---|
| 931 | 898 | struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs; |
|---|
| 899 | + struct page *page = bvec_iter_page(bvecs, cursor->bvec_iter); |
|---|
| 932 | 900 | |
|---|
| 933 | 901 | BUG_ON(bytes > cursor->resid); |
|---|
| 934 | 902 | BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter)); |
|---|
| .. | .. |
|---|
| 940 | 908 | return false; /* no more data */ |
|---|
| 941 | 909 | } |
|---|
| 942 | 910 | |
|---|
| 943 | | - if (!bytes || cursor->bvec_iter.bi_bvec_done) |
|---|
| 911 | + if (!bytes || (cursor->bvec_iter.bi_bvec_done && |
|---|
| 912 | + page == bvec_iter_page(bvecs, cursor->bvec_iter))) |
|---|
| 944 | 913 | return false; /* more bytes to process in this segment */ |
|---|
| 945 | 914 | |
|---|
| 946 | 915 | BUG_ON(cursor->last_piece); |
|---|
| .. | .. |
|---|
| 1147 | 1116 | static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length) |
|---|
| 1148 | 1117 | { |
|---|
| 1149 | 1118 | struct ceph_msg_data_cursor *cursor = &msg->cursor; |
|---|
| 1150 | | - struct ceph_msg_data *data; |
|---|
| 1151 | 1119 | |
|---|
| 1152 | 1120 | BUG_ON(!length); |
|---|
| 1153 | 1121 | BUG_ON(length > msg->data_length); |
|---|
| 1154 | | - BUG_ON(list_empty(&msg->data)); |
|---|
| 1122 | + BUG_ON(!msg->num_data_items); |
|---|
| 1155 | 1123 | |
|---|
| 1156 | | - cursor->data_head = &msg->data; |
|---|
| 1157 | 1124 | cursor->total_resid = length; |
|---|
| 1158 | | - data = list_first_entry(&msg->data, struct ceph_msg_data, links); |
|---|
| 1159 | | - cursor->data = data; |
|---|
| 1125 | + cursor->data = msg->data; |
|---|
| 1160 | 1126 | |
|---|
| 1161 | 1127 | __ceph_msg_data_cursor_init(cursor); |
|---|
| 1162 | 1128 | } |
|---|
| .. | .. |
|---|
| 1237 | 1203 | |
|---|
| 1238 | 1204 | if (!cursor->resid && cursor->total_resid) { |
|---|
| 1239 | 1205 | WARN_ON(!cursor->last_piece); |
|---|
| 1240 | | - BUG_ON(list_is_last(&cursor->data->links, cursor->data_head)); |
|---|
| 1241 | | - cursor->data = list_next_entry(cursor->data, links); |
|---|
| 1206 | + cursor->data++; |
|---|
| 1242 | 1207 | __ceph_msg_data_cursor_init(cursor); |
|---|
| 1243 | 1208 | new_piece = true; |
|---|
| 1244 | 1209 | } |
|---|
| .. | .. |
|---|
| 1254 | 1219 | |
|---|
| 1255 | 1220 | static void prepare_message_data(struct ceph_msg *msg, u32 data_len) |
|---|
| 1256 | 1221 | { |
|---|
| 1257 | | - BUG_ON(!msg); |
|---|
| 1258 | | - BUG_ON(!data_len); |
|---|
| 1259 | | - |
|---|
| 1260 | 1222 | /* Initialize data cursor */ |
|---|
| 1261 | 1223 | |
|---|
| 1262 | 1224 | ceph_msg_data_cursor_init(msg, (size_t)data_len); |
|---|
| .. | .. |
|---|
| 1592 | 1554 | struct ceph_msg *msg = con->out_msg; |
|---|
| 1593 | 1555 | struct ceph_msg_data_cursor *cursor = &msg->cursor; |
|---|
| 1594 | 1556 | bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); |
|---|
| 1557 | + int more = MSG_MORE | MSG_SENDPAGE_NOTLAST; |
|---|
| 1595 | 1558 | u32 crc; |
|---|
| 1596 | 1559 | |
|---|
| 1597 | 1560 | dout("%s %p msg %p\n", __func__, con, msg); |
|---|
| 1598 | 1561 | |
|---|
| 1599 | | - if (list_empty(&msg->data)) |
|---|
| 1562 | + if (!msg->num_data_items) |
|---|
| 1600 | 1563 | return -EINVAL; |
|---|
| 1601 | 1564 | |
|---|
| 1602 | 1565 | /* |
|---|
| .. | .. |
|---|
| 1612 | 1575 | struct page *page; |
|---|
| 1613 | 1576 | size_t page_offset; |
|---|
| 1614 | 1577 | size_t length; |
|---|
| 1615 | | - bool last_piece; |
|---|
| 1616 | 1578 | int ret; |
|---|
| 1617 | 1579 | |
|---|
| 1618 | 1580 | if (!cursor->resid) { |
|---|
| .. | .. |
|---|
| 1620 | 1582 | continue; |
|---|
| 1621 | 1583 | } |
|---|
| 1622 | 1584 | |
|---|
| 1623 | | - page = ceph_msg_data_next(cursor, &page_offset, &length, |
|---|
| 1624 | | - &last_piece); |
|---|
| 1625 | | - ret = ceph_tcp_sendpage(con->sock, page, page_offset, |
|---|
| 1626 | | - length, !last_piece); |
|---|
| 1585 | + page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); |
|---|
| 1586 | + if (length == cursor->total_resid) |
|---|
| 1587 | + more = MSG_MORE; |
|---|
| 1588 | + ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, |
|---|
| 1589 | + more); |
|---|
| 1627 | 1590 | if (ret <= 0) { |
|---|
| 1628 | 1591 | if (do_datacrc) |
|---|
| 1629 | 1592 | msg->footer.data_crc = cpu_to_le32(crc); |
|---|
| .. | .. |
|---|
| 1653 | 1616 | */ |
|---|
| 1654 | 1617 | static int write_partial_skip(struct ceph_connection *con) |
|---|
| 1655 | 1618 | { |
|---|
| 1619 | + int more = MSG_MORE | MSG_SENDPAGE_NOTLAST; |
|---|
| 1656 | 1620 | int ret; |
|---|
| 1657 | 1621 | |
|---|
| 1658 | 1622 | dout("%s %p %d left\n", __func__, con, con->out_skip); |
|---|
| 1659 | 1623 | while (con->out_skip > 0) { |
|---|
| 1660 | 1624 | size_t size = min(con->out_skip, (int) PAGE_SIZE); |
|---|
| 1661 | 1625 | |
|---|
| 1662 | | - ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true); |
|---|
| 1626 | + if (size == con->out_skip) |
|---|
| 1627 | + more = MSG_MORE; |
|---|
| 1628 | + ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, more); |
|---|
| 1663 | 1629 | if (ret <= 0) |
|---|
| 1664 | 1630 | goto out; |
|---|
| 1665 | 1631 | con->out_skip -= ret; |
|---|
| .. | .. |
|---|
| 1761 | 1727 | ret = read_partial(con, end, size, &con->actual_peer_addr); |
|---|
| 1762 | 1728 | if (ret <= 0) |
|---|
| 1763 | 1729 | goto out; |
|---|
| 1730 | + ceph_decode_banner_addr(&con->actual_peer_addr); |
|---|
| 1764 | 1731 | |
|---|
| 1765 | 1732 | size = sizeof (con->peer_addr_for_me); |
|---|
| 1766 | 1733 | end += size; |
|---|
| 1767 | 1734 | ret = read_partial(con, end, size, &con->peer_addr_for_me); |
|---|
| 1768 | 1735 | if (ret <= 0) |
|---|
| 1769 | 1736 | goto out; |
|---|
| 1737 | + ceph_decode_banner_addr(&con->peer_addr_for_me); |
|---|
| 1770 | 1738 | |
|---|
| 1771 | 1739 | out: |
|---|
| 1772 | 1740 | return ret; |
|---|
| .. | .. |
|---|
| 1817 | 1785 | { |
|---|
| 1818 | 1786 | if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { |
|---|
| 1819 | 1787 | pr_err("connect to %s got bad banner\n", |
|---|
| 1820 | | - ceph_pr_addr(&con->peer_addr.in_addr)); |
|---|
| 1788 | + ceph_pr_addr(&con->peer_addr)); |
|---|
| 1821 | 1789 | con->error_msg = "protocol error, bad banner"; |
|---|
| 1822 | 1790 | return -1; |
|---|
| 1823 | 1791 | } |
|---|
| 1824 | 1792 | return 0; |
|---|
| 1825 | 1793 | } |
|---|
| 1826 | 1794 | |
|---|
| 1827 | | -static bool addr_is_blank(struct sockaddr_storage *ss) |
|---|
| 1795 | +static bool addr_is_blank(struct ceph_entity_addr *addr) |
|---|
| 1828 | 1796 | { |
|---|
| 1829 | | - struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr; |
|---|
| 1830 | | - struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr; |
|---|
| 1797 | + struct sockaddr_storage ss = addr->in_addr; /* align */ |
|---|
| 1798 | + struct in_addr *addr4 = &((struct sockaddr_in *)&ss)->sin_addr; |
|---|
| 1799 | + struct in6_addr *addr6 = &((struct sockaddr_in6 *)&ss)->sin6_addr; |
|---|
| 1831 | 1800 | |
|---|
| 1832 | | - switch (ss->ss_family) { |
|---|
| 1801 | + switch (ss.ss_family) { |
|---|
| 1833 | 1802 | case AF_INET: |
|---|
| 1834 | | - return addr->s_addr == htonl(INADDR_ANY); |
|---|
| 1803 | + return addr4->s_addr == htonl(INADDR_ANY); |
|---|
| 1835 | 1804 | case AF_INET6: |
|---|
| 1836 | 1805 | return ipv6_addr_any(addr6); |
|---|
| 1837 | 1806 | default: |
|---|
| .. | .. |
|---|
| 1839 | 1808 | } |
|---|
| 1840 | 1809 | } |
|---|
| 1841 | 1810 | |
|---|
| 1842 | | -static int addr_port(struct sockaddr_storage *ss) |
|---|
| 1811 | +static int addr_port(struct ceph_entity_addr *addr) |
|---|
| 1843 | 1812 | { |
|---|
| 1844 | | - switch (ss->ss_family) { |
|---|
| 1813 | + switch (get_unaligned(&addr->in_addr.ss_family)) { |
|---|
| 1845 | 1814 | case AF_INET: |
|---|
| 1846 | | - return ntohs(((struct sockaddr_in *)ss)->sin_port); |
|---|
| 1815 | + return ntohs(get_unaligned(&((struct sockaddr_in *)&addr->in_addr)->sin_port)); |
|---|
| 1847 | 1816 | case AF_INET6: |
|---|
| 1848 | | - return ntohs(((struct sockaddr_in6 *)ss)->sin6_port); |
|---|
| 1817 | + return ntohs(get_unaligned(&((struct sockaddr_in6 *)&addr->in_addr)->sin6_port)); |
|---|
| 1849 | 1818 | } |
|---|
| 1850 | 1819 | return 0; |
|---|
| 1851 | 1820 | } |
|---|
| 1852 | 1821 | |
|---|
| 1853 | | -static void addr_set_port(struct sockaddr_storage *ss, int p) |
|---|
| 1822 | +static void addr_set_port(struct ceph_entity_addr *addr, int p) |
|---|
| 1854 | 1823 | { |
|---|
| 1855 | | - switch (ss->ss_family) { |
|---|
| 1824 | + switch (get_unaligned(&addr->in_addr.ss_family)) { |
|---|
| 1856 | 1825 | case AF_INET: |
|---|
| 1857 | | - ((struct sockaddr_in *)ss)->sin_port = htons(p); |
|---|
| 1826 | + put_unaligned(htons(p), &((struct sockaddr_in *)&addr->in_addr)->sin_port); |
|---|
| 1858 | 1827 | break; |
|---|
| 1859 | 1828 | case AF_INET6: |
|---|
| 1860 | | - ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); |
|---|
| 1829 | + put_unaligned(htons(p), &((struct sockaddr_in6 *)&addr->in_addr)->sin6_port); |
|---|
| 1861 | 1830 | break; |
|---|
| 1862 | 1831 | } |
|---|
| 1863 | 1832 | } |
|---|
| .. | .. |
|---|
| 1865 | 1834 | /* |
|---|
| 1866 | 1835 | * Unlike other *_pton function semantics, zero indicates success. |
|---|
| 1867 | 1836 | */ |
|---|
| 1868 | | -static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, |
|---|
| 1837 | +static int ceph_pton(const char *str, size_t len, struct ceph_entity_addr *addr, |
|---|
| 1869 | 1838 | char delim, const char **ipend) |
|---|
| 1870 | 1839 | { |
|---|
| 1871 | | - struct sockaddr_in *in4 = (struct sockaddr_in *) ss; |
|---|
| 1872 | | - struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; |
|---|
| 1840 | + memset(&addr->in_addr, 0, sizeof(addr->in_addr)); |
|---|
| 1873 | 1841 | |
|---|
| 1874 | | - memset(ss, 0, sizeof(*ss)); |
|---|
| 1875 | | - |
|---|
| 1876 | | - if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) { |
|---|
| 1877 | | - ss->ss_family = AF_INET; |
|---|
| 1842 | + if (in4_pton(str, len, (u8 *)&((struct sockaddr_in *)&addr->in_addr)->sin_addr.s_addr, delim, ipend)) { |
|---|
| 1843 | + put_unaligned(AF_INET, &addr->in_addr.ss_family); |
|---|
| 1878 | 1844 | return 0; |
|---|
| 1879 | 1845 | } |
|---|
| 1880 | 1846 | |
|---|
| 1881 | | - if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) { |
|---|
| 1882 | | - ss->ss_family = AF_INET6; |
|---|
| 1847 | + if (in6_pton(str, len, (u8 *)&((struct sockaddr_in6 *)&addr->in_addr)->sin6_addr.s6_addr, delim, ipend)) { |
|---|
| 1848 | + put_unaligned(AF_INET6, &addr->in_addr.ss_family); |
|---|
| 1883 | 1849 | return 0; |
|---|
| 1884 | 1850 | } |
|---|
| 1885 | 1851 | |
|---|
| .. | .. |
|---|
| 1891 | 1857 | */ |
|---|
| 1892 | 1858 | #ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER |
|---|
| 1893 | 1859 | static int ceph_dns_resolve_name(const char *name, size_t namelen, |
|---|
| 1894 | | - struct sockaddr_storage *ss, char delim, const char **ipend) |
|---|
| 1860 | + struct ceph_entity_addr *addr, char delim, const char **ipend) |
|---|
| 1895 | 1861 | { |
|---|
| 1896 | 1862 | const char *end, *delim_p; |
|---|
| 1897 | 1863 | char *colon_p, *ip_addr = NULL; |
|---|
| .. | .. |
|---|
| 1918 | 1884 | return -EINVAL; |
|---|
| 1919 | 1885 | |
|---|
| 1920 | 1886 | /* do dns_resolve upcall */ |
|---|
| 1921 | | - ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL); |
|---|
| 1887 | + ip_len = dns_query(current->nsproxy->net_ns, |
|---|
| 1888 | + NULL, name, end - name, NULL, &ip_addr, NULL, false); |
|---|
| 1922 | 1889 | if (ip_len > 0) |
|---|
| 1923 | | - ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL); |
|---|
| 1890 | + ret = ceph_pton(ip_addr, ip_len, addr, -1, NULL); |
|---|
| 1924 | 1891 | else |
|---|
| 1925 | 1892 | ret = -ESRCH; |
|---|
| 1926 | 1893 | |
|---|
| .. | .. |
|---|
| 1929 | 1896 | *ipend = end; |
|---|
| 1930 | 1897 | |
|---|
| 1931 | 1898 | pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name, |
|---|
| 1932 | | - ret, ret ? "failed" : ceph_pr_addr(ss)); |
|---|
| 1899 | + ret, ret ? "failed" : ceph_pr_addr(addr)); |
|---|
| 1933 | 1900 | |
|---|
| 1934 | 1901 | return ret; |
|---|
| 1935 | 1902 | } |
|---|
| 1936 | 1903 | #else |
|---|
| 1937 | 1904 | static inline int ceph_dns_resolve_name(const char *name, size_t namelen, |
|---|
| 1938 | | - struct sockaddr_storage *ss, char delim, const char **ipend) |
|---|
| 1905 | + struct ceph_entity_addr *addr, char delim, const char **ipend) |
|---|
| 1939 | 1906 | { |
|---|
| 1940 | 1907 | return -EINVAL; |
|---|
| 1941 | 1908 | } |
|---|
| .. | .. |
|---|
| 1946 | 1913 | * then try to extract a hostname to resolve using userspace DNS upcall. |
|---|
| 1947 | 1914 | */ |
|---|
| 1948 | 1915 | static int ceph_parse_server_name(const char *name, size_t namelen, |
|---|
| 1949 | | - struct sockaddr_storage *ss, char delim, const char **ipend) |
|---|
| 1916 | + struct ceph_entity_addr *addr, char delim, const char **ipend) |
|---|
| 1950 | 1917 | { |
|---|
| 1951 | 1918 | int ret; |
|---|
| 1952 | 1919 | |
|---|
| 1953 | | - ret = ceph_pton(name, namelen, ss, delim, ipend); |
|---|
| 1920 | + ret = ceph_pton(name, namelen, addr, delim, ipend); |
|---|
| 1954 | 1921 | if (ret) |
|---|
| 1955 | | - ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend); |
|---|
| 1922 | + ret = ceph_dns_resolve_name(name, namelen, addr, delim, ipend); |
|---|
| 1956 | 1923 | |
|---|
| 1957 | 1924 | return ret; |
|---|
| 1958 | 1925 | } |
|---|
| .. | .. |
|---|
| 1971 | 1938 | dout("parse_ips on '%.*s'\n", (int)(end-c), c); |
|---|
| 1972 | 1939 | for (i = 0; i < max_count; i++) { |
|---|
| 1973 | 1940 | const char *ipend; |
|---|
| 1974 | | - struct sockaddr_storage *ss = &addr[i].in_addr; |
|---|
| 1975 | 1941 | int port; |
|---|
| 1976 | 1942 | char delim = ','; |
|---|
| 1977 | 1943 | |
|---|
| .. | .. |
|---|
| 1980 | 1946 | p++; |
|---|
| 1981 | 1947 | } |
|---|
| 1982 | 1948 | |
|---|
| 1983 | | - ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend); |
|---|
| 1949 | + ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend); |
|---|
| 1984 | 1950 | if (ret) |
|---|
| 1985 | 1951 | goto bad; |
|---|
| 1986 | 1952 | ret = -EINVAL; |
|---|
| .. | .. |
|---|
| 2011 | 1977 | port = CEPH_MON_PORT; |
|---|
| 2012 | 1978 | } |
|---|
| 2013 | 1979 | |
|---|
| 2014 | | - addr_set_port(ss, port); |
|---|
| 1980 | + addr_set_port(&addr[i], port); |
|---|
| 1981 | + addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY; |
|---|
| 2015 | 1982 | |
|---|
| 2016 | | - dout("parse_ips got %s\n", ceph_pr_addr(ss)); |
|---|
| 1983 | + dout("parse_ips got %s\n", ceph_pr_addr(&addr[i])); |
|---|
| 2017 | 1984 | |
|---|
| 2018 | 1985 | if (p == end) |
|---|
| 2019 | 1986 | break; |
|---|
| .. | .. |
|---|
| 2030 | 1997 | return 0; |
|---|
| 2031 | 1998 | |
|---|
| 2032 | 1999 | bad: |
|---|
| 2033 | | - pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); |
|---|
| 2034 | 2000 | return ret; |
|---|
| 2035 | 2001 | } |
|---|
| 2036 | | -EXPORT_SYMBOL(ceph_parse_ips); |
|---|
| 2037 | 2002 | |
|---|
| 2038 | 2003 | static int process_banner(struct ceph_connection *con) |
|---|
| 2039 | 2004 | { |
|---|
| .. | .. |
|---|
| 2042 | 2007 | if (verify_hello(con) < 0) |
|---|
| 2043 | 2008 | return -1; |
|---|
| 2044 | 2009 | |
|---|
| 2045 | | - ceph_decode_addr(&con->actual_peer_addr); |
|---|
| 2046 | | - ceph_decode_addr(&con->peer_addr_for_me); |
|---|
| 2047 | | - |
|---|
| 2048 | 2010 | /* |
|---|
| 2049 | 2011 | * Make sure the other end is who we wanted. note that the other |
|---|
| 2050 | 2012 | * end may not yet know their ip address, so if it's 0.0.0.0, give |
|---|
| .. | .. |
|---|
| 2052 | 2014 | */ |
|---|
| 2053 | 2015 | if (memcmp(&con->peer_addr, &con->actual_peer_addr, |
|---|
| 2054 | 2016 | sizeof(con->peer_addr)) != 0 && |
|---|
| 2055 | | - !(addr_is_blank(&con->actual_peer_addr.in_addr) && |
|---|
| 2017 | + !(addr_is_blank(&con->actual_peer_addr) && |
|---|
| 2056 | 2018 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { |
|---|
| 2057 | | - pr_warn("wrong peer, want %s/%d, got %s/%d\n", |
|---|
| 2058 | | - ceph_pr_addr(&con->peer_addr.in_addr), |
|---|
| 2059 | | - (int)le32_to_cpu(con->peer_addr.nonce), |
|---|
| 2060 | | - ceph_pr_addr(&con->actual_peer_addr.in_addr), |
|---|
| 2061 | | - (int)le32_to_cpu(con->actual_peer_addr.nonce)); |
|---|
| 2019 | + pr_warn("wrong peer, want %s/%u, got %s/%u\n", |
|---|
| 2020 | + ceph_pr_addr(&con->peer_addr), |
|---|
| 2021 | + le32_to_cpu(con->peer_addr.nonce), |
|---|
| 2022 | + ceph_pr_addr(&con->actual_peer_addr), |
|---|
| 2023 | + le32_to_cpu(con->actual_peer_addr.nonce)); |
|---|
| 2062 | 2024 | con->error_msg = "wrong peer at address"; |
|---|
| 2063 | 2025 | return -1; |
|---|
| 2064 | 2026 | } |
|---|
| .. | .. |
|---|
| 2066 | 2028 | /* |
|---|
| 2067 | 2029 | * did we learn our address? |
|---|
| 2068 | 2030 | */ |
|---|
| 2069 | | - if (addr_is_blank(&con->msgr->inst.addr.in_addr)) { |
|---|
| 2070 | | - int port = addr_port(&con->msgr->inst.addr.in_addr); |
|---|
| 2031 | + if (addr_is_blank(&con->msgr->inst.addr)) { |
|---|
| 2032 | + int port = addr_port(&con->msgr->inst.addr); |
|---|
| 2071 | 2033 | |
|---|
| 2072 | 2034 | memcpy(&con->msgr->inst.addr.in_addr, |
|---|
| 2073 | 2035 | &con->peer_addr_for_me.in_addr, |
|---|
| 2074 | 2036 | sizeof(con->peer_addr_for_me.in_addr)); |
|---|
| 2075 | | - addr_set_port(&con->msgr->inst.addr.in_addr, port); |
|---|
| 2037 | + addr_set_port(&con->msgr->inst.addr, port); |
|---|
| 2076 | 2038 | encode_my_addr(con->msgr); |
|---|
| 2077 | 2039 | dout("process_banner learned my addr is %s\n", |
|---|
| 2078 | | - ceph_pr_addr(&con->msgr->inst.addr.in_addr)); |
|---|
| 2040 | + ceph_pr_addr(&con->msgr->inst.addr)); |
|---|
| 2079 | 2041 | } |
|---|
| 2080 | 2042 | |
|---|
| 2081 | 2043 | return 0; |
|---|
| .. | .. |
|---|
| 2126 | 2088 | pr_err("%s%lld %s feature set mismatch," |
|---|
| 2127 | 2089 | " my %llx < server's %llx, missing %llx\n", |
|---|
| 2128 | 2090 | ENTITY_NAME(con->peer_name), |
|---|
| 2129 | | - ceph_pr_addr(&con->peer_addr.in_addr), |
|---|
| 2091 | + ceph_pr_addr(&con->peer_addr), |
|---|
| 2130 | 2092 | sup_feat, server_feat, server_feat & ~sup_feat); |
|---|
| 2131 | 2093 | con->error_msg = "missing required protocol features"; |
|---|
| 2132 | 2094 | reset_connection(con); |
|---|
| .. | .. |
|---|
| 2136 | 2098 | pr_err("%s%lld %s protocol version mismatch," |
|---|
| 2137 | 2099 | " my %d != server's %d\n", |
|---|
| 2138 | 2100 | ENTITY_NAME(con->peer_name), |
|---|
| 2139 | | - ceph_pr_addr(&con->peer_addr.in_addr), |
|---|
| 2101 | + ceph_pr_addr(&con->peer_addr), |
|---|
| 2140 | 2102 | le32_to_cpu(con->out_connect.protocol_version), |
|---|
| 2141 | 2103 | le32_to_cpu(con->in_reply.protocol_version)); |
|---|
| 2142 | 2104 | con->error_msg = "protocol version mismatch"; |
|---|
| .. | .. |
|---|
| 2170 | 2132 | le32_to_cpu(con->in_reply.connect_seq)); |
|---|
| 2171 | 2133 | pr_err("%s%lld %s connection reset\n", |
|---|
| 2172 | 2134 | ENTITY_NAME(con->peer_name), |
|---|
| 2173 | | - ceph_pr_addr(&con->peer_addr.in_addr)); |
|---|
| 2135 | + ceph_pr_addr(&con->peer_addr)); |
|---|
| 2174 | 2136 | reset_connection(con); |
|---|
| 2175 | 2137 | con_out_kvec_reset(con); |
|---|
| 2176 | 2138 | ret = prepare_write_connect(con); |
|---|
| .. | .. |
|---|
| 2227 | 2189 | pr_err("%s%lld %s protocol feature mismatch," |
|---|
| 2228 | 2190 | " my required %llx > server's %llx, need %llx\n", |
|---|
| 2229 | 2191 | ENTITY_NAME(con->peer_name), |
|---|
| 2230 | | - ceph_pr_addr(&con->peer_addr.in_addr), |
|---|
| 2192 | + ceph_pr_addr(&con->peer_addr), |
|---|
| 2231 | 2193 | req_feat, server_feat, req_feat & ~server_feat); |
|---|
| 2232 | 2194 | con->error_msg = "missing required protocol features"; |
|---|
| 2233 | 2195 | reset_connection(con); |
|---|
| .. | .. |
|---|
| 2356 | 2318 | u32 crc = 0; |
|---|
| 2357 | 2319 | int ret; |
|---|
| 2358 | 2320 | |
|---|
| 2359 | | - BUG_ON(!msg); |
|---|
| 2360 | | - if (list_empty(&msg->data)) |
|---|
| 2321 | + if (!msg->num_data_items) |
|---|
| 2361 | 2322 | return -EIO; |
|---|
| 2362 | 2323 | |
|---|
| 2363 | 2324 | if (do_datacrc) |
|---|
| .. | .. |
|---|
| 2435 | 2396 | if ((s64)seq - (s64)con->in_seq < 1) { |
|---|
| 2436 | 2397 | pr_info("skipping %s%lld %s seq %lld expected %lld\n", |
|---|
| 2437 | 2398 | ENTITY_NAME(con->peer_name), |
|---|
| 2438 | | - ceph_pr_addr(&con->peer_addr.in_addr), |
|---|
| 2399 | + ceph_pr_addr(&con->peer_addr), |
|---|
| 2439 | 2400 | seq, con->in_seq + 1); |
|---|
| 2440 | 2401 | con->in_base_pos = -front_len - middle_len - data_len - |
|---|
| 2441 | 2402 | sizeof_footer(con); |
|---|
| .. | .. |
|---|
| 2790 | 2751 | switch (ret) { |
|---|
| 2791 | 2752 | case -EBADMSG: |
|---|
| 2792 | 2753 | con->error_msg = "bad crc/signature"; |
|---|
| 2793 | | - /* fall through */ |
|---|
| 2754 | + fallthrough; |
|---|
| 2794 | 2755 | case -EBADE: |
|---|
| 2795 | 2756 | ret = -EIO; |
|---|
| 2796 | 2757 | break; |
|---|
| .. | .. |
|---|
| 2850 | 2811 | return -ENOENT; |
|---|
| 2851 | 2812 | } |
|---|
| 2852 | 2813 | |
|---|
| 2814 | + dout("%s %p %lu\n", __func__, con, delay); |
|---|
| 2853 | 2815 | if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { |
|---|
| 2854 | 2816 | dout("%s %p - already queued\n", __func__, con); |
|---|
| 2855 | 2817 | con->ops->put(con); |
|---|
| 2856 | 2818 | return -EBUSY; |
|---|
| 2857 | 2819 | } |
|---|
| 2858 | 2820 | |
|---|
| 2859 | | - dout("%s %p %lu\n", __func__, con, delay); |
|---|
| 2860 | 2821 | return 0; |
|---|
| 2861 | 2822 | } |
|---|
| 2862 | 2823 | |
|---|
| .. | .. |
|---|
| 3014 | 2975 | static void con_fault(struct ceph_connection *con) |
|---|
| 3015 | 2976 | { |
|---|
| 3016 | 2977 | dout("fault %p state %lu to peer %s\n", |
|---|
| 3017 | | - con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); |
|---|
| 2978 | + con, con->state, ceph_pr_addr(&con->peer_addr)); |
|---|
| 3018 | 2979 | |
|---|
| 3019 | 2980 | pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), |
|---|
| 3020 | | - ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); |
|---|
| 2981 | + ceph_pr_addr(&con->peer_addr), con->error_msg); |
|---|
| 3021 | 2982 | con->error_msg = NULL; |
|---|
| 3022 | 2983 | |
|---|
| 3023 | 2984 | WARN_ON(con->state != CON_STATE_CONNECTING && |
|---|
| .. | .. |
|---|
| 3066 | 3027 | } |
|---|
| 3067 | 3028 | |
|---|
| 3068 | 3029 | |
|---|
| 3030 | +void ceph_messenger_reset_nonce(struct ceph_messenger *msgr) |
|---|
| 3031 | +{ |
|---|
| 3032 | + u32 nonce = le32_to_cpu(msgr->inst.addr.nonce) + 1000000; |
|---|
| 3033 | + msgr->inst.addr.nonce = cpu_to_le32(nonce); |
|---|
| 3034 | + encode_my_addr(msgr); |
|---|
| 3035 | +} |
|---|
| 3069 | 3036 | |
|---|
| 3070 | 3037 | /* |
|---|
| 3071 | 3038 | * initialize a new messenger instance |
|---|
| .. | .. |
|---|
| 3271 | 3238 | return false; |
|---|
| 3272 | 3239 | } |
|---|
| 3273 | 3240 | |
|---|
| 3274 | | -static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type) |
|---|
| 3241 | +static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg) |
|---|
| 3275 | 3242 | { |
|---|
| 3276 | | - struct ceph_msg_data *data; |
|---|
| 3277 | | - |
|---|
| 3278 | | - if (WARN_ON(!ceph_msg_data_type_valid(type))) |
|---|
| 3279 | | - return NULL; |
|---|
| 3280 | | - |
|---|
| 3281 | | - data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS); |
|---|
| 3282 | | - if (!data) |
|---|
| 3283 | | - return NULL; |
|---|
| 3284 | | - |
|---|
| 3285 | | - data->type = type; |
|---|
| 3286 | | - INIT_LIST_HEAD(&data->links); |
|---|
| 3287 | | - |
|---|
| 3288 | | - return data; |
|---|
| 3243 | + BUG_ON(msg->num_data_items >= msg->max_data_items); |
|---|
| 3244 | + return &msg->data[msg->num_data_items++]; |
|---|
| 3289 | 3245 | } |
|---|
| 3290 | 3246 | |
|---|
| 3291 | 3247 | static void ceph_msg_data_destroy(struct ceph_msg_data *data) |
|---|
| 3292 | 3248 | { |
|---|
| 3293 | | - if (!data) |
|---|
| 3294 | | - return; |
|---|
| 3295 | | - |
|---|
| 3296 | | - WARN_ON(!list_empty(&data->links)); |
|---|
| 3297 | | - if (data->type == CEPH_MSG_DATA_PAGELIST) |
|---|
| 3249 | + if (data->type == CEPH_MSG_DATA_PAGES && data->own_pages) { |
|---|
| 3250 | + int num_pages = calc_pages_for(data->alignment, data->length); |
|---|
| 3251 | + ceph_release_page_vector(data->pages, num_pages); |
|---|
| 3252 | + } else if (data->type == CEPH_MSG_DATA_PAGELIST) { |
|---|
| 3298 | 3253 | ceph_pagelist_release(data->pagelist); |
|---|
| 3299 | | - kmem_cache_free(ceph_msg_data_cache, data); |
|---|
| 3254 | + } |
|---|
| 3300 | 3255 | } |
|---|
| 3301 | 3256 | |
|---|
| 3302 | 3257 | void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, |
|---|
| 3303 | | - size_t length, size_t alignment) |
|---|
| 3258 | + size_t length, size_t alignment, bool own_pages) |
|---|
| 3304 | 3259 | { |
|---|
| 3305 | 3260 | struct ceph_msg_data *data; |
|---|
| 3306 | 3261 | |
|---|
| 3307 | 3262 | BUG_ON(!pages); |
|---|
| 3308 | 3263 | BUG_ON(!length); |
|---|
| 3309 | 3264 | |
|---|
| 3310 | | - data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES); |
|---|
| 3311 | | - BUG_ON(!data); |
|---|
| 3265 | + data = ceph_msg_data_add(msg); |
|---|
| 3266 | + data->type = CEPH_MSG_DATA_PAGES; |
|---|
| 3312 | 3267 | data->pages = pages; |
|---|
| 3313 | 3268 | data->length = length; |
|---|
| 3314 | 3269 | data->alignment = alignment & ~PAGE_MASK; |
|---|
| 3270 | + data->own_pages = own_pages; |
|---|
| 3315 | 3271 | |
|---|
| 3316 | | - list_add_tail(&data->links, &msg->data); |
|---|
| 3317 | 3272 | msg->data_length += length; |
|---|
| 3318 | 3273 | } |
|---|
| 3319 | 3274 | EXPORT_SYMBOL(ceph_msg_data_add_pages); |
|---|
| .. | .. |
|---|
| 3326 | 3281 | BUG_ON(!pagelist); |
|---|
| 3327 | 3282 | BUG_ON(!pagelist->length); |
|---|
| 3328 | 3283 | |
|---|
| 3329 | | - data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST); |
|---|
| 3330 | | - BUG_ON(!data); |
|---|
| 3284 | + data = ceph_msg_data_add(msg); |
|---|
| 3285 | + data->type = CEPH_MSG_DATA_PAGELIST; |
|---|
| 3286 | + refcount_inc(&pagelist->refcnt); |
|---|
| 3331 | 3287 | data->pagelist = pagelist; |
|---|
| 3332 | 3288 | |
|---|
| 3333 | | - list_add_tail(&data->links, &msg->data); |
|---|
| 3334 | 3289 | msg->data_length += pagelist->length; |
|---|
| 3335 | 3290 | } |
|---|
| 3336 | 3291 | EXPORT_SYMBOL(ceph_msg_data_add_pagelist); |
|---|
| .. | .. |
|---|
| 3341 | 3296 | { |
|---|
| 3342 | 3297 | struct ceph_msg_data *data; |
|---|
| 3343 | 3298 | |
|---|
| 3344 | | - data = ceph_msg_data_create(CEPH_MSG_DATA_BIO); |
|---|
| 3345 | | - BUG_ON(!data); |
|---|
| 3299 | + data = ceph_msg_data_add(msg); |
|---|
| 3300 | + data->type = CEPH_MSG_DATA_BIO; |
|---|
| 3346 | 3301 | data->bio_pos = *bio_pos; |
|---|
| 3347 | 3302 | data->bio_length = length; |
|---|
| 3348 | 3303 | |
|---|
| 3349 | | - list_add_tail(&data->links, &msg->data); |
|---|
| 3350 | 3304 | msg->data_length += length; |
|---|
| 3351 | 3305 | } |
|---|
| 3352 | 3306 | EXPORT_SYMBOL(ceph_msg_data_add_bio); |
|---|
| .. | .. |
|---|
| 3357 | 3311 | { |
|---|
| 3358 | 3312 | struct ceph_msg_data *data; |
|---|
| 3359 | 3313 | |
|---|
| 3360 | | - data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS); |
|---|
| 3361 | | - BUG_ON(!data); |
|---|
| 3314 | + data = ceph_msg_data_add(msg); |
|---|
| 3315 | + data->type = CEPH_MSG_DATA_BVECS; |
|---|
| 3362 | 3316 | data->bvec_pos = *bvec_pos; |
|---|
| 3363 | 3317 | |
|---|
| 3364 | | - list_add_tail(&data->links, &msg->data); |
|---|
| 3365 | 3318 | msg->data_length += bvec_pos->iter.bi_size; |
|---|
| 3366 | 3319 | } |
|---|
| 3367 | 3320 | EXPORT_SYMBOL(ceph_msg_data_add_bvecs); |
|---|
| .. | .. |
|---|
| 3370 | 3323 | * construct a new message with given type, size |
|---|
| 3371 | 3324 | * the new msg has a ref count of 1. |
|---|
| 3372 | 3325 | */ |
|---|
| 3373 | | -struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, |
|---|
| 3374 | | - bool can_fail) |
|---|
| 3326 | +struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, |
|---|
| 3327 | + gfp_t flags, bool can_fail) |
|---|
| 3375 | 3328 | { |
|---|
| 3376 | 3329 | struct ceph_msg *m; |
|---|
| 3377 | 3330 | |
|---|
| .. | .. |
|---|
| 3385 | 3338 | |
|---|
| 3386 | 3339 | INIT_LIST_HEAD(&m->list_head); |
|---|
| 3387 | 3340 | kref_init(&m->kref); |
|---|
| 3388 | | - INIT_LIST_HEAD(&m->data); |
|---|
| 3389 | 3341 | |
|---|
| 3390 | 3342 | /* front */ |
|---|
| 3391 | 3343 | if (front_len) { |
|---|
| .. | .. |
|---|
| 3399 | 3351 | m->front.iov_base = NULL; |
|---|
| 3400 | 3352 | } |
|---|
| 3401 | 3353 | m->front_alloc_len = m->front.iov_len = front_len; |
|---|
| 3354 | + |
|---|
| 3355 | + if (max_data_items) { |
|---|
| 3356 | + m->data = kmalloc_array(max_data_items, sizeof(*m->data), |
|---|
| 3357 | + flags); |
|---|
| 3358 | + if (!m->data) |
|---|
| 3359 | + goto out2; |
|---|
| 3360 | + |
|---|
| 3361 | + m->max_data_items = max_data_items; |
|---|
| 3362 | + } |
|---|
| 3402 | 3363 | |
|---|
| 3403 | 3364 | dout("ceph_msg_new %p front %d\n", m, front_len); |
|---|
| 3404 | 3365 | return m; |
|---|
| .. | .. |
|---|
| 3415 | 3376 | front_len); |
|---|
| 3416 | 3377 | } |
|---|
| 3417 | 3378 | return NULL; |
|---|
| 3379 | +} |
|---|
| 3380 | +EXPORT_SYMBOL(ceph_msg_new2); |
|---|
| 3381 | + |
|---|
| 3382 | +struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, |
|---|
| 3383 | + bool can_fail) |
|---|
| 3384 | +{ |
|---|
| 3385 | + return ceph_msg_new2(type, front_len, 0, flags, can_fail); |
|---|
| 3418 | 3386 | } |
|---|
| 3419 | 3387 | EXPORT_SYMBOL(ceph_msg_new); |
|---|
| 3420 | 3388 | |
|---|
| .. | .. |
|---|
| 3511 | 3479 | { |
|---|
| 3512 | 3480 | dout("%s %p\n", __func__, m); |
|---|
| 3513 | 3481 | kvfree(m->front.iov_base); |
|---|
| 3482 | + kfree(m->data); |
|---|
| 3514 | 3483 | kmem_cache_free(ceph_msg_cache, m); |
|---|
| 3515 | 3484 | } |
|---|
| 3516 | 3485 | |
|---|
| 3517 | 3486 | static void ceph_msg_release(struct kref *kref) |
|---|
| 3518 | 3487 | { |
|---|
| 3519 | 3488 | struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); |
|---|
| 3520 | | - struct ceph_msg_data *data, *next; |
|---|
| 3489 | + int i; |
|---|
| 3521 | 3490 | |
|---|
| 3522 | 3491 | dout("%s %p\n", __func__, m); |
|---|
| 3523 | 3492 | WARN_ON(!list_empty(&m->list_head)); |
|---|
| .. | .. |
|---|
| 3530 | 3499 | m->middle = NULL; |
|---|
| 3531 | 3500 | } |
|---|
| 3532 | 3501 | |
|---|
| 3533 | | - list_for_each_entry_safe(data, next, &m->data, links) { |
|---|
| 3534 | | - list_del_init(&data->links); |
|---|
| 3535 | | - ceph_msg_data_destroy(data); |
|---|
| 3536 | | - } |
|---|
| 3537 | | - m->data_length = 0; |
|---|
| 3502 | + for (i = 0; i < m->num_data_items; i++) |
|---|
| 3503 | + ceph_msg_data_destroy(&m->data[i]); |
|---|
| 3538 | 3504 | |
|---|
| 3539 | 3505 | if (m->pool) |
|---|
| 3540 | 3506 | ceph_msgpool_put(m->pool, m); |
|---|