| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (C) 2016 CNEX Labs |
|---|
| 3 | 4 | * Initial release: Javier Gonzalez <javier@cnexlabs.com> |
|---|
| .. | .. |
|---|
| 22 | 23 | |
|---|
| 23 | 24 | static DECLARE_RWSEM(pblk_rb_lock); |
|---|
| 24 | 25 | |
|---|
| 25 | | -void pblk_rb_data_free(struct pblk_rb *rb) |
|---|
| 26 | +static void pblk_rb_data_free(struct pblk_rb *rb) |
|---|
| 26 | 27 | { |
|---|
| 27 | 28 | struct pblk_rb_pages *p, *t; |
|---|
| 28 | 29 | |
|---|
| .. | .. |
|---|
| 35 | 36 | up_write(&pblk_rb_lock); |
|---|
| 36 | 37 | } |
|---|
| 37 | 38 | |
|---|
| 39 | +void pblk_rb_free(struct pblk_rb *rb) |
|---|
| 40 | +{ |
|---|
| 41 | + pblk_rb_data_free(rb); |
|---|
| 42 | + vfree(rb->entries); |
|---|
| 43 | +} |
|---|
| 44 | + |
|---|
| 45 | +/* |
|---|
| 46 | + * pblk_rb_calculate_size -- calculate the size of the write buffer |
|---|
| 47 | + */ |
|---|
| 48 | +static unsigned int pblk_rb_calculate_size(unsigned int nr_entries, |
|---|
| 49 | + unsigned int threshold) |
|---|
| 50 | +{ |
|---|
| 51 | + unsigned int thr_sz = 1 << (get_count_order(threshold + NVM_MAX_VLBA)); |
|---|
| 52 | + unsigned int max_sz = max(thr_sz, nr_entries); |
|---|
| 53 | + unsigned int max_io; |
|---|
| 54 | + |
|---|
| 55 | + /* Alloc a write buffer that can (i) fit at least two split bios |
|---|
| 56 | + * (considering max I/O size NVM_MAX_VLBA, and (ii) guarantee that the |
|---|
| 57 | + * threshold will be respected |
|---|
| 58 | + */ |
|---|
| 59 | + max_io = (1 << max((int)(get_count_order(max_sz)), |
|---|
| 60 | + (int)(get_count_order(NVM_MAX_VLBA << 1)))); |
|---|
| 61 | + if ((threshold + NVM_MAX_VLBA) >= max_io) |
|---|
| 62 | + max_io <<= 1; |
|---|
| 63 | + |
|---|
| 64 | + return max_io; |
|---|
| 65 | +} |
|---|
| 66 | + |
|---|
| 38 | 67 | /* |
|---|
| 39 | 68 | * Initialize ring buffer. The data and metadata buffers must be previously |
|---|
| 40 | 69 | * allocated and their size must be a power of two |
|---|
| 41 | 70 | * (Documentation/core-api/circular-buffers.rst) |
|---|
| 42 | 71 | */ |
|---|
| 43 | | -int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, |
|---|
| 44 | | - unsigned int power_size, unsigned int power_seg_sz) |
|---|
| 72 | +int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, |
|---|
| 73 | + unsigned int seg_size) |
|---|
| 45 | 74 | { |
|---|
| 46 | 75 | struct pblk *pblk = container_of(rb, struct pblk, rwb); |
|---|
| 76 | + struct pblk_rb_entry *entries; |
|---|
| 47 | 77 | unsigned int init_entry = 0; |
|---|
| 48 | | - unsigned int alloc_order = power_size; |
|---|
| 49 | 78 | unsigned int max_order = MAX_ORDER - 1; |
|---|
| 50 | | - unsigned int order, iter; |
|---|
| 79 | + unsigned int power_size, power_seg_sz; |
|---|
| 80 | + unsigned int alloc_order, order, iter; |
|---|
| 81 | + unsigned int nr_entries; |
|---|
| 82 | + |
|---|
| 83 | + nr_entries = pblk_rb_calculate_size(size, threshold); |
|---|
| 84 | + entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry))); |
|---|
| 85 | + if (!entries) |
|---|
| 86 | + return -ENOMEM; |
|---|
| 87 | + |
|---|
| 88 | + power_size = get_count_order(nr_entries); |
|---|
| 89 | + power_seg_sz = get_count_order(seg_size); |
|---|
| 51 | 90 | |
|---|
| 52 | 91 | down_write(&pblk_rb_lock); |
|---|
| 53 | | - rb->entries = rb_entry_base; |
|---|
| 92 | + rb->entries = entries; |
|---|
| 54 | 93 | rb->seg_size = (1 << power_seg_sz); |
|---|
| 55 | 94 | rb->nr_entries = (1 << power_size); |
|---|
| 56 | 95 | rb->mem = rb->subm = rb->sync = rb->l2p_update = 0; |
|---|
| 96 | + rb->back_thres = threshold; |
|---|
| 57 | 97 | rb->flush_point = EMPTY_ENTRY; |
|---|
| 58 | 98 | |
|---|
| 59 | 99 | spin_lock_init(&rb->w_lock); |
|---|
| .. | .. |
|---|
| 61 | 101 | |
|---|
| 62 | 102 | INIT_LIST_HEAD(&rb->pages); |
|---|
| 63 | 103 | |
|---|
| 104 | + alloc_order = power_size; |
|---|
| 64 | 105 | if (alloc_order >= max_order) { |
|---|
| 65 | 106 | order = max_order; |
|---|
| 66 | 107 | iter = (1 << (alloc_order - max_order)); |
|---|
| .. | .. |
|---|
| 79 | 120 | page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL); |
|---|
| 80 | 121 | if (!page_set) { |
|---|
| 81 | 122 | up_write(&pblk_rb_lock); |
|---|
| 123 | + vfree(entries); |
|---|
| 82 | 124 | return -ENOMEM; |
|---|
| 83 | 125 | } |
|---|
| 84 | 126 | |
|---|
| .. | .. |
|---|
| 88 | 130 | kfree(page_set); |
|---|
| 89 | 131 | pblk_rb_data_free(rb); |
|---|
| 90 | 132 | up_write(&pblk_rb_lock); |
|---|
| 133 | + vfree(entries); |
|---|
| 91 | 134 | return -ENOMEM; |
|---|
| 92 | 135 | } |
|---|
| 93 | 136 | kaddr = page_address(page_set->pages); |
|---|
| .. | .. |
|---|
| 117 | 160 | |
|---|
| 118 | 161 | /* |
|---|
| 119 | 162 | * Initialize rate-limiter, which controls access to the write buffer |
|---|
| 120 | | - * but user and GC I/O |
|---|
| 163 | + * by user and GC I/O |
|---|
| 121 | 164 | */ |
|---|
| 122 | | - pblk_rl_init(&pblk->rl, rb->nr_entries); |
|---|
| 165 | + pblk_rl_init(&pblk->rl, rb->nr_entries, threshold); |
|---|
| 123 | 166 | |
|---|
| 124 | 167 | return 0; |
|---|
| 125 | | -} |
|---|
| 126 | | - |
|---|
| 127 | | -/* |
|---|
| 128 | | - * pblk_rb_calculate_size -- calculate the size of the write buffer |
|---|
| 129 | | - */ |
|---|
| 130 | | -unsigned int pblk_rb_calculate_size(unsigned int nr_entries) |
|---|
| 131 | | -{ |
|---|
| 132 | | - /* Alloc a write buffer that can at least fit 128 entries */ |
|---|
| 133 | | - return (1 << max(get_count_order(nr_entries), 7)); |
|---|
| 134 | | -} |
|---|
| 135 | | - |
|---|
| 136 | | -void *pblk_rb_entries_ref(struct pblk_rb *rb) |
|---|
| 137 | | -{ |
|---|
| 138 | | - return rb->entries; |
|---|
| 139 | 168 | } |
|---|
| 140 | 169 | |
|---|
| 141 | 170 | static void clean_wctx(struct pblk_w_ctx *w_ctx) |
|---|
| .. | .. |
|---|
| 168 | 197 | return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries); |
|---|
| 169 | 198 | } |
|---|
| 170 | 199 | |
|---|
| 200 | +unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p, |
|---|
| 201 | + unsigned int nr_entries) |
|---|
| 202 | +{ |
|---|
| 203 | + return (p + nr_entries) & (rb->nr_entries - 1); |
|---|
| 204 | +} |
|---|
| 205 | + |
|---|
| 171 | 206 | /* |
|---|
| 172 | 207 | * Buffer count is calculated with respect to the submission entry signaling the |
|---|
| 173 | 208 | * entries that are available to send to the media |
|---|
| .. | .. |
|---|
| 194 | 229 | |
|---|
| 195 | 230 | subm = READ_ONCE(rb->subm); |
|---|
| 196 | 231 | /* Commit read means updating submission pointer */ |
|---|
| 197 | | - smp_store_release(&rb->subm, |
|---|
| 198 | | - (subm + nr_entries) & (rb->nr_entries - 1)); |
|---|
| 232 | + smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries)); |
|---|
| 199 | 233 | |
|---|
| 200 | 234 | return subm; |
|---|
| 201 | 235 | } |
|---|
| .. | .. |
|---|
| 225 | 259 | pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, |
|---|
| 226 | 260 | entry->cacheline); |
|---|
| 227 | 261 | |
|---|
| 228 | | - line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)]; |
|---|
| 262 | + line = pblk_ppa_to_line(pblk, w_ctx->ppa); |
|---|
| 263 | + atomic_dec(&line->sec_to_update); |
|---|
| 229 | 264 | kref_put(&line->ref, pblk_line_put); |
|---|
| 230 | 265 | clean_wctx(w_ctx); |
|---|
| 231 | | - rb->l2p_update = (rb->l2p_update + 1) & (rb->nr_entries - 1); |
|---|
| 266 | + rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1); |
|---|
| 232 | 267 | } |
|---|
| 233 | 268 | |
|---|
| 234 | 269 | pblk_rl_out(&pblk->rl, user_io, gc_io); |
|---|
| .. | .. |
|---|
| 385 | 420 | { |
|---|
| 386 | 421 | unsigned int mem; |
|---|
| 387 | 422 | unsigned int sync; |
|---|
| 423 | + unsigned int threshold; |
|---|
| 388 | 424 | |
|---|
| 389 | 425 | sync = READ_ONCE(rb->sync); |
|---|
| 390 | 426 | mem = READ_ONCE(rb->mem); |
|---|
| 391 | 427 | |
|---|
| 392 | | - if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < nr_entries) |
|---|
| 428 | + threshold = nr_entries + rb->back_thres; |
|---|
| 429 | + |
|---|
| 430 | + if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold) |
|---|
| 393 | 431 | return 0; |
|---|
| 394 | 432 | |
|---|
| 395 | 433 | if (pblk_rb_update_l2p(rb, nr_entries, mem, sync)) |
|---|
| .. | .. |
|---|
| 407 | 445 | return 0; |
|---|
| 408 | 446 | |
|---|
| 409 | 447 | /* Protect from read count */ |
|---|
| 410 | | - smp_store_release(&rb->mem, (*pos + nr_entries) & (rb->nr_entries - 1)); |
|---|
| 448 | + smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries)); |
|---|
| 411 | 449 | return 1; |
|---|
| 412 | 450 | } |
|---|
| 413 | 451 | |
|---|
| .. | .. |
|---|
| 431 | 469 | if (!__pblk_rb_may_write(rb, nr_entries, pos)) |
|---|
| 432 | 470 | return 0; |
|---|
| 433 | 471 | |
|---|
| 434 | | - mem = (*pos + nr_entries) & (rb->nr_entries - 1); |
|---|
| 472 | + mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries); |
|---|
| 435 | 473 | *io_ret = NVM_IO_DONE; |
|---|
| 436 | 474 | |
|---|
| 437 | 475 | if (bio->bi_opf & REQ_PREFLUSH) { |
|---|
| .. | .. |
|---|
| 528 | 566 | to_read = count; |
|---|
| 529 | 567 | } |
|---|
| 530 | 568 | |
|---|
| 569 | + /* Add space for packed metadata if in use*/ |
|---|
| 570 | + pad += (pblk->min_write_pgs - pblk->min_write_pgs_data); |
|---|
| 571 | + |
|---|
| 531 | 572 | c_ctx->sentry = pos; |
|---|
| 532 | 573 | c_ctx->nr_valid = to_read; |
|---|
| 533 | 574 | c_ctx->nr_padded = pad; |
|---|
| .. | .. |
|---|
| 571 | 612 | /* Release flags on context. Protect from writes */ |
|---|
| 572 | 613 | smp_store_release(&entry->w_ctx.flags, flags); |
|---|
| 573 | 614 | |
|---|
| 574 | | - pos = (pos + 1) & (rb->nr_entries - 1); |
|---|
| 615 | + pos = pblk_rb_ptr_wrap(rb, pos, 1); |
|---|
| 575 | 616 | } |
|---|
| 576 | 617 | |
|---|
| 577 | 618 | if (pad) { |
|---|
| .. | .. |
|---|
| 601 | 642 | * be directed to disk. |
|---|
| 602 | 643 | */ |
|---|
| 603 | 644 | int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, |
|---|
| 604 | | - struct ppa_addr ppa, int bio_iter, bool advanced_bio) |
|---|
| 645 | + struct ppa_addr ppa) |
|---|
| 605 | 646 | { |
|---|
| 606 | 647 | struct pblk *pblk = container_of(rb, struct pblk, rwb); |
|---|
| 607 | 648 | struct pblk_rb_entry *entry; |
|---|
| .. | .. |
|---|
| 632 | 673 | ret = 0; |
|---|
| 633 | 674 | goto out; |
|---|
| 634 | 675 | } |
|---|
| 635 | | - |
|---|
| 636 | | - /* Only advance the bio if it hasn't been advanced already. If advanced, |
|---|
| 637 | | - * this bio is at least a partial bio (i.e., it has partially been |
|---|
| 638 | | - * filled with data from the cache). If part of the data resides on the |
|---|
| 639 | | - * media, we will read later on |
|---|
| 640 | | - */ |
|---|
| 641 | | - if (unlikely(!advanced_bio)) |
|---|
| 642 | | - bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE); |
|---|
| 643 | | - |
|---|
| 644 | 676 | data = bio_data(bio); |
|---|
| 645 | 677 | memcpy(data, entry->data, rb->seg_size); |
|---|
| 646 | 678 | |
|---|
| .. | .. |
|---|
| 651 | 683 | |
|---|
| 652 | 684 | struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos) |
|---|
| 653 | 685 | { |
|---|
| 654 | | - unsigned int entry = pos & (rb->nr_entries - 1); |
|---|
| 686 | + unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0); |
|---|
| 655 | 687 | |
|---|
| 656 | 688 | return &rb->entries[entry].w_ctx; |
|---|
| 657 | 689 | } |
|---|
| .. | .. |
|---|
| 697 | 729 | } |
|---|
| 698 | 730 | } |
|---|
| 699 | 731 | |
|---|
| 700 | | - sync = (sync + nr_entries) & (rb->nr_entries - 1); |
|---|
| 732 | + sync = pblk_rb_ptr_wrap(rb, sync, nr_entries); |
|---|
| 701 | 733 | |
|---|
| 702 | 734 | /* Protect from counts */ |
|---|
| 703 | 735 | smp_store_release(&rb->sync, sync); |
|---|
| .. | .. |
|---|
| 726 | 758 | to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1; |
|---|
| 727 | 759 | |
|---|
| 728 | 760 | return (submitted < to_flush) ? (to_flush - submitted) : 0; |
|---|
| 729 | | -} |
|---|
| 730 | | - |
|---|
| 731 | | -/* |
|---|
| 732 | | - * Scan from the current position of the sync pointer to find the entry that |
|---|
| 733 | | - * corresponds to the given ppa. This is necessary since write requests can be |
|---|
| 734 | | - * completed out of order. The assumption is that the ppa is close to the sync |
|---|
| 735 | | - * pointer thus the search will not take long. |
|---|
| 736 | | - * |
|---|
| 737 | | - * The caller of this function must guarantee that the sync pointer will no |
|---|
| 738 | | - * reach the entry while it is using the metadata associated with it. With this |
|---|
| 739 | | - * assumption in mind, there is no need to take the sync lock. |
|---|
| 740 | | - */ |
|---|
| 741 | | -struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, |
|---|
| 742 | | - struct ppa_addr *ppa) |
|---|
| 743 | | -{ |
|---|
| 744 | | - unsigned int sync, subm, count; |
|---|
| 745 | | - unsigned int i; |
|---|
| 746 | | - |
|---|
| 747 | | - sync = READ_ONCE(rb->sync); |
|---|
| 748 | | - subm = READ_ONCE(rb->subm); |
|---|
| 749 | | - count = pblk_rb_ring_count(subm, sync, rb->nr_entries); |
|---|
| 750 | | - |
|---|
| 751 | | - for (i = 0; i < count; i++) |
|---|
| 752 | | - sync = (sync + 1) & (rb->nr_entries - 1); |
|---|
| 753 | | - |
|---|
| 754 | | - return NULL; |
|---|
| 755 | 761 | } |
|---|
| 756 | 762 | |
|---|
| 757 | 763 | int pblk_rb_tear_down_check(struct pblk_rb *rb) |
|---|