.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (C) 2015 IT University of Copenhagen (rrpc.c) |
---|
3 | 4 | * Copyright (C) 2016 CNEX Labs |
---|
.. | .. |
---|
19 | 20 | */ |
---|
20 | 21 | |
---|
21 | 22 | #include "pblk.h" |
---|
| 23 | +#include "pblk-trace.h" |
---|
22 | 24 | |
---|
23 | 25 | static unsigned int write_buffer_size; |
---|
24 | 26 | |
---|
25 | 27 | module_param(write_buffer_size, uint, 0644); |
---|
26 | 28 | MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer"); |
---|
27 | 29 | |
---|
28 | | -static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, |
---|
29 | | - *pblk_w_rq_cache; |
---|
30 | | -static DECLARE_RWSEM(pblk_lock); |
---|
| 30 | +struct pblk_global_caches { |
---|
| 31 | + struct kmem_cache *ws; |
---|
| 32 | + struct kmem_cache *rec; |
---|
| 33 | + struct kmem_cache *g_rq; |
---|
| 34 | + struct kmem_cache *w_rq; |
---|
| 35 | + |
---|
| 36 | + struct kref kref; |
---|
| 37 | + |
---|
| 38 | + struct mutex mutex; /* Ensures consistency between |
---|
| 39 | + * caches and kref |
---|
| 40 | + */ |
---|
| 41 | +}; |
---|
| 42 | + |
---|
| 43 | +static struct pblk_global_caches pblk_caches = { |
---|
| 44 | + .mutex = __MUTEX_INITIALIZER(pblk_caches.mutex), |
---|
| 45 | + .kref = KREF_INIT(0), |
---|
| 46 | +}; |
---|
| 47 | + |
---|
31 | 48 | struct bio_set pblk_bio_set; |
---|
32 | 49 | |
---|
33 | | -static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, |
---|
34 | | - struct bio *bio) |
---|
| 50 | +static blk_qc_t pblk_submit_bio(struct bio *bio) |
---|
35 | 51 | { |
---|
36 | | - int ret; |
---|
37 | | - |
---|
38 | | - /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap |
---|
39 | | - * constraint. Writes can be of arbitrary size. |
---|
40 | | - */ |
---|
41 | | - if (bio_data_dir(bio) == READ) { |
---|
42 | | - blk_queue_split(q, &bio); |
---|
43 | | - ret = pblk_submit_read(pblk, bio); |
---|
44 | | - if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED)) |
---|
45 | | - bio_put(bio); |
---|
46 | | - |
---|
47 | | - return ret; |
---|
48 | | - } |
---|
49 | | - |
---|
50 | | - /* Prevent deadlock in the case of a modest LUN configuration and large |
---|
51 | | - * user I/Os. Unless stalled, the rate limiter leaves at least 256KB |
---|
52 | | - * available for user I/O. |
---|
53 | | - */ |
---|
54 | | - if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) |
---|
55 | | - blk_queue_split(q, &bio); |
---|
56 | | - |
---|
57 | | - return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); |
---|
58 | | -} |
---|
59 | | - |
---|
60 | | -static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) |
---|
61 | | -{ |
---|
62 | | - struct pblk *pblk = q->queuedata; |
---|
| 52 | + struct pblk *pblk = bio->bi_disk->queue->queuedata; |
---|
63 | 53 | |
---|
64 | 54 | if (bio_op(bio) == REQ_OP_DISCARD) { |
---|
65 | 55 | pblk_discard(pblk, bio); |
---|
.. | .. |
---|
69 | 59 | } |
---|
70 | 60 | } |
---|
71 | 61 | |
---|
72 | | - switch (pblk_rw_io(q, pblk, bio)) { |
---|
73 | | - case NVM_IO_ERR: |
---|
74 | | - bio_io_error(bio); |
---|
75 | | - break; |
---|
76 | | - case NVM_IO_DONE: |
---|
77 | | - bio_endio(bio); |
---|
78 | | - break; |
---|
| 62 | + /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap |
---|
| 63 | + * constraint. Writes can be of arbitrary size. |
---|
| 64 | + */ |
---|
| 65 | + if (bio_data_dir(bio) == READ) { |
---|
| 66 | + blk_queue_split(&bio); |
---|
| 67 | + pblk_submit_read(pblk, bio); |
---|
| 68 | + } else { |
---|
| 69 | + /* Prevent deadlock in the case of a modest LUN configuration |
---|
| 70 | + * and large user I/Os. Unless stalled, the rate limiter |
---|
| 71 | + * leaves at least 256KB available for user I/O. |
---|
| 72 | + */ |
---|
| 73 | + if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) |
---|
| 74 | + blk_queue_split(&bio); |
---|
| 75 | + |
---|
| 76 | + pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); |
---|
79 | 77 | } |
---|
80 | 78 | |
---|
81 | 79 | return BLK_QC_T_NONE; |
---|
82 | 80 | } |
---|
| 81 | + |
---|
| 82 | +static const struct block_device_operations pblk_bops = { |
---|
| 83 | + .owner = THIS_MODULE, |
---|
| 84 | + .submit_bio = pblk_submit_bio, |
---|
| 85 | +}; |
---|
| 86 | + |
---|
83 | 87 | |
---|
84 | 88 | static size_t pblk_trans_map_size(struct pblk *pblk) |
---|
85 | 89 | { |
---|
.. | .. |
---|
88 | 92 | if (pblk->addrf_len < 32) |
---|
89 | 93 | entry_size = 4; |
---|
90 | 94 | |
---|
91 | | - return entry_size * pblk->rl.nr_secs; |
---|
| 95 | + return entry_size * pblk->capacity; |
---|
92 | 96 | } |
---|
93 | 97 | |
---|
94 | 98 | #ifdef CONFIG_NVM_PBLK_DEBUG |
---|
.. | .. |
---|
113 | 117 | struct pblk_line *line = NULL; |
---|
114 | 118 | |
---|
115 | 119 | if (factory_init) { |
---|
116 | | - pblk_setup_uuid(pblk); |
---|
| 120 | + guid_gen(&pblk->instance_uuid); |
---|
117 | 121 | } else { |
---|
118 | 122 | line = pblk_recov_l2p(pblk); |
---|
119 | 123 | if (IS_ERR(line)) { |
---|
.. | .. |
---|
147 | 151 | int ret = 0; |
---|
148 | 152 | |
---|
149 | 153 | map_size = pblk_trans_map_size(pblk); |
---|
150 | | - pblk->trans_map = vmalloc(map_size); |
---|
151 | | - if (!pblk->trans_map) |
---|
| 154 | + pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN | |
---|
| 155 | + __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM); |
---|
| 156 | + if (!pblk->trans_map) { |
---|
| 157 | + pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n", |
---|
| 158 | + map_size); |
---|
152 | 159 | return -ENOMEM; |
---|
| 160 | + } |
---|
153 | 161 | |
---|
154 | 162 | pblk_ppa_set_empty(&ppa); |
---|
155 | 163 | |
---|
156 | | - for (i = 0; i < pblk->rl.nr_secs; i++) |
---|
| 164 | + for (i = 0; i < pblk->capacity; i++) |
---|
157 | 165 | pblk_trans_map_set(pblk, i, ppa); |
---|
158 | 166 | |
---|
159 | 167 | ret = pblk_l2p_recover(pblk, factory_init); |
---|
.. | .. |
---|
168 | 176 | if (pblk_rb_tear_down_check(&pblk->rwb)) |
---|
169 | 177 | pblk_err(pblk, "write buffer error on tear down\n"); |
---|
170 | 178 | |
---|
171 | | - pblk_rb_data_free(&pblk->rwb); |
---|
172 | | - vfree(pblk_rb_entries_ref(&pblk->rwb)); |
---|
| 179 | + pblk_rb_free(&pblk->rwb); |
---|
173 | 180 | } |
---|
174 | 181 | |
---|
175 | 182 | static int pblk_rwb_init(struct pblk *pblk) |
---|
176 | 183 | { |
---|
177 | 184 | struct nvm_tgt_dev *dev = pblk->dev; |
---|
178 | 185 | struct nvm_geo *geo = &dev->geo; |
---|
179 | | - struct pblk_rb_entry *entries; |
---|
180 | | - unsigned long nr_entries, buffer_size; |
---|
181 | | - unsigned int power_size, power_seg_sz; |
---|
182 | | - int pgs_in_buffer; |
---|
| 186 | + unsigned long buffer_size; |
---|
| 187 | + int pgs_in_buffer, threshold; |
---|
183 | 188 | |
---|
| 189 | + threshold = geo->mw_cunits * geo->all_luns; |
---|
184 | 190 | pgs_in_buffer = (max(geo->mw_cunits, geo->ws_opt) + geo->ws_opt) |
---|
185 | 191 | * geo->all_luns; |
---|
186 | 192 | |
---|
.. | .. |
---|
189 | 195 | else |
---|
190 | 196 | buffer_size = pgs_in_buffer; |
---|
191 | 197 | |
---|
192 | | - nr_entries = pblk_rb_calculate_size(buffer_size); |
---|
193 | | - |
---|
194 | | - entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry))); |
---|
195 | | - if (!entries) |
---|
196 | | - return -ENOMEM; |
---|
197 | | - |
---|
198 | | - power_size = get_count_order(nr_entries); |
---|
199 | | - power_seg_sz = get_count_order(geo->csecs); |
---|
200 | | - |
---|
201 | | - return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz); |
---|
| 198 | + return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs); |
---|
202 | 199 | } |
---|
203 | | - |
---|
204 | | -/* Minimum pages needed within a lun */ |
---|
205 | | -#define ADDR_POOL_SIZE 64 |
---|
206 | 200 | |
---|
207 | 201 | static int pblk_set_addrf_12(struct pblk *pblk, struct nvm_geo *geo, |
---|
208 | 202 | struct nvm_addrf_12 *dst) |
---|
.. | .. |
---|
307 | 301 | return 0; |
---|
308 | 302 | } |
---|
309 | 303 | |
---|
310 | | -static int pblk_init_global_caches(struct pblk *pblk) |
---|
| 304 | +static int pblk_create_global_caches(void) |
---|
311 | 305 | { |
---|
312 | | - down_write(&pblk_lock); |
---|
313 | | - pblk_ws_cache = kmem_cache_create("pblk_blk_ws", |
---|
| 306 | + |
---|
| 307 | + pblk_caches.ws = kmem_cache_create("pblk_blk_ws", |
---|
314 | 308 | sizeof(struct pblk_line_ws), 0, 0, NULL); |
---|
315 | | - if (!pblk_ws_cache) { |
---|
316 | | - up_write(&pblk_lock); |
---|
| 309 | + if (!pblk_caches.ws) |
---|
317 | 310 | return -ENOMEM; |
---|
318 | | - } |
---|
319 | 311 | |
---|
320 | | - pblk_rec_cache = kmem_cache_create("pblk_rec", |
---|
| 312 | + pblk_caches.rec = kmem_cache_create("pblk_rec", |
---|
321 | 313 | sizeof(struct pblk_rec_ctx), 0, 0, NULL); |
---|
322 | | - if (!pblk_rec_cache) { |
---|
323 | | - kmem_cache_destroy(pblk_ws_cache); |
---|
324 | | - up_write(&pblk_lock); |
---|
325 | | - return -ENOMEM; |
---|
326 | | - } |
---|
| 314 | + if (!pblk_caches.rec) |
---|
| 315 | + goto fail_destroy_ws; |
---|
327 | 316 | |
---|
328 | | - pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, |
---|
| 317 | + pblk_caches.g_rq = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, |
---|
329 | 318 | 0, 0, NULL); |
---|
330 | | - if (!pblk_g_rq_cache) { |
---|
331 | | - kmem_cache_destroy(pblk_ws_cache); |
---|
332 | | - kmem_cache_destroy(pblk_rec_cache); |
---|
333 | | - up_write(&pblk_lock); |
---|
334 | | - return -ENOMEM; |
---|
335 | | - } |
---|
| 319 | + if (!pblk_caches.g_rq) |
---|
| 320 | + goto fail_destroy_rec; |
---|
336 | 321 | |
---|
337 | | - pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, |
---|
| 322 | + pblk_caches.w_rq = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, |
---|
338 | 323 | 0, 0, NULL); |
---|
339 | | - if (!pblk_w_rq_cache) { |
---|
340 | | - kmem_cache_destroy(pblk_ws_cache); |
---|
341 | | - kmem_cache_destroy(pblk_rec_cache); |
---|
342 | | - kmem_cache_destroy(pblk_g_rq_cache); |
---|
343 | | - up_write(&pblk_lock); |
---|
344 | | - return -ENOMEM; |
---|
345 | | - } |
---|
346 | | - up_write(&pblk_lock); |
---|
| 324 | + if (!pblk_caches.w_rq) |
---|
| 325 | + goto fail_destroy_g_rq; |
---|
347 | 326 | |
---|
348 | 327 | return 0; |
---|
| 328 | + |
---|
| 329 | +fail_destroy_g_rq: |
---|
| 330 | + kmem_cache_destroy(pblk_caches.g_rq); |
---|
| 331 | +fail_destroy_rec: |
---|
| 332 | + kmem_cache_destroy(pblk_caches.rec); |
---|
| 333 | +fail_destroy_ws: |
---|
| 334 | + kmem_cache_destroy(pblk_caches.ws); |
---|
| 335 | + |
---|
| 336 | + return -ENOMEM; |
---|
349 | 337 | } |
---|
350 | 338 | |
---|
351 | | -static void pblk_free_global_caches(struct pblk *pblk) |
---|
| 339 | +static int pblk_get_global_caches(void) |
---|
352 | 340 | { |
---|
353 | | - kmem_cache_destroy(pblk_ws_cache); |
---|
354 | | - kmem_cache_destroy(pblk_rec_cache); |
---|
355 | | - kmem_cache_destroy(pblk_g_rq_cache); |
---|
356 | | - kmem_cache_destroy(pblk_w_rq_cache); |
---|
| 341 | + int ret = 0; |
---|
| 342 | + |
---|
| 343 | + mutex_lock(&pblk_caches.mutex); |
---|
| 344 | + |
---|
| 345 | + if (kref_get_unless_zero(&pblk_caches.kref)) |
---|
| 346 | + goto out; |
---|
| 347 | + |
---|
| 348 | + ret = pblk_create_global_caches(); |
---|
| 349 | + if (!ret) |
---|
| 350 | + kref_init(&pblk_caches.kref); |
---|
| 351 | + |
---|
| 352 | +out: |
---|
| 353 | + mutex_unlock(&pblk_caches.mutex); |
---|
| 354 | + return ret; |
---|
| 355 | +} |
---|
| 356 | + |
---|
| 357 | +static void pblk_destroy_global_caches(struct kref *ref) |
---|
| 358 | +{ |
---|
| 359 | + struct pblk_global_caches *c; |
---|
| 360 | + |
---|
| 361 | + c = container_of(ref, struct pblk_global_caches, kref); |
---|
| 362 | + |
---|
| 363 | + kmem_cache_destroy(c->ws); |
---|
| 364 | + kmem_cache_destroy(c->rec); |
---|
| 365 | + kmem_cache_destroy(c->g_rq); |
---|
| 366 | + kmem_cache_destroy(c->w_rq); |
---|
| 367 | +} |
---|
| 368 | + |
---|
| 369 | +static void pblk_put_global_caches(void) |
---|
| 370 | +{ |
---|
| 371 | + mutex_lock(&pblk_caches.mutex); |
---|
| 372 | + kref_put(&pblk_caches.kref, pblk_destroy_global_caches); |
---|
| 373 | + mutex_unlock(&pblk_caches.mutex); |
---|
357 | 374 | } |
---|
358 | 375 | |
---|
359 | 376 | static int pblk_core_init(struct pblk *pblk) |
---|
.. | .. |
---|
373 | 390 | pblk->nr_flush_rst = 0; |
---|
374 | 391 | |
---|
375 | 392 | pblk->min_write_pgs = geo->ws_opt; |
---|
| 393 | + pblk->min_write_pgs_data = pblk->min_write_pgs; |
---|
376 | 394 | max_write_ppas = pblk->min_write_pgs * geo->all_luns; |
---|
377 | 395 | pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); |
---|
378 | 396 | pblk->max_write_pgs = min_t(int, pblk->max_write_pgs, |
---|
379 | 397 | queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT)); |
---|
380 | 398 | pblk_set_sec_per_write(pblk, pblk->min_write_pgs); |
---|
381 | 399 | |
---|
382 | | - if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) { |
---|
383 | | - pblk_err(pblk, "vector list too big(%u > %u)\n", |
---|
384 | | - pblk->max_write_pgs, PBLK_MAX_REQ_ADDRS); |
---|
385 | | - return -EINVAL; |
---|
| 400 | + pblk->oob_meta_size = geo->sos; |
---|
| 401 | + if (!pblk_is_oob_meta_supported(pblk)) { |
---|
| 402 | + /* For drives which does not have OOB metadata feature |
---|
| 403 | + * in order to support recovery feature we need to use |
---|
| 404 | + * so called packed metadata. Packed metada will store |
---|
| 405 | + * the same information as OOB metadata (l2p table mapping, |
---|
| 406 | + * but in the form of the single page at the end of |
---|
| 407 | + * every write request. |
---|
| 408 | + */ |
---|
| 409 | + if (pblk->min_write_pgs |
---|
| 410 | + * sizeof(struct pblk_sec_meta) > PAGE_SIZE) { |
---|
| 411 | + /* We want to keep all the packed metadata on single |
---|
| 412 | + * page per write requests. So we need to ensure that |
---|
| 413 | + * it will fit. |
---|
| 414 | + * |
---|
| 415 | + * This is more like sanity check, since there is |
---|
| 416 | + * no device with such a big minimal write size |
---|
| 417 | + * (above 1 metabytes). |
---|
| 418 | + */ |
---|
| 419 | + pblk_err(pblk, "Not supported min write size\n"); |
---|
| 420 | + return -EINVAL; |
---|
| 421 | + } |
---|
| 422 | + /* For packed meta approach we do some simplification. |
---|
| 423 | + * On read path we always issue requests which size |
---|
| 424 | + * equal to max_write_pgs, with all pages filled with |
---|
| 425 | + * user payload except of last one page which will be |
---|
| 426 | + * filled with packed metadata. |
---|
| 427 | + */ |
---|
| 428 | + pblk->max_write_pgs = pblk->min_write_pgs; |
---|
| 429 | + pblk->min_write_pgs_data = pblk->min_write_pgs - 1; |
---|
386 | 430 | } |
---|
387 | 431 | |
---|
388 | 432 | pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t), |
---|
.. | .. |
---|
390 | 434 | if (!pblk->pad_dist) |
---|
391 | 435 | return -ENOMEM; |
---|
392 | 436 | |
---|
393 | | - if (pblk_init_global_caches(pblk)) |
---|
| 437 | + if (pblk_get_global_caches()) |
---|
394 | 438 | goto fail_free_pad_dist; |
---|
395 | 439 | |
---|
396 | 440 | /* Internal bios can be at most the sectors signaled by the device. */ |
---|
.. | .. |
---|
399 | 443 | goto free_global_caches; |
---|
400 | 444 | |
---|
401 | 445 | ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE, |
---|
402 | | - pblk_ws_cache); |
---|
| 446 | + pblk_caches.ws); |
---|
403 | 447 | if (ret) |
---|
404 | 448 | goto free_page_bio_pool; |
---|
405 | 449 | |
---|
406 | 450 | ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns, |
---|
407 | | - pblk_rec_cache); |
---|
| 451 | + pblk_caches.rec); |
---|
408 | 452 | if (ret) |
---|
409 | 453 | goto free_gen_ws_pool; |
---|
410 | 454 | |
---|
411 | 455 | ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns, |
---|
412 | | - pblk_g_rq_cache); |
---|
| 456 | + pblk_caches.g_rq); |
---|
413 | 457 | if (ret) |
---|
414 | 458 | goto free_rec_pool; |
---|
415 | 459 | |
---|
416 | 460 | ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns, |
---|
417 | | - pblk_g_rq_cache); |
---|
| 461 | + pblk_caches.g_rq); |
---|
418 | 462 | if (ret) |
---|
419 | 463 | goto free_r_rq_pool; |
---|
420 | 464 | |
---|
421 | 465 | ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns, |
---|
422 | | - pblk_w_rq_cache); |
---|
| 466 | + pblk_caches.w_rq); |
---|
423 | 467 | if (ret) |
---|
424 | 468 | goto free_e_rq_pool; |
---|
425 | 469 | |
---|
.. | .. |
---|
465 | 509 | free_page_bio_pool: |
---|
466 | 510 | mempool_exit(&pblk->page_bio_pool); |
---|
467 | 511 | free_global_caches: |
---|
468 | | - pblk_free_global_caches(pblk); |
---|
| 512 | + pblk_put_global_caches(); |
---|
469 | 513 | fail_free_pad_dist: |
---|
470 | 514 | kfree(pblk->pad_dist); |
---|
471 | 515 | return -ENOMEM; |
---|
.. | .. |
---|
489 | 533 | mempool_exit(&pblk->e_rq_pool); |
---|
490 | 534 | mempool_exit(&pblk->w_rq_pool); |
---|
491 | 535 | |
---|
492 | | - pblk_free_global_caches(pblk); |
---|
| 536 | + pblk_put_global_caches(); |
---|
493 | 537 | kfree(pblk->pad_dist); |
---|
494 | 538 | } |
---|
495 | 539 | |
---|
.. | .. |
---|
504 | 548 | |
---|
505 | 549 | for (i = 0; i < PBLK_DATA_LINES; i++) { |
---|
506 | 550 | kfree(l_mg->sline_meta[i]); |
---|
507 | | - pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type); |
---|
| 551 | + kvfree(l_mg->eline_meta[i]->buf); |
---|
508 | 552 | kfree(l_mg->eline_meta[i]); |
---|
509 | 553 | } |
---|
| 554 | + |
---|
| 555 | + mempool_destroy(l_mg->bitmap_pool); |
---|
| 556 | + kmem_cache_destroy(l_mg->bitmap_cache); |
---|
510 | 557 | } |
---|
511 | 558 | |
---|
512 | 559 | static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg, |
---|
.. | .. |
---|
518 | 565 | kfree(line->erase_bitmap); |
---|
519 | 566 | kfree(line->chks); |
---|
520 | 567 | |
---|
521 | | - pblk_mfree(w_err_gc->lba_list, l_mg->emeta_alloc_type); |
---|
| 568 | + kvfree(w_err_gc->lba_list); |
---|
522 | 569 | kfree(w_err_gc); |
---|
523 | 570 | } |
---|
524 | 571 | |
---|
.. | .. |
---|
528 | 575 | struct pblk_line *line; |
---|
529 | 576 | int i; |
---|
530 | 577 | |
---|
531 | | - spin_lock(&l_mg->free_lock); |
---|
532 | 578 | for (i = 0; i < l_mg->nr_lines; i++) { |
---|
533 | 579 | line = &pblk->lines[i]; |
---|
534 | 580 | |
---|
535 | 581 | pblk_line_free(line); |
---|
536 | 582 | pblk_line_meta_free(l_mg, line); |
---|
537 | 583 | } |
---|
538 | | - spin_unlock(&l_mg->free_lock); |
---|
539 | 584 | |
---|
540 | 585 | pblk_line_mg_free(pblk); |
---|
541 | 586 | |
---|
542 | 587 | kfree(pblk->luns); |
---|
543 | 588 | kfree(pblk->lines); |
---|
544 | | -} |
---|
545 | | - |
---|
546 | | -static int pblk_bb_get_tbl(struct nvm_tgt_dev *dev, struct pblk_lun *rlun, |
---|
547 | | - u8 *blks, int nr_blks) |
---|
548 | | -{ |
---|
549 | | - struct ppa_addr ppa; |
---|
550 | | - int ret; |
---|
551 | | - |
---|
552 | | - ppa.ppa = 0; |
---|
553 | | - ppa.g.ch = rlun->bppa.g.ch; |
---|
554 | | - ppa.g.lun = rlun->bppa.g.lun; |
---|
555 | | - |
---|
556 | | - ret = nvm_get_tgt_bb_tbl(dev, ppa, blks); |
---|
557 | | - if (ret) |
---|
558 | | - return ret; |
---|
559 | | - |
---|
560 | | - nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); |
---|
561 | | - if (nr_blks < 0) |
---|
562 | | - return -EIO; |
---|
563 | | - |
---|
564 | | - return 0; |
---|
565 | | -} |
---|
566 | | - |
---|
567 | | -static void *pblk_bb_get_meta(struct pblk *pblk) |
---|
568 | | -{ |
---|
569 | | - struct nvm_tgt_dev *dev = pblk->dev; |
---|
570 | | - struct nvm_geo *geo = &dev->geo; |
---|
571 | | - u8 *meta; |
---|
572 | | - int i, nr_blks, blk_per_lun; |
---|
573 | | - int ret; |
---|
574 | | - |
---|
575 | | - blk_per_lun = geo->num_chk * geo->pln_mode; |
---|
576 | | - nr_blks = blk_per_lun * geo->all_luns; |
---|
577 | | - |
---|
578 | | - meta = kmalloc(nr_blks, GFP_KERNEL); |
---|
579 | | - if (!meta) |
---|
580 | | - return ERR_PTR(-ENOMEM); |
---|
581 | | - |
---|
582 | | - for (i = 0; i < geo->all_luns; i++) { |
---|
583 | | - struct pblk_lun *rlun = &pblk->luns[i]; |
---|
584 | | - u8 *meta_pos = meta + i * blk_per_lun; |
---|
585 | | - |
---|
586 | | - ret = pblk_bb_get_tbl(dev, rlun, meta_pos, blk_per_lun); |
---|
587 | | - if (ret) { |
---|
588 | | - kfree(meta); |
---|
589 | | - return ERR_PTR(-EIO); |
---|
590 | | - } |
---|
591 | | - } |
---|
592 | | - |
---|
593 | | - return meta; |
---|
594 | | -} |
---|
595 | | - |
---|
596 | | -static void *pblk_chunk_get_meta(struct pblk *pblk) |
---|
597 | | -{ |
---|
598 | | - struct nvm_tgt_dev *dev = pblk->dev; |
---|
599 | | - struct nvm_geo *geo = &dev->geo; |
---|
600 | | - |
---|
601 | | - if (geo->version == NVM_OCSSD_SPEC_12) |
---|
602 | | - return pblk_bb_get_meta(pblk); |
---|
603 | | - else |
---|
604 | | - return pblk_chunk_get_info(pblk); |
---|
605 | 589 | } |
---|
606 | 590 | |
---|
607 | 591 | static int pblk_luns_init(struct pblk *pblk) |
---|
.. | .. |
---|
666 | 650 | return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]); |
---|
667 | 651 | } |
---|
668 | 652 | |
---|
669 | | -static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) |
---|
| 653 | +static int pblk_set_provision(struct pblk *pblk, int nr_free_chks) |
---|
670 | 654 | { |
---|
671 | 655 | struct nvm_tgt_dev *dev = pblk->dev; |
---|
672 | 656 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; |
---|
673 | 657 | struct pblk_line_meta *lm = &pblk->lm; |
---|
674 | 658 | struct nvm_geo *geo = &dev->geo; |
---|
675 | 659 | sector_t provisioned; |
---|
676 | | - int sec_meta, blk_meta; |
---|
| 660 | + int sec_meta, blk_meta, clba; |
---|
| 661 | + int minimum; |
---|
677 | 662 | |
---|
678 | 663 | if (geo->op == NVM_TARGET_DEFAULT_OP) |
---|
679 | 664 | pblk->op = PBLK_DEFAULT_OP; |
---|
680 | 665 | else |
---|
681 | 666 | pblk->op = geo->op; |
---|
682 | 667 | |
---|
683 | | - provisioned = nr_free_blks; |
---|
| 668 | + minimum = pblk_get_min_chks(pblk); |
---|
| 669 | + provisioned = nr_free_chks; |
---|
684 | 670 | provisioned *= (100 - pblk->op); |
---|
685 | 671 | sector_div(provisioned, 100); |
---|
686 | 672 | |
---|
687 | | - pblk->op_blks = nr_free_blks - provisioned; |
---|
| 673 | + if ((nr_free_chks - provisioned) < minimum) { |
---|
| 674 | + if (geo->op != NVM_TARGET_DEFAULT_OP) { |
---|
| 675 | + pblk_err(pblk, "OP too small to create a sane instance\n"); |
---|
| 676 | + return -EINTR; |
---|
| 677 | + } |
---|
| 678 | + |
---|
| 679 | + /* If the user did not specify an OP value, and PBLK_DEFAULT_OP |
---|
| 680 | + * is not enough, calculate and set sane value |
---|
| 681 | + */ |
---|
| 682 | + |
---|
| 683 | + provisioned = nr_free_chks - minimum; |
---|
| 684 | + pblk->op = (100 * minimum) / nr_free_chks; |
---|
| 685 | + pblk_info(pblk, "Default OP insufficient, adjusting OP to %d\n", |
---|
| 686 | + pblk->op); |
---|
| 687 | + } |
---|
| 688 | + |
---|
| 689 | + pblk->op_blks = nr_free_chks - provisioned; |
---|
688 | 690 | |
---|
689 | 691 | /* Internally pblk manages all free blocks, but all calculations based |
---|
690 | 692 | * on user capacity consider only provisioned blocks |
---|
691 | 693 | */ |
---|
692 | | - pblk->rl.total_blocks = nr_free_blks; |
---|
693 | | - pblk->rl.nr_secs = nr_free_blks * geo->clba; |
---|
| 694 | + pblk->rl.total_blocks = nr_free_chks; |
---|
694 | 695 | |
---|
695 | 696 | /* Consider sectors used for metadata */ |
---|
696 | 697 | sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; |
---|
697 | 698 | blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); |
---|
698 | 699 | |
---|
699 | | - pblk->capacity = (provisioned - blk_meta) * geo->clba; |
---|
| 700 | + clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data; |
---|
| 701 | + pblk->capacity = (provisioned - blk_meta) * clba; |
---|
700 | 702 | |
---|
701 | | - atomic_set(&pblk->rl.free_blocks, nr_free_blks); |
---|
702 | | - atomic_set(&pblk->rl.free_user_blocks, nr_free_blks); |
---|
| 703 | + atomic_set(&pblk->rl.free_blocks, nr_free_chks); |
---|
| 704 | + atomic_set(&pblk->rl.free_user_blocks, nr_free_chks); |
---|
| 705 | + |
---|
| 706 | + return 0; |
---|
703 | 707 | } |
---|
704 | 708 | |
---|
705 | | -static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line, |
---|
706 | | - void *chunk_meta) |
---|
707 | | -{ |
---|
708 | | - struct nvm_tgt_dev *dev = pblk->dev; |
---|
709 | | - struct nvm_geo *geo = &dev->geo; |
---|
710 | | - struct pblk_line_meta *lm = &pblk->lm; |
---|
711 | | - int i, chk_per_lun, nr_bad_chks = 0; |
---|
712 | | - |
---|
713 | | - chk_per_lun = geo->num_chk * geo->pln_mode; |
---|
714 | | - |
---|
715 | | - for (i = 0; i < lm->blk_per_line; i++) { |
---|
716 | | - struct pblk_lun *rlun = &pblk->luns[i]; |
---|
717 | | - struct nvm_chk_meta *chunk; |
---|
718 | | - int pos = pblk_ppa_to_pos(geo, rlun->bppa); |
---|
719 | | - u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun; |
---|
720 | | - |
---|
721 | | - chunk = &line->chks[pos]; |
---|
722 | | - |
---|
723 | | - /* |
---|
724 | | - * In 1.2 spec. chunk state is not persisted by the device. Thus |
---|
725 | | - * some of the values are reset each time pblk is instantiated, |
---|
726 | | - * so we have to assume that the block is closed. |
---|
727 | | - */ |
---|
728 | | - if (lun_bb_meta[line->id] == NVM_BLK_T_FREE) |
---|
729 | | - chunk->state = NVM_CHK_ST_CLOSED; |
---|
730 | | - else |
---|
731 | | - chunk->state = NVM_CHK_ST_OFFLINE; |
---|
732 | | - |
---|
733 | | - chunk->type = NVM_CHK_TP_W_SEQ; |
---|
734 | | - chunk->wi = 0; |
---|
735 | | - chunk->slba = -1; |
---|
736 | | - chunk->cnlb = geo->clba; |
---|
737 | | - chunk->wp = 0; |
---|
738 | | - |
---|
739 | | - if (!(chunk->state & NVM_CHK_ST_OFFLINE)) |
---|
740 | | - continue; |
---|
741 | | - |
---|
742 | | - set_bit(pos, line->blk_bitmap); |
---|
743 | | - nr_bad_chks++; |
---|
744 | | - } |
---|
745 | | - |
---|
746 | | - return nr_bad_chks; |
---|
747 | | -} |
---|
748 | | - |
---|
749 | | -static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, |
---|
| 709 | +static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line, |
---|
750 | 710 | struct nvm_chk_meta *meta) |
---|
751 | 711 | { |
---|
752 | 712 | struct nvm_tgt_dev *dev = pblk->dev; |
---|
.. | .. |
---|
775 | 735 | chunk->cnlb = chunk_meta->cnlb; |
---|
776 | 736 | chunk->wp = chunk_meta->wp; |
---|
777 | 737 | |
---|
| 738 | + trace_pblk_chunk_state(pblk_disk_name(pblk), &ppa, |
---|
| 739 | + chunk->state); |
---|
| 740 | + |
---|
778 | 741 | if (chunk->type & NVM_CHK_TP_SZ_SPEC) { |
---|
779 | 742 | WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n"); |
---|
780 | 743 | continue; |
---|
.. | .. |
---|
793 | 756 | static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line, |
---|
794 | 757 | void *chunk_meta, int line_id) |
---|
795 | 758 | { |
---|
796 | | - struct nvm_tgt_dev *dev = pblk->dev; |
---|
797 | | - struct nvm_geo *geo = &dev->geo; |
---|
798 | 759 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; |
---|
799 | 760 | struct pblk_line_meta *lm = &pblk->lm; |
---|
800 | 761 | long nr_bad_chks, chk_in_line; |
---|
.. | .. |
---|
807 | 768 | line->vsc = &l_mg->vsc_list[line_id]; |
---|
808 | 769 | spin_lock_init(&line->lock); |
---|
809 | 770 | |
---|
810 | | - if (geo->version == NVM_OCSSD_SPEC_12) |
---|
811 | | - nr_bad_chks = pblk_setup_line_meta_12(pblk, line, chunk_meta); |
---|
812 | | - else |
---|
813 | | - nr_bad_chks = pblk_setup_line_meta_20(pblk, line, chunk_meta); |
---|
| 771 | + nr_bad_chks = pblk_setup_line_meta_chk(pblk, line, chunk_meta); |
---|
814 | 772 | |
---|
815 | 773 | chk_in_line = lm->blk_per_line - nr_bad_chks; |
---|
816 | 774 | if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line || |
---|
.. | .. |
---|
916 | 874 | goto fail_free_smeta; |
---|
917 | 875 | } |
---|
918 | 876 | |
---|
| 877 | + l_mg->bitmap_cache = kmem_cache_create("pblk_lm_bitmap", |
---|
| 878 | + lm->sec_bitmap_len, 0, 0, NULL); |
---|
| 879 | + if (!l_mg->bitmap_cache) |
---|
| 880 | + goto fail_free_smeta; |
---|
| 881 | + |
---|
| 882 | + /* the bitmap pool is used for both valid and map bitmaps */ |
---|
| 883 | + l_mg->bitmap_pool = mempool_create_slab_pool(PBLK_DATA_LINES * 2, |
---|
| 884 | + l_mg->bitmap_cache); |
---|
| 885 | + if (!l_mg->bitmap_pool) |
---|
| 886 | + goto fail_destroy_bitmap_cache; |
---|
| 887 | + |
---|
919 | 888 | /* emeta allocates three different buffers for managing metadata with |
---|
920 | 889 | * in-memory and in-media layouts |
---|
921 | 890 | */ |
---|
.. | .. |
---|
926 | 895 | if (!emeta) |
---|
927 | 896 | goto fail_free_emeta; |
---|
928 | 897 | |
---|
929 | | - if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) { |
---|
930 | | - l_mg->emeta_alloc_type = PBLK_VMALLOC_META; |
---|
931 | | - |
---|
932 | | - emeta->buf = vmalloc(lm->emeta_len[0]); |
---|
933 | | - if (!emeta->buf) { |
---|
934 | | - kfree(emeta); |
---|
935 | | - goto fail_free_emeta; |
---|
936 | | - } |
---|
937 | | - |
---|
938 | | - emeta->nr_entries = lm->emeta_sec[0]; |
---|
939 | | - l_mg->eline_meta[i] = emeta; |
---|
940 | | - } else { |
---|
941 | | - l_mg->emeta_alloc_type = PBLK_KMALLOC_META; |
---|
942 | | - |
---|
943 | | - emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL); |
---|
944 | | - if (!emeta->buf) { |
---|
945 | | - kfree(emeta); |
---|
946 | | - goto fail_free_emeta; |
---|
947 | | - } |
---|
948 | | - |
---|
949 | | - emeta->nr_entries = lm->emeta_sec[0]; |
---|
950 | | - l_mg->eline_meta[i] = emeta; |
---|
| 898 | + emeta->buf = kvmalloc(lm->emeta_len[0], GFP_KERNEL); |
---|
| 899 | + if (!emeta->buf) { |
---|
| 900 | + kfree(emeta); |
---|
| 901 | + goto fail_free_emeta; |
---|
951 | 902 | } |
---|
| 903 | + |
---|
| 904 | + emeta->nr_entries = lm->emeta_sec[0]; |
---|
| 905 | + l_mg->eline_meta[i] = emeta; |
---|
952 | 906 | } |
---|
953 | 907 | |
---|
954 | 908 | for (i = 0; i < l_mg->nr_lines; i++) |
---|
.. | .. |
---|
962 | 916 | |
---|
963 | 917 | fail_free_emeta: |
---|
964 | 918 | while (--i >= 0) { |
---|
965 | | - if (l_mg->emeta_alloc_type == PBLK_VMALLOC_META) |
---|
966 | | - vfree(l_mg->eline_meta[i]->buf); |
---|
967 | | - else |
---|
968 | | - kfree(l_mg->eline_meta[i]->buf); |
---|
| 919 | + kvfree(l_mg->eline_meta[i]->buf); |
---|
969 | 920 | kfree(l_mg->eline_meta[i]); |
---|
970 | 921 | } |
---|
| 922 | + |
---|
| 923 | + mempool_destroy(l_mg->bitmap_pool); |
---|
| 924 | +fail_destroy_bitmap_cache: |
---|
| 925 | + kmem_cache_destroy(l_mg->bitmap_cache); |
---|
971 | 926 | fail_free_smeta: |
---|
972 | 927 | for (i = 0; i < PBLK_DATA_LINES; i++) |
---|
973 | 928 | kfree(l_mg->sline_meta[i]); |
---|
.. | .. |
---|
1046 | 1001 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; |
---|
1047 | 1002 | struct pblk_line *line; |
---|
1048 | 1003 | void *chunk_meta; |
---|
1049 | | - long nr_free_chks = 0; |
---|
| 1004 | + int nr_free_chks = 0; |
---|
1050 | 1005 | int i, ret; |
---|
1051 | 1006 | |
---|
1052 | 1007 | ret = pblk_line_meta_init(pblk); |
---|
.. | .. |
---|
1061 | 1016 | if (ret) |
---|
1062 | 1017 | goto fail_free_meta; |
---|
1063 | 1018 | |
---|
1064 | | - chunk_meta = pblk_chunk_get_meta(pblk); |
---|
| 1019 | + chunk_meta = pblk_get_chunk_meta(pblk); |
---|
1065 | 1020 | if (IS_ERR(chunk_meta)) { |
---|
1066 | 1021 | ret = PTR_ERR(chunk_meta); |
---|
1067 | 1022 | goto fail_free_luns; |
---|
.. | .. |
---|
1082 | 1037 | goto fail_free_lines; |
---|
1083 | 1038 | |
---|
1084 | 1039 | nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i); |
---|
| 1040 | + |
---|
| 1041 | + trace_pblk_line_state(pblk_disk_name(pblk), line->id, |
---|
| 1042 | + line->state); |
---|
1085 | 1043 | } |
---|
1086 | 1044 | |
---|
1087 | 1045 | if (!nr_free_chks) { |
---|
.. | .. |
---|
1090 | 1048 | goto fail_free_lines; |
---|
1091 | 1049 | } |
---|
1092 | 1050 | |
---|
1093 | | - pblk_set_provision(pblk, nr_free_chks); |
---|
| 1051 | + ret = pblk_set_provision(pblk, nr_free_chks); |
---|
| 1052 | + if (ret) |
---|
| 1053 | + goto fail_free_lines; |
---|
1094 | 1054 | |
---|
1095 | | - kfree(chunk_meta); |
---|
| 1055 | + vfree(chunk_meta); |
---|
1096 | 1056 | return 0; |
---|
1097 | 1057 | |
---|
1098 | 1058 | fail_free_lines: |
---|
.. | .. |
---|
1100 | 1060 | pblk_line_meta_free(l_mg, &pblk->lines[i]); |
---|
1101 | 1061 | kfree(pblk->lines); |
---|
1102 | 1062 | fail_free_chunk_meta: |
---|
1103 | | - kfree(chunk_meta); |
---|
| 1063 | + vfree(chunk_meta); |
---|
1104 | 1064 | fail_free_luns: |
---|
1105 | 1065 | kfree(pblk->luns); |
---|
1106 | 1066 | fail_free_meta: |
---|
.. | .. |
---|
1169 | 1129 | { |
---|
1170 | 1130 | struct pblk *pblk = private; |
---|
1171 | 1131 | |
---|
1172 | | - down_write(&pblk_lock); |
---|
1173 | 1132 | pblk_gc_exit(pblk, graceful); |
---|
1174 | 1133 | pblk_tear_down(pblk, graceful); |
---|
1175 | 1134 | |
---|
.. | .. |
---|
1178 | 1137 | #endif |
---|
1179 | 1138 | |
---|
1180 | 1139 | pblk_free(pblk); |
---|
1181 | | - up_write(&pblk_lock); |
---|
1182 | 1140 | } |
---|
1183 | 1141 | |
---|
1184 | 1142 | static sector_t pblk_capacity(void *private) |
---|
.. | .. |
---|
1204 | 1162 | pblk->dev = dev; |
---|
1205 | 1163 | pblk->disk = tdisk; |
---|
1206 | 1164 | pblk->state = PBLK_STATE_RUNNING; |
---|
| 1165 | + trace_pblk_state(pblk_disk_name(pblk), pblk->state); |
---|
1207 | 1166 | pblk->gc.gc_enabled = 0; |
---|
1208 | 1167 | |
---|
1209 | 1168 | if (!(geo->version == NVM_OCSSD_SPEC_12 || |
---|
.. | .. |
---|
1214 | 1173 | return ERR_PTR(-EINVAL); |
---|
1215 | 1174 | } |
---|
1216 | 1175 | |
---|
1217 | | - if (geo->version == NVM_OCSSD_SPEC_12 && geo->dom & NVM_RSP_L2P) { |
---|
1218 | | - pblk_err(pblk, "host-side L2P table not supported. (%x)\n", |
---|
1219 | | - geo->dom); |
---|
| 1176 | + if (geo->ext) { |
---|
| 1177 | + pblk_err(pblk, "extended metadata not supported\n"); |
---|
1220 | 1178 | kfree(pblk); |
---|
1221 | 1179 | return ERR_PTR(-EINVAL); |
---|
1222 | 1180 | } |
---|
.. | .. |
---|
1298 | 1256 | |
---|
1299 | 1257 | pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n", |
---|
1300 | 1258 | geo->all_luns, pblk->l_mg.nr_lines, |
---|
1301 | | - (unsigned long long)pblk->rl.nr_secs, |
---|
| 1259 | + (unsigned long long)pblk->capacity, |
---|
1302 | 1260 | pblk->rwb.nr_entries); |
---|
1303 | 1261 | |
---|
1304 | 1262 | wake_up_process(pblk->writer_ts); |
---|
.. | .. |
---|
1328 | 1286 | .name = "pblk", |
---|
1329 | 1287 | .version = {1, 0, 0}, |
---|
1330 | 1288 | |
---|
1331 | | - .make_rq = pblk_make_rq, |
---|
| 1289 | + .bops = &pblk_bops, |
---|
1332 | 1290 | .capacity = pblk_capacity, |
---|
1333 | 1291 | |
---|
1334 | 1292 | .init = pblk_init, |
---|