hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/lightnvm/pblk-recovery.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0
12 /*
23 * Copyright (C) 2016 CNEX Labs
34 * Initial: Javier Gonzalez <javier@cnexlabs.com>
....@@ -12,9 +13,13 @@
1213 * General Public License for more details.
1314 *
1415 * pblk-recovery.c - pblk's recovery path
16
+ *
17
+ * The L2P recovery path is single threaded as the L2P table is updated in order
18
+ * following the line sequence ID.
1519 */
1620
1721 #include "pblk.h"
22
+#include "pblk-trace.h"
1823
1924 int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
2025 {
....@@ -85,134 +90,63 @@
8590 return 0;
8691 }
8792
88
-static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
93
+static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line,
94
+ u64 written_secs)
8995 {
90
- struct nvm_tgt_dev *dev = pblk->dev;
91
- struct nvm_geo *geo = &dev->geo;
96
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
97
+ int i;
98
+
99
+ for (i = 0; i < written_secs; i += pblk->min_write_pgs)
100
+ __pblk_alloc_page(pblk, line, pblk->min_write_pgs);
101
+
102
+ spin_lock(&l_mg->free_lock);
103
+ if (written_secs > line->left_msecs) {
104
+ /*
105
+ * We have all data sectors written
106
+ * and some emeta sectors written too.
107
+ */
108
+ line->left_msecs = 0;
109
+ } else {
110
+ /* We have only some data sectors written. */
111
+ line->left_msecs -= written_secs;
112
+ }
113
+ spin_unlock(&l_mg->free_lock);
114
+}
115
+
116
+static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
117
+{
92118 struct pblk_line_meta *lm = &pblk->lm;
93119 int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
120
+ u64 written_secs = 0;
121
+ int valid_chunks = 0;
122
+ int i;
94123
95
- return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
96
- nr_bb * geo->clba;
124
+ for (i = 0; i < lm->blk_per_line; i++) {
125
+ struct nvm_chk_meta *chunk = &line->chks[i];
126
+
127
+ if (chunk->state & NVM_CHK_ST_OFFLINE)
128
+ continue;
129
+
130
+ written_secs += chunk->wp;
131
+ valid_chunks++;
132
+ }
133
+
134
+ if (lm->blk_per_line - nr_bb != valid_chunks)
135
+ pblk_err(pblk, "recovery line %d is bad\n", line->id);
136
+
137
+ pblk_update_line_wp(pblk, line, written_secs - lm->smeta_sec);
138
+
139
+ return written_secs;
97140 }
98141
99142 struct pblk_recov_alloc {
100143 struct ppa_addr *ppa_list;
101
- struct pblk_sec_meta *meta_list;
144
+ void *meta_list;
102145 struct nvm_rq *rqd;
103146 void *data;
104147 dma_addr_t dma_ppa_list;
105148 dma_addr_t dma_meta_list;
106149 };
107
-
108
-static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
109
- struct pblk_recov_alloc p, u64 r_ptr)
110
-{
111
- struct nvm_tgt_dev *dev = pblk->dev;
112
- struct nvm_geo *geo = &dev->geo;
113
- struct ppa_addr *ppa_list;
114
- struct pblk_sec_meta *meta_list;
115
- struct nvm_rq *rqd;
116
- struct bio *bio;
117
- void *data;
118
- dma_addr_t dma_ppa_list, dma_meta_list;
119
- u64 r_ptr_int;
120
- int left_ppas;
121
- int rq_ppas, rq_len;
122
- int i, j;
123
- int ret = 0;
124
-
125
- ppa_list = p.ppa_list;
126
- meta_list = p.meta_list;
127
- rqd = p.rqd;
128
- data = p.data;
129
- dma_ppa_list = p.dma_ppa_list;
130
- dma_meta_list = p.dma_meta_list;
131
-
132
- left_ppas = line->cur_sec - r_ptr;
133
- if (!left_ppas)
134
- return 0;
135
-
136
- r_ptr_int = r_ptr;
137
-
138
-next_read_rq:
139
- memset(rqd, 0, pblk_g_rq_size);
140
-
141
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
142
- if (!rq_ppas)
143
- rq_ppas = pblk->min_write_pgs;
144
- rq_len = rq_ppas * geo->csecs;
145
-
146
- bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
147
- if (IS_ERR(bio))
148
- return PTR_ERR(bio);
149
-
150
- bio->bi_iter.bi_sector = 0; /* internal bio */
151
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
152
-
153
- rqd->bio = bio;
154
- rqd->opcode = NVM_OP_PREAD;
155
- rqd->meta_list = meta_list;
156
- rqd->nr_ppas = rq_ppas;
157
- rqd->ppa_list = ppa_list;
158
- rqd->dma_ppa_list = dma_ppa_list;
159
- rqd->dma_meta_list = dma_meta_list;
160
-
161
- if (pblk_io_aligned(pblk, rq_ppas))
162
- rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
163
- else
164
- rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
165
-
166
- for (i = 0; i < rqd->nr_ppas; ) {
167
- struct ppa_addr ppa;
168
- int pos;
169
-
170
- ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
171
- pos = pblk_ppa_to_pos(geo, ppa);
172
-
173
- while (test_bit(pos, line->blk_bitmap)) {
174
- r_ptr_int += pblk->min_write_pgs;
175
- ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
176
- pos = pblk_ppa_to_pos(geo, ppa);
177
- }
178
-
179
- for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++)
180
- rqd->ppa_list[i] =
181
- addr_to_gen_ppa(pblk, r_ptr_int, line->id);
182
- }
183
-
184
- /* If read fails, more padding is needed */
185
- ret = pblk_submit_io_sync(pblk, rqd);
186
- if (ret) {
187
- pblk_err(pblk, "I/O submission failed: %d\n", ret);
188
- return ret;
189
- }
190
-
191
- atomic_dec(&pblk->inflight_io);
192
-
193
- /* At this point, the read should not fail. If it does, it is a problem
194
- * we cannot recover from here. Need FTL log.
195
- */
196
- if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
197
- pblk_err(pblk, "L2P recovery failed (%d)\n", rqd->error);
198
- return -EINTR;
199
- }
200
-
201
- for (i = 0; i < rqd->nr_ppas; i++) {
202
- u64 lba = le64_to_cpu(meta_list[i].lba);
203
-
204
- if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
205
- continue;
206
-
207
- pblk_update_map(pblk, lba, rqd->ppa_list[i]);
208
- }
209
-
210
- left_ppas -= rq_ppas;
211
- if (left_ppas > 0)
212
- goto next_read_rq;
213
-
214
- return 0;
215
-}
216150
217151 static void pblk_recov_complete(struct kref *ref)
218152 {
....@@ -223,10 +157,11 @@
223157
224158 static void pblk_end_io_recov(struct nvm_rq *rqd)
225159 {
160
+ struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
226161 struct pblk_pad_rq *pad_rq = rqd->private;
227162 struct pblk *pblk = pad_rq->pblk;
228163
229
- pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
164
+ pblk_up_chunk(pblk, ppa_list[0]);
230165
231166 pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
232167
....@@ -234,21 +169,20 @@
234169 kref_put(&pad_rq->ref, pblk_recov_complete);
235170 }
236171
237
-static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
238
- int left_ppas)
172
+/* pad line using line bitmap. */
173
+static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
174
+ int left_ppas)
239175 {
240176 struct nvm_tgt_dev *dev = pblk->dev;
241177 struct nvm_geo *geo = &dev->geo;
242
- struct ppa_addr *ppa_list;
243
- struct pblk_sec_meta *meta_list;
178
+ void *meta_list;
244179 struct pblk_pad_rq *pad_rq;
245180 struct nvm_rq *rqd;
246
- struct bio *bio;
181
+ struct ppa_addr *ppa_list;
247182 void *data;
248
- dma_addr_t dma_ppa_list, dma_meta_list;
249183 __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
250184 u64 w_ptr = line->cur_sec;
251
- int left_line_ppas, rq_ppas, rq_len;
185
+ int left_line_ppas, rq_ppas;
252186 int i, j;
253187 int ret = 0;
254188
....@@ -271,45 +205,29 @@
271205 kref_init(&pad_rq->ref);
272206
273207 next_pad_rq:
274
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
208
+ rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
275209 if (rq_ppas < pblk->min_write_pgs) {
276210 pblk_err(pblk, "corrupted pad line %d\n", line->id);
277
- goto fail_free_pad;
211
+ goto fail_complete;
278212 }
279
-
280
- rq_len = rq_ppas * geo->csecs;
281
-
282
- meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
283
- if (!meta_list) {
284
- ret = -ENOMEM;
285
- goto fail_free_pad;
286
- }
287
-
288
- ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
289
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
290
-
291
- bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
292
- PBLK_VMALLOC_META, GFP_KERNEL);
293
- if (IS_ERR(bio)) {
294
- ret = PTR_ERR(bio);
295
- goto fail_free_meta;
296
- }
297
-
298
- bio->bi_iter.bi_sector = 0; /* internal bio */
299
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
300213
301214 rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
302215
303
- rqd->bio = bio;
216
+ ret = pblk_alloc_rqd_meta(pblk, rqd);
217
+ if (ret) {
218
+ pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
219
+ goto fail_complete;
220
+ }
221
+
222
+ rqd->bio = NULL;
304223 rqd->opcode = NVM_OP_PWRITE;
305
- rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
306
- rqd->meta_list = meta_list;
224
+ rqd->is_seq = 1;
307225 rqd->nr_ppas = rq_ppas;
308
- rqd->ppa_list = ppa_list;
309
- rqd->dma_ppa_list = dma_ppa_list;
310
- rqd->dma_meta_list = dma_meta_list;
311226 rqd->end_io = pblk_end_io_recov;
312227 rqd->private = pad_rq;
228
+
229
+ ppa_list = nvm_rq_to_ppa_list(rqd);
230
+ meta_list = rqd->meta_list;
313231
314232 for (i = 0; i < rqd->nr_ppas; ) {
315233 struct ppa_addr ppa;
....@@ -327,24 +245,29 @@
327245
328246 for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
329247 struct ppa_addr dev_ppa;
248
+ struct pblk_sec_meta *meta;
330249 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
331250
332251 dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
333252
334253 pblk_map_invalidate(pblk, dev_ppa);
335
- lba_list[w_ptr] = meta_list[i].lba = addr_empty;
336
- rqd->ppa_list[i] = dev_ppa;
254
+ lba_list[w_ptr] = addr_empty;
255
+ meta = pblk_get_meta(pblk, meta_list, i);
256
+ meta->lba = addr_empty;
257
+ ppa_list[i] = dev_ppa;
337258 }
338259 }
339260
340261 kref_get(&pad_rq->ref);
341
- pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
262
+ pblk_down_chunk(pblk, ppa_list[0]);
342263
343
- ret = pblk_submit_io(pblk, rqd);
264
+ ret = pblk_submit_io(pblk, rqd, data);
344265 if (ret) {
345266 pblk_err(pblk, "I/O submission failed: %d\n", ret);
346
- pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
347
- goto fail_free_bio;
267
+ pblk_up_chunk(pblk, ppa_list[0]);
268
+ kref_put(&pad_rq->ref, pblk_recov_complete);
269
+ pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
270
+ goto fail_complete;
348271 }
349272
350273 left_line_ppas -= rq_ppas;
....@@ -352,13 +275,9 @@
352275 if (left_ppas && left_line_ppas)
353276 goto next_pad_rq;
354277
278
+fail_complete:
355279 kref_put(&pad_rq->ref, pblk_recov_complete);
356
-
357
- if (!wait_for_completion_io_timeout(&pad_rq->wait,
358
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
359
- pblk_err(pblk, "pad write timed out\n");
360
- ret = -ETIME;
361
- }
280
+ wait_for_completion(&pad_rq->wait);
362281
363282 if (!pblk_line_is_full(line))
364283 pblk_err(pblk, "corrupted padded line: %d\n", line->id);
....@@ -367,172 +286,92 @@
367286 free_rq:
368287 kfree(pad_rq);
369288 return ret;
370
-
371
-fail_free_bio:
372
- bio_put(bio);
373
-fail_free_meta:
374
- nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
375
-fail_free_pad:
376
- kfree(pad_rq);
377
- vfree(data);
378
- return ret;
379289 }
380290
381
-/* When this function is called, it means that not all upper pages have been
382
- * written in a page that contains valid data. In order to recover this data, we
383
- * first find the write pointer on the device, then we pad all necessary
384
- * sectors, and finally attempt to read the valid data
385
- */
386
-static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
387
- struct pblk_recov_alloc p)
291
+static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
388292 {
389293 struct nvm_tgt_dev *dev = pblk->dev;
390294 struct nvm_geo *geo = &dev->geo;
391
- struct ppa_addr *ppa_list;
392
- struct pblk_sec_meta *meta_list;
393
- struct nvm_rq *rqd;
394
- struct bio *bio;
395
- void *data;
396
- dma_addr_t dma_ppa_list, dma_meta_list;
397
- u64 w_ptr = 0, r_ptr;
398
- int rq_ppas, rq_len;
399
- int i, j;
400
- int ret = 0;
401
- int rec_round;
402
- int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec;
295
+ int distance = geo->mw_cunits * geo->all_luns * geo->ws_opt;
403296
404
- ppa_list = p.ppa_list;
405
- meta_list = p.meta_list;
406
- rqd = p.rqd;
407
- data = p.data;
408
- dma_ppa_list = p.dma_ppa_list;
409
- dma_meta_list = p.dma_meta_list;
297
+ return (distance > line->left_msecs) ? line->left_msecs : distance;
298
+}
410299
411
- /* we could recover up until the line write pointer */
412
- r_ptr = line->cur_sec;
413
- rec_round = 0;
300
+/* Return a chunk belonging to a line by stripe(write order) index */
301
+static struct nvm_chk_meta *pblk_get_stripe_chunk(struct pblk *pblk,
302
+ struct pblk_line *line,
303
+ int index)
304
+{
305
+ struct nvm_tgt_dev *dev = pblk->dev;
306
+ struct nvm_geo *geo = &dev->geo;
307
+ struct pblk_lun *rlun;
308
+ struct ppa_addr ppa;
309
+ int pos;
414310
415
-next_rq:
416
- memset(rqd, 0, pblk_g_rq_size);
311
+ rlun = &pblk->luns[index];
312
+ ppa = rlun->bppa;
313
+ pos = pblk_ppa_to_pos(geo, ppa);
417314
418
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
419
- if (!rq_ppas)
420
- rq_ppas = pblk->min_write_pgs;
421
- rq_len = rq_ppas * geo->csecs;
315
+ return &line->chks[pos];
316
+}
422317
423
- bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
424
- if (IS_ERR(bio))
425
- return PTR_ERR(bio);
318
+static int pblk_line_wps_are_unbalanced(struct pblk *pblk,
319
+ struct pblk_line *line)
320
+{
321
+ struct pblk_line_meta *lm = &pblk->lm;
322
+ int blk_in_line = lm->blk_per_line;
323
+ struct nvm_chk_meta *chunk;
324
+ u64 max_wp, min_wp;
325
+ int i;
426326
427
- bio->bi_iter.bi_sector = 0; /* internal bio */
428
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
327
+ i = find_first_zero_bit(line->blk_bitmap, blk_in_line);
429328
430
- rqd->bio = bio;
431
- rqd->opcode = NVM_OP_PREAD;
432
- rqd->meta_list = meta_list;
433
- rqd->nr_ppas = rq_ppas;
434
- rqd->ppa_list = ppa_list;
435
- rqd->dma_ppa_list = dma_ppa_list;
436
- rqd->dma_meta_list = dma_meta_list;
437
-
438
- if (pblk_io_aligned(pblk, rq_ppas))
439
- rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
440
- else
441
- rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
442
-
443
- for (i = 0; i < rqd->nr_ppas; ) {
444
- struct ppa_addr ppa;
445
- int pos;
446
-
447
- w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
448
- ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
449
- pos = pblk_ppa_to_pos(geo, ppa);
450
-
451
- while (test_bit(pos, line->blk_bitmap)) {
452
- w_ptr += pblk->min_write_pgs;
453
- ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
454
- pos = pblk_ppa_to_pos(geo, ppa);
455
- }
456
-
457
- for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++)
458
- rqd->ppa_list[i] =
459
- addr_to_gen_ppa(pblk, w_ptr, line->id);
460
- }
461
-
462
- ret = pblk_submit_io_sync(pblk, rqd);
463
- if (ret) {
464
- pblk_err(pblk, "I/O submission failed: %d\n", ret);
465
- return ret;
466
- }
467
-
468
- atomic_dec(&pblk->inflight_io);
469
-
470
- /* This should not happen since the read failed during normal recovery,
471
- * but the media works funny sometimes...
329
+ /* If there is one or zero good chunks in the line,
330
+ * the write pointers can't be unbalanced.
472331 */
473
- if (!rec_round++ && !rqd->error) {
474
- rec_round = 0;
475
- for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) {
476
- u64 lba = le64_to_cpu(meta_list[i].lba);
332
+ if (i >= (blk_in_line - 1))
333
+ return 0;
477334
478
- if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
479
- continue;
335
+ chunk = pblk_get_stripe_chunk(pblk, line, i);
336
+ max_wp = chunk->wp;
337
+ if (max_wp > pblk->max_write_pgs)
338
+ min_wp = max_wp - pblk->max_write_pgs;
339
+ else
340
+ min_wp = 0;
480341
481
- pblk_update_map(pblk, lba, rqd->ppa_list[i]);
482
- }
342
+ i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
343
+ while (i < blk_in_line) {
344
+ chunk = pblk_get_stripe_chunk(pblk, line, i);
345
+ if (chunk->wp > max_wp || chunk->wp < min_wp)
346
+ return 1;
347
+
348
+ i = find_next_zero_bit(line->blk_bitmap, blk_in_line, i + 1);
483349 }
484350
485
- /* Reached the end of the written line */
486
- if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
487
- int pad_secs, nr_error_bits, bit;
488
- int ret;
489
-
490
- bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
491
- nr_error_bits = rqd->nr_ppas - bit;
492
-
493
- /* Roll back failed sectors */
494
- line->cur_sec -= nr_error_bits;
495
- line->left_msecs += nr_error_bits;
496
- bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
497
-
498
- pad_secs = pblk_pad_distance(pblk);
499
- if (pad_secs > line->left_msecs)
500
- pad_secs = line->left_msecs;
501
-
502
- ret = pblk_recov_pad_oob(pblk, line, pad_secs);
503
- if (ret)
504
- pblk_err(pblk, "OOB padding failed (err:%d)\n", ret);
505
-
506
- ret = pblk_recov_read_oob(pblk, line, p, r_ptr);
507
- if (ret)
508
- pblk_err(pblk, "OOB read failed (err:%d)\n", ret);
509
-
510
- left_ppas = 0;
511
- }
512
-
513
- left_ppas -= rq_ppas;
514
- if (left_ppas > 0)
515
- goto next_rq;
516
-
517
- return ret;
351
+ return 0;
518352 }
519353
520354 static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
521
- struct pblk_recov_alloc p, int *done)
355
+ struct pblk_recov_alloc p)
522356 {
523357 struct nvm_tgt_dev *dev = pblk->dev;
358
+ struct pblk_line_meta *lm = &pblk->lm;
524359 struct nvm_geo *geo = &dev->geo;
525360 struct ppa_addr *ppa_list;
526
- struct pblk_sec_meta *meta_list;
361
+ void *meta_list;
527362 struct nvm_rq *rqd;
528
- struct bio *bio;
529363 void *data;
530364 dma_addr_t dma_ppa_list, dma_meta_list;
531
- u64 paddr;
532
- int rq_ppas, rq_len;
365
+ __le64 *lba_list;
366
+ u64 paddr = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
367
+ bool padded = false;
368
+ int rq_ppas;
533369 int i, j;
534
- int ret = 0;
535
- int left_ppas = pblk_calc_sec_in_line(pblk, line);
370
+ int ret;
371
+ u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec;
372
+
373
+ if (pblk_line_wps_are_unbalanced(pblk, line))
374
+ pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id);
536375
537376 ppa_list = p.ppa_list;
538377 meta_list = p.meta_list;
....@@ -541,41 +380,32 @@
541380 dma_ppa_list = p.dma_ppa_list;
542381 dma_meta_list = p.dma_meta_list;
543382
544
- *done = 1;
383
+ lba_list = emeta_to_lbas(pblk, line->emeta->buf);
545384
546385 next_rq:
547386 memset(rqd, 0, pblk_g_rq_size);
548387
549
- rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
388
+ rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
550389 if (!rq_ppas)
551390 rq_ppas = pblk->min_write_pgs;
552
- rq_len = rq_ppas * geo->csecs;
553391
554
- bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
555
- if (IS_ERR(bio))
556
- return PTR_ERR(bio);
557
-
558
- bio->bi_iter.bi_sector = 0; /* internal bio */
559
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
560
-
561
- rqd->bio = bio;
392
+retry_rq:
393
+ rqd->bio = NULL;
562394 rqd->opcode = NVM_OP_PREAD;
563395 rqd->meta_list = meta_list;
564396 rqd->nr_ppas = rq_ppas;
565397 rqd->ppa_list = ppa_list;
566398 rqd->dma_ppa_list = dma_ppa_list;
567399 rqd->dma_meta_list = dma_meta_list;
400
+ ppa_list = nvm_rq_to_ppa_list(rqd);
568401
569402 if (pblk_io_aligned(pblk, rq_ppas))
570
- rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
571
- else
572
- rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
403
+ rqd->is_seq = 1;
573404
574405 for (i = 0; i < rqd->nr_ppas; ) {
575406 struct ppa_addr ppa;
576407 int pos;
577408
578
- paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
579409 ppa = addr_to_gen_ppa(pblk, paddr, line->id);
580410 pos = pblk_ppa_to_pos(geo, ppa);
581411
....@@ -585,53 +415,62 @@
585415 pos = pblk_ppa_to_pos(geo, ppa);
586416 }
587417
588
- for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++)
589
- rqd->ppa_list[i] =
590
- addr_to_gen_ppa(pblk, paddr, line->id);
418
+ for (j = 0; j < pblk->min_write_pgs; j++, i++)
419
+ ppa_list[i] =
420
+ addr_to_gen_ppa(pblk, paddr + j, line->id);
591421 }
592422
593
- ret = pblk_submit_io_sync(pblk, rqd);
423
+ ret = pblk_submit_io_sync(pblk, rqd, data);
594424 if (ret) {
595425 pblk_err(pblk, "I/O submission failed: %d\n", ret);
596
- bio_put(bio);
597426 return ret;
598427 }
599428
600429 atomic_dec(&pblk->inflight_io);
601430
602
- /* Reached the end of the written line */
603
- if (rqd->error) {
604
- int nr_error_bits, bit;
431
+ /* If a read fails, do a best effort by padding the line and retrying */
432
+ if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
433
+ int pad_distance, ret;
605434
606
- bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
607
- nr_error_bits = rqd->nr_ppas - bit;
435
+ if (padded) {
436
+ pblk_log_read_err(pblk, rqd);
437
+ return -EINTR;
438
+ }
608439
609
- /* Roll back failed sectors */
610
- line->cur_sec -= nr_error_bits;
611
- line->left_msecs += nr_error_bits;
612
- bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
440
+ pad_distance = pblk_pad_distance(pblk, line);
441
+ ret = pblk_recov_pad_line(pblk, line, pad_distance);
442
+ if (ret) {
443
+ return ret;
444
+ }
613445
614
- left_ppas = 0;
615
- rqd->nr_ppas = bit;
616
-
617
- if (rqd->error != NVM_RSP_ERR_EMPTYPAGE)
618
- *done = 0;
446
+ padded = true;
447
+ goto retry_rq;
619448 }
620449
621
- for (i = 0; i < rqd->nr_ppas; i++) {
622
- u64 lba = le64_to_cpu(meta_list[i].lba);
450
+ pblk_get_packed_meta(pblk, rqd);
623451
624
- if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
452
+ for (i = 0; i < rqd->nr_ppas; i++) {
453
+ struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
454
+ u64 lba = le64_to_cpu(meta->lba);
455
+
456
+ lba_list[paddr++] = cpu_to_le64(lba);
457
+
458
+ if (lba == ADDR_EMPTY || lba >= pblk->capacity)
625459 continue;
626460
627
- pblk_update_map(pblk, lba, rqd->ppa_list[i]);
461
+ line->nr_valid_lbas++;
462
+ pblk_update_map(pblk, lba, ppa_list[i]);
628463 }
629464
630465 left_ppas -= rq_ppas;
631466 if (left_ppas > 0)
632467 goto next_rq;
633468
634
- return ret;
469
+#ifdef CONFIG_NVM_PBLK_DEBUG
470
+ WARN_ON(padded && !pblk_line_is_full(line));
471
+#endif
472
+
473
+ return 0;
635474 }
636475
637476 /* Scan line for lbas on out of bound area */
....@@ -641,18 +480,18 @@
641480 struct nvm_geo *geo = &dev->geo;
642481 struct nvm_rq *rqd;
643482 struct ppa_addr *ppa_list;
644
- struct pblk_sec_meta *meta_list;
483
+ void *meta_list;
645484 struct pblk_recov_alloc p;
646485 void *data;
647486 dma_addr_t dma_ppa_list, dma_meta_list;
648
- int done, ret = 0;
487
+ int ret = 0;
649488
650489 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
651490 if (!meta_list)
652491 return -ENOMEM;
653492
654
- ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
655
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
493
+ ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk);
494
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
656495
657496 data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
658497 if (!data) {
....@@ -660,7 +499,8 @@
660499 goto free_meta_list;
661500 }
662501
663
- rqd = pblk_alloc_rqd(pblk, PBLK_READ);
502
+ rqd = mempool_alloc(&pblk->r_rq_pool, GFP_KERNEL);
503
+ memset(rqd, 0, pblk_g_rq_size);
664504
665505 p.ppa_list = ppa_list;
666506 p.meta_list = meta_list;
....@@ -669,24 +509,17 @@
669509 p.dma_ppa_list = dma_ppa_list;
670510 p.dma_meta_list = dma_meta_list;
671511
672
- ret = pblk_recov_scan_oob(pblk, line, p, &done);
512
+ ret = pblk_recov_scan_oob(pblk, line, p);
673513 if (ret) {
674
- pblk_err(pblk, "could not recover L2P from OOB\n");
514
+ pblk_err(pblk, "could not recover L2P form OOB\n");
675515 goto out;
676
- }
677
-
678
- if (!done) {
679
- ret = pblk_recov_scan_all_oob(pblk, line, p);
680
- if (ret) {
681
- pblk_err(pblk, "could not recover L2P from OOB\n");
682
- goto out;
683
- }
684516 }
685517
686518 if (pblk_line_is_full(line))
687519 pblk_line_recov_close(pblk, line);
688520
689521 out:
522
+ mempool_free(rqd, &pblk->r_rq_pool);
690523 kfree(data);
691524 free_meta_list:
692525 nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
....@@ -775,7 +608,7 @@
775608 }
776609
777610 static int pblk_line_was_written(struct pblk_line *line,
778
- struct pblk *pblk)
611
+ struct pblk *pblk)
779612 {
780613
781614 struct pblk_line_meta *lm = &pblk->lm;
....@@ -795,10 +628,24 @@
795628 bppa = pblk->luns[smeta_blk].bppa;
796629 chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
797630
798
- if (chunk->state & NVM_CHK_ST_FREE)
799
- return 0;
631
+ if (chunk->state & NVM_CHK_ST_CLOSED ||
632
+ (chunk->state & NVM_CHK_ST_OPEN
633
+ && chunk->wp >= lm->smeta_sec))
634
+ return 1;
800635
801
- return 1;
636
+ return 0;
637
+}
638
+
639
+static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line)
640
+{
641
+ struct pblk_line_meta *lm = &pblk->lm;
642
+ int i;
643
+
644
+ for (i = 0; i < lm->blk_per_line; i++)
645
+ if (line->chks[i].state & NVM_CHK_ST_OPEN)
646
+ return true;
647
+
648
+ return false;
802649 }
803650
804651 struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
....@@ -841,7 +688,7 @@
841688 continue;
842689
843690 /* Lines that cannot be read are assumed as not written here */
844
- if (pblk_line_read_smeta(pblk, line))
691
+ if (pblk_line_smeta_read(pblk, line))
845692 continue;
846693
847694 crc = pblk_calc_smeta_crc(pblk, smeta_buf);
....@@ -859,11 +706,13 @@
859706
860707 /* The first valid instance uuid is used for initialization */
861708 if (!valid_uuid) {
862
- memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
709
+ guid_copy(&pblk->instance_uuid,
710
+ (guid_t *)&smeta_buf->header.uuid);
863711 valid_uuid = 1;
864712 }
865713
866
- if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
714
+ if (!guid_equal(&pblk->instance_uuid,
715
+ (guid_t *)&smeta_buf->header.uuid)) {
867716 pblk_debug(pblk, "ignore line %u due to uuid mismatch\n",
868717 i);
869718 continue;
....@@ -893,7 +742,7 @@
893742 }
894743
895744 if (!found_lines) {
896
- pblk_setup_uuid(pblk);
745
+ guid_gen(&pblk->instance_uuid);
897746
898747 spin_lock(&l_mg->free_lock);
899748 WARN_ON_ONCE(!test_and_clear_bit(meta_line,
....@@ -911,7 +760,12 @@
911760 line->emeta = emeta;
912761 memset(line->emeta->buf, 0, lm->emeta_len[0]);
913762
914
- if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
763
+ if (pblk_line_is_open(pblk, line)) {
764
+ pblk_recov_l2p_from_oob(pblk, line);
765
+ goto next;
766
+ }
767
+
768
+ if (pblk_line_emeta_read(pblk, line, line->emeta->buf)) {
915769 pblk_recov_l2p_from_oob(pblk, line);
916770 goto next;
917771 }
....@@ -935,6 +789,8 @@
935789
936790 spin_lock(&line->lock);
937791 line->state = PBLK_LINESTATE_CLOSED;
792
+ trace_pblk_line_state(pblk_disk_name(pblk), line->id,
793
+ line->state);
938794 move_list = pblk_line_gc_list(pblk, line);
939795 spin_unlock(&line->lock);
940796
....@@ -942,17 +798,25 @@
942798 list_move_tail(&line->list, move_list);
943799 spin_unlock(&l_mg->gc_lock);
944800
945
- kfree(line->map_bitmap);
801
+ mempool_free(line->map_bitmap, l_mg->bitmap_pool);
946802 line->map_bitmap = NULL;
947803 line->smeta = NULL;
948804 line->emeta = NULL;
949805 } else {
950
- if (open_lines > 1)
951
- pblk_err(pblk, "failed to recover L2P\n");
806
+ spin_lock(&line->lock);
807
+ line->state = PBLK_LINESTATE_OPEN;
808
+ spin_unlock(&line->lock);
809
+
810
+ line->emeta->mem = 0;
811
+ atomic_set(&line->emeta->sync, 0);
812
+
813
+ trace_pblk_line_state(pblk_disk_name(pblk), line->id,
814
+ line->state);
815
+
816
+ data_line = line;
817
+ line->meta_line = meta_line;
952818
953819 open_lines++;
954
- line->meta_line = meta_line;
955
- data_line = line;
956820 }
957821 }
958822
....@@ -961,9 +825,9 @@
961825 WARN_ON_ONCE(!test_and_clear_bit(meta_line,
962826 &l_mg->meta_bitmap));
963827 spin_unlock(&l_mg->free_lock);
964
- pblk_line_replace_data(pblk);
965828 } else {
966829 spin_lock(&l_mg->free_lock);
830
+ l_mg->data_line = data_line;
967831 /* Allocate next line for preparation */
968832 l_mg->data_next = pblk_line_get(pblk);
969833 if (l_mg->data_next) {
....@@ -1000,7 +864,7 @@
1000864 left_msecs = line->left_msecs;
1001865 spin_unlock(&l_mg->free_lock);
1002866
1003
- ret = pblk_recov_pad_oob(pblk, line, left_msecs);
867
+ ret = pblk_recov_pad_line(pblk, line, left_msecs);
1004868 if (ret) {
1005869 pblk_err(pblk, "tear down padding failed (%d)\n", ret);
1006870 return ret;