hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/drivers/md/bcache/journal.c
....@@ -47,7 +47,7 @@
4747
4848 closure_init_stack(&cl);
4949
50
- pr_debug("reading %u", bucket_index);
50
+ pr_debug("reading %u\n", bucket_index);
5151
5252 while (offset < ca->sb.bucket_size) {
5353 reread: left = ca->sb.bucket_size - offset;
....@@ -78,13 +78,13 @@
7878 size_t blocks, bytes = set_bytes(j);
7979
8080 if (j->magic != jset_magic(&ca->sb)) {
81
- pr_debug("%u: bad magic", bucket_index);
81
+ pr_debug("%u: bad magic\n", bucket_index);
8282 return ret;
8383 }
8484
8585 if (bytes > left << 9 ||
8686 bytes > PAGE_SIZE << JSET_BITS) {
87
- pr_info("%u: too big, %zu bytes, offset %u",
87
+ pr_info("%u: too big, %zu bytes, offset %u\n",
8888 bucket_index, bytes, offset);
8989 return ret;
9090 }
....@@ -93,13 +93,27 @@
9393 goto reread;
9494
9595 if (j->csum != csum_set(j)) {
96
- pr_info("%u: bad csum, %zu bytes, offset %u",
96
+ pr_info("%u: bad csum, %zu bytes, offset %u\n",
9797 bucket_index, bytes, offset);
9898 return ret;
9999 }
100100
101
- blocks = set_blocks(j, block_bytes(ca->set));
101
+ blocks = set_blocks(j, block_bytes(ca));
102102
103
+ /*
104
+ * Nodes in 'list' are in linear increasing order of
105
+ * i->j.seq, the node on head has the smallest (oldest)
106
+ * journal seq, the node on tail has the biggest
107
+ * (latest) journal seq.
108
+ */
109
+
110
+ /*
111
+ * Check from the oldest jset for last_seq. If
112
+ * i->j.seq < j->last_seq, it means the oldest jset
113
+ * in list is expired and useless, remove it from
114
+ * this list. Otherwise, j is a condidate jset for
115
+ * further following checks.
116
+ */
103117 while (!list_empty(list)) {
104118 i = list_first_entry(list,
105119 struct journal_replay, list);
....@@ -109,13 +123,22 @@
109123 kfree(i);
110124 }
111125
126
+ /* iterate list in reverse order (from latest jset) */
112127 list_for_each_entry_reverse(i, list, list) {
113128 if (j->seq == i->j.seq)
114129 goto next_set;
115130
131
+ /*
132
+ * if j->seq is less than any i->j.last_seq
133
+ * in list, j is an expired and useless jset.
134
+ */
116135 if (j->seq < i->j.last_seq)
117136 goto next_set;
118137
138
+ /*
139
+ * 'where' points to first jset in list which
140
+ * is elder then j.
141
+ */
119142 if (j->seq > i->j.seq) {
120143 where = &i->list;
121144 goto add;
....@@ -129,10 +152,12 @@
129152 if (!i)
130153 return -ENOMEM;
131154 memcpy(&i->j, j, bytes);
155
+ /* Add to the location after 'where' points to */
132156 list_add(&i->list, where);
133157 ret = 1;
134158
135
- ja->seq[bucket_index] = j->seq;
159
+ if (j->seq > ja->seq[bucket_index])
160
+ ja->seq[bucket_index] = j->seq;
136161 next_set:
137162 offset += blocks * ca->sb.block_size;
138163 len -= blocks * ca->sb.block_size;
....@@ -147,121 +172,116 @@
147172 {
148173 #define read_bucket(b) \
149174 ({ \
150
- int ret = journal_read_bucket(ca, list, b); \
175
+ ret = journal_read_bucket(ca, list, b); \
151176 __set_bit(b, bitmap); \
152177 if (ret < 0) \
153178 return ret; \
154179 ret; \
155180 })
156181
157
- struct cache *ca;
158
- unsigned int iter;
182
+ struct cache *ca = c->cache;
183
+ int ret = 0;
184
+ struct journal_device *ja = &ca->journal;
185
+ DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS);
186
+ unsigned int i, l, r, m;
187
+ uint64_t seq;
159188
160
- for_each_cache(ca, c, iter) {
161
- struct journal_device *ja = &ca->journal;
162
- DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS);
163
- unsigned int i, l, r, m;
164
- uint64_t seq;
189
+ bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
190
+ pr_debug("%u journal buckets\n", ca->sb.njournal_buckets);
165191
166
- bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
167
- pr_debug("%u journal buckets", ca->sb.njournal_buckets);
168
-
192
+ /*
193
+ * Read journal buckets ordered by golden ratio hash to quickly
194
+ * find a sequence of buckets with valid journal entries
195
+ */
196
+ for (i = 0; i < ca->sb.njournal_buckets; i++) {
169197 /*
170
- * Read journal buckets ordered by golden ratio hash to quickly
171
- * find a sequence of buckets with valid journal entries
198
+ * We must try the index l with ZERO first for
199
+ * correctness due to the scenario that the journal
200
+ * bucket is circular buffer which might have wrapped
172201 */
173
- for (i = 0; i < ca->sb.njournal_buckets; i++) {
174
- /*
175
- * We must try the index l with ZERO first for
176
- * correctness due to the scenario that the journal
177
- * bucket is circular buffer which might have wrapped
178
- */
179
- l = (i * 2654435769U) % ca->sb.njournal_buckets;
202
+ l = (i * 2654435769U) % ca->sb.njournal_buckets;
180203
181
- if (test_bit(l, bitmap))
182
- break;
204
+ if (test_bit(l, bitmap))
205
+ break;
183206
184
- if (read_bucket(l))
185
- goto bsearch;
186
- }
187
-
188
- /*
189
- * If that fails, check all the buckets we haven't checked
190
- * already
191
- */
192
- pr_debug("falling back to linear search");
193
-
194
- for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
195
- l < ca->sb.njournal_buckets;
196
- l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets,
197
- l + 1))
198
- if (read_bucket(l))
199
- goto bsearch;
200
-
201
- /* no journal entries on this device? */
202
- if (l == ca->sb.njournal_buckets)
203
- continue;
204
-bsearch:
205
- BUG_ON(list_empty(list));
206
-
207
- /* Binary search */
208
- m = l;
209
- r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
210
- pr_debug("starting binary search, l %u r %u", l, r);
211
-
212
- while (l + 1 < r) {
213
- seq = list_entry(list->prev, struct journal_replay,
214
- list)->j.seq;
215
-
216
- m = (l + r) >> 1;
217
- read_bucket(m);
218
-
219
- if (seq != list_entry(list->prev, struct journal_replay,
220
- list)->j.seq)
221
- l = m;
222
- else
223
- r = m;
224
- }
225
-
226
- /*
227
- * Read buckets in reverse order until we stop finding more
228
- * journal entries
229
- */
230
- pr_debug("finishing up: m %u njournal_buckets %u",
231
- m, ca->sb.njournal_buckets);
232
- l = m;
233
-
234
- while (1) {
235
- if (!l--)
236
- l = ca->sb.njournal_buckets - 1;
237
-
238
- if (l == m)
239
- break;
240
-
241
- if (test_bit(l, bitmap))
242
- continue;
243
-
244
- if (!read_bucket(l))
245
- break;
246
- }
247
-
248
- seq = 0;
249
-
250
- for (i = 0; i < ca->sb.njournal_buckets; i++)
251
- if (ja->seq[i] > seq) {
252
- seq = ja->seq[i];
253
- /*
254
- * When journal_reclaim() goes to allocate for
255
- * the first time, it'll use the bucket after
256
- * ja->cur_idx
257
- */
258
- ja->cur_idx = i;
259
- ja->last_idx = ja->discard_idx = (i + 1) %
260
- ca->sb.njournal_buckets;
261
-
262
- }
207
+ if (read_bucket(l))
208
+ goto bsearch;
263209 }
264210
211
+ /*
212
+ * If that fails, check all the buckets we haven't checked
213
+ * already
214
+ */
215
+ pr_debug("falling back to linear search\n");
216
+
217
+ for_each_clear_bit(l, bitmap, ca->sb.njournal_buckets)
218
+ if (read_bucket(l))
219
+ goto bsearch;
220
+
221
+ /* no journal entries on this device? */
222
+ if (l == ca->sb.njournal_buckets)
223
+ goto out;
224
+bsearch:
225
+ BUG_ON(list_empty(list));
226
+
227
+ /* Binary search */
228
+ m = l;
229
+ r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
230
+ pr_debug("starting binary search, l %u r %u\n", l, r);
231
+
232
+ while (l + 1 < r) {
233
+ seq = list_entry(list->prev, struct journal_replay,
234
+ list)->j.seq;
235
+
236
+ m = (l + r) >> 1;
237
+ read_bucket(m);
238
+
239
+ if (seq != list_entry(list->prev, struct journal_replay,
240
+ list)->j.seq)
241
+ l = m;
242
+ else
243
+ r = m;
244
+ }
245
+
246
+ /*
247
+ * Read buckets in reverse order until we stop finding more
248
+ * journal entries
249
+ */
250
+ pr_debug("finishing up: m %u njournal_buckets %u\n",
251
+ m, ca->sb.njournal_buckets);
252
+ l = m;
253
+
254
+ while (1) {
255
+ if (!l--)
256
+ l = ca->sb.njournal_buckets - 1;
257
+
258
+ if (l == m)
259
+ break;
260
+
261
+ if (test_bit(l, bitmap))
262
+ continue;
263
+
264
+ if (!read_bucket(l))
265
+ break;
266
+ }
267
+
268
+ seq = 0;
269
+
270
+ for (i = 0; i < ca->sb.njournal_buckets; i++)
271
+ if (ja->seq[i] > seq) {
272
+ seq = ja->seq[i];
273
+ /*
274
+ * When journal_reclaim() goes to allocate for
275
+ * the first time, it'll use the bucket after
276
+ * ja->cur_idx
277
+ */
278
+ ja->cur_idx = i;
279
+ ja->last_idx = ja->discard_idx = (i + 1) %
280
+ ca->sb.njournal_buckets;
281
+
282
+ }
283
+
284
+out:
265285 if (!list_empty(list))
266286 c->journal.seq = list_entry(list->prev,
267287 struct journal_replay,
....@@ -317,14 +337,12 @@
317337 }
318338 }
319339
320
-bool is_discard_enabled(struct cache_set *s)
340
+static bool is_discard_enabled(struct cache_set *s)
321341 {
322
- struct cache *ca;
323
- unsigned int i;
342
+ struct cache *ca = s->cache;
324343
325
- for_each_cache(ca, s, i)
326
- if (ca->discard)
327
- return true;
344
+ if (ca->discard)
345
+ return true;
328346
329347 return false;
330348 }
....@@ -344,10 +362,10 @@
344362
345363 if (n != i->j.seq) {
346364 if (n == start && is_discard_enabled(s))
347
- pr_info("bcache: journal entries %llu-%llu may be discarded! (replaying %llu-%llu)",
365
+ pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n",
348366 n, i->j.seq - 1, start, end);
349367 else {
350
- pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)",
368
+ pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n",
351369 n, i->j.seq - 1, start, end);
352370 ret = -EIO;
353371 goto err;
....@@ -377,7 +395,7 @@
377395 entries++;
378396 }
379397
380
- pr_info("journal replay done, %i keys in %i entries, seq %llu",
398
+ pr_info("journal replay done, %i keys in %i entries, seq %llu\n",
381399 keys, entries, end);
382400 err:
383401 while (!list_empty(list)) {
....@@ -389,52 +407,164 @@
389407 return ret;
390408 }
391409
410
+void bch_journal_space_reserve(struct journal *j)
411
+{
412
+ j->do_reserve = true;
413
+}
414
+
392415 /* Journalling */
393416
394417 static void btree_flush_write(struct cache_set *c)
395418 {
396
- /*
397
- * Try to find the btree node with that references the oldest journal
398
- * entry, best is our current candidate and is locked if non NULL:
399
- */
400
- struct btree *b, *best;
401
- unsigned int i;
419
+ struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR];
420
+ unsigned int i, nr;
421
+ int ref_nr;
422
+ atomic_t *fifo_front_p, *now_fifo_front_p;
423
+ size_t mask;
402424
425
+ if (c->journal.btree_flushing)
426
+ return;
427
+
428
+ spin_lock(&c->journal.flush_write_lock);
429
+ if (c->journal.btree_flushing) {
430
+ spin_unlock(&c->journal.flush_write_lock);
431
+ return;
432
+ }
433
+ c->journal.btree_flushing = true;
434
+ spin_unlock(&c->journal.flush_write_lock);
435
+
436
+ /* get the oldest journal entry and check its refcount */
437
+ spin_lock(&c->journal.lock);
438
+ fifo_front_p = &fifo_front(&c->journal.pin);
439
+ ref_nr = atomic_read(fifo_front_p);
440
+ if (ref_nr <= 0) {
441
+ /*
442
+ * do nothing if no btree node references
443
+ * the oldest journal entry
444
+ */
445
+ spin_unlock(&c->journal.lock);
446
+ goto out;
447
+ }
448
+ spin_unlock(&c->journal.lock);
449
+
450
+ mask = c->journal.pin.mask;
451
+ nr = 0;
403452 atomic_long_inc(&c->flush_write);
404
-retry:
405
- best = NULL;
453
+ memset(btree_nodes, 0, sizeof(btree_nodes));
406454
407455 mutex_lock(&c->bucket_lock);
408
- for_each_cached_btree(b, c, i)
409
- if (btree_current_write(b)->journal) {
410
- if (!best)
411
- best = b;
412
- else if (journal_pin_cmp(c,
413
- btree_current_write(best)->journal,
414
- btree_current_write(b)->journal)) {
415
- best = b;
416
- }
456
+ list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) {
457
+ /*
458
+ * It is safe to get now_fifo_front_p without holding
459
+ * c->journal.lock here, because we don't need to know
460
+ * the exactly accurate value, just check whether the
461
+ * front pointer of c->journal.pin is changed.
462
+ */
463
+ now_fifo_front_p = &fifo_front(&c->journal.pin);
464
+ /*
465
+ * If the oldest journal entry is reclaimed and front
466
+ * pointer of c->journal.pin changes, it is unnecessary
467
+ * to scan c->btree_cache anymore, just quit the loop and
468
+ * flush out what we have already.
469
+ */
470
+ if (now_fifo_front_p != fifo_front_p)
471
+ break;
472
+ /*
473
+ * quit this loop if all matching btree nodes are
474
+ * scanned and record in btree_nodes[] already.
475
+ */
476
+ ref_nr = atomic_read(fifo_front_p);
477
+ if (nr >= ref_nr)
478
+ break;
479
+
480
+ if (btree_node_journal_flush(b))
481
+ pr_err("BUG: flush_write bit should not be set here!\n");
482
+
483
+ mutex_lock(&b->write_lock);
484
+
485
+ if (!btree_node_dirty(b)) {
486
+ mutex_unlock(&b->write_lock);
487
+ continue;
417488 }
418489
419
- b = best;
420
- if (b)
490
+ if (!btree_current_write(b)->journal) {
491
+ mutex_unlock(&b->write_lock);
492
+ continue;
493
+ }
494
+
495
+ /*
496
+ * Only select the btree node which exactly references
497
+ * the oldest journal entry.
498
+ *
499
+ * If the journal entry pointed by fifo_front_p is
500
+ * reclaimed in parallel, don't worry:
501
+ * - the list_for_each_xxx loop will quit when checking
502
+ * next now_fifo_front_p.
503
+ * - If there are matched nodes recorded in btree_nodes[],
504
+ * they are clean now (this is why and how the oldest
505
+ * journal entry can be reclaimed). These selected nodes
506
+ * will be ignored and skipped in the folowing for-loop.
507
+ */
508
+ if (((btree_current_write(b)->journal - fifo_front_p) &
509
+ mask) != 0) {
510
+ mutex_unlock(&b->write_lock);
511
+ continue;
512
+ }
513
+
421514 set_btree_node_journal_flush(b);
515
+
516
+ mutex_unlock(&b->write_lock);
517
+
518
+ btree_nodes[nr++] = b;
519
+ /*
520
+ * To avoid holding c->bucket_lock too long time,
521
+ * only scan for BTREE_FLUSH_NR matched btree nodes
522
+ * at most. If there are more btree nodes reference
523
+ * the oldest journal entry, try to flush them next
524
+ * time when btree_flush_write() is called.
525
+ */
526
+ if (nr == BTREE_FLUSH_NR)
527
+ break;
528
+ }
422529 mutex_unlock(&c->bucket_lock);
423530
424
- if (b) {
531
+ for (i = 0; i < nr; i++) {
532
+ b = btree_nodes[i];
533
+ if (!b) {
534
+ pr_err("BUG: btree_nodes[%d] is NULL\n", i);
535
+ continue;
536
+ }
537
+
538
+ /* safe to check without holding b->write_lock */
539
+ if (!btree_node_journal_flush(b)) {
540
+ pr_err("BUG: bnode %p: journal_flush bit cleaned\n", b);
541
+ continue;
542
+ }
543
+
425544 mutex_lock(&b->write_lock);
426545 if (!btree_current_write(b)->journal) {
427546 clear_bit(BTREE_NODE_journal_flush, &b->flags);
428547 mutex_unlock(&b->write_lock);
429
- /* We raced */
430
- atomic_long_inc(&c->retry_flush_write);
431
- goto retry;
548
+ pr_debug("bnode %p: written by others\n", b);
549
+ continue;
550
+ }
551
+
552
+ if (!btree_node_dirty(b)) {
553
+ clear_bit(BTREE_NODE_journal_flush, &b->flags);
554
+ mutex_unlock(&b->write_lock);
555
+ pr_debug("bnode %p: dirty bit cleaned by others\n", b);
556
+ continue;
432557 }
433558
434559 __bch_btree_node_write(b, NULL);
435560 clear_bit(BTREE_NODE_journal_flush, &b->flags);
436561 mutex_unlock(&b->write_lock);
437562 }
563
+
564
+out:
565
+ spin_lock(&c->journal.flush_write_lock);
566
+ c->journal.btree_flushing = false;
567
+ spin_unlock(&c->journal.flush_write_lock);
438568 }
439569
440570 #define last_seq(j) ((j)->seq - fifo_used(&(j)->pin) + 1)
....@@ -478,7 +608,7 @@
478608 ca->sb.njournal_buckets;
479609
480610 atomic_set(&ja->discard_in_flight, DISCARD_READY);
481
- /* fallthrough */
611
+ fallthrough;
482612
483613 case DISCARD_READY:
484614 if (ja->discard_idx == ja->last_idx)
....@@ -500,12 +630,31 @@
500630 }
501631 }
502632
633
+static unsigned int free_journal_buckets(struct cache_set *c)
634
+{
635
+ struct journal *j = &c->journal;
636
+ struct cache *ca = c->cache;
637
+ struct journal_device *ja = &c->cache->journal;
638
+ unsigned int n;
639
+
640
+ /* In case njournal_buckets is not power of 2 */
641
+ if (ja->cur_idx >= ja->discard_idx)
642
+ n = ca->sb.njournal_buckets + ja->discard_idx - ja->cur_idx;
643
+ else
644
+ n = ja->discard_idx - ja->cur_idx;
645
+
646
+ if (n > (1 + j->do_reserve))
647
+ return n - (1 + j->do_reserve);
648
+
649
+ return 0;
650
+}
651
+
503652 static void journal_reclaim(struct cache_set *c)
504653 {
505654 struct bkey *k = &c->journal.key;
506
- struct cache *ca;
655
+ struct cache *ca = c->cache;
507656 uint64_t last_seq;
508
- unsigned int iter, n = 0;
657
+ struct journal_device *ja = &ca->journal;
509658 atomic_t p __maybe_unused;
510659
511660 atomic_long_inc(&c->reclaim);
....@@ -517,45 +666,29 @@
517666
518667 /* Update last_idx */
519668
520
- for_each_cache(ca, c, iter) {
521
- struct journal_device *ja = &ca->journal;
669
+ while (ja->last_idx != ja->cur_idx &&
670
+ ja->seq[ja->last_idx] < last_seq)
671
+ ja->last_idx = (ja->last_idx + 1) %
672
+ ca->sb.njournal_buckets;
522673
523
- while (ja->last_idx != ja->cur_idx &&
524
- ja->seq[ja->last_idx] < last_seq)
525
- ja->last_idx = (ja->last_idx + 1) %
526
- ca->sb.njournal_buckets;
527
- }
528
-
529
- for_each_cache(ca, c, iter)
530
- do_journal_discard(ca);
674
+ do_journal_discard(ca);
531675
532676 if (c->journal.blocks_free)
533677 goto out;
534678
535
- /*
536
- * Allocate:
537
- * XXX: Sort by free journal space
538
- */
679
+ if (!free_journal_buckets(c))
680
+ goto out;
539681
540
- for_each_cache(ca, c, iter) {
541
- struct journal_device *ja = &ca->journal;
542
- unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
682
+ ja->cur_idx = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
683
+ k->ptr[0] = MAKE_PTR(0,
684
+ bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
685
+ ca->sb.nr_this_dev);
686
+ atomic_long_inc(&c->reclaimed_journal_buckets);
543687
544
- /* No space available on this device */
545
- if (next == ja->discard_idx)
546
- continue;
688
+ bkey_init(k);
689
+ SET_KEY_PTRS(k, 1);
690
+ c->journal.blocks_free = ca->sb.bucket_size >> c->block_bits;
547691
548
- ja->cur_idx = next;
549
- k->ptr[n++] = MAKE_PTR(0,
550
- bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
551
- ca->sb.nr_this_dev);
552
- }
553
-
554
- if (n) {
555
- bkey_init(k);
556
- SET_KEY_PTRS(k, n);
557
- c->journal.blocks_free = c->sb.bucket_size >> c->block_bits;
558
- }
559692 out:
560693 if (!journal_full(&c->journal))
561694 __closure_wake_up(&c->journal.wait);
....@@ -582,7 +715,7 @@
582715 j->cur->data->keys = 0;
583716
584717 if (fifo_full(&j->pin))
585
- pr_debug("journal_pin full (%zu)", fifo_used(&j->pin));
718
+ pr_debug("journal_pin full (%zu)\n", fifo_used(&j->pin));
586719 }
587720
588721 static void journal_write_endio(struct bio *bio)
....@@ -619,11 +752,11 @@
619752 __releases(c->journal.lock)
620753 {
621754 struct cache_set *c = container_of(cl, struct cache_set, journal.io);
622
- struct cache *ca;
755
+ struct cache *ca = c->cache;
623756 struct journal_write *w = c->journal.cur;
624757 struct bkey *k = &c->journal.key;
625
- unsigned int i, sectors = set_blocks(w->data, block_bytes(c)) *
626
- c->sb.block_size;
758
+ unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) *
759
+ ca->sb.block_size;
627760
628761 struct bio *bio;
629762 struct bio_list list;
....@@ -642,17 +775,15 @@
642775 return;
643776 }
644777
645
- c->journal.blocks_free -= set_blocks(w->data, block_bytes(c));
778
+ c->journal.blocks_free -= set_blocks(w->data, block_bytes(ca));
646779
647780 w->data->btree_level = c->root->level;
648781
649782 bkey_copy(&w->data->btree_root, &c->root->key);
650783 bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket);
651784
652
- for_each_cache(ca, c, i)
653
- w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0];
654
-
655
- w->data->magic = jset_magic(&c->sb);
785
+ w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0];
786
+ w->data->magic = jset_magic(&ca->sb);
656787 w->data->version = BCACHE_JSET_VERSION;
657788 w->data->last_seq = last_seq(&c->journal);
658789 w->data->csum = csum_set(w->data);
....@@ -674,7 +805,7 @@
674805 REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
675806 bch_bio_map(bio, w->data);
676807
677
- trace_bcache_journal_write(bio);
808
+ trace_bcache_journal_write(bio, w->data->keys);
678809 bio_list_add(&list, bio);
679810
680811 SET_PTR_OFFSET(k, i, PTR_OFFSET(k, i) + sectors);
....@@ -728,6 +859,7 @@
728859 size_t sectors;
729860 struct closure cl;
730861 bool wait = false;
862
+ struct cache *ca = c->cache;
731863
732864 closure_init_stack(&cl);
733865
....@@ -737,10 +869,10 @@
737869 struct journal_write *w = c->journal.cur;
738870
739871 sectors = __set_blocks(w->data, w->data->keys + nkeys,
740
- block_bytes(c)) * c->sb.block_size;
872
+ block_bytes(ca)) * ca->sb.block_size;
741873
742874 if (sectors <= min_t(size_t,
743
- c->journal.blocks_free * c->sb.block_size,
875
+ c->journal.blocks_free * ca->sb.block_size,
744876 PAGE_SECTORS << JSET_BITS))
745877 return w;
746878
....@@ -805,7 +937,7 @@
805937 if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags)))
806938 return NULL;
807939
808
- if (!CACHE_SYNC(&c->sb))
940
+ if (!CACHE_SYNC(&c->cache->sb))
809941 return NULL;
810942
811943 w = journal_wait_for_write(c, bch_keylist_nkeys(keys));
....@@ -821,8 +953,8 @@
821953 journal_try_write(c);
822954 } else if (!w->dirty) {
823955 w->dirty = true;
824
- schedule_delayed_work(&c->journal.work,
825
- msecs_to_jiffies(c->journal_delay_ms));
956
+ queue_delayed_work(bch_flush_wq, &c->journal.work,
957
+ msecs_to_jiffies(c->journal_delay_ms));
826958 spin_unlock(&c->journal.lock);
827959 } else {
828960 spin_unlock(&c->journal.lock);
....@@ -856,6 +988,7 @@
856988 struct journal *j = &c->journal;
857989
858990 spin_lock_init(&j->lock);
991
+ spin_lock_init(&j->flush_write_lock);
859992 INIT_DELAYED_WORK(&j->work, journal_write_work);
860993
861994 c->journal_delay_ms = 100;