hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/drivers/md/bcache/super.c
....@@ -13,20 +13,21 @@
1313 #include "extents.h"
1414 #include "request.h"
1515 #include "writeback.h"
16
+#include "features.h"
1617
1718 #include <linux/blkdev.h>
18
-#include <linux/buffer_head.h>
1919 #include <linux/debugfs.h>
2020 #include <linux/genhd.h>
2121 #include <linux/idr.h>
2222 #include <linux/kthread.h>
23
+#include <linux/workqueue.h>
2324 #include <linux/module.h>
2425 #include <linux/random.h>
2526 #include <linux/reboot.h>
2627 #include <linux/sysfs.h>
2728
28
-MODULE_LICENSE("GPL");
29
-MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
29
+unsigned int bch_cutoff_writeback;
30
+unsigned int bch_cutoff_writeback_sync;
3031
3132 static const char bcache_magic[] = {
3233 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca,
....@@ -40,6 +41,7 @@
4041
4142 static struct kobject *bcache_kobj;
4243 struct mutex bch_register_lock;
44
+bool bcache_is_reboot;
4345 LIST_HEAD(bch_cache_sets);
4446 static LIST_HEAD(uncached_devices);
4547
....@@ -47,7 +49,9 @@
4749 static DEFINE_IDA(bcache_device_idx);
4850 static wait_queue_head_t unregister_wait;
4951 struct workqueue_struct *bcache_wq;
52
+struct workqueue_struct *bch_flush_wq;
5053 struct workqueue_struct *bch_journal_wq;
54
+
5155
5256 #define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE)
5357 /* limitation of partitions number on single bcache device */
....@@ -57,18 +61,121 @@
5761
5862 /* Superblock */
5963
60
-static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
61
- struct page **res)
64
+static unsigned int get_bucket_size(struct cache_sb *sb, struct cache_sb_disk *s)
65
+{
66
+ unsigned int bucket_size = le16_to_cpu(s->bucket_size);
67
+
68
+ if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) {
69
+ if (bch_has_feature_large_bucket(sb)) {
70
+ unsigned int max, order;
71
+
72
+ max = sizeof(unsigned int) * BITS_PER_BYTE - 1;
73
+ order = le16_to_cpu(s->bucket_size);
74
+ /*
75
+ * bcache tool will make sure the overflow won't
76
+ * happen, an error message here is enough.
77
+ */
78
+ if (order > max)
79
+ pr_err("Bucket size (1 << %u) overflows\n",
80
+ order);
81
+ bucket_size = 1 << order;
82
+ } else if (bch_has_feature_obso_large_bucket(sb)) {
83
+ bucket_size +=
84
+ le16_to_cpu(s->obso_bucket_size_hi) << 16;
85
+ }
86
+ }
87
+
88
+ return bucket_size;
89
+}
90
+
91
+static const char *read_super_common(struct cache_sb *sb, struct block_device *bdev,
92
+ struct cache_sb_disk *s)
6293 {
6394 const char *err;
64
- struct cache_sb *s;
65
- struct buffer_head *bh = __bread(bdev, 1, SB_SIZE);
6695 unsigned int i;
6796
68
- if (!bh)
69
- return "IO error";
97
+ sb->first_bucket= le16_to_cpu(s->first_bucket);
98
+ sb->nbuckets = le64_to_cpu(s->nbuckets);
99
+ sb->bucket_size = get_bucket_size(sb, s);
70100
71
- s = (struct cache_sb *) bh->b_data;
101
+ sb->nr_in_set = le16_to_cpu(s->nr_in_set);
102
+ sb->nr_this_dev = le16_to_cpu(s->nr_this_dev);
103
+
104
+ err = "Too many journal buckets";
105
+ if (sb->keys > SB_JOURNAL_BUCKETS)
106
+ goto err;
107
+
108
+ err = "Too many buckets";
109
+ if (sb->nbuckets > LONG_MAX)
110
+ goto err;
111
+
112
+ err = "Not enough buckets";
113
+ if (sb->nbuckets < 1 << 7)
114
+ goto err;
115
+
116
+ err = "Bad block size (not power of 2)";
117
+ if (!is_power_of_2(sb->block_size))
118
+ goto err;
119
+
120
+ err = "Bad block size (larger than page size)";
121
+ if (sb->block_size > PAGE_SECTORS)
122
+ goto err;
123
+
124
+ err = "Bad bucket size (not power of 2)";
125
+ if (!is_power_of_2(sb->bucket_size))
126
+ goto err;
127
+
128
+ err = "Bad bucket size (smaller than page size)";
129
+ if (sb->bucket_size < PAGE_SECTORS)
130
+ goto err;
131
+
132
+ err = "Invalid superblock: device too small";
133
+ if (get_capacity(bdev->bd_disk) <
134
+ sb->bucket_size * sb->nbuckets)
135
+ goto err;
136
+
137
+ err = "Bad UUID";
138
+ if (bch_is_zero(sb->set_uuid, 16))
139
+ goto err;
140
+
141
+ err = "Bad cache device number in set";
142
+ if (!sb->nr_in_set ||
143
+ sb->nr_in_set <= sb->nr_this_dev ||
144
+ sb->nr_in_set > MAX_CACHES_PER_SET)
145
+ goto err;
146
+
147
+ err = "Journal buckets not sequential";
148
+ for (i = 0; i < sb->keys; i++)
149
+ if (sb->d[i] != sb->first_bucket + i)
150
+ goto err;
151
+
152
+ err = "Too many journal buckets";
153
+ if (sb->first_bucket + sb->keys > sb->nbuckets)
154
+ goto err;
155
+
156
+ err = "Invalid superblock: first bucket comes before end of super";
157
+ if (sb->first_bucket * sb->bucket_size < 16)
158
+ goto err;
159
+
160
+ err = NULL;
161
+err:
162
+ return err;
163
+}
164
+
165
+
166
+static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
167
+ struct cache_sb_disk **res)
168
+{
169
+ const char *err;
170
+ struct cache_sb_disk *s;
171
+ struct page *page;
172
+ unsigned int i;
173
+
174
+ page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
175
+ SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
176
+ if (IS_ERR(page))
177
+ return "IO error";
178
+ s = page_address(page) + offset_in_page(SB_OFFSET);
72179
73180 sb->offset = le64_to_cpu(s->offset);
74181 sb->version = le64_to_cpu(s->version);
....@@ -81,24 +188,20 @@
81188 sb->flags = le64_to_cpu(s->flags);
82189 sb->seq = le64_to_cpu(s->seq);
83190 sb->last_mount = le32_to_cpu(s->last_mount);
84
- sb->first_bucket = le16_to_cpu(s->first_bucket);
85191 sb->keys = le16_to_cpu(s->keys);
86192
87193 for (i = 0; i < SB_JOURNAL_BUCKETS; i++)
88194 sb->d[i] = le64_to_cpu(s->d[i]);
89195
90
- pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
196
+ pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u\n",
91197 sb->version, sb->flags, sb->seq, sb->keys);
92198
93
- err = "Not a bcache superblock";
199
+ err = "Not a bcache superblock (bad offset)";
94200 if (sb->offset != SB_SECTOR)
95201 goto err;
96202
203
+ err = "Not a bcache superblock (bad magic)";
97204 if (memcmp(sb->magic, bcache_magic, 16))
98
- goto err;
99
-
100
- err = "Too many journal buckets";
101
- if (sb->keys > SB_JOURNAL_BUCKETS)
102205 goto err;
103206
104207 err = "Bad checksum";
....@@ -120,6 +223,7 @@
120223 sb->data_offset = BDEV_DATA_START_DEFAULT;
121224 break;
122225 case BCACHE_SB_VERSION_BDEV_WITH_OFFSET:
226
+ case BCACHE_SB_VERSION_BDEV_WITH_FEATURES:
123227 sb->data_offset = le64_to_cpu(s->data_offset);
124228
125229 err = "Bad data offset";
....@@ -129,55 +233,35 @@
129233 break;
130234 case BCACHE_SB_VERSION_CDEV:
131235 case BCACHE_SB_VERSION_CDEV_WITH_UUID:
132
- sb->nbuckets = le64_to_cpu(s->nbuckets);
133
- sb->bucket_size = le16_to_cpu(s->bucket_size);
236
+ err = read_super_common(sb, bdev, s);
237
+ if (err)
238
+ goto err;
239
+ break;
240
+ case BCACHE_SB_VERSION_CDEV_WITH_FEATURES:
241
+ /*
242
+ * Feature bits are needed in read_super_common(),
243
+ * convert them firstly.
244
+ */
245
+ sb->feature_compat = le64_to_cpu(s->feature_compat);
246
+ sb->feature_incompat = le64_to_cpu(s->feature_incompat);
247
+ sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat);
134248
135
- sb->nr_in_set = le16_to_cpu(s->nr_in_set);
136
- sb->nr_this_dev = le16_to_cpu(s->nr_this_dev);
137
-
138
- err = "Too many buckets";
139
- if (sb->nbuckets > LONG_MAX)
249
+ /* Check incompatible features */
250
+ err = "Unsupported compatible feature found";
251
+ if (bch_has_unknown_compat_features(sb))
140252 goto err;
141253
142
- err = "Not enough buckets";
143
- if (sb->nbuckets < 1 << 7)
254
+ err = "Unsupported read-only compatible feature found";
255
+ if (bch_has_unknown_ro_compat_features(sb))
144256 goto err;
145257
146
- err = "Bad block/bucket size";
147
- if (!is_power_of_2(sb->block_size) ||
148
- sb->block_size > PAGE_SECTORS ||
149
- !is_power_of_2(sb->bucket_size) ||
150
- sb->bucket_size < PAGE_SECTORS)
258
+ err = "Unsupported incompatible feature found";
259
+ if (bch_has_unknown_incompat_features(sb))
151260 goto err;
152261
153
- err = "Invalid superblock: device too small";
154
- if (get_capacity(bdev->bd_disk) <
155
- sb->bucket_size * sb->nbuckets)
262
+ err = read_super_common(sb, bdev, s);
263
+ if (err)
156264 goto err;
157
-
158
- err = "Bad UUID";
159
- if (bch_is_zero(sb->set_uuid, 16))
160
- goto err;
161
-
162
- err = "Bad cache device number in set";
163
- if (!sb->nr_in_set ||
164
- sb->nr_in_set <= sb->nr_this_dev ||
165
- sb->nr_in_set > MAX_CACHES_PER_SET)
166
- goto err;
167
-
168
- err = "Journal buckets not sequential";
169
- for (i = 0; i < sb->keys; i++)
170
- if (sb->d[i] != sb->first_bucket + i)
171
- goto err;
172
-
173
- err = "Too many journal buckets";
174
- if (sb->first_bucket + sb->keys > sb->nbuckets)
175
- goto err;
176
-
177
- err = "Invalid superblock: first bucket comes before end of super";
178
- if (sb->first_bucket * sb->bucket_size < 16)
179
- goto err;
180
-
181265 break;
182266 default:
183267 err = "Unsupported superblock version";
....@@ -185,35 +269,34 @@
185269 }
186270
187271 sb->last_mount = (u32)ktime_get_real_seconds();
188
- err = NULL;
189
-
190
- get_page(bh->b_page);
191
- *res = bh->b_page;
272
+ *res = s;
273
+ return NULL;
192274 err:
193
- put_bh(bh);
275
+ put_page(page);
194276 return err;
195277 }
196278
197279 static void write_bdev_super_endio(struct bio *bio)
198280 {
199281 struct cached_dev *dc = bio->bi_private;
200
- /* XXX: error checking */
282
+
283
+ if (bio->bi_status)
284
+ bch_count_backing_io_errors(dc, bio);
201285
202286 closure_put(&dc->sb_write);
203287 }
204288
205
-static void __write_super(struct cache_sb *sb, struct bio *bio)
289
+static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
290
+ struct bio *bio)
206291 {
207
- struct cache_sb *out = page_address(bio_first_page_all(bio));
208292 unsigned int i;
209293
294
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META;
210295 bio->bi_iter.bi_sector = SB_SECTOR;
211
- bio->bi_iter.bi_size = SB_SIZE;
212
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
213
- bch_bio_map(bio, NULL);
296
+ __bio_add_page(bio, virt_to_page(out), SB_SIZE,
297
+ offset_in_page(out));
214298
215299 out->offset = cpu_to_le64(sb->offset);
216
- out->version = cpu_to_le64(sb->version);
217300
218301 memcpy(out->uuid, sb->uuid, 16);
219302 memcpy(out->set_uuid, sb->set_uuid, 16);
....@@ -229,9 +312,16 @@
229312 for (i = 0; i < sb->keys; i++)
230313 out->d[i] = cpu_to_le64(sb->d[i]);
231314
315
+ if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) {
316
+ out->feature_compat = cpu_to_le64(sb->feature_compat);
317
+ out->feature_incompat = cpu_to_le64(sb->feature_incompat);
318
+ out->feature_ro_compat = cpu_to_le64(sb->feature_ro_compat);
319
+ }
320
+
321
+ out->version = cpu_to_le64(sb->version);
232322 out->csum = csum_set(out);
233323
234
- pr_debug("ver %llu, flags %llu, seq %llu",
324
+ pr_debug("ver %llu, flags %llu, seq %llu\n",
235325 sb->version, sb->flags, sb->seq);
236326
237327 submit_bio(bio);
....@@ -252,14 +342,14 @@
252342 down(&dc->sb_write_mutex);
253343 closure_init(cl, parent);
254344
255
- bio_reset(bio);
345
+ bio_init(bio, dc->sb_bv, 1);
256346 bio_set_dev(bio, dc->bdev);
257347 bio->bi_end_io = write_bdev_super_endio;
258348 bio->bi_private = dc;
259349
260350 closure_get(cl);
261351 /* I/O request sent to backing device */
262
- __write_super(&dc->sb, bio);
352
+ __write_super(&dc->sb, dc->sb_disk, bio);
263353
264354 closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
265355 }
....@@ -284,31 +374,25 @@
284374 void bcache_write_super(struct cache_set *c)
285375 {
286376 struct closure *cl = &c->sb_write;
287
- struct cache *ca;
288
- unsigned int i;
377
+ struct cache *ca = c->cache;
378
+ struct bio *bio = &ca->sb_bio;
379
+ unsigned int version = BCACHE_SB_VERSION_CDEV_WITH_UUID;
289380
290381 down(&c->sb_write_mutex);
291382 closure_init(cl, &c->cl);
292383
293
- c->sb.seq++;
384
+ ca->sb.seq++;
294385
295
- for_each_cache(ca, c, i) {
296
- struct bio *bio = &ca->sb_bio;
386
+ if (ca->sb.version < version)
387
+ ca->sb.version = version;
297388
298
- ca->sb.version = BCACHE_SB_VERSION_CDEV_WITH_UUID;
299
- ca->sb.seq = c->sb.seq;
300
- ca->sb.last_mount = c->sb.last_mount;
389
+ bio_init(bio, ca->sb_bv, 1);
390
+ bio_set_dev(bio, ca->bdev);
391
+ bio->bi_end_io = write_super_endio;
392
+ bio->bi_private = ca;
301393
302
- SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb));
303
-
304
- bio_reset(bio);
305
- bio_set_dev(bio, ca->bdev);
306
- bio->bi_end_io = write_super_endio;
307
- bio->bi_private = ca;
308
-
309
- closure_get(cl);
310
- __write_super(&ca->sb, bio);
311
- }
394
+ closure_get(cl);
395
+ __write_super(&ca->sb, ca->sb_disk, bio);
312396
313397 closure_return_with_destructor(cl, bcache_write_super_unlock);
314398 }
....@@ -362,11 +446,11 @@
362446 }
363447
364448 bch_extent_to_text(buf, sizeof(buf), k);
365
- pr_debug("%s UUIDs at %s", op == REQ_OP_WRITE ? "wrote" : "read", buf);
449
+ pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf);
366450
367451 for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
368452 if (!bch_is_zero(u->uuid, 16))
369
- pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u",
453
+ pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u\n",
370454 u - c->uuids, u->uuid, u->label,
371455 u->first_reg, u->last_reg, u->invalidated);
372456
....@@ -418,20 +502,21 @@
418502 {
419503 BKEY_PADDED(key) k;
420504 struct closure cl;
421
- struct cache *ca;
505
+ struct cache *ca = c->cache;
506
+ unsigned int size;
422507
423508 closure_init_stack(&cl);
424509 lockdep_assert_held(&bch_register_lock);
425510
426
- if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true))
511
+ if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, true))
427512 return 1;
428513
429
- SET_KEY_SIZE(&k.key, c->sb.bucket_size);
514
+ size = meta_bucket_pages(&ca->sb) * PAGE_SECTORS;
515
+ SET_KEY_SIZE(&k.key, size);
430516 uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl);
431517 closure_sync(&cl);
432518
433519 /* Only one bucket used for uuid write */
434
- ca = PTR_CACHE(c, &k.key, 0);
435520 atomic_long_add(ca->sb.bucket_size, &ca->meta_sectors_written);
436521
437522 bkey_copy(&c->uuid_bucket, &k.key);
....@@ -514,7 +599,7 @@
514599
515600 bio->bi_iter.bi_sector = bucket * ca->sb.bucket_size;
516601 bio_set_dev(bio, ca->bdev);
517
- bio->bi_iter.bi_size = bucket_bytes(ca);
602
+ bio->bi_iter.bi_size = meta_bucket_bytes(&ca->sb);
518603
519604 bio->bi_end_io = prio_endio;
520605 bio->bi_private = ca;
....@@ -531,7 +616,7 @@
531616 struct bucket *b;
532617 struct closure cl;
533618
534
- pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu",
619
+ pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu\n",
535620 fifo_used(&ca->free[RESERVE_PRIO]),
536621 fifo_used(&ca->free[RESERVE_NONE]),
537622 fifo_used(&ca->free_inc));
....@@ -572,7 +657,7 @@
572657
573658 p->next_bucket = ca->prio_buckets[i + 1];
574659 p->magic = pset_magic(&ca->sb);
575
- p->csum = bch_crc64(&p->magic, bucket_bytes(ca) - 8);
660
+ p->csum = bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8);
576661
577662 bucket = bch_bucket_alloc(ca, RESERVE_PRIO, wait);
578663 BUG_ON(bucket == -1);
....@@ -606,12 +691,13 @@
606691 return 0;
607692 }
608693
609
-static void prio_read(struct cache *ca, uint64_t bucket)
694
+static int prio_read(struct cache *ca, uint64_t bucket)
610695 {
611696 struct prio_set *p = ca->disk_buckets;
612697 struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
613698 struct bucket *b;
614699 unsigned int bucket_nr = 0;
700
+ int ret = -EIO;
615701
616702 for (b = ca->buckets;
617703 b < ca->buckets + ca->sb.nbuckets;
....@@ -624,11 +710,15 @@
624710 prio_io(ca, bucket, REQ_OP_READ, 0);
625711
626712 if (p->csum !=
627
- bch_crc64(&p->magic, bucket_bytes(ca) - 8))
628
- pr_warn("bad csum reading priorities");
713
+ bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8)) {
714
+ pr_warn("bad csum reading priorities\n");
715
+ goto out;
716
+ }
629717
630
- if (p->magic != pset_magic(&ca->sb))
631
- pr_warn("bad magic reading priorities");
718
+ if (p->magic != pset_magic(&ca->sb)) {
719
+ pr_warn("bad magic reading priorities\n");
720
+ goto out;
721
+ }
632722
633723 bucket = p->next_bucket;
634724 d = p->data;
....@@ -637,6 +727,10 @@
637727 b->prio = le16_to_cpu(d->prio);
638728 b->gen = b->last_gc = d->gen;
639729 }
730
+
731
+ ret = 0;
732
+out:
733
+ return ret;
640734 }
641735
642736 /* Bcache device */
....@@ -667,7 +761,16 @@
667761 return d->ioctl(d, mode, cmd, arg);
668762 }
669763
670
-static const struct block_device_operations bcache_ops = {
764
+static const struct block_device_operations bcache_cached_ops = {
765
+ .submit_bio = cached_dev_submit_bio,
766
+ .open = open_dev,
767
+ .release = release_dev,
768
+ .ioctl = ioctl_dev,
769
+ .owner = THIS_MODULE,
770
+};
771
+
772
+static const struct block_device_operations bcache_flash_ops = {
773
+ .submit_bio = flash_dev_submit_bio,
671774 .open = open_dev,
672775 .release = release_dev,
673776 .ioctl = ioctl_dev,
....@@ -677,6 +780,11 @@
677780 void bcache_device_stop(struct bcache_device *d)
678781 {
679782 if (!test_and_set_bit(BCACHE_DEV_CLOSING, &d->flags))
783
+ /*
784
+ * closure_fn set to
785
+ * - cached device: cached_dev_flush()
786
+ * - flash dev: flash_dev_flush()
787
+ */
680788 closure_queue(&d->cl);
681789 }
682790
....@@ -685,32 +793,33 @@
685793 lockdep_assert_held(&bch_register_lock);
686794
687795 if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) {
688
- unsigned int i;
689
- struct cache *ca;
796
+ struct cache *ca = d->c->cache;
690797
691798 sysfs_remove_link(&d->c->kobj, d->name);
692799 sysfs_remove_link(&d->kobj, "cache");
693800
694
- for_each_cache(ca, d->c, i)
695
- bd_unlink_disk_holder(ca->bdev, d->disk);
801
+ bd_unlink_disk_holder(ca->bdev, d->disk);
696802 }
697803 }
698804
699805 static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
700806 const char *name)
701807 {
702
- unsigned int i;
703
- struct cache *ca;
808
+ struct cache *ca = c->cache;
809
+ int ret;
704810
705
- for_each_cache(ca, d->c, i)
706
- bd_link_disk_holder(ca->bdev, d->disk);
811
+ bd_link_disk_holder(ca->bdev, d->disk);
707812
708813 snprintf(d->name, BCACHEDEVNAME_SIZE,
709814 "%s%u", name, d->id);
710815
711
- WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") ||
712
- sysfs_create_link(&c->kobj, &d->kobj, d->name),
713
- "Couldn't create device <-> cache set symlinks");
816
+ ret = sysfs_create_link(&d->kobj, &c->kobj, "cache");
817
+ if (ret < 0)
818
+ pr_err("Couldn't create device -> cache set symlink\n");
819
+
820
+ ret = sysfs_create_link(&c->kobj, &d->kobj, d->name);
821
+ if (ret < 0)
822
+ pr_err("Couldn't create cache set -> device symlink\n");
714823
715824 clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
716825 }
....@@ -767,9 +876,9 @@
767876 lockdep_assert_held(&bch_register_lock);
768877
769878 if (disk)
770
- pr_info("%s stopped", disk->disk_name);
879
+ pr_info("%s stopped\n", disk->disk_name);
771880 else
772
- pr_err("bcache device (NULL gendisk) stopped");
881
+ pr_err("bcache device (NULL gendisk) stopped\n");
773882
774883 if (d->c)
775884 bcache_device_detach(d);
....@@ -797,24 +906,25 @@
797906 }
798907
799908 static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
800
- sector_t sectors)
909
+ sector_t sectors, struct block_device *cached_bdev,
910
+ const struct block_device_operations *ops)
801911 {
802912 struct request_queue *q;
803913 const size_t max_stripes = min_t(size_t, INT_MAX,
804914 SIZE_MAX / sizeof(atomic_t));
805
- size_t n;
915
+ uint64_t n;
806916 int idx;
807917
808918 if (!d->stripe_size)
809919 d->stripe_size = 1 << 31;
810920
811
- d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
812
-
813
- if (!d->nr_stripes || d->nr_stripes > max_stripes) {
814
- pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
815
- (unsigned int)d->nr_stripes);
921
+ n = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
922
+ if (!n || n > max_stripes) {
923
+ pr_err("nr_stripes too large or invalid: %llu (start sector beyond end of disk?)\n",
924
+ n);
816925 return -ENOMEM;
817926 }
927
+ d->nr_stripes = n;
818928
819929 n = d->nr_stripes * sizeof(atomic_t);
820930 d->stripe_sectors_dirty = kvzalloc(n, GFP_KERNEL);
....@@ -844,17 +954,14 @@
844954
845955 d->disk->major = bcache_major;
846956 d->disk->first_minor = idx_to_first_minor(idx);
847
- d->disk->fops = &bcache_ops;
957
+ d->disk->fops = ops;
848958 d->disk->private_data = d;
849959
850
- q = blk_alloc_queue(GFP_KERNEL);
960
+ q = blk_alloc_queue(NUMA_NO_NODE);
851961 if (!q)
852962 return -ENOMEM;
853963
854
- blk_queue_make_request(q, NULL);
855964 d->disk->queue = q;
856
- q->queuedata = d;
857
- q->backing_dev_info->congested_data = d;
858965 q->limits.max_hw_sectors = UINT_MAX;
859966 q->limits.max_sectors = UINT_MAX;
860967 q->limits.max_segment_size = UINT_MAX;
....@@ -864,6 +971,20 @@
864971 q->limits.io_min = block_size;
865972 q->limits.logical_block_size = block_size;
866973 q->limits.physical_block_size = block_size;
974
+
975
+ if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) {
976
+ /*
977
+ * This should only happen with BCACHE_SB_VERSION_BDEV.
978
+ * Block/page size is checked for BCACHE_SB_VERSION_CDEV.
979
+ */
980
+ pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n",
981
+ d->disk->disk_name, q->limits.logical_block_size,
982
+ PAGE_SIZE, bdev_logical_block_size(cached_bdev));
983
+
984
+ /* This also adjusts physical block size/min io size if needed */
985
+ blk_queue_logical_block_size(q, bdev_logical_block_size(cached_bdev));
986
+ }
987
+
867988 blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
868989 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
869990 blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
....@@ -916,11 +1037,11 @@
9161037 dc->offline_seconds = 0;
9171038
9181039 if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
919
- pr_err("%s: device offline for %d seconds",
1040
+ pr_err("%s: device offline for %d seconds\n",
9201041 dc->backing_dev_name,
9211042 BACKING_DEV_OFFLINE_TIMEOUT);
922
- pr_err("%s: disable I/O request due to backing "
923
- "device offline", dc->disk.name);
1043
+ pr_err("%s: disable I/O request due to backing device offline\n",
1044
+ dc->disk.name);
9241045 dc->io_disable = true;
9251046 /* let others know earlier that io_disable is true */
9261047 smp_mb();
....@@ -935,25 +1056,33 @@
9351056 }
9361057
9371058
938
-void bch_cached_dev_run(struct cached_dev *dc)
1059
+int bch_cached_dev_run(struct cached_dev *dc)
9391060 {
9401061 struct bcache_device *d = &dc->disk;
941
- char buf[SB_LABEL_SIZE + 1];
1062
+ char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL);
9421063 char *env[] = {
9431064 "DRIVER=bcache",
9441065 kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid),
945
- NULL,
1066
+ kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf ? : ""),
9461067 NULL,
9471068 };
9481069
949
- memcpy(buf, dc->sb.label, SB_LABEL_SIZE);
950
- buf[SB_LABEL_SIZE] = '\0';
951
- env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf);
1070
+ if (dc->io_disable) {
1071
+ pr_err("I/O disabled on cached dev %s\n",
1072
+ dc->backing_dev_name);
1073
+ kfree(env[1]);
1074
+ kfree(env[2]);
1075
+ kfree(buf);
1076
+ return -EIO;
1077
+ }
9521078
9531079 if (atomic_xchg(&dc->running, 1)) {
9541080 kfree(env[1]);
9551081 kfree(env[2]);
956
- return;
1082
+ kfree(buf);
1083
+ pr_info("cached dev %s is running already\n",
1084
+ dc->backing_dev_name);
1085
+ return -EBUSY;
9571086 }
9581087
9591088 if (!d->c &&
....@@ -976,18 +1105,22 @@
9761105 kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
9771106 kfree(env[1]);
9781107 kfree(env[2]);
1108
+ kfree(buf);
9791109
9801110 if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
981
- sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
982
- pr_debug("error creating sysfs link");
1111
+ sysfs_create_link(&disk_to_dev(d->disk)->kobj,
1112
+ &d->kobj, "bcache")) {
1113
+ pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n");
1114
+ return -ENOMEM;
1115
+ }
9831116
9841117 dc->status_update_thread = kthread_run(cached_dev_status_update,
9851118 dc, "bcache_status_update");
9861119 if (IS_ERR(dc->status_update_thread)) {
987
- pr_warn("failed to create bcache_status_update kthread, "
988
- "continue to run without monitoring backing "
989
- "device status");
1120
+ pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n");
9901121 }
1122
+
1123
+ return 0;
9911124 }
9921125
9931126 /*
....@@ -1010,7 +1143,7 @@
10101143 } while (time_out > 0);
10111144
10121145 if (time_out == 0)
1013
- pr_warn("give up waiting for dc->writeback_write_update to quit");
1146
+ pr_warn("give up waiting for dc->writeback_write_update to quit\n");
10141147
10151148 cancel_delayed_work_sync(&dc->writeback_rate_update);
10161149 }
....@@ -1025,7 +1158,6 @@
10251158 BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
10261159 BUG_ON(refcount_read(&dc->count));
10271160
1028
- mutex_lock(&bch_register_lock);
10291161
10301162 if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
10311163 cancel_writeback_rate_update_dwork(dc);
....@@ -1041,6 +1173,8 @@
10411173 bch_write_bdev_super(dc, &cl);
10421174 closure_sync(&cl);
10431175
1176
+ mutex_lock(&bch_register_lock);
1177
+
10441178 calc_cached_dev_sectors(dc->disk.c);
10451179 bcache_device_detach(&dc->disk);
10461180 list_move(&dc->list, &uncached_devices);
....@@ -1050,7 +1184,7 @@
10501184
10511185 mutex_unlock(&bch_register_lock);
10521186
1053
- pr_info("Caching disabled for %s", dc->backing_dev_name);
1187
+ pr_info("Caching disabled for %s\n", dc->backing_dev_name);
10541188
10551189 /* Drop ref we took in cached_dev_detach() */
10561190 closure_put(&dc->disk.cl);
....@@ -1083,26 +1217,27 @@
10831217 uint32_t rtime = cpu_to_le32((u32)ktime_get_real_seconds());
10841218 struct uuid_entry *u;
10851219 struct cached_dev *exist_dc, *t;
1220
+ int ret = 0;
10861221
1087
- if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) ||
1088
- (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
1222
+ if ((set_uuid && memcmp(set_uuid, c->set_uuid, 16)) ||
1223
+ (!set_uuid && memcmp(dc->sb.set_uuid, c->set_uuid, 16)))
10891224 return -ENOENT;
10901225
10911226 if (dc->disk.c) {
1092
- pr_err("Can't attach %s: already attached",
1227
+ pr_err("Can't attach %s: already attached\n",
10931228 dc->backing_dev_name);
10941229 return -EINVAL;
10951230 }
10961231
10971232 if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
1098
- pr_err("Can't attach %s: shutting down",
1233
+ pr_err("Can't attach %s: shutting down\n",
10991234 dc->backing_dev_name);
11001235 return -EINVAL;
11011236 }
11021237
1103
- if (dc->sb.block_size < c->sb.block_size) {
1238
+ if (dc->sb.block_size < c->cache->sb.block_size) {
11041239 /* Will die */
1105
- pr_err("Couldn't attach %s: block size less than set's block size",
1240
+ pr_err("Couldn't attach %s: block size less than set's block size\n",
11061241 dc->backing_dev_name);
11071242 return -EINVAL;
11081243 }
....@@ -1110,7 +1245,7 @@
11101245 /* Check whether already attached */
11111246 list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
11121247 if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
1113
- pr_err("Tried to attach %s but duplicate UUID already attached",
1248
+ pr_err("Tried to attach %s but duplicate UUID already attached\n",
11141249 dc->backing_dev_name);
11151250
11161251 return -EINVAL;
....@@ -1129,14 +1264,14 @@
11291264
11301265 if (!u) {
11311266 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
1132
- pr_err("Couldn't find uuid for %s in set",
1267
+ pr_err("Couldn't find uuid for %s in set\n",
11331268 dc->backing_dev_name);
11341269 return -ENOENT;
11351270 }
11361271
11371272 u = uuid_find_empty(c);
11381273 if (!u) {
1139
- pr_err("Not caching %s, no room for UUID",
1274
+ pr_err("Not caching %s, no room for UUID\n",
11401275 dc->backing_dev_name);
11411276 return -EINVAL;
11421277 }
....@@ -1157,7 +1292,7 @@
11571292 u->first_reg = u->last_reg = rtime;
11581293 bch_uuid_write(c);
11591294
1160
- memcpy(dc->sb.set_uuid, c->sb.set_uuid, 16);
1295
+ memcpy(dc->sb.set_uuid, c->set_uuid, 16);
11611296 SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
11621297
11631298 bch_write_bdev_super(dc, &cl);
....@@ -1182,6 +1317,8 @@
11821317 down_write(&dc->writeback_lock);
11831318 if (bch_cached_dev_writeback_start(dc)) {
11841319 up_write(&dc->writeback_lock);
1320
+ pr_err("Couldn't start writeback facilities for %s\n",
1321
+ dc->disk.disk->disk_name);
11851322 return -ENOMEM;
11861323 }
11871324
....@@ -1192,20 +1329,42 @@
11921329
11931330 bch_sectors_dirty_init(&dc->disk);
11941331
1195
- bch_cached_dev_run(dc);
1332
+ ret = bch_cached_dev_run(dc);
1333
+ if (ret && (ret != -EBUSY)) {
1334
+ up_write(&dc->writeback_lock);
1335
+ /*
1336
+ * bch_register_lock is held, bcache_device_stop() is not
1337
+ * able to be directly called. The kthread and kworker
1338
+ * created previously in bch_cached_dev_writeback_start()
1339
+ * have to be stopped manually here.
1340
+ */
1341
+ kthread_stop(dc->writeback_thread);
1342
+ cancel_writeback_rate_update_dwork(dc);
1343
+ pr_err("Couldn't run cached device %s\n",
1344
+ dc->backing_dev_name);
1345
+ return ret;
1346
+ }
1347
+
11961348 bcache_device_link(&dc->disk, c, "bdev");
11971349 atomic_inc(&c->attached_dev_nr);
1350
+
1351
+ if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) {
1352
+ pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n");
1353
+ pr_err("Please update to the latest bcache-tools to create the cache device\n");
1354
+ set_disk_ro(dc->disk.disk, 1);
1355
+ }
11981356
11991357 /* Allow the writeback thread to proceed */
12001358 up_write(&dc->writeback_lock);
12011359
1202
- pr_info("Caching %s as %s on set %pU",
1360
+ pr_info("Caching %s as %s on set %pU\n",
12031361 dc->backing_dev_name,
12041362 dc->disk.disk->disk_name,
1205
- dc->disk.c->sb.set_uuid);
1363
+ dc->disk.c->set_uuid);
12061364 return 0;
12071365 }
12081366
1367
+/* when dc->disk.kobj released */
12091368 void bch_cached_dev_release(struct kobject *kobj)
12101369 {
12111370 struct cached_dev *dc = container_of(kobj, struct cached_dev,
....@@ -1235,8 +1394,8 @@
12351394
12361395 mutex_unlock(&bch_register_lock);
12371396
1238
- if (dc->sb_bio.bi_inline_vecs[0].bv_page)
1239
- put_page(bio_first_page_all(&dc->sb_bio));
1397
+ if (dc->sb_disk)
1398
+ put_page(virt_to_page(dc->sb_disk));
12401399
12411400 if (!IS_ERR_OR_NULL(dc->bdev))
12421401 blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
....@@ -1292,13 +1451,13 @@
12921451 q->limits.raid_partial_stripes_expensive;
12931452
12941453 ret = bcache_device_init(&dc->disk, block_size,
1295
- dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
1454
+ dc->bdev->bd_part->nr_sects - dc->sb.data_offset,
1455
+ dc->bdev, &bcache_cached_ops);
12961456 if (ret)
12971457 return ret;
12981458
1299
- dc->disk.disk->queue->backing_dev_info->ra_pages =
1300
- max(dc->disk.disk->queue->backing_dev_info->ra_pages,
1301
- q->backing_dev_info->ra_pages);
1459
+ blk_queue_io_opt(dc->disk.disk->queue,
1460
+ max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q)));
13021461
13031462 atomic_set(&dc->io_errors, 0);
13041463 dc->io_disable = false;
....@@ -1313,22 +1472,19 @@
13131472
13141473 /* Cached device - bcache superblock */
13151474
1316
-static void register_bdev(struct cache_sb *sb, struct page *sb_page,
1475
+static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
13171476 struct block_device *bdev,
13181477 struct cached_dev *dc)
13191478 {
13201479 const char *err = "cannot allocate memory";
13211480 struct cache_set *c;
1481
+ int ret = -ENOMEM;
13221482
13231483 bdevname(bdev, dc->backing_dev_name);
13241484 memcpy(&dc->sb, sb, sizeof(struct cache_sb));
13251485 dc->bdev = bdev;
13261486 dc->bdev->bd_holder = dc;
1327
-
1328
- bio_init(&dc->sb_bio, dc->sb_bio.bi_inline_vecs, 1);
1329
- bio_first_bvec_all(&dc->sb_bio)->bv_page = sb_page;
1330
- get_page(sb_page);
1331
-
1487
+ dc->sb_disk = sb_disk;
13321488
13331489 if (cached_dev_init(dc, sb->block_size << 9))
13341490 goto err;
....@@ -1340,7 +1496,7 @@
13401496 if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
13411497 goto err;
13421498
1343
- pr_info("registered backing device %s", dc->backing_dev_name);
1499
+ pr_info("registered backing device %s\n", dc->backing_dev_name);
13441500
13451501 list_add(&dc->list, &uncached_devices);
13461502 /* attach to a matched cache set if it exists */
....@@ -1348,17 +1504,23 @@
13481504 bch_cached_dev_attach(dc, c, NULL);
13491505
13501506 if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
1351
- BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
1352
- bch_cached_dev_run(dc);
1507
+ BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) {
1508
+ err = "failed to run cached device";
1509
+ ret = bch_cached_dev_run(dc);
1510
+ if (ret)
1511
+ goto err;
1512
+ }
13531513
1354
- return;
1514
+ return 0;
13551515 err:
1356
- pr_notice("error %s: %s", dc->backing_dev_name, err);
1516
+ pr_notice("error %s: %s\n", dc->backing_dev_name, err);
13571517 bcache_device_stop(&dc->disk);
1518
+ return ret;
13581519 }
13591520
13601521 /* Flash only volumes */
13611522
1523
+/* When d->kobj released */
13621524 void bch_flash_dev_release(struct kobject *kobj)
13631525 {
13641526 struct bcache_device *d = container_of(kobj, struct bcache_device,
....@@ -1401,7 +1563,8 @@
14011563
14021564 kobject_init(&d->kobj, &bch_flash_dev_ktype);
14031565
1404
- if (bcache_device_init(d, block_bytes(c), u->sectors))
1566
+ if (bcache_device_init(d, block_bytes(c->cache), u->sectors,
1567
+ NULL, &bcache_flash_ops))
14051568 goto err;
14061569
14071570 bcache_device_attach(d, c, u - c->uuids);
....@@ -1413,6 +1576,12 @@
14131576 goto err;
14141577
14151578 bcache_device_link(d, c, "volume");
1579
+
1580
+ if (bch_has_feature_obso_large_bucket(&c->cache->sb)) {
1581
+ pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n");
1582
+ pr_err("Please update to the latest bcache-tools to create the cache device\n");
1583
+ set_disk_ro(d->disk, 1);
1584
+ }
14161585
14171586 return 0;
14181587 err:
....@@ -1446,7 +1615,7 @@
14461615
14471616 u = uuid_find_empty(c);
14481617 if (!u) {
1449
- pr_err("Can't create volume, no room for UUID");
1618
+ pr_err("Can't create volume, no room for UUID\n");
14501619 return -EINVAL;
14511620 }
14521621
....@@ -1472,7 +1641,7 @@
14721641 smp_mb();
14731642
14741643 pr_err("stop %s: too many IO errors on backing device %s\n",
1475
- dc->disk.disk->disk_name, dc->backing_dev_name);
1644
+ dc->disk.disk->disk_name, dc->backing_dev_name);
14761645
14771646 bcache_device_stop(&dc->disk);
14781647 return true;
....@@ -1483,6 +1652,7 @@
14831652 __printf(2, 3)
14841653 bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
14851654 {
1655
+ struct va_format vaf;
14861656 va_list args;
14871657
14881658 if (c->on_error != ON_ERROR_PANIC &&
....@@ -1490,20 +1660,22 @@
14901660 return false;
14911661
14921662 if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
1493
- pr_info("CACHE_SET_IO_DISABLE already set");
1663
+ pr_info("CACHE_SET_IO_DISABLE already set\n");
14941664
14951665 /*
14961666 * XXX: we can be called from atomic context
14971667 * acquire_console_sem();
14981668 */
14991669
1500
- pr_err("bcache: error on %pU: ", c->sb.set_uuid);
1501
-
15021670 va_start(args, fmt);
1503
- vprintk(fmt, args);
1504
- va_end(args);
15051671
1506
- pr_err(", disabling caching\n");
1672
+ vaf.fmt = fmt;
1673
+ vaf.va = &args;
1674
+
1675
+ pr_err("error on %pU: %pV, disabling caching\n",
1676
+ c->set_uuid, &vaf);
1677
+
1678
+ va_end(args);
15071679
15081680 if (c->on_error == ON_ERROR_PANIC)
15091681 panic("panic forced after error\n");
....@@ -1512,6 +1684,7 @@
15121684 return true;
15131685 }
15141686
1687
+/* When c->kobj released */
15151688 void bch_cache_set_release(struct kobject *kobj)
15161689 {
15171690 struct cache_set *c = container_of(kobj, struct cache_set, kobj);
....@@ -1524,7 +1697,6 @@
15241697 {
15251698 struct cache_set *c = container_of(cl, struct cache_set, cl);
15261699 struct cache *ca;
1527
- unsigned int i;
15281700
15291701 debugfs_remove(c->debug);
15301702
....@@ -1533,15 +1705,16 @@
15331705 bch_journal_free(c);
15341706
15351707 mutex_lock(&bch_register_lock);
1536
- for_each_cache(ca, c, i)
1537
- if (ca) {
1538
- ca->set = NULL;
1539
- c->cache[ca->sb.nr_this_dev] = NULL;
1540
- kobject_put(&ca->kobj);
1541
- }
1542
-
15431708 bch_bset_sort_state_free(&c->sort);
1544
- free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
1709
+ free_pages((unsigned long) c->uuids, ilog2(meta_bucket_pages(&c->cache->sb)));
1710
+
1711
+ ca = c->cache;
1712
+ if (ca) {
1713
+ ca->set = NULL;
1714
+ c->cache = NULL;
1715
+ kobject_put(&ca->kobj);
1716
+ }
1717
+
15451718
15461719 if (c->moving_gc_wq)
15471720 destroy_workqueue(c->moving_gc_wq);
....@@ -1554,7 +1727,7 @@
15541727 list_del(&c->list);
15551728 mutex_unlock(&bch_register_lock);
15561729
1557
- pr_info("Cache set %pU unregistered", c->sb.set_uuid);
1730
+ pr_info("Cache set %pU unregistered\n", c->set_uuid);
15581731 wake_up(&unregister_wait);
15591732
15601733 closure_debug_destroy(&c->cl);
....@@ -1564,9 +1737,8 @@
15641737 static void cache_set_flush(struct closure *cl)
15651738 {
15661739 struct cache_set *c = container_of(cl, struct cache_set, caching);
1567
- struct cache *ca;
1740
+ struct cache *ca = c->cache;
15681741 struct btree *b;
1569
- unsigned int i;
15701742
15711743 bch_cache_accounting_destroy(&c->accounting);
15721744
....@@ -1576,20 +1748,23 @@
15761748 if (!IS_ERR_OR_NULL(c->gc_thread))
15771749 kthread_stop(c->gc_thread);
15781750
1579
- if (!IS_ERR_OR_NULL(c->root))
1751
+ if (!IS_ERR(c->root))
15801752 list_add(&c->root->list, &c->btree_cache);
15811753
1582
- /* Should skip this if we're unregistering because of an error */
1583
- list_for_each_entry(b, &c->btree_cache, list) {
1584
- mutex_lock(&b->write_lock);
1585
- if (btree_node_dirty(b))
1586
- __bch_btree_node_write(b, NULL);
1587
- mutex_unlock(&b->write_lock);
1588
- }
1754
+ /*
1755
+ * Avoid flushing cached nodes if cache set is retiring
1756
+ * due to too many I/O errors detected.
1757
+ */
1758
+ if (!test_bit(CACHE_SET_IO_DISABLE, &c->flags))
1759
+ list_for_each_entry(b, &c->btree_cache, list) {
1760
+ mutex_lock(&b->write_lock);
1761
+ if (btree_node_dirty(b))
1762
+ __bch_btree_node_write(b, NULL);
1763
+ mutex_unlock(&b->write_lock);
1764
+ }
15891765
1590
- for_each_cache(ca, c, i)
1591
- if (ca->alloc_thread)
1592
- kthread_stop(ca->alloc_thread);
1766
+ if (ca->alloc_thread)
1767
+ kthread_stop(ca->alloc_thread);
15931768
15941769 if (c->journal.cur) {
15951770 cancel_delayed_work_sync(&c->journal.work);
....@@ -1621,37 +1796,37 @@
16211796 struct cached_dev *dc)
16221797 {
16231798 if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
1624
- pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.",
1625
- d->disk->disk_name, c->sb.set_uuid);
1799
+ pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n",
1800
+ d->disk->disk_name, c->set_uuid);
16261801 bcache_device_stop(d);
16271802 } else if (atomic_read(&dc->has_dirty)) {
16281803 /*
16291804 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
16301805 * and dc->has_dirty == 1
16311806 */
1632
- pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
1807
+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.\n",
16331808 d->disk->disk_name);
1634
- /*
1635
- * There might be a small time gap that cache set is
1636
- * released but bcache device is not. Inside this time
1637
- * gap, regular I/O requests will directly go into
1638
- * backing device as no cache set attached to. This
1639
- * behavior may also introduce potential inconsistence
1640
- * data in writeback mode while cache is dirty.
1641
- * Therefore before calling bcache_device_stop() due
1642
- * to a broken cache device, dc->io_disable should be
1643
- * explicitly set to true.
1644
- */
1645
- dc->io_disable = true;
1646
- /* make others know io_disable is true earlier */
1647
- smp_mb();
1648
- bcache_device_stop(d);
1809
+ /*
1810
+ * There might be a small time gap that cache set is
1811
+ * released but bcache device is not. Inside this time
1812
+ * gap, regular I/O requests will directly go into
1813
+ * backing device as no cache set attached to. This
1814
+ * behavior may also introduce potential inconsistence
1815
+ * data in writeback mode while cache is dirty.
1816
+ * Therefore before calling bcache_device_stop() due
1817
+ * to a broken cache device, dc->io_disable should be
1818
+ * explicitly set to true.
1819
+ */
1820
+ dc->io_disable = true;
1821
+ /* make others know io_disable is true earlier */
1822
+ smp_mb();
1823
+ bcache_device_stop(d);
16491824 } else {
16501825 /*
16511826 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
16521827 * and dc->has_dirty == 0
16531828 */
1654
- pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.",
1829
+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.\n",
16551830 d->disk->disk_name);
16561831 }
16571832 }
....@@ -1689,6 +1864,7 @@
16891864 void bch_cache_set_stop(struct cache_set *c)
16901865 {
16911866 if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags))
1867
+ /* closure_fn set to __cache_set_unregister() */
16921868 closure_queue(&c->caching);
16931869 }
16941870
....@@ -1698,12 +1874,13 @@
16981874 bch_cache_set_stop(c);
16991875 }
17001876
1701
-#define alloc_bucket_pages(gfp, c) \
1702
- ((void *) __get_free_pages(__GFP_ZERO|__GFP_COMP|gfp, ilog2(bucket_pages(c))))
1877
+#define alloc_meta_bucket_pages(gfp, sb) \
1878
+ ((void *) __get_free_pages(__GFP_ZERO|__GFP_COMP|gfp, ilog2(meta_bucket_pages(sb))))
17031879
17041880 struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
17051881 {
17061882 int iter_size;
1883
+ struct cache *ca = container_of(sb, struct cache, sb);
17071884 struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL);
17081885
17091886 if (!c)
....@@ -1725,17 +1902,16 @@
17251902
17261903 bch_cache_accounting_init(&c->accounting, &c->cl);
17271904
1728
- memcpy(c->sb.set_uuid, sb->set_uuid, 16);
1729
- c->sb.block_size = sb->block_size;
1730
- c->sb.bucket_size = sb->bucket_size;
1731
- c->sb.nr_in_set = sb->nr_in_set;
1732
- c->sb.last_mount = sb->last_mount;
1905
+ memcpy(c->set_uuid, sb->set_uuid, 16);
1906
+
1907
+ c->cache = ca;
1908
+ c->cache->set = c;
17331909 c->bucket_bits = ilog2(sb->bucket_size);
17341910 c->block_bits = ilog2(sb->block_size);
1735
- c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry);
1911
+ c->nr_uuids = meta_bucket_bytes(sb) / sizeof(struct uuid_entry);
17361912 c->devices_max_used = 0;
17371913 atomic_set(&c->attached_dev_nr, 0);
1738
- c->btree_pages = bucket_pages(c);
1914
+ c->btree_pages = meta_bucket_pages(sb);
17391915 if (c->btree_pages > BTREE_MAX_PAGES)
17401916 c->btree_pages = max_t(int, c->btree_pages / 4,
17411917 BTREE_MAX_PAGES);
....@@ -1761,29 +1937,52 @@
17611937 INIT_LIST_HEAD(&c->btree_cache_freed);
17621938 INIT_LIST_HEAD(&c->data_buckets);
17631939
1764
- iter_size = (sb->bucket_size / sb->block_size + 1) *
1940
+ iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size + 1) *
17651941 sizeof(struct btree_iter_set);
17661942
1767
- if (!(c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL)) ||
1768
- mempool_init_slab_pool(&c->search, 32, bch_search_cache) ||
1769
- mempool_init_kmalloc_pool(&c->bio_meta, 2,
1770
- sizeof(struct bbio) + sizeof(struct bio_vec) *
1771
- bucket_pages(c)) ||
1772
- mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
1773
- bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
1774
- BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
1775
- !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
1776
- !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
1777
- WQ_MEM_RECLAIM, 0)) ||
1778
- bch_journal_alloc(c) ||
1779
- bch_btree_cache_alloc(c) ||
1780
- bch_open_buckets_alloc(c) ||
1781
- bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
1943
+ c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL);
1944
+ if (!c->devices)
1945
+ goto err;
1946
+
1947
+ if (mempool_init_slab_pool(&c->search, 32, bch_search_cache))
1948
+ goto err;
1949
+
1950
+ if (mempool_init_kmalloc_pool(&c->bio_meta, 2,
1951
+ sizeof(struct bbio) +
1952
+ sizeof(struct bio_vec) * meta_bucket_pages(sb)))
1953
+ goto err;
1954
+
1955
+ if (mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size))
1956
+ goto err;
1957
+
1958
+ if (bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
1959
+ BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
1960
+ goto err;
1961
+
1962
+ c->uuids = alloc_meta_bucket_pages(GFP_KERNEL, sb);
1963
+ if (!c->uuids)
1964
+ goto err;
1965
+
1966
+ c->moving_gc_wq = alloc_workqueue("bcache_gc", WQ_MEM_RECLAIM, 0);
1967
+ if (!c->moving_gc_wq)
1968
+ goto err;
1969
+
1970
+ if (bch_journal_alloc(c))
1971
+ goto err;
1972
+
1973
+ if (bch_btree_cache_alloc(c))
1974
+ goto err;
1975
+
1976
+ if (bch_open_buckets_alloc(c))
1977
+ goto err;
1978
+
1979
+ if (bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
17821980 goto err;
17831981
17841982 c->congested_read_threshold_us = 2000;
17851983 c->congested_write_threshold_us = 20000;
17861984 c->error_limit = DEFAULT_IO_ERROR_LIMIT;
1985
+ c->idle_max_writeback_rate_enabled = 1;
17871986 WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
17881987
17891988 return c;
....@@ -1796,19 +1995,17 @@
17961995 {
17971996 const char *err = "cannot allocate memory";
17981997 struct cached_dev *dc, *t;
1799
- struct cache *ca;
1998
+ struct cache *ca = c->cache;
18001999 struct closure cl;
1801
- unsigned int i;
18022000 LIST_HEAD(journal);
18032001 struct journal_replay *l;
18042002
18052003 closure_init_stack(&cl);
18062004
1807
- for_each_cache(ca, c, i)
1808
- c->nbuckets += ca->sb.nbuckets;
2005
+ c->nbuckets = ca->sb.nbuckets;
18092006 set_gc_sectors(c);
18102007
1811
- if (CACHE_SYNC(&c->sb)) {
2008
+ if (CACHE_SYNC(&c->cache->sb)) {
18122009 struct bkey *k;
18132010 struct jset *j;
18142011
....@@ -1816,7 +2013,7 @@
18162013 if (bch_journal_read(c, &journal))
18172014 goto err;
18182015
1819
- pr_debug("btree_journal_read() done");
2016
+ pr_debug("btree_journal_read() done\n");
18202017
18212018 err = "no journal entries found";
18222019 if (list_empty(&journal))
....@@ -1825,8 +2022,8 @@
18252022 j = &list_entry(journal.prev, struct journal_replay, list)->j;
18262023
18272024 err = "IO error reading priorities";
1828
- for_each_cache(ca, c, i)
1829
- prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]);
2025
+ if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]))
2026
+ goto err;
18302027
18312028 /*
18322029 * If prio_read() fails it'll call cache_set_error and we'll
....@@ -1860,7 +2057,7 @@
18602057
18612058 bch_journal_mark(c, &journal);
18622059 bch_initial_gc_finish(c);
1863
- pr_debug("btree_check() done");
2060
+ pr_debug("btree_check() done\n");
18642061
18652062 /*
18662063 * bcache_journal_next() can't happen sooner, or
....@@ -1870,9 +2067,8 @@
18702067 bch_journal_next(&c->journal);
18712068
18722069 err = "error starting allocator thread";
1873
- for_each_cache(ca, c, i)
1874
- if (bch_cache_allocator_start(ca))
1875
- goto err;
2070
+ if (bch_cache_allocator_start(ca))
2071
+ goto err;
18762072
18772073 /*
18782074 * First place it's safe to allocate: btree_check() and
....@@ -1891,28 +2087,23 @@
18912087 if (bch_journal_replay(c, &journal))
18922088 goto err;
18932089 } else {
1894
- pr_notice("invalidating existing data");
2090
+ unsigned int j;
18952091
1896
- for_each_cache(ca, c, i) {
1897
- unsigned int j;
2092
+ pr_notice("invalidating existing data\n");
2093
+ ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7,
2094
+ 2, SB_JOURNAL_BUCKETS);
18982095
1899
- ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7,
1900
- 2, SB_JOURNAL_BUCKETS);
1901
-
1902
- for (j = 0; j < ca->sb.keys; j++)
1903
- ca->sb.d[j] = ca->sb.first_bucket + j;
1904
- }
2096
+ for (j = 0; j < ca->sb.keys; j++)
2097
+ ca->sb.d[j] = ca->sb.first_bucket + j;
19052098
19062099 bch_initial_gc_finish(c);
19072100
19082101 err = "error starting allocator thread";
1909
- for_each_cache(ca, c, i)
1910
- if (bch_cache_allocator_start(ca))
1911
- goto err;
2102
+ if (bch_cache_allocator_start(ca))
2103
+ goto err;
19122104
19132105 mutex_lock(&c->bucket_lock);
1914
- for_each_cache(ca, c, i)
1915
- bch_prio_write(ca, true);
2106
+ bch_prio_write(ca, true);
19162107 mutex_unlock(&c->bucket_lock);
19172108
19182109 err = "cannot allocate new UUID bucket";
....@@ -1921,7 +2112,7 @@
19212112
19222113 err = "cannot allocate new btree root";
19232114 c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
1924
- if (IS_ERR_OR_NULL(c->root))
2115
+ if (IS_ERR(c->root))
19252116 goto err;
19262117
19272118 mutex_lock(&c->root->write_lock);
....@@ -1937,7 +2128,7 @@
19372128 * everything is set up - fortunately journal entries won't be
19382129 * written until the SET_CACHE_SYNC() here:
19392130 */
1940
- SET_CACHE_SYNC(&c->sb, true);
2131
+ SET_CACHE_SYNC(&c->cache->sb, true);
19412132
19422133 bch_journal_next(&c->journal);
19432134 bch_journal_meta(c, &cl);
....@@ -1948,14 +2139,18 @@
19482139 goto err;
19492140
19502141 closure_sync(&cl);
1951
- c->sb.last_mount = (u32)ktime_get_real_seconds();
2142
+ c->cache->sb.last_mount = (u32)ktime_get_real_seconds();
19522143 bcache_write_super(c);
2144
+
2145
+ if (bch_has_feature_obso_large_bucket(&c->cache->sb))
2146
+ pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n");
19532147
19542148 list_for_each_entry_safe(dc, t, &uncached_devices, list)
19552149 bch_cached_dev_attach(dc, c, NULL);
19562150
19572151 flash_devs_run(c);
19582152
2153
+ bch_journal_space_reserve(&c->journal);
19592154 set_bit(CACHE_SET_RUNNING, &c->flags);
19602155 return 0;
19612156 err:
....@@ -1966,17 +2161,10 @@
19662161 }
19672162
19682163 closure_sync(&cl);
1969
- /* XXX: test this, it's broken */
2164
+
19702165 bch_cache_set_error(c, "%s", err);
19712166
19722167 return -EIO;
1973
-}
1974
-
1975
-static bool can_attach_cache(struct cache *ca, struct cache_set *c)
1976
-{
1977
- return ca->sb.block_size == c->sb.block_size &&
1978
- ca->sb.bucket_size == c->sb.bucket_size &&
1979
- ca->sb.nr_in_set == c->sb.nr_in_set;
19802168 }
19812169
19822170 static const char *register_cache_set(struct cache *ca)
....@@ -1986,15 +2174,9 @@
19862174 struct cache_set *c;
19872175
19882176 list_for_each_entry(c, &bch_cache_sets, list)
1989
- if (!memcmp(c->sb.set_uuid, ca->sb.set_uuid, 16)) {
1990
- if (c->cache[ca->sb.nr_this_dev])
2177
+ if (!memcmp(c->set_uuid, ca->sb.set_uuid, 16)) {
2178
+ if (c->cache)
19912179 return "duplicate cache set member";
1992
-
1993
- if (!can_attach_cache(ca, c))
1994
- return "cache sb does not match set";
1995
-
1996
- if (!CACHE_SYNC(&ca->sb))
1997
- SET_CACHE_SYNC(&c->sb, false);
19982180
19992181 goto found;
20002182 }
....@@ -2004,7 +2186,7 @@
20042186 return err;
20052187
20062188 err = "error creating kobject";
2007
- if (kobject_add(&c->kobj, bcache_kobj, "%pU", c->sb.set_uuid) ||
2189
+ if (kobject_add(&c->kobj, bcache_kobj, "%pU", c->set_uuid) ||
20082190 kobject_add(&c->internal, &c->kobj, "internal"))
20092191 goto err;
20102192
....@@ -2020,31 +2202,13 @@
20202202 sysfs_create_link(&c->kobj, &ca->kobj, buf))
20212203 goto err;
20222204
2023
- /*
2024
- * A special case is both ca->sb.seq and c->sb.seq are 0,
2025
- * such condition happens on a new created cache device whose
2026
- * super block is never flushed yet. In this case c->sb.version
2027
- * and other members should be updated too, otherwise we will
2028
- * have a mistaken super block version in cache set.
2029
- */
2030
- if (ca->sb.seq > c->sb.seq || c->sb.seq == 0) {
2031
- c->sb.version = ca->sb.version;
2032
- memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16);
2033
- c->sb.flags = ca->sb.flags;
2034
- c->sb.seq = ca->sb.seq;
2035
- pr_debug("set version = %llu", c->sb.version);
2036
- }
2037
-
20382205 kobject_get(&ca->kobj);
20392206 ca->set = c;
2040
- ca->set->cache[ca->sb.nr_this_dev] = ca;
2041
- c->cache_by_alloc[c->caches_loaded++] = ca;
2207
+ ca->set->cache = ca;
20422208
2043
- if (c->caches_loaded == c->sb.nr_in_set) {
2044
- err = "failed to run cache set";
2045
- if (run_cache_set(c) < 0)
2046
- goto err;
2047
- }
2209
+ err = "failed to run cache set";
2210
+ if (run_cache_set(c) < 0)
2211
+ goto err;
20482212
20492213 return NULL;
20502214 err:
....@@ -2054,17 +2218,18 @@
20542218
20552219 /* Cache device */
20562220
2221
+/* When ca->kobj released */
20572222 void bch_cache_release(struct kobject *kobj)
20582223 {
20592224 struct cache *ca = container_of(kobj, struct cache, kobj);
20602225 unsigned int i;
20612226
20622227 if (ca->set) {
2063
- BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca);
2064
- ca->set->cache[ca->sb.nr_this_dev] = NULL;
2228
+ BUG_ON(ca->set->cache != ca);
2229
+ ca->set->cache = NULL;
20652230 }
20662231
2067
- free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca)));
2232
+ free_pages((unsigned long) ca->disk_buckets, ilog2(meta_bucket_pages(&ca->sb)));
20682233 kfree(ca->prio_buckets);
20692234 vfree(ca->buckets);
20702235
....@@ -2074,8 +2239,8 @@
20742239 for (i = 0; i < RESERVE_NR; i++)
20752240 free_fifo(&ca->free[i]);
20762241
2077
- if (ca->sb_bio.bi_inline_vecs[0].bv_page)
2078
- put_page(bio_first_page_all(&ca->sb_bio));
2242
+ if (ca->sb_disk)
2243
+ put_page(virt_to_page(ca->sb_disk));
20792244
20802245 if (!IS_ERR_OR_NULL(ca->bdev))
20812246 blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
....@@ -2089,6 +2254,8 @@
20892254 size_t free;
20902255 size_t btree_buckets;
20912256 struct bucket *b;
2257
+ int ret = -ENOMEM;
2258
+ const char *err = NULL;
20922259
20932260 __module_get(THIS_MODULE);
20942261 kobject_init(&ca->kobj, &bch_cache_ktype);
....@@ -2106,30 +2273,96 @@
21062273 */
21072274 btree_buckets = ca->sb.njournal_buckets ?: 8;
21082275 free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
2276
+ if (!free) {
2277
+ ret = -EPERM;
2278
+ err = "ca->sb.nbuckets is too small";
2279
+ goto err_free;
2280
+ }
21092281
2110
- if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets, GFP_KERNEL) ||
2111
- !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
2112
- !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
2113
- !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
2114
- !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) ||
2115
- !init_heap(&ca->heap, free << 3, GFP_KERNEL) ||
2116
- !(ca->buckets = vzalloc(array_size(sizeof(struct bucket),
2117
- ca->sb.nbuckets))) ||
2118
- !(ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t),
2119
- prio_buckets(ca), 2),
2120
- GFP_KERNEL)) ||
2121
- !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)))
2122
- return -ENOMEM;
2282
+ if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets,
2283
+ GFP_KERNEL)) {
2284
+ err = "ca->free[RESERVE_BTREE] alloc failed";
2285
+ goto err_btree_alloc;
2286
+ }
2287
+
2288
+ if (!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca),
2289
+ GFP_KERNEL)) {
2290
+ err = "ca->free[RESERVE_PRIO] alloc failed";
2291
+ goto err_prio_alloc;
2292
+ }
2293
+
2294
+ if (!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL)) {
2295
+ err = "ca->free[RESERVE_MOVINGGC] alloc failed";
2296
+ goto err_movinggc_alloc;
2297
+ }
2298
+
2299
+ if (!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL)) {
2300
+ err = "ca->free[RESERVE_NONE] alloc failed";
2301
+ goto err_none_alloc;
2302
+ }
2303
+
2304
+ if (!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL)) {
2305
+ err = "ca->free_inc alloc failed";
2306
+ goto err_free_inc_alloc;
2307
+ }
2308
+
2309
+ if (!init_heap(&ca->heap, free << 3, GFP_KERNEL)) {
2310
+ err = "ca->heap alloc failed";
2311
+ goto err_heap_alloc;
2312
+ }
2313
+
2314
+ ca->buckets = vzalloc(array_size(sizeof(struct bucket),
2315
+ ca->sb.nbuckets));
2316
+ if (!ca->buckets) {
2317
+ err = "ca->buckets alloc failed";
2318
+ goto err_buckets_alloc;
2319
+ }
2320
+
2321
+ ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t),
2322
+ prio_buckets(ca), 2),
2323
+ GFP_KERNEL);
2324
+ if (!ca->prio_buckets) {
2325
+ err = "ca->prio_buckets alloc failed";
2326
+ goto err_prio_buckets_alloc;
2327
+ }
2328
+
2329
+ ca->disk_buckets = alloc_meta_bucket_pages(GFP_KERNEL, &ca->sb);
2330
+ if (!ca->disk_buckets) {
2331
+ err = "ca->disk_buckets alloc failed";
2332
+ goto err_disk_buckets_alloc;
2333
+ }
21232334
21242335 ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca);
21252336
21262337 for_each_bucket(b, ca)
21272338 atomic_set(&b->pin, 0);
2128
-
21292339 return 0;
2340
+
2341
+err_disk_buckets_alloc:
2342
+ kfree(ca->prio_buckets);
2343
+err_prio_buckets_alloc:
2344
+ vfree(ca->buckets);
2345
+err_buckets_alloc:
2346
+ free_heap(&ca->heap);
2347
+err_heap_alloc:
2348
+ free_fifo(&ca->free_inc);
2349
+err_free_inc_alloc:
2350
+ free_fifo(&ca->free[RESERVE_NONE]);
2351
+err_none_alloc:
2352
+ free_fifo(&ca->free[RESERVE_MOVINGGC]);
2353
+err_movinggc_alloc:
2354
+ free_fifo(&ca->free[RESERVE_PRIO]);
2355
+err_prio_alloc:
2356
+ free_fifo(&ca->free[RESERVE_BTREE]);
2357
+err_btree_alloc:
2358
+err_free:
2359
+ module_put(THIS_MODULE);
2360
+ if (err)
2361
+ pr_notice("error %s: %s\n", ca->cache_dev_name, err);
2362
+ return ret;
21302363 }
21312364
2132
-static int register_cache(struct cache_sb *sb, struct page *sb_page,
2365
+static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
21332366 struct block_device *bdev, struct cache *ca)
21342367 {
21352368 const char *err = NULL; /* must be set for any error case */
....@@ -2139,19 +2372,24 @@
21392372 memcpy(&ca->sb, sb, sizeof(struct cache_sb));
21402373 ca->bdev = bdev;
21412374 ca->bdev->bd_holder = ca;
2142
-
2143
- bio_init(&ca->sb_bio, ca->sb_bio.bi_inline_vecs, 1);
2144
- bio_first_bvec_all(&ca->sb_bio)->bv_page = sb_page;
2145
- get_page(sb_page);
2375
+ ca->sb_disk = sb_disk;
21462376
21472377 if (blk_queue_discard(bdev_get_queue(bdev)))
21482378 ca->discard = CACHE_DISCARD(&ca->sb);
21492379
21502380 ret = cache_alloc(ca);
21512381 if (ret != 0) {
2382
+ /*
2383
+ * If we failed here, it means ca->kobj is not initialized yet,
2384
+ * kobject_put() won't be called and there is no chance to
2385
+ * call blkdev_put() to bdev in bch_cache_release(). So we
2386
+ * explicitly call blkdev_put() here.
2387
+ */
21522388 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
21532389 if (ret == -ENOMEM)
21542390 err = "cache_alloc(): -ENOMEM";
2391
+ else if (ret == -EPERM)
2392
+ err = "cache_alloc(): cache device is too small";
21552393 else
21562394 err = "cache_alloc(): unknown error";
21572395 goto err;
....@@ -2174,14 +2412,14 @@
21742412 goto out;
21752413 }
21762414
2177
- pr_info("registered cache device %s", ca->cache_dev_name);
2415
+ pr_info("registered cache device %s\n", ca->cache_dev_name);
21782416
21792417 out:
21802418 kobject_put(&ca->kobj);
21812419
21822420 err:
21832421 if (err)
2184
- pr_notice("error %s: %s", ca->cache_dev_name, err);
2422
+ pr_notice("error %s: %s\n", ca->cache_dev_name, err);
21852423
21862424 return ret;
21872425 }
....@@ -2190,9 +2428,13 @@
21902428
21912429 static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
21922430 const char *buffer, size_t size);
2431
+static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
2432
+ struct kobj_attribute *attr,
2433
+ const char *buffer, size_t size);
21932434
21942435 kobj_attribute_write(register, register_bcache);
21952436 kobj_attribute_write(register_quiet, register_bcache);
2437
+kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup);
21962438
21972439 static bool bch_is_open_backing(struct block_device *bdev)
21982440 {
....@@ -2212,13 +2454,14 @@
22122454 static bool bch_is_open_cache(struct block_device *bdev)
22132455 {
22142456 struct cache_set *c, *tc;
2215
- struct cache *ca;
2216
- unsigned int i;
22172457
2218
- list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
2219
- for_each_cache(ca, c, i)
2220
- if (ca->bdev == bdev)
2221
- return true;
2458
+ list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
2459
+ struct cache *ca = c->cache;
2460
+
2461
+ if (ca->bdev == bdev)
2462
+ return true;
2463
+ }
2464
+
22222465 return false;
22232466 }
22242467
....@@ -2227,27 +2470,121 @@
22272470 return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
22282471 }
22292472
2473
+struct async_reg_args {
2474
+ struct delayed_work reg_work;
2475
+ char *path;
2476
+ struct cache_sb *sb;
2477
+ struct cache_sb_disk *sb_disk;
2478
+ struct block_device *bdev;
2479
+};
2480
+
2481
+static void register_bdev_worker(struct work_struct *work)
2482
+{
2483
+ int fail = false;
2484
+ struct async_reg_args *args =
2485
+ container_of(work, struct async_reg_args, reg_work.work);
2486
+ struct cached_dev *dc;
2487
+
2488
+ dc = kzalloc(sizeof(*dc), GFP_KERNEL);
2489
+ if (!dc) {
2490
+ fail = true;
2491
+ put_page(virt_to_page(args->sb_disk));
2492
+ blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2493
+ goto out;
2494
+ }
2495
+
2496
+ mutex_lock(&bch_register_lock);
2497
+ if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0)
2498
+ fail = true;
2499
+ mutex_unlock(&bch_register_lock);
2500
+
2501
+out:
2502
+ if (fail)
2503
+ pr_info("error %s: fail to register backing device\n",
2504
+ args->path);
2505
+ kfree(args->sb);
2506
+ kfree(args->path);
2507
+ kfree(args);
2508
+ module_put(THIS_MODULE);
2509
+}
2510
+
2511
+static void register_cache_worker(struct work_struct *work)
2512
+{
2513
+ int fail = false;
2514
+ struct async_reg_args *args =
2515
+ container_of(work, struct async_reg_args, reg_work.work);
2516
+ struct cache *ca;
2517
+
2518
+ ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2519
+ if (!ca) {
2520
+ fail = true;
2521
+ put_page(virt_to_page(args->sb_disk));
2522
+ blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2523
+ goto out;
2524
+ }
2525
+
2526
+ /* blkdev_put() will be called in bch_cache_release() */
2527
+ if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0)
2528
+ fail = true;
2529
+
2530
+out:
2531
+ if (fail)
2532
+ pr_info("error %s: fail to register cache device\n",
2533
+ args->path);
2534
+ kfree(args->sb);
2535
+ kfree(args->path);
2536
+ kfree(args);
2537
+ module_put(THIS_MODULE);
2538
+}
2539
+
2540
+static void register_device_aync(struct async_reg_args *args)
2541
+{
2542
+ if (SB_IS_BDEV(args->sb))
2543
+ INIT_DELAYED_WORK(&args->reg_work, register_bdev_worker);
2544
+ else
2545
+ INIT_DELAYED_WORK(&args->reg_work, register_cache_worker);
2546
+
2547
+ /* 10 jiffies is enough for a delay */
2548
+ queue_delayed_work(system_wq, &args->reg_work, 10);
2549
+}
2550
+
22302551 static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
22312552 const char *buffer, size_t size)
22322553 {
2233
- ssize_t ret = size;
2234
- const char *err = "cannot allocate memory";
2554
+ const char *err;
22352555 char *path = NULL;
2236
- struct cache_sb *sb = NULL;
2237
- struct block_device *bdev = NULL;
2238
- struct page *sb_page = NULL;
2556
+ struct cache_sb *sb;
2557
+ struct cache_sb_disk *sb_disk;
2558
+ struct block_device *bdev;
2559
+ ssize_t ret;
2560
+ bool async_registration = false;
22392561
2562
+#ifdef CONFIG_BCACHE_ASYNC_REGISTRATION
2563
+ async_registration = true;
2564
+#endif
2565
+
2566
+ ret = -EBUSY;
2567
+ err = "failed to reference bcache module";
22402568 if (!try_module_get(THIS_MODULE))
2241
- return -EBUSY;
2569
+ goto out;
22422570
2571
+ /* For latest state of bcache_is_reboot */
2572
+ smp_mb();
2573
+ err = "bcache is in reboot";
2574
+ if (bcache_is_reboot)
2575
+ goto out_module_put;
2576
+
2577
+ ret = -ENOMEM;
2578
+ err = "cannot allocate memory";
22432579 path = kstrndup(buffer, size, GFP_KERNEL);
22442580 if (!path)
2245
- goto err;
2581
+ goto out_module_put;
22462582
22472583 sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL);
22482584 if (!sb)
2249
- goto err;
2585
+ goto out_free_path;
22502586
2587
+ ret = -EINVAL;
22512588 err = "failed to open device";
22522589 bdev = blkdev_get_by_path(strim(path),
22532590 FMODE_READ|FMODE_WRITE|FMODE_EXCL,
....@@ -2264,56 +2601,142 @@
22642601 if (!IS_ERR(bdev))
22652602 bdput(bdev);
22662603 if (attr == &ksysfs_register_quiet)
2267
- goto out;
2604
+ goto done;
22682605 }
2269
- goto err;
2606
+ goto out_free_sb;
22702607 }
22712608
22722609 err = "failed to set blocksize";
22732610 if (set_blocksize(bdev, 4096))
2274
- goto err_close;
2611
+ goto out_blkdev_put;
22752612
2276
- err = read_super(sb, bdev, &sb_page);
2613
+ err = read_super(sb, bdev, &sb_disk);
22772614 if (err)
2278
- goto err_close;
2615
+ goto out_blkdev_put;
22792616
22802617 err = "failed to register device";
2618
+
2619
+ if (async_registration) {
2620
+ /* register in asynchronous way */
2621
+ struct async_reg_args *args =
2622
+ kzalloc(sizeof(struct async_reg_args), GFP_KERNEL);
2623
+
2624
+ if (!args) {
2625
+ ret = -ENOMEM;
2626
+ err = "cannot allocate memory";
2627
+ goto out_put_sb_page;
2628
+ }
2629
+
2630
+ args->path = path;
2631
+ args->sb = sb;
2632
+ args->sb_disk = sb_disk;
2633
+ args->bdev = bdev;
2634
+ register_device_aync(args);
2635
+ /* No wait and returns to user space */
2636
+ goto async_done;
2637
+ }
2638
+
22812639 if (SB_IS_BDEV(sb)) {
22822640 struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
22832641
22842642 if (!dc)
2285
- goto err_close;
2643
+ goto out_put_sb_page;
22862644
22872645 mutex_lock(&bch_register_lock);
2288
- register_bdev(sb, sb_page, bdev, dc);
2646
+ ret = register_bdev(sb, sb_disk, bdev, dc);
22892647 mutex_unlock(&bch_register_lock);
2648
+ /* blkdev_put() will be called in cached_dev_free() */
2649
+ if (ret < 0)
2650
+ goto out_free_sb;
22902651 } else {
22912652 struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
22922653
22932654 if (!ca)
2294
- goto err_close;
2655
+ goto out_put_sb_page;
22952656
2296
- if (register_cache(sb, sb_page, bdev, ca) != 0)
2297
- goto err;
2657
+ /* blkdev_put() will be called in bch_cache_release() */
2658
+ if (register_cache(sb, sb_disk, bdev, ca) != 0)
2659
+ goto out_free_sb;
22982660 }
2299
-out:
2300
- if (sb_page)
2301
- put_page(sb_page);
2661
+
2662
+done:
23022663 kfree(sb);
23032664 kfree(path);
23042665 module_put(THIS_MODULE);
2305
- return ret;
2666
+async_done:
2667
+ return size;
23062668
2307
-err_close:
2308
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2309
-err:
2310
- pr_info("error %s: %s", path, err);
2311
- ret = -EINVAL;
2312
- goto out;
2669
+out_put_sb_page:
2670
+ put_page(virt_to_page(sb_disk));
2671
+out_blkdev_put:
2672
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2673
+out_free_sb:
2674
+ kfree(sb);
2675
+out_free_path:
2676
+ kfree(path);
2677
+ path = NULL;
2678
+out_module_put:
2679
+ module_put(THIS_MODULE);
2680
+out:
2681
+ pr_info("error %s: %s\n", path?path:"", err);
2682
+ return ret;
2683
+}
2684
+
2685
+
2686
+struct pdev {
2687
+ struct list_head list;
2688
+ struct cached_dev *dc;
2689
+};
2690
+
2691
+static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
2692
+ struct kobj_attribute *attr,
2693
+ const char *buffer,
2694
+ size_t size)
2695
+{
2696
+ LIST_HEAD(pending_devs);
2697
+ ssize_t ret = size;
2698
+ struct cached_dev *dc, *tdc;
2699
+ struct pdev *pdev, *tpdev;
2700
+ struct cache_set *c, *tc;
2701
+
2702
+ mutex_lock(&bch_register_lock);
2703
+ list_for_each_entry_safe(dc, tdc, &uncached_devices, list) {
2704
+ pdev = kmalloc(sizeof(struct pdev), GFP_KERNEL);
2705
+ if (!pdev)
2706
+ break;
2707
+ pdev->dc = dc;
2708
+ list_add(&pdev->list, &pending_devs);
2709
+ }
2710
+
2711
+ list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
2712
+ list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
2713
+ char *pdev_set_uuid = pdev->dc->sb.set_uuid;
2714
+ char *set_uuid = c->set_uuid;
2715
+
2716
+ if (!memcmp(pdev_set_uuid, set_uuid, 16)) {
2717
+ list_del(&pdev->list);
2718
+ kfree(pdev);
2719
+ break;
2720
+ }
2721
+ }
2722
+ }
2723
+ mutex_unlock(&bch_register_lock);
2724
+
2725
+ list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
2726
+ pr_info("delete pdev %p\n", pdev);
2727
+ list_del(&pdev->list);
2728
+ bcache_device_stop(&pdev->dc->disk);
2729
+ kfree(pdev);
2730
+ }
2731
+
2732
+ return ret;
23132733 }
23142734
23152735 static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
23162736 {
2737
+ if (bcache_is_reboot)
2738
+ return NOTIFY_DONE;
2739
+
23172740 if (code == SYS_DOWN ||
23182741 code == SYS_HALT ||
23192742 code == SYS_POWER_OFF) {
....@@ -2326,22 +2749,57 @@
23262749
23272750 mutex_lock(&bch_register_lock);
23282751
2752
+ if (bcache_is_reboot)
2753
+ goto out;
2754
+
2755
+ /* New registration is rejected since now */
2756
+ bcache_is_reboot = true;
2757
+ /*
2758
+ * Make registering caller (if there is) on other CPU
2759
+ * core know bcache_is_reboot set to true earlier
2760
+ */
2761
+ smp_mb();
2762
+
23292763 if (list_empty(&bch_cache_sets) &&
23302764 list_empty(&uncached_devices))
23312765 goto out;
23322766
2333
- pr_info("Stopping all devices:");
2767
+ mutex_unlock(&bch_register_lock);
23342768
2769
+ pr_info("Stopping all devices:\n");
2770
+
2771
+ /*
2772
+ * The reason bch_register_lock is not held to call
2773
+ * bch_cache_set_stop() and bcache_device_stop() is to
2774
+ * avoid potential deadlock during reboot, because cache
2775
+ * set or bcache device stopping process will acqurie
2776
+ * bch_register_lock too.
2777
+ *
2778
+ * We are safe here because bcache_is_reboot sets to
2779
+ * true already, register_bcache() will reject new
2780
+ * registration now. bcache_is_reboot also makes sure
2781
+ * bcache_reboot() won't be re-entered on by other thread,
2782
+ * so there is no race in following list iteration by
2783
+ * list_for_each_entry_safe().
2784
+ */
23352785 list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
23362786 bch_cache_set_stop(c);
23372787
23382788 list_for_each_entry_safe(dc, tdc, &uncached_devices, list)
23392789 bcache_device_stop(&dc->disk);
23402790
2791
+
2792
+ /*
2793
+ * Give an early chance for other kthreads and
2794
+ * kworkers to stop themselves
2795
+ */
2796
+ schedule();
2797
+
23412798 /* What's a condition variable? */
23422799 while (1) {
2343
- long timeout = start + 2 * HZ - jiffies;
2800
+ long timeout = start + 10 * HZ - jiffies;
23442801
2802
+ mutex_lock(&bch_register_lock);
23452803 stopped = list_empty(&bch_cache_sets) &&
23462804 list_empty(&uncached_devices);
23472805
....@@ -2353,15 +2811,14 @@
23532811
23542812 mutex_unlock(&bch_register_lock);
23552813 schedule_timeout(timeout);
2356
- mutex_lock(&bch_register_lock);
23572814 }
23582815
23592816 finish_wait(&unregister_wait, &wait);
23602817
23612818 if (stopped)
2362
- pr_info("All devices stopped");
2819
+ pr_info("All devices stopped\n");
23632820 else
2364
- pr_notice("Timeout waiting for devices to be closed");
2821
+ pr_notice("Timeout waiting for devices to be closed\n");
23652822 out:
23662823 mutex_unlock(&bch_register_lock);
23672824 }
....@@ -2384,6 +2841,9 @@
23842841 destroy_workqueue(bcache_wq);
23852842 if (bch_journal_wq)
23862843 destroy_workqueue(bch_journal_wq);
2844
+ if (bch_flush_wq)
2845
+ destroy_workqueue(bch_flush_wq);
2846
+ bch_btree_exit();
23872847
23882848 if (bcache_major)
23892849 unregister_blkdev(bcache_major, "bcache");
....@@ -2391,13 +2851,42 @@
23912851 mutex_destroy(&bch_register_lock);
23922852 }
23932853
2854
+/* Check and fixup module parameters */
2855
+static void check_module_parameters(void)
2856
+{
2857
+ if (bch_cutoff_writeback_sync == 0)
2858
+ bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC;
2859
+ else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) {
2860
+ pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u\n",
2861
+ bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX);
2862
+ bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX;
2863
+ }
2864
+
2865
+ if (bch_cutoff_writeback == 0)
2866
+ bch_cutoff_writeback = CUTOFF_WRITEBACK;
2867
+ else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) {
2868
+ pr_warn("set bch_cutoff_writeback (%u) to max value %u\n",
2869
+ bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX);
2870
+ bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX;
2871
+ }
2872
+
2873
+ if (bch_cutoff_writeback > bch_cutoff_writeback_sync) {
2874
+ pr_warn("set bch_cutoff_writeback (%u) to %u\n",
2875
+ bch_cutoff_writeback, bch_cutoff_writeback_sync);
2876
+ bch_cutoff_writeback = bch_cutoff_writeback_sync;
2877
+ }
2878
+}
2879
+
23942880 static int __init bcache_init(void)
23952881 {
23962882 static const struct attribute *files[] = {
23972883 &ksysfs_register.attr,
23982884 &ksysfs_register_quiet.attr,
2885
+ &ksysfs_pendings_cleanup.attr,
23992886 NULL
24002887 };
2888
+
2889
+ check_module_parameters();
24012890
24022891 mutex_init(&bch_register_lock);
24032892 init_waitqueue_head(&unregister_wait);
....@@ -2410,8 +2899,24 @@
24102899 return bcache_major;
24112900 }
24122901
2902
+ if (bch_btree_init())
2903
+ goto err;
2904
+
24132905 bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0);
24142906 if (!bcache_wq)
2907
+ goto err;
2908
+
2909
+ /*
2910
+ * Let's not make this `WQ_MEM_RECLAIM` for the following reasons:
2911
+ *
2912
+ * 1. It used `system_wq` before which also does no memory reclaim.
2913
+ * 2. With `WQ_MEM_RECLAIM` desktop stalls, increased boot times, and
2914
+ * reduced throughput can be observed.
2915
+ *
2916
+ * We still want to user our own queue to not congest the `system_wq`.
2917
+ */
2918
+ bch_flush_wq = alloc_workqueue("bch_flush", 0, 0);
2919
+ if (!bch_flush_wq)
24152920 goto err;
24162921
24172922 bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0);
....@@ -2426,8 +2931,10 @@
24262931 sysfs_create_files(bcache_kobj, files))
24272932 goto err;
24282933
2429
- bch_debug_init(bcache_kobj);
2934
+ bch_debug_init();
24302935 closure_debug_init();
2936
+
2937
+ bcache_is_reboot = false;
24312938
24322939 return 0;
24332940 err:
....@@ -2435,5 +2942,18 @@
24352942 return -ENOMEM;
24362943 }
24372944
2945
+/*
2946
+ * Module hooks
2947
+ */
24382948 module_exit(bcache_exit);
24392949 module_init(bcache_init);
2950
+
2951
+module_param(bch_cutoff_writeback, uint, 0);
2952
+MODULE_PARM_DESC(bch_cutoff_writeback, "threshold to cutoff writeback");
2953
+
2954
+module_param(bch_cutoff_writeback_sync, uint, 0);
2955
+MODULE_PARM_DESC(bch_cutoff_writeback_sync, "hard threshold to cutoff writeback");
2956
+
2957
+MODULE_DESCRIPTION("Bcache: a Linux block layer cache");
2958
+MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
2959
+MODULE_LICENSE("GPL");