hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/dax/super.c
....@@ -1,18 +1,11 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright(c) 2017 Intel Corporation. All rights reserved.
3
- *
4
- * This program is free software; you can redistribute it and/or modify
5
- * it under the terms of version 2 of the GNU General Public License as
6
- * published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope that it will be useful, but
9
- * WITHOUT ANY WARRANTY; without even the implied warranty of
10
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
- * General Public License for more details.
124 */
135 #include <linux/pagemap.h>
146 #include <linux/module.h>
157 #include <linux/mount.h>
8
+#include <linux/pseudo_fs.h>
169 #include <linux/magic.h>
1710 #include <linux/genhd.h>
1811 #include <linux/pfn_t.h>
....@@ -22,6 +15,7 @@
2215 #include <linux/uio.h>
2316 #include <linux/dax.h>
2417 #include <linux/fs.h>
18
+#include "dax-private.h"
2519
2620 static dev_t dax_devt;
2721 DEFINE_STATIC_SRCU(dax_srcu);
....@@ -52,7 +46,8 @@
5246 int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
5347 pgoff_t *pgoff)
5448 {
55
- phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
49
+ sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
50
+ phys_addr_t phys_off = (start_sect + sector) * 512;
5651
5752 if (pgoff)
5853 *pgoff = PHYS_PFN(phys_off);
....@@ -65,70 +60,61 @@
6560 #if IS_ENABLED(CONFIG_FS_DAX)
6661 struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
6762 {
68
- if (!blk_queue_dax(bdev->bd_queue))
63
+ if (!blk_queue_dax(bdev->bd_disk->queue))
6964 return NULL;
70
- return fs_dax_get_by_host(bdev->bd_disk->disk_name);
65
+ return dax_get_by_host(bdev->bd_disk->disk_name);
7166 }
7267 EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
7368 #endif
7469
75
-/**
76
- * __bdev_dax_supported() - Check if the device supports dax for filesystem
77
- * @bdev: block device to check
78
- * @blocksize: The block size of the device
79
- *
80
- * This is a library function for filesystems to check if the block device
81
- * can be mounted with dax option.
82
- *
83
- * Return: true if supported, false if unsupported
84
- */
85
-bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
70
+bool __generic_fsdax_supported(struct dax_device *dax_dev,
71
+ struct block_device *bdev, int blocksize, sector_t start,
72
+ sector_t sectors)
8673 {
87
- struct dax_device *dax_dev;
8874 bool dax_enabled = false;
89
- struct request_queue *q;
90
- pgoff_t pgoff;
91
- int err, id;
92
- pfn_t pfn;
93
- long len;
75
+ pgoff_t pgoff, pgoff_end;
9476 char buf[BDEVNAME_SIZE];
77
+ void *kaddr, *end_kaddr;
78
+ pfn_t pfn, end_pfn;
79
+ sector_t last_page;
80
+ long len, len2;
81
+ int err, id;
9582
9683 if (blocksize != PAGE_SIZE) {
97
- pr_debug("%s: error: unsupported blocksize for dax\n",
84
+ pr_info("%s: error: unsupported blocksize for dax\n",
9885 bdevname(bdev, buf));
9986 return false;
10087 }
10188
102
- q = bdev_get_queue(bdev);
103
- if (!q || !blk_queue_dax(q)) {
104
- pr_debug("%s: error: request queue doesn't support dax\n",
105
- bdevname(bdev, buf));
106
- return false;
107
- }
108
-
109
- err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
110
- if (err) {
111
- pr_debug("%s: error: unaligned partition for dax\n",
112
- bdevname(bdev, buf));
113
- return false;
114
- }
115
-
116
- dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
11789 if (!dax_dev) {
118
- pr_debug("%s: error: device does not support dax\n",
90
+ pr_debug("%s: error: dax unsupported by block device\n",
91
+ bdevname(bdev, buf));
92
+ return false;
93
+ }
94
+
95
+ err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
96
+ if (err) {
97
+ pr_info("%s: error: unaligned partition for dax\n",
98
+ bdevname(bdev, buf));
99
+ return false;
100
+ }
101
+
102
+ last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
103
+ err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
104
+ if (err) {
105
+ pr_info("%s: error: unaligned partition for dax\n",
119106 bdevname(bdev, buf));
120107 return false;
121108 }
122109
123110 id = dax_read_lock();
124
- len = dax_direct_access(dax_dev, pgoff, 1, NULL, &pfn);
125
- dax_read_unlock(id);
111
+ len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
112
+ len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
126113
127
- put_dax(dax_dev);
128
-
129
- if (len < 1) {
130
- pr_debug("%s: error: dax access failed (%ld)\n",
131
- bdevname(bdev, buf), len);
114
+ if (len < 1 || len2 < 1) {
115
+ pr_info("%s: error: dax access failed (%ld)\n",
116
+ bdevname(bdev, buf), len < 1 ? len : len2);
117
+ dax_read_unlock(id);
132118 return false;
133119 }
134120
....@@ -143,21 +129,72 @@
143129 */
144130 WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
145131 dax_enabled = true;
146
- } else if (pfn_t_devmap(pfn)) {
147
- struct dev_pagemap *pgmap;
132
+ } else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) {
133
+ struct dev_pagemap *pgmap, *end_pgmap;
148134
149135 pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
150
- if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX)
136
+ end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL);
137
+ if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX
138
+ && pfn_t_to_page(pfn)->pgmap == pgmap
139
+ && pfn_t_to_page(end_pfn)->pgmap == pgmap
140
+ && pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr))
141
+ && pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr)))
151142 dax_enabled = true;
152143 put_dev_pagemap(pgmap);
144
+ put_dev_pagemap(end_pgmap);
145
+
153146 }
147
+ dax_read_unlock(id);
154148
155149 if (!dax_enabled) {
156
- pr_debug("%s: error: dax support not enabled\n",
150
+ pr_info("%s: error: dax support not enabled\n",
157151 bdevname(bdev, buf));
158152 return false;
159153 }
160154 return true;
155
+}
156
+EXPORT_SYMBOL_GPL(__generic_fsdax_supported);
157
+
158
+/**
159
+ * __bdev_dax_supported() - Check if the device supports dax for filesystem
160
+ * @bdev: block device to check
161
+ * @blocksize: The block size of the device
162
+ *
163
+ * This is a library function for filesystems to check if the block device
164
+ * can be mounted with dax option.
165
+ *
166
+ * Return: true if supported, false if unsupported
167
+ */
168
+bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
169
+{
170
+ struct dax_device *dax_dev;
171
+ struct request_queue *q;
172
+ char buf[BDEVNAME_SIZE];
173
+ bool ret;
174
+ int id;
175
+
176
+ q = bdev_get_queue(bdev);
177
+ if (!q || !blk_queue_dax(q)) {
178
+ pr_debug("%s: error: request queue doesn't support dax\n",
179
+ bdevname(bdev, buf));
180
+ return false;
181
+ }
182
+
183
+ dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
184
+ if (!dax_dev) {
185
+ pr_debug("%s: error: device does not support dax\n",
186
+ bdevname(bdev, buf));
187
+ return false;
188
+ }
189
+
190
+ id = dax_read_lock();
191
+ ret = dax_supported(dax_dev, bdev, blocksize, 0,
192
+ i_size_read(bdev->bd_inode) / 512);
193
+ dax_read_unlock(id);
194
+
195
+ put_dax(dax_dev);
196
+
197
+ return ret;
161198 }
162199 EXPORT_SYMBOL_GPL(__bdev_dax_supported);
163200 #endif
....@@ -167,6 +204,8 @@
167204 DAXDEV_ALIVE,
168205 /* gate whether dax_flush() calls the low level flush routine */
169206 DAXDEV_WRITE_CACHE,
207
+ /* flag to check if device supports synchronous flush */
208
+ DAXDEV_SYNC,
170209 };
171210
172211 /**
....@@ -284,6 +323,19 @@
284323 }
285324 EXPORT_SYMBOL_GPL(dax_direct_access);
286325
326
+bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
327
+ int blocksize, sector_t start, sector_t len)
328
+{
329
+ if (!dax_dev)
330
+ return false;
331
+
332
+ if (!dax_alive(dax_dev))
333
+ return false;
334
+
335
+ return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
336
+}
337
+EXPORT_SYMBOL_GPL(dax_supported);
338
+
287339 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
288340 size_t bytes, struct iov_iter *i)
289341 {
....@@ -303,6 +355,23 @@
303355 return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i);
304356 }
305357 EXPORT_SYMBOL_GPL(dax_copy_to_iter);
358
+
359
+int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
360
+ size_t nr_pages)
361
+{
362
+ if (!dax_alive(dax_dev))
363
+ return -ENXIO;
364
+ /*
365
+ * There are no callers that want to zero more than one page as of now.
366
+ * Once users are there, this check can be removed after the
367
+ * device mapper code has been updated to split ranges across targets.
368
+ */
369
+ if (nr_pages != 1)
370
+ return -EIO;
371
+
372
+ return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
373
+}
374
+EXPORT_SYMBOL_GPL(dax_zero_page_range);
306375
307376 #ifdef CONFIG_ARCH_HAS_PMEM_API
308377 void arch_wb_cache_pmem(void *addr, size_t size);
....@@ -335,6 +404,18 @@
335404 }
336405 EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
337406
407
+bool __dax_synchronous(struct dax_device *dax_dev)
408
+{
409
+ return test_bit(DAXDEV_SYNC, &dax_dev->flags);
410
+}
411
+EXPORT_SYMBOL_GPL(__dax_synchronous);
412
+
413
+void __set_dax_synchronous(struct dax_device *dax_dev)
414
+{
415
+ set_bit(DAXDEV_SYNC, &dax_dev->flags);
416
+}
417
+EXPORT_SYMBOL_GPL(__set_dax_synchronous);
418
+
338419 bool dax_alive(struct dax_device *dax_dev)
339420 {
340421 lockdep_assert_held(&dax_srcu);
....@@ -365,10 +446,14 @@
365446 spin_lock(&dax_host_lock);
366447 hlist_del_init(&dax_dev->list);
367448 spin_unlock(&dax_host_lock);
368
-
369
- dax_dev->private = NULL;
370449 }
371450 EXPORT_SYMBOL_GPL(kill_dax);
451
+
452
+void run_dax(struct dax_device *dax_dev)
453
+{
454
+ set_bit(DAXDEV_ALIVE, &dax_dev->flags);
455
+}
456
+EXPORT_SYMBOL_GPL(run_dax);
372457
373458 static struct inode *dax_alloc_inode(struct super_block *sb)
374459 {
....@@ -389,11 +474,9 @@
389474 return container_of(inode, struct dax_device, inode);
390475 }
391476
392
-static void dax_i_callback(struct rcu_head *head)
477
+static void dax_free_inode(struct inode *inode)
393478 {
394
- struct inode *inode = container_of(head, struct inode, i_rcu);
395479 struct dax_device *dax_dev = to_dax_dev(inode);
396
-
397480 kfree(dax_dev->host);
398481 dax_dev->host = NULL;
399482 if (inode->i_rdev)
....@@ -404,29 +487,31 @@
404487 static void dax_destroy_inode(struct inode *inode)
405488 {
406489 struct dax_device *dax_dev = to_dax_dev(inode);
407
-
408490 WARN_ONCE(test_bit(DAXDEV_ALIVE, &dax_dev->flags),
409491 "kill_dax() must be called before final iput()\n");
410
- call_rcu(&inode->i_rcu, dax_i_callback);
411492 }
412493
413494 static const struct super_operations dax_sops = {
414495 .statfs = simple_statfs,
415496 .alloc_inode = dax_alloc_inode,
416497 .destroy_inode = dax_destroy_inode,
498
+ .free_inode = dax_free_inode,
417499 .drop_inode = generic_delete_inode,
418500 };
419501
420
-static struct dentry *dax_mount(struct file_system_type *fs_type,
421
- int flags, const char *dev_name, void *data)
502
+static int dax_init_fs_context(struct fs_context *fc)
422503 {
423
- return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC);
504
+ struct pseudo_fs_context *ctx = init_pseudo(fc, DAXFS_MAGIC);
505
+ if (!ctx)
506
+ return -ENOMEM;
507
+ ctx->ops = &dax_sops;
508
+ return 0;
424509 }
425510
426511 static struct file_system_type dax_fs_type = {
427
- .name = "dax",
428
- .mount = dax_mount,
429
- .kill_sb = kill_anon_super,
512
+ .name = "dax",
513
+ .init_fs_context = dax_init_fs_context,
514
+ .kill_sb = kill_anon_super,
430515 };
431516
432517 static int dax_test(struct inode *inode, void *data)
....@@ -488,16 +573,23 @@
488573 }
489574
490575 struct dax_device *alloc_dax(void *private, const char *__host,
491
- const struct dax_operations *ops)
576
+ const struct dax_operations *ops, unsigned long flags)
492577 {
493578 struct dax_device *dax_dev;
494579 const char *host;
495580 dev_t devt;
496581 int minor;
497582
583
+ if (ops && !ops->zero_page_range) {
584
+ pr_debug("%s: error: device does not provide dax"
585
+ " operation zero_page_range()\n",
586
+ __host ? __host : "Unknown");
587
+ return ERR_PTR(-EINVAL);
588
+ }
589
+
498590 host = kstrdup(__host, GFP_KERNEL);
499591 if (__host && !host)
500
- return NULL;
592
+ return ERR_PTR(-ENOMEM);
501593
502594 minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL);
503595 if (minor < 0)
....@@ -511,13 +603,16 @@
511603 dax_add_host(dax_dev, host);
512604 dax_dev->ops = ops;
513605 dax_dev->private = private;
606
+ if (flags & DAXDEV_F_SYNC)
607
+ set_dax_synchronous(dax_dev);
608
+
514609 return dax_dev;
515610
516611 err_dev:
517612 ida_simple_remove(&dax_minor_ida, minor);
518613 err_minor:
519614 kfree(host);
520
- return NULL;
615
+ return ERR_PTR(-ENOMEM);
521616 }
522617 EXPORT_SYMBOL_GPL(alloc_dax);
523618
....@@ -584,6 +679,8 @@
584679
585680 void *dax_get_private(struct dax_device *dax_dev)
586681 {
682
+ if (!test_bit(DAXDEV_ALIVE, &dax_dev->flags))
683
+ return NULL;
587684 return dax_dev->private;
588685 }
589686 EXPORT_SYMBOL_GPL(dax_get_private);
....@@ -597,7 +694,7 @@
597694 inode_init_once(inode);
598695 }
599696
600
-static int __dax_fs_init(void)
697
+static int dax_fs_init(void)
601698 {
602699 int rc;
603700
....@@ -607,10 +704,6 @@
607704 init_once);
608705 if (!dax_cache)
609706 return -ENOMEM;
610
-
611
- rc = register_filesystem(&dax_fs_type);
612
- if (rc)
613
- goto err_register_fs;
614707
615708 dax_mnt = kern_mount(&dax_fs_type);
616709 if (IS_ERR(dax_mnt)) {
....@@ -622,42 +715,51 @@
622715 return 0;
623716
624717 err_mount:
625
- unregister_filesystem(&dax_fs_type);
626
- err_register_fs:
627718 kmem_cache_destroy(dax_cache);
628719
629720 return rc;
630721 }
631722
632
-static void __dax_fs_exit(void)
723
+static void dax_fs_exit(void)
633724 {
634725 kern_unmount(dax_mnt);
635
- unregister_filesystem(&dax_fs_type);
726
+ rcu_barrier();
636727 kmem_cache_destroy(dax_cache);
637728 }
638729
639
-static int __init dax_fs_init(void)
730
+static int __init dax_core_init(void)
640731 {
641732 int rc;
642733
643
- rc = __dax_fs_init();
734
+ rc = dax_fs_init();
644735 if (rc)
645736 return rc;
646737
647738 rc = alloc_chrdev_region(&dax_devt, 0, MINORMASK+1, "dax");
648739 if (rc)
649
- __dax_fs_exit();
650
- return rc;
740
+ goto err_chrdev;
741
+
742
+ rc = dax_bus_init();
743
+ if (rc)
744
+ goto err_bus;
745
+ return 0;
746
+
747
+err_bus:
748
+ unregister_chrdev_region(dax_devt, MINORMASK+1);
749
+err_chrdev:
750
+ dax_fs_exit();
751
+ return 0;
651752 }
652753
653
-static void __exit dax_fs_exit(void)
754
+static void __exit dax_core_exit(void)
654755 {
756
+ dax_bus_exit();
655757 unregister_chrdev_region(dax_devt, MINORMASK+1);
656758 ida_destroy(&dax_minor_ida);
657
- __dax_fs_exit();
759
+ dax_fs_exit();
658760 }
659761
660762 MODULE_AUTHOR("Intel Corporation");
661763 MODULE_LICENSE("GPL v2");
662
-subsys_initcall(dax_fs_init);
663
-module_exit(dax_fs_exit);
764
+subsys_initcall(dax_core_init);
765
+module_exit(dax_core_exit);