From 01573e231f18eb2d99162747186f59511f56b64d Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Fri, 08 Dec 2023 10:40:48 +0000
Subject: [PATCH] 移去rt
---
kernel/fs/hugetlbfs/inode.c | 541 ++++++++++++++++++++++++++++++++++-------------------
1 files changed, 348 insertions(+), 193 deletions(-)
diff --git a/kernel/fs/hugetlbfs/inode.c b/kernel/fs/hugetlbfs/inode.c
index 806573e..a2f43f1 100644
--- a/kernel/fs/hugetlbfs/inode.c
+++ b/kernel/fs/hugetlbfs/inode.c
@@ -27,7 +27,7 @@
#include <linux/backing-dev.h>
#include <linux/hugetlb.h>
#include <linux/pagevec.h>
-#include <linux/parser.h>
+#include <linux/fs_parser.h>
#include <linux/mman.h>
#include <linux/slab.h>
#include <linux/dnotify.h>
@@ -38,6 +38,7 @@
#include <linux/uio.h>
#include <linux/uaccess.h>
+#include <linux/sched/mm.h>
static const struct super_operations hugetlbfs_ops;
static const struct address_space_operations hugetlbfs_aops;
@@ -45,11 +46,17 @@
static const struct inode_operations hugetlbfs_dir_inode_operations;
static const struct inode_operations hugetlbfs_inode_operations;
-struct hugetlbfs_config {
+enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT };
+
+struct hugetlbfs_fs_context {
struct hstate *hstate;
+ unsigned long long max_size_opt;
+ unsigned long long min_size_opt;
long max_hpages;
long nr_inodes;
long min_hpages;
+ enum hugetlbfs_size_type max_val_type;
+ enum hugetlbfs_size_type min_val_type;
kuid_t uid;
kgid_t gid;
umode_t mode;
@@ -57,22 +64,25 @@
int sysctl_hugetlb_shm_group;
-enum {
- Opt_size, Opt_nr_inodes,
- Opt_mode, Opt_uid, Opt_gid,
- Opt_pagesize, Opt_min_size,
- Opt_err,
+enum hugetlb_param {
+ Opt_gid,
+ Opt_min_size,
+ Opt_mode,
+ Opt_nr_inodes,
+ Opt_pagesize,
+ Opt_size,
+ Opt_uid,
};
-static const match_table_t tokens = {
- {Opt_size, "size=%s"},
- {Opt_nr_inodes, "nr_inodes=%s"},
- {Opt_mode, "mode=%o"},
- {Opt_uid, "uid=%u"},
- {Opt_gid, "gid=%u"},
- {Opt_pagesize, "pagesize=%s"},
- {Opt_min_size, "min_size=%s"},
- {Opt_err, NULL},
+static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
+ fsparam_u32 ("gid", Opt_gid),
+ fsparam_string("min_size", Opt_min_size),
+ fsparam_u32oct("mode", Opt_mode),
+ fsparam_string("nr_inodes", Opt_nr_inodes),
+ fsparam_string("pagesize", Opt_pagesize),
+ fsparam_string("size", Opt_size),
+ fsparam_u32 ("uid", Opt_uid),
+ {}
};
#ifdef CONFIG_NUMA
@@ -121,6 +131,7 @@
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
+ struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);
@@ -130,11 +141,15 @@
* already been checked by prepare_hugepage_range. If you add
* any error returns here, do so after setting VM_HUGETLB, so
* is_vm_hugetlb_page tests below unmap_region go the right
- * way when do_mmap_pgoff unwinds (may be important on powerpc
+ * way when do_mmap unwinds (may be important on powerpc
* and ia64).
*/
vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma->vm_ops = &hugetlb_vm_ops;
+
+ ret = seal_check_future_write(info->seals, vma);
+ if (ret)
+ return ret;
/*
* page based offset in vm_pgoff could be sufficiently large to
@@ -177,10 +192,58 @@
}
/*
- * Called under down_write(mmap_sem).
+ * Called under mmap_write_lock(mm).
*/
#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+static unsigned long
+hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = current->mm->mmap_base;
+ info.high_limit = arch_get_mmap_end(addr);
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
+}
+
+static unsigned long
+hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+ info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ addr = vm_unmapped_area(&info);
+
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ if (unlikely(offset_in_page(addr))) {
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = current->mm->mmap_base;
+ info.high_limit = arch_get_mmap_end(addr);
+ addr = vm_unmapped_area(&info);
+ }
+
+ return addr;
+}
+
static unsigned long
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
@@ -188,7 +251,7 @@
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
+ const unsigned long mmap_end = arch_get_mmap_end(addr);
if (len & ~huge_page_mask(h))
return -EINVAL;
@@ -204,18 +267,21 @@
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr &&
+ if (mmap_end - len >= addr &&
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
- info.flags = 0;
- info.length = len;
- info.low_limit = TASK_UNMAPPED_BASE;
- info.high_limit = TASK_SIZE;
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
- return vm_unmapped_area(&info);
+ /*
+ * Use mm->get_unmapped_area value as a hint to use topdown routine.
+ * If architectures have special needs, they should define their own
+ * version of hugetlb_get_unmapped_area.
+ */
+ if (mm->get_unmapped_area == arch_get_unmapped_area_topdown)
+ return hugetlb_get_unmapped_area_topdown(file, addr, len,
+ pgoff, flags);
+ return hugetlb_get_unmapped_area_bottomup(file, addr, len,
+ pgoff, flags);
}
#endif
@@ -384,10 +450,9 @@
* In this case, we first scan the range and release found pages.
* After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
* maps and global counts. Page faults can not race with truncation
- * in this routine. hugetlb_no_page() prevents page faults in the
- * truncated range. It checks i_size before allocation, and again after
- * with the page table lock for the page held. The same lock must be
- * acquired to unmap a page.
+ * in this routine. hugetlb_no_page() holds i_mmap_rwsem and prevents
+ * page faults in the truncated range by checking i_size. i_size is
+ * modified while holding i_mmap_rwsem.
* hole punch is indicated if end is not LLONG_MAX
* In the hole punch case we scan the range and release found pages.
* Only when releasing a page is the associated region/reserv map
@@ -426,8 +491,16 @@
u32 hash;
index = page->index;
- hash = hugetlb_fault_mutex_hash(h, mapping, index);
- mutex_lock(&hugetlb_fault_mutex_table[hash]);
+ hash = hugetlb_fault_mutex_hash(mapping, index);
+ if (!truncate_op) {
+ /*
+ * Only need to hold the fault mutex in the
+ * hole punch case. This prevents races with
+ * page faults. Races are not possible in the
+ * case of truncation.
+ */
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
+ }
/*
* If page is mapped, it was faulted in after being
@@ -441,7 +514,9 @@
if (unlikely(page_mapped(page))) {
BUG_ON(truncate_op);
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
i_mmap_lock_write(mapping);
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
hugetlb_vmdelete_list(&mapping->i_mmap,
index * pages_per_huge_page(h),
(index + 1) * pages_per_huge_page(h));
@@ -468,7 +543,8 @@
}
unlock_page(page);
- mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ if (!truncate_op)
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
}
huge_pagevec_release(&pvec);
cond_resched();
@@ -483,8 +559,15 @@
struct resv_map *resv_map;
remove_inode_hugepages(inode, 0, LLONG_MAX);
- resv_map = (struct resv_map *)inode->i_mapping->private_data;
- /* root inode doesn't have the resv_map, so we should check it */
+
+ /*
+ * Get the resv_map from the address space embedded in the inode.
+ * This is the address space which points to any resv_map allocated
+ * at inode creation time. If this is a device special inode,
+ * i_mapping may not point to the original address space.
+ */
+ resv_map = (struct resv_map *)(&inode->i_data)->private_data;
+ /* Only regular and link inodes have associated reserve maps */
if (resv_map)
resv_map_release(&resv_map->refs);
clear_inode(inode);
@@ -499,8 +582,8 @@
BUG_ON(offset & ~huge_page_mask(h));
pgoff = offset >> PAGE_SHIFT;
- i_size_write(inode, offset);
i_mmap_lock_write(mapping);
+ i_size_write(inode, offset);
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
i_mmap_unlock_write(mapping);
@@ -622,8 +705,12 @@
/* addr is the offset within the file (zero based) */
addr = index * hpage_size;
- /* mutex taken here, fault path and hole punch */
- hash = hugetlb_fault_mutex_hash(h, mapping, index);
+ /*
+ * fault mutex taken here, protects against fault path
+ * and hole punch. inode_lock previously taken protects
+ * against truncation.
+ */
+ hash = hugetlb_fault_mutex_hash(mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
@@ -706,16 +793,16 @@
}
static struct inode *hugetlbfs_get_root(struct super_block *sb,
- struct hugetlbfs_config *config)
+ struct hugetlbfs_fs_context *ctx)
{
struct inode *inode;
inode = new_inode(sb);
if (inode) {
inode->i_ino = get_next_ino();
- inode->i_mode = S_IFDIR | config->mode;
- inode->i_uid = config->uid;
- inode->i_gid = config->gid;
+ inode->i_mode = S_IFDIR | ctx->mode;
+ inode->i_uid = ctx->uid;
+ inode->i_gid = ctx->gid;
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
inode->i_op = &hugetlbfs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
@@ -795,8 +882,11 @@
/*
* File creation. Allocate an inode, and we're done..
*/
-static int hugetlbfs_mknod(struct inode *dir,
- struct dentry *dentry, umode_t mode, dev_t dev)
+static int do_hugetlbfs_mknod(struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode,
+ dev_t dev,
+ bool tmpfile)
{
struct inode *inode;
int error = -ENOSPC;
@@ -804,11 +894,21 @@
inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
if (inode) {
dir->i_ctime = dir->i_mtime = current_time(dir);
- d_instantiate(dentry, inode);
- dget(dentry); /* Extra count - pin the dentry in core */
+ if (tmpfile) {
+ d_tmpfile(dentry, inode);
+ } else {
+ d_instantiate(dentry, inode);
+ dget(dentry);/* Extra count - pin the dentry in core */
+ }
error = 0;
}
return error;
+}
+
+static int hugetlbfs_mknod(struct inode *dir,
+ struct dentry *dentry, umode_t mode, dev_t dev)
+{
+ return do_hugetlbfs_mknod(dir, dentry, mode, dev, false);
}
static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
@@ -822,6 +922,12 @@
static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
{
return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
+}
+
+static int hugetlbfs_tmpfile(struct inode *dir,
+ struct dentry *dentry, umode_t mode)
+{
+ return do_hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0, true);
}
static int hugetlbfs_symlink(struct inode *dir,
@@ -1035,9 +1141,8 @@
return &p->vfs_inode;
}
-static void hugetlbfs_i_callback(struct rcu_head *head)
+static void hugetlbfs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
}
@@ -1045,7 +1150,6 @@
{
hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
- call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
}
static const struct address_space_operations hugetlbfs_aops = {
@@ -1084,6 +1188,7 @@
.mknod = hugetlbfs_mknod,
.rename = simple_rename,
.setattr = hugetlbfs_setattr,
+ .tmpfile = hugetlbfs_tmpfile,
};
static const struct inode_operations hugetlbfs_inode_operations = {
@@ -1092,14 +1197,13 @@
static const struct super_operations hugetlbfs_ops = {
.alloc_inode = hugetlbfs_alloc_inode,
+ .free_inode = hugetlbfs_free_inode,
.destroy_inode = hugetlbfs_destroy_inode,
.evict_inode = hugetlbfs_evict_inode,
.statfs = hugetlbfs_statfs,
.put_super = hugetlbfs_put_super,
.show_options = hugetlbfs_show_options,
};
-
-enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT };
/*
* Convert size option passed from command line to number of huge pages
@@ -1123,170 +1227,157 @@
return size_opt;
}
-static int
-hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
+/*
+ * Parse one mount parameter.
+ */
+static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- char *p, *rest;
- substring_t args[MAX_OPT_ARGS];
- int option;
- unsigned long long max_size_opt = 0, min_size_opt = 0;
- enum hugetlbfs_size_type max_val_type = NO_SIZE, min_val_type = NO_SIZE;
+ struct hugetlbfs_fs_context *ctx = fc->fs_private;
+ struct fs_parse_result result;
+ char *rest;
+ unsigned long ps;
+ int opt;
- if (!options)
+ opt = fs_parse(fc, hugetlb_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_uid:
+ ctx->uid = make_kuid(current_user_ns(), result.uint_32);
+ if (!uid_valid(ctx->uid))
+ goto bad_val;
return 0;
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
- if (!*p)
- continue;
+ case Opt_gid:
+ ctx->gid = make_kgid(current_user_ns(), result.uint_32);
+ if (!gid_valid(ctx->gid))
+ goto bad_val;
+ return 0;
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_uid:
- if (match_int(&args[0], &option))
- goto bad_val;
- pconfig->uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(pconfig->uid))
- goto bad_val;
- break;
+ case Opt_mode:
+ ctx->mode = result.uint_32 & 01777U;
+ return 0;
- case Opt_gid:
- if (match_int(&args[0], &option))
- goto bad_val;
- pconfig->gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(pconfig->gid))
- goto bad_val;
- break;
+ case Opt_size:
+ /* memparse() will accept a K/M/G without a digit */
+ if (!isdigit(param->string[0]))
+ goto bad_val;
+ ctx->max_size_opt = memparse(param->string, &rest);
+ ctx->max_val_type = SIZE_STD;
+ if (*rest == '%')
+ ctx->max_val_type = SIZE_PERCENT;
+ return 0;
- case Opt_mode:
- if (match_octal(&args[0], &option))
- goto bad_val;
- pconfig->mode = option & 01777U;
- break;
+ case Opt_nr_inodes:
+ /* memparse() will accept a K/M/G without a digit */
+ if (!isdigit(param->string[0]))
+ goto bad_val;
+ ctx->nr_inodes = memparse(param->string, &rest);
+ return 0;
- case Opt_size: {
- /* memparse() will accept a K/M/G without a digit */
- if (!isdigit(*args[0].from))
- goto bad_val;
- max_size_opt = memparse(args[0].from, &rest);
- max_val_type = SIZE_STD;
- if (*rest == '%')
- max_val_type = SIZE_PERCENT;
- break;
- }
-
- case Opt_nr_inodes:
- /* memparse() will accept a K/M/G without a digit */
- if (!isdigit(*args[0].from))
- goto bad_val;
- pconfig->nr_inodes = memparse(args[0].from, &rest);
- break;
-
- case Opt_pagesize: {
- unsigned long ps;
- ps = memparse(args[0].from, &rest);
- pconfig->hstate = size_to_hstate(ps);
- if (!pconfig->hstate) {
- pr_err("Unsupported page size %lu MB\n",
- ps >> 20);
- return -EINVAL;
- }
- break;
- }
-
- case Opt_min_size: {
- /* memparse() will accept a K/M/G without a digit */
- if (!isdigit(*args[0].from))
- goto bad_val;
- min_size_opt = memparse(args[0].from, &rest);
- min_val_type = SIZE_STD;
- if (*rest == '%')
- min_val_type = SIZE_PERCENT;
- break;
- }
-
- default:
- pr_err("Bad mount option: \"%s\"\n", p);
+ case Opt_pagesize:
+ ps = memparse(param->string, &rest);
+ ctx->hstate = size_to_hstate(ps);
+ if (!ctx->hstate) {
+ pr_err("Unsupported page size %lu MB\n", ps >> 20);
return -EINVAL;
- break;
}
+ return 0;
+
+ case Opt_min_size:
+ /* memparse() will accept a K/M/G without a digit */
+ if (!isdigit(param->string[0]))
+ goto bad_val;
+ ctx->min_size_opt = memparse(param->string, &rest);
+ ctx->min_val_type = SIZE_STD;
+ if (*rest == '%')
+ ctx->min_val_type = SIZE_PERCENT;
+ return 0;
+
+ default:
+ return -EINVAL;
}
+
+bad_val:
+ return invalfc(fc, "Bad value '%s' for mount option '%s'\n",
+ param->string, param->key);
+}
+
+/*
+ * Validate the parsed options.
+ */
+static int hugetlbfs_validate(struct fs_context *fc)
+{
+ struct hugetlbfs_fs_context *ctx = fc->fs_private;
/*
* Use huge page pool size (in hstate) to convert the size
* options to number of huge pages. If NO_SIZE, -1 is returned.
*/
- pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
- max_size_opt, max_val_type);
- pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
- min_size_opt, min_val_type);
+ ctx->max_hpages = hugetlbfs_size_to_hpages(ctx->hstate,
+ ctx->max_size_opt,
+ ctx->max_val_type);
+ ctx->min_hpages = hugetlbfs_size_to_hpages(ctx->hstate,
+ ctx->min_size_opt,
+ ctx->min_val_type);
/*
* If max_size was specified, then min_size must be smaller
*/
- if (max_val_type > NO_SIZE &&
- pconfig->min_hpages > pconfig->max_hpages) {
- pr_err("minimum size can not be greater than maximum size\n");
+ if (ctx->max_val_type > NO_SIZE &&
+ ctx->min_hpages > ctx->max_hpages) {
+ pr_err("Minimum size can not be greater than maximum size\n");
return -EINVAL;
}
return 0;
-
-bad_val:
- pr_err("Bad value '%s' for mount option '%s'\n", args[0].from, p);
- return -EINVAL;
}
static int
-hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
+hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
- int ret;
- struct hugetlbfs_config config;
+ struct hugetlbfs_fs_context *ctx = fc->fs_private;
struct hugetlbfs_sb_info *sbinfo;
-
- config.max_hpages = -1; /* No limit on size by default */
- config.nr_inodes = -1; /* No limit on number of inodes by default */
- config.uid = current_fsuid();
- config.gid = current_fsgid();
- config.mode = 0755;
- config.hstate = &default_hstate;
- config.min_hpages = -1; /* No default minimum size */
- ret = hugetlbfs_parse_options(data, &config);
- if (ret)
- return ret;
sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL);
if (!sbinfo)
return -ENOMEM;
sb->s_fs_info = sbinfo;
- sbinfo->hstate = config.hstate;
spin_lock_init(&sbinfo->stat_lock);
- sbinfo->max_inodes = config.nr_inodes;
- sbinfo->free_inodes = config.nr_inodes;
- sbinfo->spool = NULL;
- sbinfo->uid = config.uid;
- sbinfo->gid = config.gid;
- sbinfo->mode = config.mode;
+ sbinfo->hstate = ctx->hstate;
+ sbinfo->max_inodes = ctx->nr_inodes;
+ sbinfo->free_inodes = ctx->nr_inodes;
+ sbinfo->spool = NULL;
+ sbinfo->uid = ctx->uid;
+ sbinfo->gid = ctx->gid;
+ sbinfo->mode = ctx->mode;
/*
* Allocate and initialize subpool if maximum or minimum size is
* specified. Any needed reservations (for minimim size) are taken
* taken when the subpool is created.
*/
- if (config.max_hpages != -1 || config.min_hpages != -1) {
- sbinfo->spool = hugepage_new_subpool(config.hstate,
- config.max_hpages,
- config.min_hpages);
+ if (ctx->max_hpages != -1 || ctx->min_hpages != -1) {
+ sbinfo->spool = hugepage_new_subpool(ctx->hstate,
+ ctx->max_hpages,
+ ctx->min_hpages);
if (!sbinfo->spool)
goto out_free;
}
sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_blocksize = huge_page_size(config.hstate);
- sb->s_blocksize_bits = huge_page_shift(config.hstate);
+ sb->s_blocksize = huge_page_size(ctx->hstate);
+ sb->s_blocksize_bits = huge_page_shift(ctx->hstate);
sb->s_magic = HUGETLBFS_MAGIC;
sb->s_op = &hugetlbfs_ops;
sb->s_time_gran = 1;
- sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config));
+
+ /*
+ * Due to the special and limited functionality of hugetlbfs, it does
+ * not work well as a stacking filesystem.
+ */
+ sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
+ sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx));
if (!sb->s_root)
goto out_free;
return 0;
@@ -1296,16 +1387,52 @@
return -ENOMEM;
}
-static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int hugetlbfs_get_tree(struct fs_context *fc)
{
- return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
+ int err = hugetlbfs_validate(fc);
+ if (err)
+ return err;
+ return get_tree_nodev(fc, hugetlbfs_fill_super);
+}
+
+static void hugetlbfs_fs_context_free(struct fs_context *fc)
+{
+ kfree(fc->fs_private);
+}
+
+static const struct fs_context_operations hugetlbfs_fs_context_ops = {
+ .free = hugetlbfs_fs_context_free,
+ .parse_param = hugetlbfs_parse_param,
+ .get_tree = hugetlbfs_get_tree,
+};
+
+static int hugetlbfs_init_fs_context(struct fs_context *fc)
+{
+ struct hugetlbfs_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct hugetlbfs_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->max_hpages = -1; /* No limit on size by default */
+ ctx->nr_inodes = -1; /* No limit on number of inodes by default */
+ ctx->uid = current_fsuid();
+ ctx->gid = current_fsgid();
+ ctx->mode = 0755;
+ ctx->hstate = &default_hstate;
+ ctx->min_hpages = -1; /* No default minimum size */
+ ctx->max_val_type = NO_SIZE;
+ ctx->min_val_type = NO_SIZE;
+ fc->fs_private = ctx;
+ fc->ops = &hugetlbfs_fs_context_ops;
+ return 0;
}
static struct file_system_type hugetlbfs_fs_type = {
- .name = "hugetlbfs",
- .mount = hugetlbfs_mount,
- .kill_sb = kill_litter_super,
+ .name = "hugetlbfs",
+ .init_fs_context = hugetlbfs_init_fs_context,
+ .parameters = hugetlb_fs_parameters,
+ .kill_sb = kill_litter_super,
};
static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
@@ -1390,8 +1517,29 @@
return file;
}
+static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h)
+{
+ struct fs_context *fc;
+ struct vfsmount *mnt;
+
+ fc = fs_context_for_mount(&hugetlbfs_fs_type, SB_KERNMOUNT);
+ if (IS_ERR(fc)) {
+ mnt = ERR_CAST(fc);
+ } else {
+ struct hugetlbfs_fs_context *ctx = fc->fs_private;
+ ctx->hstate = h;
+ mnt = fc_mount(fc);
+ put_fs_context(fc);
+ }
+ if (IS_ERR(mnt))
+ pr_err("Cannot mount internal hugetlbfs for page size %uK",
+ 1U << (h->order + PAGE_SHIFT - 10));
+ return mnt;
+}
+
static int __init init_hugetlbfs_fs(void)
{
+ struct vfsmount *mnt;
struct hstate *h;
int error;
int i;
@@ -1406,36 +1554,43 @@
sizeof(struct hugetlbfs_inode_info),
0, SLAB_ACCOUNT, init_once);
if (hugetlbfs_inode_cachep == NULL)
- goto out2;
+ goto out;
error = register_filesystem(&hugetlbfs_fs_type);
if (error)
- goto out;
+ goto out_free;
+ /* default hstate mount is required */
+ mnt = mount_one_hugetlbfs(&hstates[default_hstate_idx]);
+ if (IS_ERR(mnt)) {
+ error = PTR_ERR(mnt);
+ goto out_unreg;
+ }
+ hugetlbfs_vfsmount[default_hstate_idx] = mnt;
+
+ /* other hstates are optional */
i = 0;
for_each_hstate(h) {
- char buf[50];
- unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10);
-
- snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb);
- hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type,
- buf);
-
- if (IS_ERR(hugetlbfs_vfsmount[i])) {
- pr_err("Cannot mount internal hugetlbfs for "
- "page size %uK", ps_kb);
- error = PTR_ERR(hugetlbfs_vfsmount[i]);
- hugetlbfs_vfsmount[i] = NULL;
+ if (i == default_hstate_idx) {
+ i++;
+ continue;
}
+
+ mnt = mount_one_hugetlbfs(h);
+ if (IS_ERR(mnt))
+ hugetlbfs_vfsmount[i] = NULL;
+ else
+ hugetlbfs_vfsmount[i] = mnt;
i++;
}
- /* Non default hstates are optional */
- if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx]))
- return 0;
- out:
+ return 0;
+
+ out_unreg:
+ (void)unregister_filesystem(&hugetlbfs_fs_type);
+ out_free:
kmem_cache_destroy(hugetlbfs_inode_cachep);
- out2:
+ out:
return error;
}
fs_initcall(init_hugetlbfs_fs)
--
Gitblit v1.6.2