hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/hugetlbfs/inode.c
....@@ -27,7 +27,7 @@
2727 #include <linux/backing-dev.h>
2828 #include <linux/hugetlb.h>
2929 #include <linux/pagevec.h>
30
-#include <linux/parser.h>
30
+#include <linux/fs_parser.h>
3131 #include <linux/mman.h>
3232 #include <linux/slab.h>
3333 #include <linux/dnotify.h>
....@@ -38,6 +38,7 @@
3838 #include <linux/uio.h>
3939
4040 #include <linux/uaccess.h>
41
+#include <linux/sched/mm.h>
4142
4243 static const struct super_operations hugetlbfs_ops;
4344 static const struct address_space_operations hugetlbfs_aops;
....@@ -45,11 +46,17 @@
4546 static const struct inode_operations hugetlbfs_dir_inode_operations;
4647 static const struct inode_operations hugetlbfs_inode_operations;
4748
48
-struct hugetlbfs_config {
49
+enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT };
50
+
51
+struct hugetlbfs_fs_context {
4952 struct hstate *hstate;
53
+ unsigned long long max_size_opt;
54
+ unsigned long long min_size_opt;
5055 long max_hpages;
5156 long nr_inodes;
5257 long min_hpages;
58
+ enum hugetlbfs_size_type max_val_type;
59
+ enum hugetlbfs_size_type min_val_type;
5360 kuid_t uid;
5461 kgid_t gid;
5562 umode_t mode;
....@@ -57,22 +64,25 @@
5764
5865 int sysctl_hugetlb_shm_group;
5966
60
-enum {
61
- Opt_size, Opt_nr_inodes,
62
- Opt_mode, Opt_uid, Opt_gid,
63
- Opt_pagesize, Opt_min_size,
64
- Opt_err,
67
+enum hugetlb_param {
68
+ Opt_gid,
69
+ Opt_min_size,
70
+ Opt_mode,
71
+ Opt_nr_inodes,
72
+ Opt_pagesize,
73
+ Opt_size,
74
+ Opt_uid,
6575 };
6676
67
-static const match_table_t tokens = {
68
- {Opt_size, "size=%s"},
69
- {Opt_nr_inodes, "nr_inodes=%s"},
70
- {Opt_mode, "mode=%o"},
71
- {Opt_uid, "uid=%u"},
72
- {Opt_gid, "gid=%u"},
73
- {Opt_pagesize, "pagesize=%s"},
74
- {Opt_min_size, "min_size=%s"},
75
- {Opt_err, NULL},
77
+static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
78
+ fsparam_u32 ("gid", Opt_gid),
79
+ fsparam_string("min_size", Opt_min_size),
80
+ fsparam_u32oct("mode", Opt_mode),
81
+ fsparam_string("nr_inodes", Opt_nr_inodes),
82
+ fsparam_string("pagesize", Opt_pagesize),
83
+ fsparam_string("size", Opt_size),
84
+ fsparam_u32 ("uid", Opt_uid),
85
+ {}
7686 };
7787
7888 #ifdef CONFIG_NUMA
....@@ -121,6 +131,7 @@
121131 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
122132 {
123133 struct inode *inode = file_inode(file);
134
+ struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
124135 loff_t len, vma_len;
125136 int ret;
126137 struct hstate *h = hstate_file(file);
....@@ -130,11 +141,15 @@
130141 * already been checked by prepare_hugepage_range. If you add
131142 * any error returns here, do so after setting VM_HUGETLB, so
132143 * is_vm_hugetlb_page tests below unmap_region go the right
133
- * way when do_mmap_pgoff unwinds (may be important on powerpc
144
+ * way when do_mmap unwinds (may be important on powerpc
134145 * and ia64).
135146 */
136147 vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
137148 vma->vm_ops = &hugetlb_vm_ops;
149
+
150
+ ret = seal_check_future_write(info->seals, vma);
151
+ if (ret)
152
+ return ret;
138153
139154 /*
140155 * page based offset in vm_pgoff could be sufficiently large to
....@@ -177,10 +192,58 @@
177192 }
178193
179194 /*
180
- * Called under down_write(mmap_sem).
195
+ * Called under mmap_write_lock(mm).
181196 */
182197
183198 #ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
199
+static unsigned long
200
+hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
201
+ unsigned long len, unsigned long pgoff, unsigned long flags)
202
+{
203
+ struct hstate *h = hstate_file(file);
204
+ struct vm_unmapped_area_info info;
205
+
206
+ info.flags = 0;
207
+ info.length = len;
208
+ info.low_limit = current->mm->mmap_base;
209
+ info.high_limit = arch_get_mmap_end(addr);
210
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
211
+ info.align_offset = 0;
212
+ return vm_unmapped_area(&info);
213
+}
214
+
215
+static unsigned long
216
+hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
217
+ unsigned long len, unsigned long pgoff, unsigned long flags)
218
+{
219
+ struct hstate *h = hstate_file(file);
220
+ struct vm_unmapped_area_info info;
221
+
222
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
223
+ info.length = len;
224
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
225
+ info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
226
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
227
+ info.align_offset = 0;
228
+ addr = vm_unmapped_area(&info);
229
+
230
+ /*
231
+ * A failed mmap() very likely causes application failure,
232
+ * so fall back to the bottom-up function here. This scenario
233
+ * can happen with large stack limits and large mmap()
234
+ * allocations.
235
+ */
236
+ if (unlikely(offset_in_page(addr))) {
237
+ VM_BUG_ON(addr != -ENOMEM);
238
+ info.flags = 0;
239
+ info.low_limit = current->mm->mmap_base;
240
+ info.high_limit = arch_get_mmap_end(addr);
241
+ addr = vm_unmapped_area(&info);
242
+ }
243
+
244
+ return addr;
245
+}
246
+
184247 static unsigned long
185248 hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
186249 unsigned long len, unsigned long pgoff, unsigned long flags)
....@@ -188,7 +251,7 @@
188251 struct mm_struct *mm = current->mm;
189252 struct vm_area_struct *vma;
190253 struct hstate *h = hstate_file(file);
191
- struct vm_unmapped_area_info info;
254
+ const unsigned long mmap_end = arch_get_mmap_end(addr);
192255
193256 if (len & ~huge_page_mask(h))
194257 return -EINVAL;
....@@ -204,18 +267,21 @@
204267 if (addr) {
205268 addr = ALIGN(addr, huge_page_size(h));
206269 vma = find_vma(mm, addr);
207
- if (TASK_SIZE - len >= addr &&
270
+ if (mmap_end - len >= addr &&
208271 (!vma || addr + len <= vm_start_gap(vma)))
209272 return addr;
210273 }
211274
212
- info.flags = 0;
213
- info.length = len;
214
- info.low_limit = TASK_UNMAPPED_BASE;
215
- info.high_limit = TASK_SIZE;
216
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
217
- info.align_offset = 0;
218
- return vm_unmapped_area(&info);
275
+ /*
276
+ * Use mm->get_unmapped_area value as a hint to use topdown routine.
277
+ * If architectures have special needs, they should define their own
278
+ * version of hugetlb_get_unmapped_area.
279
+ */
280
+ if (mm->get_unmapped_area == arch_get_unmapped_area_topdown)
281
+ return hugetlb_get_unmapped_area_topdown(file, addr, len,
282
+ pgoff, flags);
283
+ return hugetlb_get_unmapped_area_bottomup(file, addr, len,
284
+ pgoff, flags);
219285 }
220286 #endif
221287
....@@ -384,10 +450,9 @@
384450 * In this case, we first scan the range and release found pages.
385451 * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
386452 * maps and global counts. Page faults can not race with truncation
387
- * in this routine. hugetlb_no_page() prevents page faults in the
388
- * truncated range. It checks i_size before allocation, and again after
389
- * with the page table lock for the page held. The same lock must be
390
- * acquired to unmap a page.
453
+ * in this routine. hugetlb_no_page() holds i_mmap_rwsem and prevents
454
+ * page faults in the truncated range by checking i_size. i_size is
455
+ * modified while holding i_mmap_rwsem.
391456 * hole punch is indicated if end is not LLONG_MAX
392457 * In the hole punch case we scan the range and release found pages.
393458 * Only when releasing a page is the associated region/reserv map
....@@ -426,8 +491,16 @@
426491 u32 hash;
427492
428493 index = page->index;
429
- hash = hugetlb_fault_mutex_hash(h, mapping, index);
430
- mutex_lock(&hugetlb_fault_mutex_table[hash]);
494
+ hash = hugetlb_fault_mutex_hash(mapping, index);
495
+ if (!truncate_op) {
496
+ /*
497
+ * Only need to hold the fault mutex in the
498
+ * hole punch case. This prevents races with
499
+ * page faults. Races are not possible in the
500
+ * case of truncation.
501
+ */
502
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
503
+ }
431504
432505 /*
433506 * If page is mapped, it was faulted in after being
....@@ -441,7 +514,9 @@
441514 if (unlikely(page_mapped(page))) {
442515 BUG_ON(truncate_op);
443516
517
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
444518 i_mmap_lock_write(mapping);
519
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
445520 hugetlb_vmdelete_list(&mapping->i_mmap,
446521 index * pages_per_huge_page(h),
447522 (index + 1) * pages_per_huge_page(h));
....@@ -468,7 +543,8 @@
468543 }
469544
470545 unlock_page(page);
471
- mutex_unlock(&hugetlb_fault_mutex_table[hash]);
546
+ if (!truncate_op)
547
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
472548 }
473549 huge_pagevec_release(&pvec);
474550 cond_resched();
....@@ -483,8 +559,15 @@
483559 struct resv_map *resv_map;
484560
485561 remove_inode_hugepages(inode, 0, LLONG_MAX);
486
- resv_map = (struct resv_map *)inode->i_mapping->private_data;
487
- /* root inode doesn't have the resv_map, so we should check it */
562
+
563
+ /*
564
+ * Get the resv_map from the address space embedded in the inode.
565
+ * This is the address space which points to any resv_map allocated
566
+ * at inode creation time. If this is a device special inode,
567
+ * i_mapping may not point to the original address space.
568
+ */
569
+ resv_map = (struct resv_map *)(&inode->i_data)->private_data;
570
+ /* Only regular and link inodes have associated reserve maps */
488571 if (resv_map)
489572 resv_map_release(&resv_map->refs);
490573 clear_inode(inode);
....@@ -499,8 +582,8 @@
499582 BUG_ON(offset & ~huge_page_mask(h));
500583 pgoff = offset >> PAGE_SHIFT;
501584
502
- i_size_write(inode, offset);
503585 i_mmap_lock_write(mapping);
586
+ i_size_write(inode, offset);
504587 if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
505588 hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
506589 i_mmap_unlock_write(mapping);
....@@ -622,8 +705,12 @@
622705 /* addr is the offset within the file (zero based) */
623706 addr = index * hpage_size;
624707
625
- /* mutex taken here, fault path and hole punch */
626
- hash = hugetlb_fault_mutex_hash(h, mapping, index);
708
+ /*
709
+ * fault mutex taken here, protects against fault path
710
+ * and hole punch. inode_lock previously taken protects
711
+ * against truncation.
712
+ */
713
+ hash = hugetlb_fault_mutex_hash(mapping, index);
627714 mutex_lock(&hugetlb_fault_mutex_table[hash]);
628715
629716 /* See if already present in mapping to avoid alloc/free */
....@@ -706,16 +793,16 @@
706793 }
707794
708795 static struct inode *hugetlbfs_get_root(struct super_block *sb,
709
- struct hugetlbfs_config *config)
796
+ struct hugetlbfs_fs_context *ctx)
710797 {
711798 struct inode *inode;
712799
713800 inode = new_inode(sb);
714801 if (inode) {
715802 inode->i_ino = get_next_ino();
716
- inode->i_mode = S_IFDIR | config->mode;
717
- inode->i_uid = config->uid;
718
- inode->i_gid = config->gid;
803
+ inode->i_mode = S_IFDIR | ctx->mode;
804
+ inode->i_uid = ctx->uid;
805
+ inode->i_gid = ctx->gid;
719806 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
720807 inode->i_op = &hugetlbfs_dir_inode_operations;
721808 inode->i_fop = &simple_dir_operations;
....@@ -795,8 +882,11 @@
795882 /*
796883 * File creation. Allocate an inode, and we're done..
797884 */
798
-static int hugetlbfs_mknod(struct inode *dir,
799
- struct dentry *dentry, umode_t mode, dev_t dev)
885
+static int do_hugetlbfs_mknod(struct inode *dir,
886
+ struct dentry *dentry,
887
+ umode_t mode,
888
+ dev_t dev,
889
+ bool tmpfile)
800890 {
801891 struct inode *inode;
802892 int error = -ENOSPC;
....@@ -804,11 +894,21 @@
804894 inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
805895 if (inode) {
806896 dir->i_ctime = dir->i_mtime = current_time(dir);
807
- d_instantiate(dentry, inode);
808
- dget(dentry); /* Extra count - pin the dentry in core */
897
+ if (tmpfile) {
898
+ d_tmpfile(dentry, inode);
899
+ } else {
900
+ d_instantiate(dentry, inode);
901
+ dget(dentry);/* Extra count - pin the dentry in core */
902
+ }
809903 error = 0;
810904 }
811905 return error;
906
+}
907
+
908
+static int hugetlbfs_mknod(struct inode *dir,
909
+ struct dentry *dentry, umode_t mode, dev_t dev)
910
+{
911
+ return do_hugetlbfs_mknod(dir, dentry, mode, dev, false);
812912 }
813913
814914 static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
....@@ -822,6 +922,12 @@
822922 static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
823923 {
824924 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
925
+}
926
+
927
+static int hugetlbfs_tmpfile(struct inode *dir,
928
+ struct dentry *dentry, umode_t mode)
929
+{
930
+ return do_hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0, true);
825931 }
826932
827933 static int hugetlbfs_symlink(struct inode *dir,
....@@ -1035,9 +1141,8 @@
10351141 return &p->vfs_inode;
10361142 }
10371143
1038
-static void hugetlbfs_i_callback(struct rcu_head *head)
1144
+static void hugetlbfs_free_inode(struct inode *inode)
10391145 {
1040
- struct inode *inode = container_of(head, struct inode, i_rcu);
10411146 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
10421147 }
10431148
....@@ -1045,7 +1150,6 @@
10451150 {
10461151 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
10471152 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
1048
- call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
10491153 }
10501154
10511155 static const struct address_space_operations hugetlbfs_aops = {
....@@ -1084,6 +1188,7 @@
10841188 .mknod = hugetlbfs_mknod,
10851189 .rename = simple_rename,
10861190 .setattr = hugetlbfs_setattr,
1191
+ .tmpfile = hugetlbfs_tmpfile,
10871192 };
10881193
10891194 static const struct inode_operations hugetlbfs_inode_operations = {
....@@ -1092,14 +1197,13 @@
10921197
10931198 static const struct super_operations hugetlbfs_ops = {
10941199 .alloc_inode = hugetlbfs_alloc_inode,
1200
+ .free_inode = hugetlbfs_free_inode,
10951201 .destroy_inode = hugetlbfs_destroy_inode,
10961202 .evict_inode = hugetlbfs_evict_inode,
10971203 .statfs = hugetlbfs_statfs,
10981204 .put_super = hugetlbfs_put_super,
10991205 .show_options = hugetlbfs_show_options,
11001206 };
1101
-
1102
-enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT };
11031207
11041208 /*
11051209 * Convert size option passed from command line to number of huge pages
....@@ -1123,170 +1227,157 @@
11231227 return size_opt;
11241228 }
11251229
1126
-static int
1127
-hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
1230
+/*
1231
+ * Parse one mount parameter.
1232
+ */
1233
+static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
11281234 {
1129
- char *p, *rest;
1130
- substring_t args[MAX_OPT_ARGS];
1131
- int option;
1132
- unsigned long long max_size_opt = 0, min_size_opt = 0;
1133
- enum hugetlbfs_size_type max_val_type = NO_SIZE, min_val_type = NO_SIZE;
1235
+ struct hugetlbfs_fs_context *ctx = fc->fs_private;
1236
+ struct fs_parse_result result;
1237
+ char *rest;
1238
+ unsigned long ps;
1239
+ int opt;
11341240
1135
- if (!options)
1241
+ opt = fs_parse(fc, hugetlb_fs_parameters, param, &result);
1242
+ if (opt < 0)
1243
+ return opt;
1244
+
1245
+ switch (opt) {
1246
+ case Opt_uid:
1247
+ ctx->uid = make_kuid(current_user_ns(), result.uint_32);
1248
+ if (!uid_valid(ctx->uid))
1249
+ goto bad_val;
11361250 return 0;
11371251
1138
- while ((p = strsep(&options, ",")) != NULL) {
1139
- int token;
1140
- if (!*p)
1141
- continue;
1252
+ case Opt_gid:
1253
+ ctx->gid = make_kgid(current_user_ns(), result.uint_32);
1254
+ if (!gid_valid(ctx->gid))
1255
+ goto bad_val;
1256
+ return 0;
11421257
1143
- token = match_token(p, tokens, args);
1144
- switch (token) {
1145
- case Opt_uid:
1146
- if (match_int(&args[0], &option))
1147
- goto bad_val;
1148
- pconfig->uid = make_kuid(current_user_ns(), option);
1149
- if (!uid_valid(pconfig->uid))
1150
- goto bad_val;
1151
- break;
1258
+ case Opt_mode:
1259
+ ctx->mode = result.uint_32 & 01777U;
1260
+ return 0;
11521261
1153
- case Opt_gid:
1154
- if (match_int(&args[0], &option))
1155
- goto bad_val;
1156
- pconfig->gid = make_kgid(current_user_ns(), option);
1157
- if (!gid_valid(pconfig->gid))
1158
- goto bad_val;
1159
- break;
1262
+ case Opt_size:
1263
+ /* memparse() will accept a K/M/G without a digit */
1264
+ if (!param->string || !isdigit(param->string[0]))
1265
+ goto bad_val;
1266
+ ctx->max_size_opt = memparse(param->string, &rest);
1267
+ ctx->max_val_type = SIZE_STD;
1268
+ if (*rest == '%')
1269
+ ctx->max_val_type = SIZE_PERCENT;
1270
+ return 0;
11601271
1161
- case Opt_mode:
1162
- if (match_octal(&args[0], &option))
1163
- goto bad_val;
1164
- pconfig->mode = option & 01777U;
1165
- break;
1272
+ case Opt_nr_inodes:
1273
+ /* memparse() will accept a K/M/G without a digit */
1274
+ if (!param->string || !isdigit(param->string[0]))
1275
+ goto bad_val;
1276
+ ctx->nr_inodes = memparse(param->string, &rest);
1277
+ return 0;
11661278
1167
- case Opt_size: {
1168
- /* memparse() will accept a K/M/G without a digit */
1169
- if (!isdigit(*args[0].from))
1170
- goto bad_val;
1171
- max_size_opt = memparse(args[0].from, &rest);
1172
- max_val_type = SIZE_STD;
1173
- if (*rest == '%')
1174
- max_val_type = SIZE_PERCENT;
1175
- break;
1176
- }
1177
-
1178
- case Opt_nr_inodes:
1179
- /* memparse() will accept a K/M/G without a digit */
1180
- if (!isdigit(*args[0].from))
1181
- goto bad_val;
1182
- pconfig->nr_inodes = memparse(args[0].from, &rest);
1183
- break;
1184
-
1185
- case Opt_pagesize: {
1186
- unsigned long ps;
1187
- ps = memparse(args[0].from, &rest);
1188
- pconfig->hstate = size_to_hstate(ps);
1189
- if (!pconfig->hstate) {
1190
- pr_err("Unsupported page size %lu MB\n",
1191
- ps >> 20);
1192
- return -EINVAL;
1193
- }
1194
- break;
1195
- }
1196
-
1197
- case Opt_min_size: {
1198
- /* memparse() will accept a K/M/G without a digit */
1199
- if (!isdigit(*args[0].from))
1200
- goto bad_val;
1201
- min_size_opt = memparse(args[0].from, &rest);
1202
- min_val_type = SIZE_STD;
1203
- if (*rest == '%')
1204
- min_val_type = SIZE_PERCENT;
1205
- break;
1206
- }
1207
-
1208
- default:
1209
- pr_err("Bad mount option: \"%s\"\n", p);
1279
+ case Opt_pagesize:
1280
+ ps = memparse(param->string, &rest);
1281
+ ctx->hstate = size_to_hstate(ps);
1282
+ if (!ctx->hstate) {
1283
+ pr_err("Unsupported page size %lu MB\n", ps >> 20);
12101284 return -EINVAL;
1211
- break;
12121285 }
1286
+ return 0;
1287
+
1288
+ case Opt_min_size:
1289
+ /* memparse() will accept a K/M/G without a digit */
1290
+ if (!param->string || !isdigit(param->string[0]))
1291
+ goto bad_val;
1292
+ ctx->min_size_opt = memparse(param->string, &rest);
1293
+ ctx->min_val_type = SIZE_STD;
1294
+ if (*rest == '%')
1295
+ ctx->min_val_type = SIZE_PERCENT;
1296
+ return 0;
1297
+
1298
+ default:
1299
+ return -EINVAL;
12131300 }
1301
+
1302
+bad_val:
1303
+ return invalfc(fc, "Bad value '%s' for mount option '%s'\n",
1304
+ param->string, param->key);
1305
+}
1306
+
1307
+/*
1308
+ * Validate the parsed options.
1309
+ */
1310
+static int hugetlbfs_validate(struct fs_context *fc)
1311
+{
1312
+ struct hugetlbfs_fs_context *ctx = fc->fs_private;
12141313
12151314 /*
12161315 * Use huge page pool size (in hstate) to convert the size
12171316 * options to number of huge pages. If NO_SIZE, -1 is returned.
12181317 */
1219
- pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
1220
- max_size_opt, max_val_type);
1221
- pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
1222
- min_size_opt, min_val_type);
1318
+ ctx->max_hpages = hugetlbfs_size_to_hpages(ctx->hstate,
1319
+ ctx->max_size_opt,
1320
+ ctx->max_val_type);
1321
+ ctx->min_hpages = hugetlbfs_size_to_hpages(ctx->hstate,
1322
+ ctx->min_size_opt,
1323
+ ctx->min_val_type);
12231324
12241325 /*
12251326 * If max_size was specified, then min_size must be smaller
12261327 */
1227
- if (max_val_type > NO_SIZE &&
1228
- pconfig->min_hpages > pconfig->max_hpages) {
1229
- pr_err("minimum size can not be greater than maximum size\n");
1328
+ if (ctx->max_val_type > NO_SIZE &&
1329
+ ctx->min_hpages > ctx->max_hpages) {
1330
+ pr_err("Minimum size can not be greater than maximum size\n");
12301331 return -EINVAL;
12311332 }
12321333
12331334 return 0;
1234
-
1235
-bad_val:
1236
- pr_err("Bad value '%s' for mount option '%s'\n", args[0].from, p);
1237
- return -EINVAL;
12381335 }
12391336
12401337 static int
1241
-hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
1338
+hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc)
12421339 {
1243
- int ret;
1244
- struct hugetlbfs_config config;
1340
+ struct hugetlbfs_fs_context *ctx = fc->fs_private;
12451341 struct hugetlbfs_sb_info *sbinfo;
1246
-
1247
- config.max_hpages = -1; /* No limit on size by default */
1248
- config.nr_inodes = -1; /* No limit on number of inodes by default */
1249
- config.uid = current_fsuid();
1250
- config.gid = current_fsgid();
1251
- config.mode = 0755;
1252
- config.hstate = &default_hstate;
1253
- config.min_hpages = -1; /* No default minimum size */
1254
- ret = hugetlbfs_parse_options(data, &config);
1255
- if (ret)
1256
- return ret;
12571342
12581343 sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL);
12591344 if (!sbinfo)
12601345 return -ENOMEM;
12611346 sb->s_fs_info = sbinfo;
1262
- sbinfo->hstate = config.hstate;
12631347 spin_lock_init(&sbinfo->stat_lock);
1264
- sbinfo->max_inodes = config.nr_inodes;
1265
- sbinfo->free_inodes = config.nr_inodes;
1266
- sbinfo->spool = NULL;
1267
- sbinfo->uid = config.uid;
1268
- sbinfo->gid = config.gid;
1269
- sbinfo->mode = config.mode;
1348
+ sbinfo->hstate = ctx->hstate;
1349
+ sbinfo->max_inodes = ctx->nr_inodes;
1350
+ sbinfo->free_inodes = ctx->nr_inodes;
1351
+ sbinfo->spool = NULL;
1352
+ sbinfo->uid = ctx->uid;
1353
+ sbinfo->gid = ctx->gid;
1354
+ sbinfo->mode = ctx->mode;
12701355
12711356 /*
12721357 * Allocate and initialize subpool if maximum or minimum size is
12731358 * specified. Any needed reservations (for minimim size) are taken
12741359 * taken when the subpool is created.
12751360 */
1276
- if (config.max_hpages != -1 || config.min_hpages != -1) {
1277
- sbinfo->spool = hugepage_new_subpool(config.hstate,
1278
- config.max_hpages,
1279
- config.min_hpages);
1361
+ if (ctx->max_hpages != -1 || ctx->min_hpages != -1) {
1362
+ sbinfo->spool = hugepage_new_subpool(ctx->hstate,
1363
+ ctx->max_hpages,
1364
+ ctx->min_hpages);
12801365 if (!sbinfo->spool)
12811366 goto out_free;
12821367 }
12831368 sb->s_maxbytes = MAX_LFS_FILESIZE;
1284
- sb->s_blocksize = huge_page_size(config.hstate);
1285
- sb->s_blocksize_bits = huge_page_shift(config.hstate);
1369
+ sb->s_blocksize = huge_page_size(ctx->hstate);
1370
+ sb->s_blocksize_bits = huge_page_shift(ctx->hstate);
12861371 sb->s_magic = HUGETLBFS_MAGIC;
12871372 sb->s_op = &hugetlbfs_ops;
12881373 sb->s_time_gran = 1;
1289
- sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config));
1374
+
1375
+ /*
1376
+ * Due to the special and limited functionality of hugetlbfs, it does
1377
+ * not work well as a stacking filesystem.
1378
+ */
1379
+ sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
1380
+ sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx));
12901381 if (!sb->s_root)
12911382 goto out_free;
12921383 return 0;
....@@ -1296,16 +1387,52 @@
12961387 return -ENOMEM;
12971388 }
12981389
1299
-static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
1300
- int flags, const char *dev_name, void *data)
1390
+static int hugetlbfs_get_tree(struct fs_context *fc)
13011391 {
1302
- return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
1392
+ int err = hugetlbfs_validate(fc);
1393
+ if (err)
1394
+ return err;
1395
+ return get_tree_nodev(fc, hugetlbfs_fill_super);
1396
+}
1397
+
1398
+static void hugetlbfs_fs_context_free(struct fs_context *fc)
1399
+{
1400
+ kfree(fc->fs_private);
1401
+}
1402
+
1403
+static const struct fs_context_operations hugetlbfs_fs_context_ops = {
1404
+ .free = hugetlbfs_fs_context_free,
1405
+ .parse_param = hugetlbfs_parse_param,
1406
+ .get_tree = hugetlbfs_get_tree,
1407
+};
1408
+
1409
+static int hugetlbfs_init_fs_context(struct fs_context *fc)
1410
+{
1411
+ struct hugetlbfs_fs_context *ctx;
1412
+
1413
+ ctx = kzalloc(sizeof(struct hugetlbfs_fs_context), GFP_KERNEL);
1414
+ if (!ctx)
1415
+ return -ENOMEM;
1416
+
1417
+ ctx->max_hpages = -1; /* No limit on size by default */
1418
+ ctx->nr_inodes = -1; /* No limit on number of inodes by default */
1419
+ ctx->uid = current_fsuid();
1420
+ ctx->gid = current_fsgid();
1421
+ ctx->mode = 0755;
1422
+ ctx->hstate = &default_hstate;
1423
+ ctx->min_hpages = -1; /* No default minimum size */
1424
+ ctx->max_val_type = NO_SIZE;
1425
+ ctx->min_val_type = NO_SIZE;
1426
+ fc->fs_private = ctx;
1427
+ fc->ops = &hugetlbfs_fs_context_ops;
1428
+ return 0;
13031429 }
13041430
13051431 static struct file_system_type hugetlbfs_fs_type = {
1306
- .name = "hugetlbfs",
1307
- .mount = hugetlbfs_mount,
1308
- .kill_sb = kill_litter_super,
1432
+ .name = "hugetlbfs",
1433
+ .init_fs_context = hugetlbfs_init_fs_context,
1434
+ .parameters = hugetlb_fs_parameters,
1435
+ .kill_sb = kill_litter_super,
13091436 };
13101437
13111438 static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
....@@ -1390,8 +1517,29 @@
13901517 return file;
13911518 }
13921519
1520
+static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h)
1521
+{
1522
+ struct fs_context *fc;
1523
+ struct vfsmount *mnt;
1524
+
1525
+ fc = fs_context_for_mount(&hugetlbfs_fs_type, SB_KERNMOUNT);
1526
+ if (IS_ERR(fc)) {
1527
+ mnt = ERR_CAST(fc);
1528
+ } else {
1529
+ struct hugetlbfs_fs_context *ctx = fc->fs_private;
1530
+ ctx->hstate = h;
1531
+ mnt = fc_mount(fc);
1532
+ put_fs_context(fc);
1533
+ }
1534
+ if (IS_ERR(mnt))
1535
+ pr_err("Cannot mount internal hugetlbfs for page size %uK",
1536
+ 1U << (h->order + PAGE_SHIFT - 10));
1537
+ return mnt;
1538
+}
1539
+
13931540 static int __init init_hugetlbfs_fs(void)
13941541 {
1542
+ struct vfsmount *mnt;
13951543 struct hstate *h;
13961544 int error;
13971545 int i;
....@@ -1406,36 +1554,43 @@
14061554 sizeof(struct hugetlbfs_inode_info),
14071555 0, SLAB_ACCOUNT, init_once);
14081556 if (hugetlbfs_inode_cachep == NULL)
1409
- goto out2;
1557
+ goto out;
14101558
14111559 error = register_filesystem(&hugetlbfs_fs_type);
14121560 if (error)
1413
- goto out;
1561
+ goto out_free;
14141562
1563
+ /* default hstate mount is required */
1564
+ mnt = mount_one_hugetlbfs(&hstates[default_hstate_idx]);
1565
+ if (IS_ERR(mnt)) {
1566
+ error = PTR_ERR(mnt);
1567
+ goto out_unreg;
1568
+ }
1569
+ hugetlbfs_vfsmount[default_hstate_idx] = mnt;
1570
+
1571
+ /* other hstates are optional */
14151572 i = 0;
14161573 for_each_hstate(h) {
1417
- char buf[50];
1418
- unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10);
1419
-
1420
- snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb);
1421
- hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type,
1422
- buf);
1423
-
1424
- if (IS_ERR(hugetlbfs_vfsmount[i])) {
1425
- pr_err("Cannot mount internal hugetlbfs for "
1426
- "page size %uK", ps_kb);
1427
- error = PTR_ERR(hugetlbfs_vfsmount[i]);
1428
- hugetlbfs_vfsmount[i] = NULL;
1574
+ if (i == default_hstate_idx) {
1575
+ i++;
1576
+ continue;
14291577 }
1578
+
1579
+ mnt = mount_one_hugetlbfs(h);
1580
+ if (IS_ERR(mnt))
1581
+ hugetlbfs_vfsmount[i] = NULL;
1582
+ else
1583
+ hugetlbfs_vfsmount[i] = mnt;
14301584 i++;
14311585 }
1432
- /* Non default hstates are optional */
1433
- if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx]))
1434
- return 0;
14351586
1436
- out:
1587
+ return 0;
1588
+
1589
+ out_unreg:
1590
+ (void)unregister_filesystem(&hugetlbfs_fs_type);
1591
+ out_free:
14371592 kmem_cache_destroy(hugetlbfs_inode_cachep);
1438
- out2:
1593
+ out:
14391594 return error;
14401595 }
14411596 fs_initcall(init_hugetlbfs_fs)