.. | .. |
---|
27 | 27 | #include <linux/backing-dev.h> |
---|
28 | 28 | #include <linux/hugetlb.h> |
---|
29 | 29 | #include <linux/pagevec.h> |
---|
30 | | -#include <linux/parser.h> |
---|
| 30 | +#include <linux/fs_parser.h> |
---|
31 | 31 | #include <linux/mman.h> |
---|
32 | 32 | #include <linux/slab.h> |
---|
33 | 33 | #include <linux/dnotify.h> |
---|
.. | .. |
---|
38 | 38 | #include <linux/uio.h> |
---|
39 | 39 | |
---|
40 | 40 | #include <linux/uaccess.h> |
---|
| 41 | +#include <linux/sched/mm.h> |
---|
41 | 42 | |
---|
42 | 43 | static const struct super_operations hugetlbfs_ops; |
---|
43 | 44 | static const struct address_space_operations hugetlbfs_aops; |
---|
.. | .. |
---|
45 | 46 | static const struct inode_operations hugetlbfs_dir_inode_operations; |
---|
46 | 47 | static const struct inode_operations hugetlbfs_inode_operations; |
---|
47 | 48 | |
---|
48 | | -struct hugetlbfs_config { |
---|
| 49 | +enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT }; |
---|
| 50 | + |
---|
| 51 | +struct hugetlbfs_fs_context { |
---|
49 | 52 | struct hstate *hstate; |
---|
| 53 | + unsigned long long max_size_opt; |
---|
| 54 | + unsigned long long min_size_opt; |
---|
50 | 55 | long max_hpages; |
---|
51 | 56 | long nr_inodes; |
---|
52 | 57 | long min_hpages; |
---|
| 58 | + enum hugetlbfs_size_type max_val_type; |
---|
| 59 | + enum hugetlbfs_size_type min_val_type; |
---|
53 | 60 | kuid_t uid; |
---|
54 | 61 | kgid_t gid; |
---|
55 | 62 | umode_t mode; |
---|
.. | .. |
---|
57 | 64 | |
---|
58 | 65 | int sysctl_hugetlb_shm_group; |
---|
59 | 66 | |
---|
60 | | -enum { |
---|
61 | | - Opt_size, Opt_nr_inodes, |
---|
62 | | - Opt_mode, Opt_uid, Opt_gid, |
---|
63 | | - Opt_pagesize, Opt_min_size, |
---|
64 | | - Opt_err, |
---|
| 67 | +enum hugetlb_param { |
---|
| 68 | + Opt_gid, |
---|
| 69 | + Opt_min_size, |
---|
| 70 | + Opt_mode, |
---|
| 71 | + Opt_nr_inodes, |
---|
| 72 | + Opt_pagesize, |
---|
| 73 | + Opt_size, |
---|
| 74 | + Opt_uid, |
---|
65 | 75 | }; |
---|
66 | 76 | |
---|
67 | | -static const match_table_t tokens = { |
---|
68 | | - {Opt_size, "size=%s"}, |
---|
69 | | - {Opt_nr_inodes, "nr_inodes=%s"}, |
---|
70 | | - {Opt_mode, "mode=%o"}, |
---|
71 | | - {Opt_uid, "uid=%u"}, |
---|
72 | | - {Opt_gid, "gid=%u"}, |
---|
73 | | - {Opt_pagesize, "pagesize=%s"}, |
---|
74 | | - {Opt_min_size, "min_size=%s"}, |
---|
75 | | - {Opt_err, NULL}, |
---|
| 77 | +static const struct fs_parameter_spec hugetlb_fs_parameters[] = { |
---|
| 78 | + fsparam_u32 ("gid", Opt_gid), |
---|
| 79 | + fsparam_string("min_size", Opt_min_size), |
---|
| 80 | + fsparam_u32oct("mode", Opt_mode), |
---|
| 81 | + fsparam_string("nr_inodes", Opt_nr_inodes), |
---|
| 82 | + fsparam_string("pagesize", Opt_pagesize), |
---|
| 83 | + fsparam_string("size", Opt_size), |
---|
| 84 | + fsparam_u32 ("uid", Opt_uid), |
---|
| 85 | + {} |
---|
76 | 86 | }; |
---|
77 | 87 | |
---|
78 | 88 | #ifdef CONFIG_NUMA |
---|
.. | .. |
---|
121 | 131 | static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
---|
122 | 132 | { |
---|
123 | 133 | struct inode *inode = file_inode(file); |
---|
| 134 | + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); |
---|
124 | 135 | loff_t len, vma_len; |
---|
125 | 136 | int ret; |
---|
126 | 137 | struct hstate *h = hstate_file(file); |
---|
.. | .. |
---|
130 | 141 | * already been checked by prepare_hugepage_range. If you add |
---|
131 | 142 | * any error returns here, do so after setting VM_HUGETLB, so |
---|
132 | 143 | * is_vm_hugetlb_page tests below unmap_region go the right |
---|
133 | | - * way when do_mmap_pgoff unwinds (may be important on powerpc |
---|
| 144 | + * way when do_mmap unwinds (may be important on powerpc |
---|
134 | 145 | * and ia64). |
---|
135 | 146 | */ |
---|
136 | 147 | vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; |
---|
137 | 148 | vma->vm_ops = &hugetlb_vm_ops; |
---|
| 149 | + |
---|
| 150 | + ret = seal_check_future_write(info->seals, vma); |
---|
| 151 | + if (ret) |
---|
| 152 | + return ret; |
---|
138 | 153 | |
---|
139 | 154 | /* |
---|
140 | 155 | * page based offset in vm_pgoff could be sufficiently large to |
---|
.. | .. |
---|
177 | 192 | } |
---|
178 | 193 | |
---|
179 | 194 | /* |
---|
180 | | - * Called under down_write(mmap_sem). |
---|
| 195 | + * Called under mmap_write_lock(mm). |
---|
181 | 196 | */ |
---|
182 | 197 | |
---|
183 | 198 | #ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA |
---|
| 199 | +static unsigned long |
---|
| 200 | +hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, |
---|
| 201 | + unsigned long len, unsigned long pgoff, unsigned long flags) |
---|
| 202 | +{ |
---|
| 203 | + struct hstate *h = hstate_file(file); |
---|
| 204 | + struct vm_unmapped_area_info info; |
---|
| 205 | + |
---|
| 206 | + info.flags = 0; |
---|
| 207 | + info.length = len; |
---|
| 208 | + info.low_limit = current->mm->mmap_base; |
---|
| 209 | + info.high_limit = arch_get_mmap_end(addr); |
---|
| 210 | + info.align_mask = PAGE_MASK & ~huge_page_mask(h); |
---|
| 211 | + info.align_offset = 0; |
---|
| 212 | + return vm_unmapped_area(&info); |
---|
| 213 | +} |
---|
| 214 | + |
---|
| 215 | +static unsigned long |
---|
| 216 | +hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr, |
---|
| 217 | + unsigned long len, unsigned long pgoff, unsigned long flags) |
---|
| 218 | +{ |
---|
| 219 | + struct hstate *h = hstate_file(file); |
---|
| 220 | + struct vm_unmapped_area_info info; |
---|
| 221 | + |
---|
| 222 | + info.flags = VM_UNMAPPED_AREA_TOPDOWN; |
---|
| 223 | + info.length = len; |
---|
| 224 | + info.low_limit = max(PAGE_SIZE, mmap_min_addr); |
---|
| 225 | + info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base); |
---|
| 226 | + info.align_mask = PAGE_MASK & ~huge_page_mask(h); |
---|
| 227 | + info.align_offset = 0; |
---|
| 228 | + addr = vm_unmapped_area(&info); |
---|
| 229 | + |
---|
| 230 | + /* |
---|
| 231 | + * A failed mmap() very likely causes application failure, |
---|
| 232 | + * so fall back to the bottom-up function here. This scenario |
---|
| 233 | + * can happen with large stack limits and large mmap() |
---|
| 234 | + * allocations. |
---|
| 235 | + */ |
---|
| 236 | + if (unlikely(offset_in_page(addr))) { |
---|
| 237 | + VM_BUG_ON(addr != -ENOMEM); |
---|
| 238 | + info.flags = 0; |
---|
| 239 | + info.low_limit = current->mm->mmap_base; |
---|
| 240 | + info.high_limit = arch_get_mmap_end(addr); |
---|
| 241 | + addr = vm_unmapped_area(&info); |
---|
| 242 | + } |
---|
| 243 | + |
---|
| 244 | + return addr; |
---|
| 245 | +} |
---|
| 246 | + |
---|
184 | 247 | static unsigned long |
---|
185 | 248 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
---|
186 | 249 | unsigned long len, unsigned long pgoff, unsigned long flags) |
---|
.. | .. |
---|
188 | 251 | struct mm_struct *mm = current->mm; |
---|
189 | 252 | struct vm_area_struct *vma; |
---|
190 | 253 | struct hstate *h = hstate_file(file); |
---|
191 | | - struct vm_unmapped_area_info info; |
---|
| 254 | + const unsigned long mmap_end = arch_get_mmap_end(addr); |
---|
192 | 255 | |
---|
193 | 256 | if (len & ~huge_page_mask(h)) |
---|
194 | 257 | return -EINVAL; |
---|
.. | .. |
---|
204 | 267 | if (addr) { |
---|
205 | 268 | addr = ALIGN(addr, huge_page_size(h)); |
---|
206 | 269 | vma = find_vma(mm, addr); |
---|
207 | | - if (TASK_SIZE - len >= addr && |
---|
| 270 | + if (mmap_end - len >= addr && |
---|
208 | 271 | (!vma || addr + len <= vm_start_gap(vma))) |
---|
209 | 272 | return addr; |
---|
210 | 273 | } |
---|
211 | 274 | |
---|
212 | | - info.flags = 0; |
---|
213 | | - info.length = len; |
---|
214 | | - info.low_limit = TASK_UNMAPPED_BASE; |
---|
215 | | - info.high_limit = TASK_SIZE; |
---|
216 | | - info.align_mask = PAGE_MASK & ~huge_page_mask(h); |
---|
217 | | - info.align_offset = 0; |
---|
218 | | - return vm_unmapped_area(&info); |
---|
| 275 | + /* |
---|
| 276 | + * Use mm->get_unmapped_area value as a hint to use topdown routine. |
---|
| 277 | + * If architectures have special needs, they should define their own |
---|
| 278 | + * version of hugetlb_get_unmapped_area. |
---|
| 279 | + */ |
---|
| 280 | + if (mm->get_unmapped_area == arch_get_unmapped_area_topdown) |
---|
| 281 | + return hugetlb_get_unmapped_area_topdown(file, addr, len, |
---|
| 282 | + pgoff, flags); |
---|
| 283 | + return hugetlb_get_unmapped_area_bottomup(file, addr, len, |
---|
| 284 | + pgoff, flags); |
---|
219 | 285 | } |
---|
220 | 286 | #endif |
---|
221 | 287 | |
---|
.. | .. |
---|
384 | 450 | * In this case, we first scan the range and release found pages. |
---|
385 | 451 | * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv |
---|
386 | 452 | * maps and global counts. Page faults can not race with truncation |
---|
387 | | - * in this routine. hugetlb_no_page() prevents page faults in the |
---|
388 | | - * truncated range. It checks i_size before allocation, and again after |
---|
389 | | - * with the page table lock for the page held. The same lock must be |
---|
390 | | - * acquired to unmap a page. |
---|
| 453 | + * in this routine. hugetlb_no_page() holds i_mmap_rwsem and prevents |
---|
| 454 | + * page faults in the truncated range by checking i_size. i_size is |
---|
| 455 | + * modified while holding i_mmap_rwsem. |
---|
391 | 456 | * hole punch is indicated if end is not LLONG_MAX |
---|
392 | 457 | * In the hole punch case we scan the range and release found pages. |
---|
393 | 458 | * Only when releasing a page is the associated region/reserv map |
---|
.. | .. |
---|
426 | 491 | u32 hash; |
---|
427 | 492 | |
---|
428 | 493 | index = page->index; |
---|
429 | | - hash = hugetlb_fault_mutex_hash(h, mapping, index); |
---|
430 | | - mutex_lock(&hugetlb_fault_mutex_table[hash]); |
---|
| 494 | + hash = hugetlb_fault_mutex_hash(mapping, index); |
---|
| 495 | + if (!truncate_op) { |
---|
| 496 | + /* |
---|
| 497 | + * Only need to hold the fault mutex in the |
---|
| 498 | + * hole punch case. This prevents races with |
---|
| 499 | + * page faults. Races are not possible in the |
---|
| 500 | + * case of truncation. |
---|
| 501 | + */ |
---|
| 502 | + mutex_lock(&hugetlb_fault_mutex_table[hash]); |
---|
| 503 | + } |
---|
431 | 504 | |
---|
432 | 505 | /* |
---|
433 | 506 | * If page is mapped, it was faulted in after being |
---|
.. | .. |
---|
441 | 514 | if (unlikely(page_mapped(page))) { |
---|
442 | 515 | BUG_ON(truncate_op); |
---|
443 | 516 | |
---|
| 517 | + mutex_unlock(&hugetlb_fault_mutex_table[hash]); |
---|
444 | 518 | i_mmap_lock_write(mapping); |
---|
| 519 | + mutex_lock(&hugetlb_fault_mutex_table[hash]); |
---|
445 | 520 | hugetlb_vmdelete_list(&mapping->i_mmap, |
---|
446 | 521 | index * pages_per_huge_page(h), |
---|
447 | 522 | (index + 1) * pages_per_huge_page(h)); |
---|
.. | .. |
---|
468 | 543 | } |
---|
469 | 544 | |
---|
470 | 545 | unlock_page(page); |
---|
471 | | - mutex_unlock(&hugetlb_fault_mutex_table[hash]); |
---|
| 546 | + if (!truncate_op) |
---|
| 547 | + mutex_unlock(&hugetlb_fault_mutex_table[hash]); |
---|
472 | 548 | } |
---|
473 | 549 | huge_pagevec_release(&pvec); |
---|
474 | 550 | cond_resched(); |
---|
.. | .. |
---|
483 | 559 | struct resv_map *resv_map; |
---|
484 | 560 | |
---|
485 | 561 | remove_inode_hugepages(inode, 0, LLONG_MAX); |
---|
486 | | - resv_map = (struct resv_map *)inode->i_mapping->private_data; |
---|
487 | | - /* root inode doesn't have the resv_map, so we should check it */ |
---|
| 562 | + |
---|
| 563 | + /* |
---|
| 564 | + * Get the resv_map from the address space embedded in the inode. |
---|
| 565 | + * This is the address space which points to any resv_map allocated |
---|
| 566 | + * at inode creation time. If this is a device special inode, |
---|
| 567 | + * i_mapping may not point to the original address space. |
---|
| 568 | + */ |
---|
| 569 | + resv_map = (struct resv_map *)(&inode->i_data)->private_data; |
---|
| 570 | + /* Only regular and link inodes have associated reserve maps */ |
---|
488 | 571 | if (resv_map) |
---|
489 | 572 | resv_map_release(&resv_map->refs); |
---|
490 | 573 | clear_inode(inode); |
---|
.. | .. |
---|
499 | 582 | BUG_ON(offset & ~huge_page_mask(h)); |
---|
500 | 583 | pgoff = offset >> PAGE_SHIFT; |
---|
501 | 584 | |
---|
502 | | - i_size_write(inode, offset); |
---|
503 | 585 | i_mmap_lock_write(mapping); |
---|
| 586 | + i_size_write(inode, offset); |
---|
504 | 587 | if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) |
---|
505 | 588 | hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); |
---|
506 | 589 | i_mmap_unlock_write(mapping); |
---|
.. | .. |
---|
622 | 705 | /* addr is the offset within the file (zero based) */ |
---|
623 | 706 | addr = index * hpage_size; |
---|
624 | 707 | |
---|
625 | | - /* mutex taken here, fault path and hole punch */ |
---|
626 | | - hash = hugetlb_fault_mutex_hash(h, mapping, index); |
---|
| 708 | + /* |
---|
| 709 | + * fault mutex taken here, protects against fault path |
---|
| 710 | + * and hole punch. inode_lock previously taken protects |
---|
| 711 | + * against truncation. |
---|
| 712 | + */ |
---|
| 713 | + hash = hugetlb_fault_mutex_hash(mapping, index); |
---|
627 | 714 | mutex_lock(&hugetlb_fault_mutex_table[hash]); |
---|
628 | 715 | |
---|
629 | 716 | /* See if already present in mapping to avoid alloc/free */ |
---|
.. | .. |
---|
706 | 793 | } |
---|
707 | 794 | |
---|
708 | 795 | static struct inode *hugetlbfs_get_root(struct super_block *sb, |
---|
709 | | - struct hugetlbfs_config *config) |
---|
| 796 | + struct hugetlbfs_fs_context *ctx) |
---|
710 | 797 | { |
---|
711 | 798 | struct inode *inode; |
---|
712 | 799 | |
---|
713 | 800 | inode = new_inode(sb); |
---|
714 | 801 | if (inode) { |
---|
715 | 802 | inode->i_ino = get_next_ino(); |
---|
716 | | - inode->i_mode = S_IFDIR | config->mode; |
---|
717 | | - inode->i_uid = config->uid; |
---|
718 | | - inode->i_gid = config->gid; |
---|
| 803 | + inode->i_mode = S_IFDIR | ctx->mode; |
---|
| 804 | + inode->i_uid = ctx->uid; |
---|
| 805 | + inode->i_gid = ctx->gid; |
---|
719 | 806 | inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); |
---|
720 | 807 | inode->i_op = &hugetlbfs_dir_inode_operations; |
---|
721 | 808 | inode->i_fop = &simple_dir_operations; |
---|
.. | .. |
---|
795 | 882 | /* |
---|
796 | 883 | * File creation. Allocate an inode, and we're done.. |
---|
797 | 884 | */ |
---|
798 | | -static int hugetlbfs_mknod(struct inode *dir, |
---|
799 | | - struct dentry *dentry, umode_t mode, dev_t dev) |
---|
| 885 | +static int do_hugetlbfs_mknod(struct inode *dir, |
---|
| 886 | + struct dentry *dentry, |
---|
| 887 | + umode_t mode, |
---|
| 888 | + dev_t dev, |
---|
| 889 | + bool tmpfile) |
---|
800 | 890 | { |
---|
801 | 891 | struct inode *inode; |
---|
802 | 892 | int error = -ENOSPC; |
---|
.. | .. |
---|
804 | 894 | inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev); |
---|
805 | 895 | if (inode) { |
---|
806 | 896 | dir->i_ctime = dir->i_mtime = current_time(dir); |
---|
807 | | - d_instantiate(dentry, inode); |
---|
808 | | - dget(dentry); /* Extra count - pin the dentry in core */ |
---|
| 897 | + if (tmpfile) { |
---|
| 898 | + d_tmpfile(dentry, inode); |
---|
| 899 | + } else { |
---|
| 900 | + d_instantiate(dentry, inode); |
---|
| 901 | + dget(dentry);/* Extra count - pin the dentry in core */ |
---|
| 902 | + } |
---|
809 | 903 | error = 0; |
---|
810 | 904 | } |
---|
811 | 905 | return error; |
---|
| 906 | +} |
---|
| 907 | + |
---|
| 908 | +static int hugetlbfs_mknod(struct inode *dir, |
---|
| 909 | + struct dentry *dentry, umode_t mode, dev_t dev) |
---|
| 910 | +{ |
---|
| 911 | + return do_hugetlbfs_mknod(dir, dentry, mode, dev, false); |
---|
812 | 912 | } |
---|
813 | 913 | |
---|
814 | 914 | static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
---|
.. | .. |
---|
822 | 922 | static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) |
---|
823 | 923 | { |
---|
824 | 924 | return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); |
---|
| 925 | +} |
---|
| 926 | + |
---|
| 927 | +static int hugetlbfs_tmpfile(struct inode *dir, |
---|
| 928 | + struct dentry *dentry, umode_t mode) |
---|
| 929 | +{ |
---|
| 930 | + return do_hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0, true); |
---|
825 | 931 | } |
---|
826 | 932 | |
---|
827 | 933 | static int hugetlbfs_symlink(struct inode *dir, |
---|
.. | .. |
---|
1035 | 1141 | return &p->vfs_inode; |
---|
1036 | 1142 | } |
---|
1037 | 1143 | |
---|
1038 | | -static void hugetlbfs_i_callback(struct rcu_head *head) |
---|
| 1144 | +static void hugetlbfs_free_inode(struct inode *inode) |
---|
1039 | 1145 | { |
---|
1040 | | - struct inode *inode = container_of(head, struct inode, i_rcu); |
---|
1041 | 1146 | kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); |
---|
1042 | 1147 | } |
---|
1043 | 1148 | |
---|
.. | .. |
---|
1045 | 1150 | { |
---|
1046 | 1151 | hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); |
---|
1047 | 1152 | mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); |
---|
1048 | | - call_rcu(&inode->i_rcu, hugetlbfs_i_callback); |
---|
1049 | 1153 | } |
---|
1050 | 1154 | |
---|
1051 | 1155 | static const struct address_space_operations hugetlbfs_aops = { |
---|
.. | .. |
---|
1084 | 1188 | .mknod = hugetlbfs_mknod, |
---|
1085 | 1189 | .rename = simple_rename, |
---|
1086 | 1190 | .setattr = hugetlbfs_setattr, |
---|
| 1191 | + .tmpfile = hugetlbfs_tmpfile, |
---|
1087 | 1192 | }; |
---|
1088 | 1193 | |
---|
1089 | 1194 | static const struct inode_operations hugetlbfs_inode_operations = { |
---|
.. | .. |
---|
1092 | 1197 | |
---|
1093 | 1198 | static const struct super_operations hugetlbfs_ops = { |
---|
1094 | 1199 | .alloc_inode = hugetlbfs_alloc_inode, |
---|
| 1200 | + .free_inode = hugetlbfs_free_inode, |
---|
1095 | 1201 | .destroy_inode = hugetlbfs_destroy_inode, |
---|
1096 | 1202 | .evict_inode = hugetlbfs_evict_inode, |
---|
1097 | 1203 | .statfs = hugetlbfs_statfs, |
---|
1098 | 1204 | .put_super = hugetlbfs_put_super, |
---|
1099 | 1205 | .show_options = hugetlbfs_show_options, |
---|
1100 | 1206 | }; |
---|
1101 | | - |
---|
1102 | | -enum hugetlbfs_size_type { NO_SIZE, SIZE_STD, SIZE_PERCENT }; |
---|
1103 | 1207 | |
---|
1104 | 1208 | /* |
---|
1105 | 1209 | * Convert size option passed from command line to number of huge pages |
---|
.. | .. |
---|
1123 | 1227 | return size_opt; |
---|
1124 | 1228 | } |
---|
1125 | 1229 | |
---|
1126 | | -static int |
---|
1127 | | -hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) |
---|
| 1230 | +/* |
---|
| 1231 | + * Parse one mount parameter. |
---|
| 1232 | + */ |
---|
| 1233 | +static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *param) |
---|
1128 | 1234 | { |
---|
1129 | | - char *p, *rest; |
---|
1130 | | - substring_t args[MAX_OPT_ARGS]; |
---|
1131 | | - int option; |
---|
1132 | | - unsigned long long max_size_opt = 0, min_size_opt = 0; |
---|
1133 | | - enum hugetlbfs_size_type max_val_type = NO_SIZE, min_val_type = NO_SIZE; |
---|
| 1235 | + struct hugetlbfs_fs_context *ctx = fc->fs_private; |
---|
| 1236 | + struct fs_parse_result result; |
---|
| 1237 | + char *rest; |
---|
| 1238 | + unsigned long ps; |
---|
| 1239 | + int opt; |
---|
1134 | 1240 | |
---|
1135 | | - if (!options) |
---|
| 1241 | + opt = fs_parse(fc, hugetlb_fs_parameters, param, &result); |
---|
| 1242 | + if (opt < 0) |
---|
| 1243 | + return opt; |
---|
| 1244 | + |
---|
| 1245 | + switch (opt) { |
---|
| 1246 | + case Opt_uid: |
---|
| 1247 | + ctx->uid = make_kuid(current_user_ns(), result.uint_32); |
---|
| 1248 | + if (!uid_valid(ctx->uid)) |
---|
| 1249 | + goto bad_val; |
---|
1136 | 1250 | return 0; |
---|
1137 | 1251 | |
---|
1138 | | - while ((p = strsep(&options, ",")) != NULL) { |
---|
1139 | | - int token; |
---|
1140 | | - if (!*p) |
---|
1141 | | - continue; |
---|
| 1252 | + case Opt_gid: |
---|
| 1253 | + ctx->gid = make_kgid(current_user_ns(), result.uint_32); |
---|
| 1254 | + if (!gid_valid(ctx->gid)) |
---|
| 1255 | + goto bad_val; |
---|
| 1256 | + return 0; |
---|
1142 | 1257 | |
---|
1143 | | - token = match_token(p, tokens, args); |
---|
1144 | | - switch (token) { |
---|
1145 | | - case Opt_uid: |
---|
1146 | | - if (match_int(&args[0], &option)) |
---|
1147 | | - goto bad_val; |
---|
1148 | | - pconfig->uid = make_kuid(current_user_ns(), option); |
---|
1149 | | - if (!uid_valid(pconfig->uid)) |
---|
1150 | | - goto bad_val; |
---|
1151 | | - break; |
---|
| 1258 | + case Opt_mode: |
---|
| 1259 | + ctx->mode = result.uint_32 & 01777U; |
---|
| 1260 | + return 0; |
---|
1152 | 1261 | |
---|
1153 | | - case Opt_gid: |
---|
1154 | | - if (match_int(&args[0], &option)) |
---|
1155 | | - goto bad_val; |
---|
1156 | | - pconfig->gid = make_kgid(current_user_ns(), option); |
---|
1157 | | - if (!gid_valid(pconfig->gid)) |
---|
1158 | | - goto bad_val; |
---|
1159 | | - break; |
---|
| 1262 | + case Opt_size: |
---|
| 1263 | + /* memparse() will accept a K/M/G without a digit */ |
---|
| 1264 | + if (!param->string || !isdigit(param->string[0])) |
---|
| 1265 | + goto bad_val; |
---|
| 1266 | + ctx->max_size_opt = memparse(param->string, &rest); |
---|
| 1267 | + ctx->max_val_type = SIZE_STD; |
---|
| 1268 | + if (*rest == '%') |
---|
| 1269 | + ctx->max_val_type = SIZE_PERCENT; |
---|
| 1270 | + return 0; |
---|
1160 | 1271 | |
---|
1161 | | - case Opt_mode: |
---|
1162 | | - if (match_octal(&args[0], &option)) |
---|
1163 | | - goto bad_val; |
---|
1164 | | - pconfig->mode = option & 01777U; |
---|
1165 | | - break; |
---|
| 1272 | + case Opt_nr_inodes: |
---|
| 1273 | + /* memparse() will accept a K/M/G without a digit */ |
---|
| 1274 | + if (!param->string || !isdigit(param->string[0])) |
---|
| 1275 | + goto bad_val; |
---|
| 1276 | + ctx->nr_inodes = memparse(param->string, &rest); |
---|
| 1277 | + return 0; |
---|
1166 | 1278 | |
---|
1167 | | - case Opt_size: { |
---|
1168 | | - /* memparse() will accept a K/M/G without a digit */ |
---|
1169 | | - if (!isdigit(*args[0].from)) |
---|
1170 | | - goto bad_val; |
---|
1171 | | - max_size_opt = memparse(args[0].from, &rest); |
---|
1172 | | - max_val_type = SIZE_STD; |
---|
1173 | | - if (*rest == '%') |
---|
1174 | | - max_val_type = SIZE_PERCENT; |
---|
1175 | | - break; |
---|
1176 | | - } |
---|
1177 | | - |
---|
1178 | | - case Opt_nr_inodes: |
---|
1179 | | - /* memparse() will accept a K/M/G without a digit */ |
---|
1180 | | - if (!isdigit(*args[0].from)) |
---|
1181 | | - goto bad_val; |
---|
1182 | | - pconfig->nr_inodes = memparse(args[0].from, &rest); |
---|
1183 | | - break; |
---|
1184 | | - |
---|
1185 | | - case Opt_pagesize: { |
---|
1186 | | - unsigned long ps; |
---|
1187 | | - ps = memparse(args[0].from, &rest); |
---|
1188 | | - pconfig->hstate = size_to_hstate(ps); |
---|
1189 | | - if (!pconfig->hstate) { |
---|
1190 | | - pr_err("Unsupported page size %lu MB\n", |
---|
1191 | | - ps >> 20); |
---|
1192 | | - return -EINVAL; |
---|
1193 | | - } |
---|
1194 | | - break; |
---|
1195 | | - } |
---|
1196 | | - |
---|
1197 | | - case Opt_min_size: { |
---|
1198 | | - /* memparse() will accept a K/M/G without a digit */ |
---|
1199 | | - if (!isdigit(*args[0].from)) |
---|
1200 | | - goto bad_val; |
---|
1201 | | - min_size_opt = memparse(args[0].from, &rest); |
---|
1202 | | - min_val_type = SIZE_STD; |
---|
1203 | | - if (*rest == '%') |
---|
1204 | | - min_val_type = SIZE_PERCENT; |
---|
1205 | | - break; |
---|
1206 | | - } |
---|
1207 | | - |
---|
1208 | | - default: |
---|
1209 | | - pr_err("Bad mount option: \"%s\"\n", p); |
---|
| 1279 | + case Opt_pagesize: |
---|
| 1280 | + ps = memparse(param->string, &rest); |
---|
| 1281 | + ctx->hstate = size_to_hstate(ps); |
---|
| 1282 | + if (!ctx->hstate) { |
---|
| 1283 | + pr_err("Unsupported page size %lu MB\n", ps >> 20); |
---|
1210 | 1284 | return -EINVAL; |
---|
1211 | | - break; |
---|
1212 | 1285 | } |
---|
| 1286 | + return 0; |
---|
| 1287 | + |
---|
| 1288 | + case Opt_min_size: |
---|
| 1289 | + /* memparse() will accept a K/M/G without a digit */ |
---|
| 1290 | + if (!param->string || !isdigit(param->string[0])) |
---|
| 1291 | + goto bad_val; |
---|
| 1292 | + ctx->min_size_opt = memparse(param->string, &rest); |
---|
| 1293 | + ctx->min_val_type = SIZE_STD; |
---|
| 1294 | + if (*rest == '%') |
---|
| 1295 | + ctx->min_val_type = SIZE_PERCENT; |
---|
| 1296 | + return 0; |
---|
| 1297 | + |
---|
| 1298 | + default: |
---|
| 1299 | + return -EINVAL; |
---|
1213 | 1300 | } |
---|
| 1301 | + |
---|
| 1302 | +bad_val: |
---|
| 1303 | + return invalfc(fc, "Bad value '%s' for mount option '%s'\n", |
---|
| 1304 | + param->string, param->key); |
---|
| 1305 | +} |
---|
| 1306 | + |
---|
| 1307 | +/* |
---|
| 1308 | + * Validate the parsed options. |
---|
| 1309 | + */ |
---|
| 1310 | +static int hugetlbfs_validate(struct fs_context *fc) |
---|
| 1311 | +{ |
---|
| 1312 | + struct hugetlbfs_fs_context *ctx = fc->fs_private; |
---|
1214 | 1313 | |
---|
1215 | 1314 | /* |
---|
1216 | 1315 | * Use huge page pool size (in hstate) to convert the size |
---|
1217 | 1316 | * options to number of huge pages. If NO_SIZE, -1 is returned. |
---|
1218 | 1317 | */ |
---|
1219 | | - pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, |
---|
1220 | | - max_size_opt, max_val_type); |
---|
1221 | | - pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, |
---|
1222 | | - min_size_opt, min_val_type); |
---|
| 1318 | + ctx->max_hpages = hugetlbfs_size_to_hpages(ctx->hstate, |
---|
| 1319 | + ctx->max_size_opt, |
---|
| 1320 | + ctx->max_val_type); |
---|
| 1321 | + ctx->min_hpages = hugetlbfs_size_to_hpages(ctx->hstate, |
---|
| 1322 | + ctx->min_size_opt, |
---|
| 1323 | + ctx->min_val_type); |
---|
1223 | 1324 | |
---|
1224 | 1325 | /* |
---|
1225 | 1326 | * If max_size was specified, then min_size must be smaller |
---|
1226 | 1327 | */ |
---|
1227 | | - if (max_val_type > NO_SIZE && |
---|
1228 | | - pconfig->min_hpages > pconfig->max_hpages) { |
---|
1229 | | - pr_err("minimum size can not be greater than maximum size\n"); |
---|
| 1328 | + if (ctx->max_val_type > NO_SIZE && |
---|
| 1329 | + ctx->min_hpages > ctx->max_hpages) { |
---|
| 1330 | + pr_err("Minimum size can not be greater than maximum size\n"); |
---|
1230 | 1331 | return -EINVAL; |
---|
1231 | 1332 | } |
---|
1232 | 1333 | |
---|
1233 | 1334 | return 0; |
---|
1234 | | - |
---|
1235 | | -bad_val: |
---|
1236 | | - pr_err("Bad value '%s' for mount option '%s'\n", args[0].from, p); |
---|
1237 | | - return -EINVAL; |
---|
1238 | 1335 | } |
---|
1239 | 1336 | |
---|
1240 | 1337 | static int |
---|
1241 | | -hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) |
---|
| 1338 | +hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc) |
---|
1242 | 1339 | { |
---|
1243 | | - int ret; |
---|
1244 | | - struct hugetlbfs_config config; |
---|
| 1340 | + struct hugetlbfs_fs_context *ctx = fc->fs_private; |
---|
1245 | 1341 | struct hugetlbfs_sb_info *sbinfo; |
---|
1246 | | - |
---|
1247 | | - config.max_hpages = -1; /* No limit on size by default */ |
---|
1248 | | - config.nr_inodes = -1; /* No limit on number of inodes by default */ |
---|
1249 | | - config.uid = current_fsuid(); |
---|
1250 | | - config.gid = current_fsgid(); |
---|
1251 | | - config.mode = 0755; |
---|
1252 | | - config.hstate = &default_hstate; |
---|
1253 | | - config.min_hpages = -1; /* No default minimum size */ |
---|
1254 | | - ret = hugetlbfs_parse_options(data, &config); |
---|
1255 | | - if (ret) |
---|
1256 | | - return ret; |
---|
1257 | 1342 | |
---|
1258 | 1343 | sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); |
---|
1259 | 1344 | if (!sbinfo) |
---|
1260 | 1345 | return -ENOMEM; |
---|
1261 | 1346 | sb->s_fs_info = sbinfo; |
---|
1262 | | - sbinfo->hstate = config.hstate; |
---|
1263 | 1347 | spin_lock_init(&sbinfo->stat_lock); |
---|
1264 | | - sbinfo->max_inodes = config.nr_inodes; |
---|
1265 | | - sbinfo->free_inodes = config.nr_inodes; |
---|
1266 | | - sbinfo->spool = NULL; |
---|
1267 | | - sbinfo->uid = config.uid; |
---|
1268 | | - sbinfo->gid = config.gid; |
---|
1269 | | - sbinfo->mode = config.mode; |
---|
| 1348 | + sbinfo->hstate = ctx->hstate; |
---|
| 1349 | + sbinfo->max_inodes = ctx->nr_inodes; |
---|
| 1350 | + sbinfo->free_inodes = ctx->nr_inodes; |
---|
| 1351 | + sbinfo->spool = NULL; |
---|
| 1352 | + sbinfo->uid = ctx->uid; |
---|
| 1353 | + sbinfo->gid = ctx->gid; |
---|
| 1354 | + sbinfo->mode = ctx->mode; |
---|
1270 | 1355 | |
---|
1271 | 1356 | /* |
---|
1272 | 1357 | * Allocate and initialize subpool if maximum or minimum size is |
---|
1273 | 1358 | * specified. Any needed reservations (for minimim size) are taken |
---|
1274 | 1359 | * taken when the subpool is created. |
---|
1275 | 1360 | */ |
---|
1276 | | - if (config.max_hpages != -1 || config.min_hpages != -1) { |
---|
1277 | | - sbinfo->spool = hugepage_new_subpool(config.hstate, |
---|
1278 | | - config.max_hpages, |
---|
1279 | | - config.min_hpages); |
---|
| 1361 | + if (ctx->max_hpages != -1 || ctx->min_hpages != -1) { |
---|
| 1362 | + sbinfo->spool = hugepage_new_subpool(ctx->hstate, |
---|
| 1363 | + ctx->max_hpages, |
---|
| 1364 | + ctx->min_hpages); |
---|
1280 | 1365 | if (!sbinfo->spool) |
---|
1281 | 1366 | goto out_free; |
---|
1282 | 1367 | } |
---|
1283 | 1368 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
---|
1284 | | - sb->s_blocksize = huge_page_size(config.hstate); |
---|
1285 | | - sb->s_blocksize_bits = huge_page_shift(config.hstate); |
---|
| 1369 | + sb->s_blocksize = huge_page_size(ctx->hstate); |
---|
| 1370 | + sb->s_blocksize_bits = huge_page_shift(ctx->hstate); |
---|
1286 | 1371 | sb->s_magic = HUGETLBFS_MAGIC; |
---|
1287 | 1372 | sb->s_op = &hugetlbfs_ops; |
---|
1288 | 1373 | sb->s_time_gran = 1; |
---|
1289 | | - sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config)); |
---|
| 1374 | + |
---|
| 1375 | + /* |
---|
| 1376 | + * Due to the special and limited functionality of hugetlbfs, it does |
---|
| 1377 | + * not work well as a stacking filesystem. |
---|
| 1378 | + */ |
---|
| 1379 | + sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; |
---|
| 1380 | + sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx)); |
---|
1290 | 1381 | if (!sb->s_root) |
---|
1291 | 1382 | goto out_free; |
---|
1292 | 1383 | return 0; |
---|
.. | .. |
---|
1296 | 1387 | return -ENOMEM; |
---|
1297 | 1388 | } |
---|
1298 | 1389 | |
---|
1299 | | -static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, |
---|
1300 | | - int flags, const char *dev_name, void *data) |
---|
| 1390 | +static int hugetlbfs_get_tree(struct fs_context *fc) |
---|
1301 | 1391 | { |
---|
1302 | | - return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super); |
---|
| 1392 | + int err = hugetlbfs_validate(fc); |
---|
| 1393 | + if (err) |
---|
| 1394 | + return err; |
---|
| 1395 | + return get_tree_nodev(fc, hugetlbfs_fill_super); |
---|
| 1396 | +} |
---|
| 1397 | + |
---|
| 1398 | +static void hugetlbfs_fs_context_free(struct fs_context *fc) |
---|
| 1399 | +{ |
---|
| 1400 | + kfree(fc->fs_private); |
---|
| 1401 | +} |
---|
| 1402 | + |
---|
| 1403 | +static const struct fs_context_operations hugetlbfs_fs_context_ops = { |
---|
| 1404 | + .free = hugetlbfs_fs_context_free, |
---|
| 1405 | + .parse_param = hugetlbfs_parse_param, |
---|
| 1406 | + .get_tree = hugetlbfs_get_tree, |
---|
| 1407 | +}; |
---|
| 1408 | + |
---|
| 1409 | +static int hugetlbfs_init_fs_context(struct fs_context *fc) |
---|
| 1410 | +{ |
---|
| 1411 | + struct hugetlbfs_fs_context *ctx; |
---|
| 1412 | + |
---|
| 1413 | + ctx = kzalloc(sizeof(struct hugetlbfs_fs_context), GFP_KERNEL); |
---|
| 1414 | + if (!ctx) |
---|
| 1415 | + return -ENOMEM; |
---|
| 1416 | + |
---|
| 1417 | + ctx->max_hpages = -1; /* No limit on size by default */ |
---|
| 1418 | + ctx->nr_inodes = -1; /* No limit on number of inodes by default */ |
---|
| 1419 | + ctx->uid = current_fsuid(); |
---|
| 1420 | + ctx->gid = current_fsgid(); |
---|
| 1421 | + ctx->mode = 0755; |
---|
| 1422 | + ctx->hstate = &default_hstate; |
---|
| 1423 | + ctx->min_hpages = -1; /* No default minimum size */ |
---|
| 1424 | + ctx->max_val_type = NO_SIZE; |
---|
| 1425 | + ctx->min_val_type = NO_SIZE; |
---|
| 1426 | + fc->fs_private = ctx; |
---|
| 1427 | + fc->ops = &hugetlbfs_fs_context_ops; |
---|
| 1428 | + return 0; |
---|
1303 | 1429 | } |
---|
1304 | 1430 | |
---|
1305 | 1431 | static struct file_system_type hugetlbfs_fs_type = { |
---|
1306 | | - .name = "hugetlbfs", |
---|
1307 | | - .mount = hugetlbfs_mount, |
---|
1308 | | - .kill_sb = kill_litter_super, |
---|
| 1432 | + .name = "hugetlbfs", |
---|
| 1433 | + .init_fs_context = hugetlbfs_init_fs_context, |
---|
| 1434 | + .parameters = hugetlb_fs_parameters, |
---|
| 1435 | + .kill_sb = kill_litter_super, |
---|
1309 | 1436 | }; |
---|
1310 | 1437 | |
---|
1311 | 1438 | static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; |
---|
.. | .. |
---|
1390 | 1517 | return file; |
---|
1391 | 1518 | } |
---|
1392 | 1519 | |
---|
| 1520 | +static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h) |
---|
| 1521 | +{ |
---|
| 1522 | + struct fs_context *fc; |
---|
| 1523 | + struct vfsmount *mnt; |
---|
| 1524 | + |
---|
| 1525 | + fc = fs_context_for_mount(&hugetlbfs_fs_type, SB_KERNMOUNT); |
---|
| 1526 | + if (IS_ERR(fc)) { |
---|
| 1527 | + mnt = ERR_CAST(fc); |
---|
| 1528 | + } else { |
---|
| 1529 | + struct hugetlbfs_fs_context *ctx = fc->fs_private; |
---|
| 1530 | + ctx->hstate = h; |
---|
| 1531 | + mnt = fc_mount(fc); |
---|
| 1532 | + put_fs_context(fc); |
---|
| 1533 | + } |
---|
| 1534 | + if (IS_ERR(mnt)) |
---|
| 1535 | + pr_err("Cannot mount internal hugetlbfs for page size %uK", |
---|
| 1536 | + 1U << (h->order + PAGE_SHIFT - 10)); |
---|
| 1537 | + return mnt; |
---|
| 1538 | +} |
---|
| 1539 | + |
---|
1393 | 1540 | static int __init init_hugetlbfs_fs(void) |
---|
1394 | 1541 | { |
---|
| 1542 | + struct vfsmount *mnt; |
---|
1395 | 1543 | struct hstate *h; |
---|
1396 | 1544 | int error; |
---|
1397 | 1545 | int i; |
---|
.. | .. |
---|
1406 | 1554 | sizeof(struct hugetlbfs_inode_info), |
---|
1407 | 1555 | 0, SLAB_ACCOUNT, init_once); |
---|
1408 | 1556 | if (hugetlbfs_inode_cachep == NULL) |
---|
1409 | | - goto out2; |
---|
| 1557 | + goto out; |
---|
1410 | 1558 | |
---|
1411 | 1559 | error = register_filesystem(&hugetlbfs_fs_type); |
---|
1412 | 1560 | if (error) |
---|
1413 | | - goto out; |
---|
| 1561 | + goto out_free; |
---|
1414 | 1562 | |
---|
| 1563 | + /* default hstate mount is required */ |
---|
| 1564 | + mnt = mount_one_hugetlbfs(&hstates[default_hstate_idx]); |
---|
| 1565 | + if (IS_ERR(mnt)) { |
---|
| 1566 | + error = PTR_ERR(mnt); |
---|
| 1567 | + goto out_unreg; |
---|
| 1568 | + } |
---|
| 1569 | + hugetlbfs_vfsmount[default_hstate_idx] = mnt; |
---|
| 1570 | + |
---|
| 1571 | + /* other hstates are optional */ |
---|
1415 | 1572 | i = 0; |
---|
1416 | 1573 | for_each_hstate(h) { |
---|
1417 | | - char buf[50]; |
---|
1418 | | - unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10); |
---|
1419 | | - |
---|
1420 | | - snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb); |
---|
1421 | | - hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type, |
---|
1422 | | - buf); |
---|
1423 | | - |
---|
1424 | | - if (IS_ERR(hugetlbfs_vfsmount[i])) { |
---|
1425 | | - pr_err("Cannot mount internal hugetlbfs for " |
---|
1426 | | - "page size %uK", ps_kb); |
---|
1427 | | - error = PTR_ERR(hugetlbfs_vfsmount[i]); |
---|
1428 | | - hugetlbfs_vfsmount[i] = NULL; |
---|
| 1574 | + if (i == default_hstate_idx) { |
---|
| 1575 | + i++; |
---|
| 1576 | + continue; |
---|
1429 | 1577 | } |
---|
| 1578 | + |
---|
| 1579 | + mnt = mount_one_hugetlbfs(h); |
---|
| 1580 | + if (IS_ERR(mnt)) |
---|
| 1581 | + hugetlbfs_vfsmount[i] = NULL; |
---|
| 1582 | + else |
---|
| 1583 | + hugetlbfs_vfsmount[i] = mnt; |
---|
1430 | 1584 | i++; |
---|
1431 | 1585 | } |
---|
1432 | | - /* Non default hstates are optional */ |
---|
1433 | | - if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx])) |
---|
1434 | | - return 0; |
---|
1435 | 1586 | |
---|
1436 | | - out: |
---|
| 1587 | + return 0; |
---|
| 1588 | + |
---|
| 1589 | + out_unreg: |
---|
| 1590 | + (void)unregister_filesystem(&hugetlbfs_fs_type); |
---|
| 1591 | + out_free: |
---|
1437 | 1592 | kmem_cache_destroy(hugetlbfs_inode_cachep); |
---|
1438 | | - out2: |
---|
| 1593 | + out: |
---|
1439 | 1594 | return error; |
---|
1440 | 1595 | } |
---|
1441 | 1596 | fs_initcall(init_hugetlbfs_fs) |
---|