From 23fa18eaa71266feff7ba8d83022d9e1cc83c65a Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Fri, 10 May 2024 07:42:03 +0000 Subject: [PATCH] disable pwm7 --- kernel/fs/xfs/xfs_super.c | 2106 ++++++++++++++++++++++++++++++---------------------------- 1 files changed, 1,092 insertions(+), 1,014 deletions(-) diff --git a/kernel/fs/xfs/xfs_super.c b/kernel/fs/xfs/xfs_super.c index dce8114..a21abc4 100644 --- a/kernel/fs/xfs/xfs_super.c +++ b/kernel/fs/xfs/xfs_super.c @@ -11,18 +11,15 @@ #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_alloc.h" -#include "xfs_error.h" #include "xfs_fsops.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" #include "xfs_log_priv.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" @@ -39,417 +36,119 @@ #include "xfs_bmap_item.h" #include "xfs_reflink.h" -#include <linux/namei.h> -#include <linux/dax.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/mount.h> -#include <linux/mempool.h> -#include <linux/writeback.h> -#include <linux/kthread.h> -#include <linux/freezer.h> -#include <linux/parser.h> +#include <linux/magic.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> static const struct super_operations xfs_super_operations; -struct bio_set xfs_ioend_bioset; static struct kset *xfs_kset; /* top-level xfs sysfs dir */ #ifdef DEBUG static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ #endif +enum xfs_dax_mode { + XFS_DAX_INODE = 0, + XFS_DAX_ALWAYS = 1, + XFS_DAX_NEVER = 2, +}; + +static void +xfs_mount_set_dax_mode( + struct xfs_mount *mp, + enum xfs_dax_mode mode) +{ + switch (mode) { + case XFS_DAX_INODE: + mp->m_flags &= ~(XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER); + break; + case XFS_DAX_ALWAYS: + mp->m_flags |= XFS_MOUNT_DAX_ALWAYS; + mp->m_flags &= ~XFS_MOUNT_DAX_NEVER; + break; + case XFS_DAX_NEVER: + mp->m_flags |= XFS_MOUNT_DAX_NEVER; + mp->m_flags &= ~XFS_MOUNT_DAX_ALWAYS; + break; + } +} + +static const struct constant_table dax_param_enums[] = { + {"inode", XFS_DAX_INODE }, + {"always", XFS_DAX_ALWAYS }, + {"never", XFS_DAX_NEVER }, + {} +}; + /* * Table driven mount option parser. */ enum { - Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize, + Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, - Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, + Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep, Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, - Opt_discard, Opt_nodiscard, Opt_dax, Opt_err, + Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, }; -static const match_table_t tokens = { - {Opt_logbufs, "logbufs=%u"}, /* number of XFS log buffers */ - {Opt_logbsize, "logbsize=%s"}, /* size of XFS log buffers */ - {Opt_logdev, "logdev=%s"}, /* log device */ - {Opt_rtdev, "rtdev=%s"}, /* realtime I/O device */ - {Opt_biosize, "biosize=%u"}, /* log2 of preferred buffered io size */ - {Opt_wsync, "wsync"}, /* safe-mode nfs compatible mount */ - {Opt_noalign, "noalign"}, /* turn off stripe alignment */ - {Opt_swalloc, "swalloc"}, /* turn on stripe width allocation */ - {Opt_sunit, "sunit=%u"}, /* data volume stripe unit */ - {Opt_swidth, "swidth=%u"}, /* data volume stripe width */ - {Opt_nouuid, "nouuid"}, /* ignore filesystem UUID */ - {Opt_mtpt, "mtpt"}, /* filesystem mount point */ - {Opt_grpid, "grpid"}, /* group-ID from parent directory */ - {Opt_nogrpid, "nogrpid"}, /* group-ID from current process */ - {Opt_bsdgroups, "bsdgroups"}, /* group-ID from parent directory */ - {Opt_sysvgroups,"sysvgroups"}, /* group-ID from current process */ - {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */ - {Opt_norecovery,"norecovery"}, /* don't run XFS recovery */ - {Opt_inode64, "inode64"}, /* inodes can be allocated anywhere */ - {Opt_inode32, "inode32"}, /* inode allocation limited to - * XFS_MAXINUMBER_32 */ - {Opt_ikeep, "ikeep"}, /* do not free empty inode clusters */ - {Opt_noikeep, "noikeep"}, /* free empty inode clusters */ - {Opt_largeio, "largeio"}, /* report large I/O sizes in stat() */ - {Opt_nolargeio, "nolargeio"}, /* do not report large I/O sizes - * in stat(). */ - {Opt_attr2, "attr2"}, /* do use attr2 attribute format */ - {Opt_noattr2, "noattr2"}, /* do not use attr2 attribute format */ - {Opt_filestreams,"filestreams"},/* use filestreams allocator */ - {Opt_quota, "quota"}, /* disk quotas (user) */ - {Opt_noquota, "noquota"}, /* no quotas */ - {Opt_usrquota, "usrquota"}, /* user quota enabled */ - {Opt_grpquota, "grpquota"}, /* group quota enabled */ - {Opt_prjquota, "prjquota"}, /* project quota enabled */ - {Opt_uquota, "uquota"}, /* user quota (IRIX variant) */ - {Opt_gquota, "gquota"}, /* group quota (IRIX variant) */ - {Opt_pquota, "pquota"}, /* project quota (IRIX variant) */ - {Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */ - {Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */ - {Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */ - {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */ - {Opt_discard, "discard"}, /* Discard unused blocks */ - {Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */ - {Opt_dax, "dax"}, /* Enable direct access to bdev pages */ - {Opt_err, NULL}, +static const struct fs_parameter_spec xfs_fs_parameters[] = { + fsparam_u32("logbufs", Opt_logbufs), + fsparam_string("logbsize", Opt_logbsize), + fsparam_string("logdev", Opt_logdev), + fsparam_string("rtdev", Opt_rtdev), + fsparam_flag("wsync", Opt_wsync), + fsparam_flag("noalign", Opt_noalign), + fsparam_flag("swalloc", Opt_swalloc), + fsparam_u32("sunit", Opt_sunit), + fsparam_u32("swidth", Opt_swidth), + fsparam_flag("nouuid", Opt_nouuid), + fsparam_flag("grpid", Opt_grpid), + fsparam_flag("nogrpid", Opt_nogrpid), + fsparam_flag("bsdgroups", Opt_bsdgroups), + fsparam_flag("sysvgroups", Opt_sysvgroups), + fsparam_string("allocsize", Opt_allocsize), + fsparam_flag("norecovery", Opt_norecovery), + fsparam_flag("inode64", Opt_inode64), + fsparam_flag("inode32", Opt_inode32), + fsparam_flag("ikeep", Opt_ikeep), + fsparam_flag("noikeep", Opt_noikeep), + fsparam_flag("largeio", Opt_largeio), + fsparam_flag("nolargeio", Opt_nolargeio), + fsparam_flag("attr2", Opt_attr2), + fsparam_flag("noattr2", Opt_noattr2), + fsparam_flag("filestreams", Opt_filestreams), + fsparam_flag("quota", Opt_quota), + fsparam_flag("noquota", Opt_noquota), + fsparam_flag("usrquota", Opt_usrquota), + fsparam_flag("grpquota", Opt_grpquota), + fsparam_flag("prjquota", Opt_prjquota), + fsparam_flag("uquota", Opt_uquota), + fsparam_flag("gquota", Opt_gquota), + fsparam_flag("pquota", Opt_pquota), + fsparam_flag("uqnoenforce", Opt_uqnoenforce), + fsparam_flag("gqnoenforce", Opt_gqnoenforce), + fsparam_flag("pqnoenforce", Opt_pqnoenforce), + fsparam_flag("qnoenforce", Opt_qnoenforce), + fsparam_flag("discard", Opt_discard), + fsparam_flag("nodiscard", Opt_nodiscard), + fsparam_flag("dax", Opt_dax), + fsparam_enum("dax", Opt_dax_enum, dax_param_enums), + {} }; - - -STATIC int -suffix_kstrtoint(const substring_t *s, unsigned int base, int *res) -{ - int last, shift_left_factor = 0, _res; - char *value; - int ret = 0; - - value = match_strdup(s); - if (!value) - return -ENOMEM; - - last = strlen(value) - 1; - if (value[last] == 'K' || value[last] == 'k') { - shift_left_factor = 10; - value[last] = '\0'; - } - if (value[last] == 'M' || value[last] == 'm') { - shift_left_factor = 20; - value[last] = '\0'; - } - if (value[last] == 'G' || value[last] == 'g') { - shift_left_factor = 30; - value[last] = '\0'; - } - - if (kstrtoint(value, base, &_res)) - ret = -EINVAL; - kfree(value); - *res = _res << shift_left_factor; - return ret; -} - -/* - * This function fills in xfs_mount_t fields based on mount args. - * Note: the superblock has _not_ yet been read in. - * - * Note that this function leaks the various device name allocations on - * failure. The caller takes care of them. - * - * *sb is const because this is also used to test options on the remount - * path, and we don't want this to have any side effects at remount time. - * Today this function does not change *sb, but just to future-proof... - */ -STATIC int -xfs_parseargs( - struct xfs_mount *mp, - char *options) -{ - const struct super_block *sb = mp->m_super; - char *p; - substring_t args[MAX_OPT_ARGS]; - int dsunit = 0; - int dswidth = 0; - int iosize = 0; - uint8_t iosizelog = 0; - - /* - * set up the mount name first so all the errors will refer to the - * correct device. - */ - mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); - if (!mp->m_fsname) - return -ENOMEM; - mp->m_fsname_len = strlen(mp->m_fsname) + 1; - - /* - * Copy binary VFS mount flags we are interested in. - */ - if (sb_rdonly(sb)) - mp->m_flags |= XFS_MOUNT_RDONLY; - if (sb->s_flags & SB_DIRSYNC) - mp->m_flags |= XFS_MOUNT_DIRSYNC; - if (sb->s_flags & SB_SYNCHRONOUS) - mp->m_flags |= XFS_MOUNT_WSYNC; - - /* - * Set some default flags that could be cleared by the mount option - * parsing. - */ - mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; - - /* - * These can be overridden by the mount option parsing. - */ - mp->m_logbufs = -1; - mp->m_logbsize = -1; - - if (!options) - goto done; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_logbufs: - if (match_int(args, &mp->m_logbufs)) - return -EINVAL; - break; - case Opt_logbsize: - if (suffix_kstrtoint(args, 10, &mp->m_logbsize)) - return -EINVAL; - break; - case Opt_logdev: - kfree(mp->m_logname); - mp->m_logname = match_strdup(args); - if (!mp->m_logname) - return -ENOMEM; - break; - case Opt_mtpt: - xfs_warn(mp, "%s option not allowed on this system", p); - return -EINVAL; - case Opt_rtdev: - kfree(mp->m_rtname); - mp->m_rtname = match_strdup(args); - if (!mp->m_rtname) - return -ENOMEM; - break; - case Opt_allocsize: - case Opt_biosize: - if (suffix_kstrtoint(args, 10, &iosize)) - return -EINVAL; - iosizelog = ffs(iosize) - 1; - break; - case Opt_grpid: - case Opt_bsdgroups: - mp->m_flags |= XFS_MOUNT_GRPID; - break; - case Opt_nogrpid: - case Opt_sysvgroups: - mp->m_flags &= ~XFS_MOUNT_GRPID; - break; - case Opt_wsync: - mp->m_flags |= XFS_MOUNT_WSYNC; - break; - case Opt_norecovery: - mp->m_flags |= XFS_MOUNT_NORECOVERY; - break; - case Opt_noalign: - mp->m_flags |= XFS_MOUNT_NOALIGN; - break; - case Opt_swalloc: - mp->m_flags |= XFS_MOUNT_SWALLOC; - break; - case Opt_sunit: - if (match_int(args, &dsunit)) - return -EINVAL; - break; - case Opt_swidth: - if (match_int(args, &dswidth)) - return -EINVAL; - break; - case Opt_inode32: - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - break; - case Opt_inode64: - mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; - break; - case Opt_nouuid: - mp->m_flags |= XFS_MOUNT_NOUUID; - break; - case Opt_ikeep: - mp->m_flags |= XFS_MOUNT_IKEEP; - break; - case Opt_noikeep: - mp->m_flags &= ~XFS_MOUNT_IKEEP; - break; - case Opt_largeio: - mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; - break; - case Opt_nolargeio: - mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; - break; - case Opt_attr2: - mp->m_flags |= XFS_MOUNT_ATTR2; - break; - case Opt_noattr2: - mp->m_flags &= ~XFS_MOUNT_ATTR2; - mp->m_flags |= XFS_MOUNT_NOATTR2; - break; - case Opt_filestreams: - mp->m_flags |= XFS_MOUNT_FILESTREAMS; - break; - case Opt_noquota: - mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; - mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; - mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; - break; - case Opt_quota: - case Opt_uquota: - case Opt_usrquota: - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | - XFS_UQUOTA_ENFD); - break; - case Opt_qnoenforce: - case Opt_uqnoenforce: - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_UQUOTA_ENFD; - break; - case Opt_pquota: - case Opt_prjquota: - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | - XFS_PQUOTA_ENFD); - break; - case Opt_pqnoenforce: - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_PQUOTA_ENFD; - break; - case Opt_gquota: - case Opt_grpquota: - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | - XFS_GQUOTA_ENFD); - break; - case Opt_gqnoenforce: - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_GQUOTA_ENFD; - break; - case Opt_discard: - mp->m_flags |= XFS_MOUNT_DISCARD; - break; - case Opt_nodiscard: - mp->m_flags &= ~XFS_MOUNT_DISCARD; - break; -#ifdef CONFIG_FS_DAX - case Opt_dax: - mp->m_flags |= XFS_MOUNT_DAX; - break; -#endif - default: - xfs_warn(mp, "unknown mount option [%s].", p); - return -EINVAL; - } - } - - /* - * no recovery flag requires a read-only mount - */ - if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && - !(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_warn(mp, "no-recovery mounts must be read-only."); - return -EINVAL; - } - - if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { - xfs_warn(mp, - "sunit and swidth options incompatible with the noalign option"); - return -EINVAL; - } - -#ifndef CONFIG_XFS_QUOTA - if (XFS_IS_QUOTA_RUNNING(mp)) { - xfs_warn(mp, "quota support not available in this kernel."); - return -EINVAL; - } -#endif - - if ((dsunit && !dswidth) || (!dsunit && dswidth)) { - xfs_warn(mp, "sunit and swidth must be specified together"); - return -EINVAL; - } - - if (dsunit && (dswidth % dsunit != 0)) { - xfs_warn(mp, - "stripe width (%d) must be a multiple of the stripe unit (%d)", - dswidth, dsunit); - return -EINVAL; - } - -done: - if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) { - /* - * At this point the superblock has not been read - * in, therefore we do not know the block size. - * Before the mount call ends we will convert - * these to FSBs. - */ - mp->m_dalign = dsunit; - mp->m_swidth = dswidth; - } - - if (mp->m_logbufs != -1 && - mp->m_logbufs != 0 && - (mp->m_logbufs < XLOG_MIN_ICLOGS || - mp->m_logbufs > XLOG_MAX_ICLOGS)) { - xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", - mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); - return -EINVAL; - } - if (mp->m_logbsize != -1 && - mp->m_logbsize != 0 && - (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || - mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || - !is_power_of_2(mp->m_logbsize))) { - xfs_warn(mp, - "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", - mp->m_logbsize); - return -EINVAL; - } - - if (iosizelog) { - if (iosizelog > XFS_MAX_IO_LOG || - iosizelog < XFS_MIN_IO_LOG) { - xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", - iosizelog, XFS_MIN_IO_LOG, - XFS_MAX_IO_LOG); - return -EINVAL; - } - - mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; - mp->m_readio_log = iosizelog; - mp->m_writeio_log = iosizelog; - } - - return 0; -} struct proc_xfs_info { uint64_t flag; char *str; }; -STATIC int -xfs_showargs( - struct xfs_mount *mp, - struct seq_file *m) +static int +xfs_fs_show_options( + struct seq_file *m, + struct dentry *root) { static struct proc_xfs_info xfs_info_set[] = { /* the few simple ones we can get from the mount struct */ @@ -463,30 +162,25 @@ { XFS_MOUNT_FILESTREAMS, ",filestreams" }, { XFS_MOUNT_GRPID, ",grpid" }, { XFS_MOUNT_DISCARD, ",discard" }, - { XFS_MOUNT_SMALL_INUMS, ",inode32" }, - { XFS_MOUNT_DAX, ",dax" }, + { XFS_MOUNT_LARGEIO, ",largeio" }, + { XFS_MOUNT_DAX_ALWAYS, ",dax=always" }, + { XFS_MOUNT_DAX_NEVER, ",dax=never" }, { 0, NULL } }; - static struct proc_xfs_info xfs_info_unset[] = { - /* the few simple ones we can get from the mount struct */ - { XFS_MOUNT_COMPAT_IOSIZE, ",largeio" }, - { XFS_MOUNT_SMALL_INUMS, ",inode64" }, - { 0, NULL } - }; + struct xfs_mount *mp = XFS_M(root->d_sb); struct proc_xfs_info *xfs_infop; for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { if (mp->m_flags & xfs_infop->flag) seq_puts(m, xfs_infop->str); } - for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) { - if (!(mp->m_flags & xfs_infop->flag)) - seq_puts(m, xfs_infop->str); - } - if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) + seq_printf(m, ",inode%d", + (mp->m_flags & XFS_MOUNT_SMALL_INUMS) ? 32 : 64); + + if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) seq_printf(m, ",allocsize=%dk", - (int)(1 << mp->m_writeio_log) >> 10); + (1 << mp->m_allocsize_log) >> 10); if (mp->m_logbufs > 0) seq_printf(m, ",logbufs=%d", mp->m_logbufs); @@ -505,10 +199,12 @@ seq_printf(m, ",swidth=%d", (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); - if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD)) - seq_puts(m, ",usrquota"); - else if (mp->m_qflags & XFS_UQUOTA_ACCT) - seq_puts(m, ",uqnoenforce"); + if (mp->m_qflags & XFS_UQUOTA_ACCT) { + if (mp->m_qflags & XFS_UQUOTA_ENFD) + seq_puts(m, ",usrquota"); + else + seq_puts(m, ",uqnoenforce"); + } if (mp->m_qflags & XFS_PQUOTA_ACCT) { if (mp->m_qflags & XFS_PQUOTA_ENFD) @@ -527,39 +223,6 @@ seq_puts(m, ",noquota"); return 0; -} -static uint64_t -xfs_max_file_offset( - unsigned int blockshift) -{ - unsigned int pagefactor = 1; - unsigned int bitshift = BITS_PER_LONG - 1; - - /* Figure out maximum filesize, on Linux this can depend on - * the filesystem blocksize (on 32 bit platforms). - * __block_write_begin does this in an [unsigned] long... - * page->index << (PAGE_SHIFT - bbits) - * So, for page sized blocks (4K on 32 bit platforms), - * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is - * (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1) - * but for smaller blocksizes it is less (bbits = log2 bsize). - * Note1: get_block_t takes a long (implicit cast from above) - * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch - * can optionally convert the [unsigned] long from above into - * an [unsigned] long long. - */ - -#if BITS_PER_LONG == 32 -# if defined(CONFIG_LBDAF) - ASSERT(sizeof(sector_t) == 8); - pagefactor = PAGE_SIZE; - bitshift = BITS_PER_LONG; -# else - pagefactor = PAGE_SIZE >> (PAGE_SHIFT - blockshift); -# endif -#endif - - return (((uint64_t)pagefactor) << bitshift) - 1; } /* @@ -593,7 +256,7 @@ * Calculate how much should be reserved for inodes to meet * the max inode percentage. Used only for inode32. */ - if (mp->m_maxicount) { + if (M_IGEO(mp)->maxicount) { uint64_t icount; icount = sbp->sb_dblocks * sbp->sb_imax_pct; @@ -606,7 +269,7 @@ } /* Get the last possible inode in the filesystem */ - agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); + agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); /* @@ -679,7 +342,7 @@ xfs_blkdev_issue_flush( xfs_buftarg_t *buftarg) { - blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL); + blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS); } STATIC void @@ -832,43 +495,33 @@ struct xfs_mount *mp) { mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_super->s_id); if (!mp->m_buf_workqueue) goto out; - mp->m_data_workqueue = alloc_workqueue("xfs-data/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); - if (!mp->m_data_workqueue) + mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id); + if (!mp->m_unwritten_workqueue) goto out_destroy_buf; - mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); - if (!mp->m_unwritten_workqueue) - goto out_destroy_data_iodone_queue; - mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND, + 0, mp->m_super->s_id); if (!mp->m_cil_workqueue) goto out_destroy_unwritten; mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id); if (!mp->m_reclaim_workqueue) goto out_destroy_cil; - mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0, - mp->m_fsname); - if (!mp->m_log_workqueue) + mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id); + if (!mp->m_eofblocks_workqueue) goto out_destroy_reclaim; - mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); - if (!mp->m_eofblocks_workqueue) - goto out_destroy_log; - mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0, - mp->m_fsname); + mp->m_super->s_id); if (!mp->m_sync_workqueue) goto out_destroy_eofb; @@ -876,16 +529,12 @@ out_destroy_eofb: destroy_workqueue(mp->m_eofblocks_workqueue); -out_destroy_log: - destroy_workqueue(mp->m_log_workqueue); out_destroy_reclaim: destroy_workqueue(mp->m_reclaim_workqueue); out_destroy_cil: destroy_workqueue(mp->m_cil_workqueue); out_destroy_unwritten: destroy_workqueue(mp->m_unwritten_workqueue); -out_destroy_data_iodone_queue: - destroy_workqueue(mp->m_data_workqueue); out_destroy_buf: destroy_workqueue(mp->m_buf_workqueue); out: @@ -898,12 +547,24 @@ { destroy_workqueue(mp->m_sync_workqueue); destroy_workqueue(mp->m_eofblocks_workqueue); - destroy_workqueue(mp->m_log_workqueue); destroy_workqueue(mp->m_reclaim_workqueue); destroy_workqueue(mp->m_cil_workqueue); - destroy_workqueue(mp->m_data_workqueue); destroy_workqueue(mp->m_unwritten_workqueue); destroy_workqueue(mp->m_buf_workqueue); +} + +static void +xfs_flush_inodes_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(work, struct xfs_mount, + m_flush_inodes_work); + struct super_block *sb = mp->m_super; + + if (down_read_trylock(&sb->s_umount)) { + sync_inodes_sb(sb); + up_read(&sb->s_umount); + } } /* @@ -916,12 +577,15 @@ xfs_flush_inodes( struct xfs_mount *mp) { - struct super_block *sb = mp->m_super; + /* + * If flush_work() returns true then that means we waited for a flush + * which was already in progress. Don't bother running another scan. + */ + if (flush_work(&mp->m_flush_inodes_work)) + return; - if (down_read_trylock(&sb->s_umount)) { - sync_inodes_sb(sb); - up_read(&sb->s_umount); - } + queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); + flush_work(&mp->m_flush_inodes_work); } /* Catch misguided souls that try to use this interface on XFS */ @@ -932,6 +596,32 @@ BUG(); return NULL; } + +#ifdef DEBUG +static void +xfs_check_delalloc( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_bmbt_irec got; + struct xfs_iext_cursor icur; + + if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got)) + return; + do { + if (isnullstartblock(got.br_startblock)) { + xfs_warn(ip->i_mount, + "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]", + ip->i_ino, + whichfork == XFS_DATA_FORK ? "data" : "cow", + got.br_startoff, got.br_blockcount); + } + } while (xfs_iext_next_extent(ifp, &icur, &got)); +} +#else +#define xfs_check_delalloc(ip, whichfork) do { } while (0) +#endif /* * Now that the generic code is guaranteed not to be accessing @@ -951,7 +641,12 @@ xfs_inactive(ip); - ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); + if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) { + xfs_check_delalloc(ip, XFS_DATA_FORK); + xfs_check_delalloc(ip, XFS_COW_FORK); + ASSERT(0); + } + XFS_STATS_INC(ip->i_mount, vn_reclaim); /* @@ -961,11 +656,11 @@ ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); /* - * We always use background reclaim here because even if the - * inode is clean, it still may be under IO and hence we have - * to take the flush lock. The background reclaim path handles - * this more efficiently than we can here, so simply let background - * reclaim tear down all inodes. + * We always use background reclaim here because even if the inode is + * clean, it still may be under IO and hence we have wait for IO + * completion to occur before we can reclaim the inode. The background + * reclaim path handles this more efficiently than we can here, so + * simply let background reclaim tear down all inodes. */ xfs_inode_set_reclaim_tag(ip); } @@ -1044,16 +739,16 @@ return 0; } - return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE); + return generic_drop_inode(inode); } -STATIC void -xfs_free_fsname( +static void +xfs_mount_free( struct xfs_mount *mp) { - kfree(mp->m_fsname); kfree(mp->m_rtname); kfree(mp->m_logname); + kmem_free(mp); } STATIC int @@ -1062,6 +757,7 @@ int wait) { struct xfs_mount *mp = XFS_M(sb); + int error; /* * Doing anything during the async pass would be counterproductive. @@ -1069,7 +765,10 @@ if (!wait) return 0; - xfs_log_force(mp, XFS_LOG_SYNC); + error = xfs_log_force(mp, XFS_LOG_SYNC); + if (error) + return error; + if (laptop_mode) { /* * The disk must be active because we're syncing. @@ -1097,12 +796,11 @@ xfs_extlen_t lsize; int64_t ffree; - statp->f_type = XFS_SB_MAGIC; + statp->f_type = XFS_SUPER_MAGIC; statp->f_namelen = MAXNAMELEN - 1; id = huge_encode_dev(mp->m_ddev_targp->bt_dev); - statp->f_fsid.val[0] = (u32)id; - statp->f_fsid.val[1] = (u32)(id >> 32); + statp->f_fsid = u64_to_fsid(id); icount = percpu_counter_sum(&mp->m_icount); ifree = percpu_counter_sum(&mp->m_ifree); @@ -1114,15 +812,16 @@ statp->f_blocks = sbp->sb_dblocks - lsize; spin_unlock(&mp->m_sb_lock); - statp->f_bfree = fdblocks - mp->m_alloc_set_aside; + /* make sure statp->f_bfree does not underflow */ + statp->f_bfree = max_t(int64_t, fdblocks - mp->m_alloc_set_aside, 0); statp->f_bavail = statp->f_bfree; - fakeinos = statp->f_bfree << sbp->sb_inopblog; + fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); - if (mp->m_maxicount) + if (M_IGEO(mp)->maxicount) statp->f_files = min_t(typeof(statp->f_files), statp->f_files, - mp->m_maxicount); + M_IGEO(mp)->maxicount); /* If sb_icount overshot maxicount, report actual allocation */ statp->f_files = max_t(typeof(statp->f_files), @@ -1180,8 +879,10 @@ * there is no log replay required to write the inodes to disk - this is the * primary difference between a sync and a quiesce. * - * Note: xfs_log_quiesce() stops background log work - the callers must ensure - * it is started again when appropriate. + * We cancel log work early here to ensure all transactions the log worker may + * run have finished before we clean up and log the superblock and write an + * unmount record. The unfreeze process is responsible for restarting the log + * worker correctly. */ void xfs_quiesce_attr( @@ -1189,204 +890,18 @@ { int error = 0; - /* wait for all modifications to complete */ - while (atomic_read(&mp->m_active_trans) > 0) - delay(100); + cancel_delayed_work_sync(&mp->m_log->l_work); /* force the log to unpin objects from the now complete transactions */ xfs_log_force(mp, XFS_LOG_SYNC); - /* reclaim inodes to do any IO before the freeze completes */ - xfs_reclaim_inodes(mp, 0); - xfs_reclaim_inodes(mp, SYNC_WAIT); /* Push the superblock and write an unmount record */ error = xfs_log_sbcount(mp); if (error) xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " "Frozen image may not be consistent."); - /* - * Just warn here till VFS can correctly support - * read-only remount without racing. - */ - WARN_ON(atomic_read(&mp->m_active_trans) != 0); - xfs_log_quiesce(mp); -} - -STATIC int -xfs_test_remount_options( - struct super_block *sb, - char *options) -{ - int error = 0; - struct xfs_mount *tmp_mp; - - tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL); - if (!tmp_mp) - return -ENOMEM; - - tmp_mp->m_super = sb; - error = xfs_parseargs(tmp_mp, options); - xfs_free_fsname(tmp_mp); - kmem_free(tmp_mp); - - return error; -} - -STATIC int -xfs_fs_remount( - struct super_block *sb, - int *flags, - char *options) -{ - struct xfs_mount *mp = XFS_M(sb); - xfs_sb_t *sbp = &mp->m_sb; - substring_t args[MAX_OPT_ARGS]; - char *p; - int error; - - /* First, check for complete junk; i.e. invalid options */ - error = xfs_test_remount_options(sb, options); - if (error) - return error; - - sync_filesystem(sb); - while ((p = strsep(&options, ",")) != NULL) { - int token; - - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_inode64: - mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; - mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); - break; - case Opt_inode32: - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); - break; - default: - /* - * Logically we would return an error here to prevent - * users from believing they might have changed - * mount options using remount which can't be changed. - * - * But unfortunately mount(8) adds all options from - * mtab and fstab to the mount arguments in some cases - * so we can't blindly reject options, but have to - * check for each specified option if it actually - * differs from the currently set option and only - * reject it if that's the case. - * - * Until that is implemented we return success for - * every remount request, and silently ignore all - * options that we can't actually change. - */ -#if 0 - xfs_info(mp, - "mount option \"%s\" not supported for remount", p); - return -EINVAL; -#else - break; -#endif - } - } - - /* ro -> rw */ - if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) { - if (mp->m_flags & XFS_MOUNT_NORECOVERY) { - xfs_warn(mp, - "ro->rw transition prohibited on norecovery mount"); - return -EINVAL; - } - - if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && - xfs_sb_has_ro_compat_feature(sbp, - XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { - xfs_warn(mp, -"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", - (sbp->sb_features_ro_compat & - XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); - return -EINVAL; - } - - mp->m_flags &= ~XFS_MOUNT_RDONLY; - - /* - * If this is the first remount to writeable state we - * might have some superblock changes to update. - */ - if (mp->m_update_sb) { - error = xfs_sync_sb(mp, false); - if (error) { - xfs_warn(mp, "failed to write sb changes"); - return error; - } - mp->m_update_sb = false; - } - - /* - * Fill out the reserve pool if it is empty. Use the stashed - * value if it is non-zero, otherwise go with the default. - */ - xfs_restore_resvblks(mp); - xfs_log_work_queue(mp); - - /* Recover any CoW blocks that never got remapped. */ - error = xfs_reflink_recover_cow(mp); - if (error) { - xfs_err(mp, - "Error %d recovering leftover CoW allocations.", error); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return error; - } - xfs_icache_enable_reclaim(mp); - - /* Create the per-AG metadata reservation pool .*/ - error = xfs_fs_reserve_ag_blocks(mp); - if (error && error != -ENOSPC) - return error; - } - - /* rw -> ro */ - if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { - /* - * Cancel background eofb scanning so it cannot race with the - * final log force+buftarg wait and deadlock the remount. - */ - xfs_icache_disable_reclaim(mp); - - /* Get rid of any leftover CoW reservations... */ - error = xfs_icache_free_cowblocks(mp, NULL); - if (error) { - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return error; - } - - /* Free the per-AG metadata reservation pool. */ - error = xfs_fs_unreserve_ag_blocks(mp); - if (error) { - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return error; - } - - /* - * Before we sync the metadata, we need to free up the reserve - * block pool so that the used block count in the superblock on - * disk is correct at the end of the remount. Stash the current - * reserve pool size so that if we get remounted rw, we can - * return it to the same size. - */ - xfs_save_resvblks(mp); - - xfs_quiesce_attr(mp); - mp->m_flags |= XFS_MOUNT_RDONLY; - } - - return 0; } /* @@ -1400,11 +915,21 @@ struct super_block *sb) { struct xfs_mount *mp = XFS_M(sb); + unsigned int flags; + int ret; - xfs_icache_disable_reclaim(mp); + /* + * The filesystem is now frozen far enough that memory reclaim + * cannot safely operate on the filesystem. Hence we need to + * set a GFP_NOFS context here to avoid recursion deadlocks. + */ + flags = memalloc_nofs_save(); + xfs_stop_block_reaping(mp); xfs_save_resvblks(mp); xfs_quiesce_attr(mp); - return xfs_sync_sb(mp, true); + ret = xfs_sync_sb(mp, true); + memalloc_nofs_restore(flags); + return ret; } STATIC int @@ -1415,16 +940,8 @@ xfs_restore_resvblks(mp); xfs_log_work_queue(mp); - xfs_icache_enable_reclaim(mp); + xfs_start_block_reaping(mp); return 0; -} - -STATIC int -xfs_fs_show_options( - struct seq_file *m, - struct dentry *root) -{ - return xfs_showargs(XFS_M(root->d_sb), m); } /* @@ -1513,8 +1030,14 @@ if (error) goto free_ifree; + error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); + if (error) + goto free_fdblocks; + return 0; +free_fdblocks: + percpu_counter_destroy(&mp->m_fdblocks); free_ifree: percpu_counter_destroy(&mp->m_ifree); free_icount: @@ -1538,228 +1061,12 @@ percpu_counter_destroy(&mp->m_icount); percpu_counter_destroy(&mp->m_ifree); percpu_counter_destroy(&mp->m_fdblocks); + ASSERT(XFS_FORCED_SHUTDOWN(mp) || + percpu_counter_sum(&mp->m_delalloc_blks) == 0); + percpu_counter_destroy(&mp->m_delalloc_blks); } -static struct xfs_mount * -xfs_mount_alloc( - struct super_block *sb) -{ - struct xfs_mount *mp; - - mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); - if (!mp) - return NULL; - - mp->m_super = sb; - spin_lock_init(&mp->m_sb_lock); - spin_lock_init(&mp->m_agirotor_lock); - INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); - spin_lock_init(&mp->m_perag_lock); - mutex_init(&mp->m_growlock); - atomic_set(&mp->m_active_trans, 0); - INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); - INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); - INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); - mp->m_kobj.kobject.kset = xfs_kset; - /* - * We don't create the finobt per-ag space reservation until after log - * recovery, so we must set this to true so that an ifree transaction - * started during log recovery will not depend on space reservations - * for finobt expansion. - */ - mp->m_finobt_nores = true; - return mp; -} - - -STATIC int -xfs_fs_fill_super( - struct super_block *sb, - void *data, - int silent) -{ - struct inode *root; - struct xfs_mount *mp = NULL; - int flags = 0, error = -ENOMEM; - - /* - * allocate mp and do all low-level struct initializations before we - * attach it to the super - */ - mp = xfs_mount_alloc(sb); - if (!mp) - goto out; - sb->s_fs_info = mp; - - error = xfs_parseargs(mp, (char *)data); - if (error) - goto out_free_fsname; - - sb_min_blocksize(sb, BBSIZE); - sb->s_xattr = xfs_xattr_handlers; - sb->s_export_op = &xfs_export_operations; -#ifdef CONFIG_XFS_QUOTA - sb->s_qcop = &xfs_quotactl_operations; - sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; -#endif - sb->s_op = &xfs_super_operations; - - /* - * Delay mount work if the debug hook is set. This is debug - * instrumention to coordinate simulation of xfs mount failures with - * VFS superblock operations - */ - if (xfs_globals.mount_delay) { - xfs_notice(mp, "Delaying mount for %d seconds.", - xfs_globals.mount_delay); - msleep(xfs_globals.mount_delay * 1000); - } - - if (silent) - flags |= XFS_MFSI_QUIET; - - error = xfs_open_devices(mp); - if (error) - goto out_free_fsname; - - error = xfs_init_mount_workqueues(mp); - if (error) - goto out_close_devices; - - error = xfs_init_percpu_counters(mp); - if (error) - goto out_destroy_workqueues; - - /* Allocate stats memory before we do operations that might use it */ - mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); - if (!mp->m_stats.xs_stats) { - error = -ENOMEM; - goto out_destroy_counters; - } - - error = xfs_readsb(mp, flags); - if (error) - goto out_free_stats; - - error = xfs_finish_flags(mp); - if (error) - goto out_free_sb; - - error = xfs_setup_devices(mp); - if (error) - goto out_free_sb; - - error = xfs_filestream_mount(mp); - if (error) - goto out_free_sb; - - /* - * we must configure the block size in the superblock before we run the - * full mount process as the mount process can lookup and cache inodes. - */ - sb->s_magic = XFS_SB_MAGIC; - sb->s_blocksize = mp->m_sb.sb_blocksize; - sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; - sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); - sb->s_max_links = XFS_MAXLINK; - sb->s_time_gran = 1; - set_posix_acl_flag(sb); - - /* version 5 superblocks support inode version counters. */ - if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) - sb->s_flags |= SB_I_VERSION; - - if (mp->m_flags & XFS_MOUNT_DAX) { - bool rtdev_is_dax = false, datadev_is_dax; - - xfs_warn(mp, - "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); - - datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev, - sb->s_blocksize); - if (mp->m_rtdev_targp) - rtdev_is_dax = bdev_dax_supported( - mp->m_rtdev_targp->bt_bdev, sb->s_blocksize); - if (!rtdev_is_dax && !datadev_is_dax) { - xfs_alert(mp, - "DAX unsupported by block device. Turning off DAX."); - mp->m_flags &= ~XFS_MOUNT_DAX; - } - if (xfs_sb_version_hasreflink(&mp->m_sb)) { - xfs_alert(mp, - "DAX and reflink cannot be used together!"); - error = -EINVAL; - goto out_filestream_unmount; - } - } - - if (mp->m_flags & XFS_MOUNT_DISCARD) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - - if (!blk_queue_discard(q)) { - xfs_warn(mp, "mounting with \"discard\" option, but " - "the device does not support discard"); - mp->m_flags &= ~XFS_MOUNT_DISCARD; - } - } - - if (xfs_sb_version_hasreflink(&mp->m_sb) && mp->m_sb.sb_rblocks) { - xfs_alert(mp, - "reflink not compatible with realtime device!"); - error = -EINVAL; - goto out_filestream_unmount; - } - - if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) { - xfs_alert(mp, - "reverse mapping btree not compatible with realtime device!"); - error = -EINVAL; - goto out_filestream_unmount; - } - - error = xfs_mountfs(mp); - if (error) - goto out_filestream_unmount; - - root = igrab(VFS_I(mp->m_rootip)); - if (!root) { - error = -ENOENT; - goto out_unmount; - } - sb->s_root = d_make_root(root); - if (!sb->s_root) { - error = -ENOMEM; - goto out_unmount; - } - - return 0; - - out_filestream_unmount: - xfs_filestream_unmount(mp); - out_free_sb: - xfs_freesb(mp); - out_free_stats: - free_percpu(mp->m_stats.xs_stats); - out_destroy_counters: - xfs_destroy_percpu_counters(mp); - out_destroy_workqueues: - xfs_destroy_mount_workqueues(mp); - out_close_devices: - xfs_close_devices(mp); - out_free_fsname: - sb->s_fs_info = NULL; - xfs_free_fsname(mp); - kfree(mp); - out: - return error; - - out_unmount: - xfs_filestream_unmount(mp); - xfs_unmountfs(mp); - goto out_free_sb; -} - -STATIC void +static void xfs_fs_put_super( struct super_block *sb) { @@ -1780,18 +1087,7 @@ xfs_close_devices(mp); sb->s_fs_info = NULL; - xfs_free_fsname(mp); - kfree(mp); -} - -STATIC struct dentry * -xfs_fs_mount( - struct file_system_type *fs_type, - int flags, - const char *dev_name, - void *data) -{ - return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); + xfs_mount_free(mp); } static long @@ -1823,16 +1119,790 @@ .freeze_fs = xfs_fs_freeze, .unfreeze_fs = xfs_fs_unfreeze, .statfs = xfs_fs_statfs, - .remount_fs = xfs_fs_remount, .show_options = xfs_fs_show_options, .nr_cached_objects = xfs_fs_nr_cached_objects, .free_cached_objects = xfs_fs_free_cached_objects, }; +static int +suffix_kstrtoint( + const char *s, + unsigned int base, + int *res) +{ + int last, shift_left_factor = 0, _res; + char *value; + int ret = 0; + + value = kstrdup(s, GFP_KERNEL); + if (!value) + return -ENOMEM; + + last = strlen(value) - 1; + if (value[last] == 'K' || value[last] == 'k') { + shift_left_factor = 10; + value[last] = '\0'; + } + if (value[last] == 'M' || value[last] == 'm') { + shift_left_factor = 20; + value[last] = '\0'; + } + if (value[last] == 'G' || value[last] == 'g') { + shift_left_factor = 30; + value[last] = '\0'; + } + + if (kstrtoint(value, base, &_res)) + ret = -EINVAL; + kfree(value); + *res = _res << shift_left_factor; + return ret; +} + +static inline void +xfs_fs_warn_deprecated( + struct fs_context *fc, + struct fs_parameter *param, + uint64_t flag, + bool value) +{ + /* Don't print the warning if reconfiguring and current mount point + * already had the flag set + */ + if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && + !!(XFS_M(fc->root->d_sb)->m_flags & flag) == value) + return; + xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); +} + +/* + * Set mount state from a mount option. + * + * NOTE: mp->m_super is NULL here! + */ +static int +xfs_fc_parse_param( + struct fs_context *fc, + struct fs_parameter *param) +{ + struct xfs_mount *parsing_mp = fc->s_fs_info; + struct fs_parse_result result; + int size = 0; + int opt; + + opt = fs_parse(fc, xfs_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_logbufs: + parsing_mp->m_logbufs = result.uint_32; + return 0; + case Opt_logbsize: + if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) + return -EINVAL; + return 0; + case Opt_logdev: + kfree(parsing_mp->m_logname); + parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); + if (!parsing_mp->m_logname) + return -ENOMEM; + return 0; + case Opt_rtdev: + kfree(parsing_mp->m_rtname); + parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); + if (!parsing_mp->m_rtname) + return -ENOMEM; + return 0; + case Opt_allocsize: + if (suffix_kstrtoint(param->string, 10, &size)) + return -EINVAL; + parsing_mp->m_allocsize_log = ffs(size) - 1; + parsing_mp->m_flags |= XFS_MOUNT_ALLOCSIZE; + return 0; + case Opt_grpid: + case Opt_bsdgroups: + parsing_mp->m_flags |= XFS_MOUNT_GRPID; + return 0; + case Opt_nogrpid: + case Opt_sysvgroups: + parsing_mp->m_flags &= ~XFS_MOUNT_GRPID; + return 0; + case Opt_wsync: + parsing_mp->m_flags |= XFS_MOUNT_WSYNC; + return 0; + case Opt_norecovery: + parsing_mp->m_flags |= XFS_MOUNT_NORECOVERY; + return 0; + case Opt_noalign: + parsing_mp->m_flags |= XFS_MOUNT_NOALIGN; + return 0; + case Opt_swalloc: + parsing_mp->m_flags |= XFS_MOUNT_SWALLOC; + return 0; + case Opt_sunit: + parsing_mp->m_dalign = result.uint_32; + return 0; + case Opt_swidth: + parsing_mp->m_swidth = result.uint_32; + return 0; + case Opt_inode32: + parsing_mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + return 0; + case Opt_inode64: + parsing_mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; + return 0; + case Opt_nouuid: + parsing_mp->m_flags |= XFS_MOUNT_NOUUID; + return 0; + case Opt_largeio: + parsing_mp->m_flags |= XFS_MOUNT_LARGEIO; + return 0; + case Opt_nolargeio: + parsing_mp->m_flags &= ~XFS_MOUNT_LARGEIO; + return 0; + case Opt_filestreams: + parsing_mp->m_flags |= XFS_MOUNT_FILESTREAMS; + return 0; + case Opt_noquota: + parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; + parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; + parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; + return 0; + case Opt_quota: + case Opt_uquota: + case Opt_usrquota: + parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_UQUOTA_ENFD); + return 0; + case Opt_qnoenforce: + case Opt_uqnoenforce: + parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); + parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; + return 0; + case Opt_pquota: + case Opt_prjquota: + parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | + XFS_PQUOTA_ENFD); + return 0; + case Opt_pqnoenforce: + parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; + return 0; + case Opt_gquota: + case Opt_grpquota: + parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_GQUOTA_ENFD); + return 0; + case Opt_gqnoenforce: + parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); + parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; + return 0; + case Opt_discard: + parsing_mp->m_flags |= XFS_MOUNT_DISCARD; + return 0; + case Opt_nodiscard: + parsing_mp->m_flags &= ~XFS_MOUNT_DISCARD; + return 0; +#ifdef CONFIG_FS_DAX + case Opt_dax: + xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); + return 0; + case Opt_dax_enum: + xfs_mount_set_dax_mode(parsing_mp, result.uint_32); + return 0; +#endif + /* Following mount options will be removed in September 2025 */ + case Opt_ikeep: + xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, true); + parsing_mp->m_flags |= XFS_MOUNT_IKEEP; + return 0; + case Opt_noikeep: + xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, false); + parsing_mp->m_flags &= ~XFS_MOUNT_IKEEP; + return 0; + case Opt_attr2: + xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_ATTR2, true); + parsing_mp->m_flags |= XFS_MOUNT_ATTR2; + return 0; + case Opt_noattr2: + xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_NOATTR2, true); + parsing_mp->m_flags &= ~XFS_MOUNT_ATTR2; + parsing_mp->m_flags |= XFS_MOUNT_NOATTR2; + return 0; + default: + xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); + return -EINVAL; + } + + return 0; +} + +static int +xfs_fc_validate_params( + struct xfs_mount *mp) +{ + /* + * no recovery flag requires a read-only mount + */ + if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && + !(mp->m_flags & XFS_MOUNT_RDONLY)) { + xfs_warn(mp, "no-recovery mounts must be read-only."); + return -EINVAL; + } + + if ((mp->m_flags & XFS_MOUNT_NOALIGN) && + (mp->m_dalign || mp->m_swidth)) { + xfs_warn(mp, + "sunit and swidth options incompatible with the noalign option"); + return -EINVAL; + } + + if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) { + xfs_warn(mp, "quota support not available in this kernel."); + return -EINVAL; + } + + if ((mp->m_dalign && !mp->m_swidth) || + (!mp->m_dalign && mp->m_swidth)) { + xfs_warn(mp, "sunit and swidth must be specified together"); + return -EINVAL; + } + + if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) { + xfs_warn(mp, + "stripe width (%d) must be a multiple of the stripe unit (%d)", + mp->m_swidth, mp->m_dalign); + return -EINVAL; + } + + if (mp->m_logbufs != -1 && + mp->m_logbufs != 0 && + (mp->m_logbufs < XLOG_MIN_ICLOGS || + mp->m_logbufs > XLOG_MAX_ICLOGS)) { + xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", + mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); + return -EINVAL; + } + + if (mp->m_logbsize != -1 && + mp->m_logbsize != 0 && + (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || + mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || + !is_power_of_2(mp->m_logbsize))) { + xfs_warn(mp, + "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", + mp->m_logbsize); + return -EINVAL; + } + + if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) && + (mp->m_allocsize_log > XFS_MAX_IO_LOG || + mp->m_allocsize_log < XFS_MIN_IO_LOG)) { + xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", + mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); + return -EINVAL; + } + + return 0; +} + +static int +xfs_fc_fill_super( + struct super_block *sb, + struct fs_context *fc) +{ + struct xfs_mount *mp = sb->s_fs_info; + struct inode *root; + int flags = 0, error; + + mp->m_super = sb; + + error = xfs_fc_validate_params(mp); + if (error) + goto out_free_names; + + sb_min_blocksize(sb, BBSIZE); + sb->s_xattr = xfs_xattr_handlers; + sb->s_export_op = &xfs_export_operations; +#ifdef CONFIG_XFS_QUOTA + sb->s_qcop = &xfs_quotactl_operations; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; +#endif + sb->s_op = &xfs_super_operations; + + /* + * Delay mount work if the debug hook is set. This is debug + * instrumention to coordinate simulation of xfs mount failures with + * VFS superblock operations + */ + if (xfs_globals.mount_delay) { + xfs_notice(mp, "Delaying mount for %d seconds.", + xfs_globals.mount_delay); + msleep(xfs_globals.mount_delay * 1000); + } + + if (fc->sb_flags & SB_SILENT) + flags |= XFS_MFSI_QUIET; + + error = xfs_open_devices(mp); + if (error) + goto out_free_names; + + error = xfs_init_mount_workqueues(mp); + if (error) + goto out_close_devices; + + error = xfs_init_percpu_counters(mp); + if (error) + goto out_destroy_workqueues; + + /* Allocate stats memory before we do operations that might use it */ + mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); + if (!mp->m_stats.xs_stats) { + error = -ENOMEM; + goto out_destroy_counters; + } + + error = xfs_readsb(mp, flags); + if (error) + goto out_free_stats; + + error = xfs_finish_flags(mp); + if (error) + goto out_free_sb; + + error = xfs_setup_devices(mp); + if (error) + goto out_free_sb; + + /* V4 support is undergoing deprecation. */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) { +#ifdef CONFIG_XFS_SUPPORT_V4 + xfs_warn_once(mp, + "Deprecated V4 format (crc=0) will not be supported after September 2030."); +#else + xfs_warn(mp, + "Deprecated V4 format (crc=0) not supported by kernel."); + error = -EINVAL; + goto out_free_sb; +#endif + } + + /* + * XFS block mappings use 54 bits to store the logical block offset. + * This should suffice to handle the maximum file size that the VFS + * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT + * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes + * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON + * to check this assertion. + * + * Avoid integer overflow by comparing the maximum bmbt offset to the + * maximum pagecache offset in units of fs blocks. + */ + if (XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE) > XFS_MAX_FILEOFF) { + xfs_warn(mp, +"MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", + XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), + XFS_MAX_FILEOFF); + error = -EINVAL; + goto out_free_sb; + } + + error = xfs_filestream_mount(mp); + if (error) + goto out_free_sb; + + /* + * we must configure the block size in the superblock before we run the + * full mount process as the mount process can lookup and cache inodes. + */ + sb->s_magic = XFS_SUPER_MAGIC; + sb->s_blocksize = mp->m_sb.sb_blocksize; + sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_max_links = XFS_MAXLINK; + sb->s_time_gran = 1; + if (xfs_sb_version_hasbigtime(&mp->m_sb)) { + sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); + sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); + } else { + sb->s_time_min = XFS_LEGACY_TIME_MIN; + sb->s_time_max = XFS_LEGACY_TIME_MAX; + } + trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max); + sb->s_iflags |= SB_I_CGROUPWB; + + set_posix_acl_flag(sb); + + /* version 5 superblocks support inode version counters. */ + if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) + sb->s_flags |= SB_I_VERSION; + + if (xfs_sb_version_hasbigtime(&mp->m_sb)) + xfs_warn(mp, + "EXPERIMENTAL big timestamp feature in use. Use at your own risk!"); + + if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) { + bool rtdev_is_dax = false, datadev_is_dax; + + xfs_warn(mp, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + + datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev, + sb->s_blocksize); + if (mp->m_rtdev_targp) + rtdev_is_dax = bdev_dax_supported( + mp->m_rtdev_targp->bt_bdev, sb->s_blocksize); + if (!rtdev_is_dax && !datadev_is_dax) { + xfs_alert(mp, + "DAX unsupported by block device. Turning off DAX."); + xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); + } + if (xfs_sb_version_hasreflink(&mp->m_sb)) { + xfs_alert(mp, + "DAX and reflink cannot be used together!"); + error = -EINVAL; + goto out_filestream_unmount; + } + } + + if (mp->m_flags & XFS_MOUNT_DISCARD) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + + if (!blk_queue_discard(q)) { + xfs_warn(mp, "mounting with \"discard\" option, but " + "the device does not support discard"); + mp->m_flags &= ~XFS_MOUNT_DISCARD; + } + } + + if (xfs_sb_version_hasreflink(&mp->m_sb)) { + if (mp->m_sb.sb_rblocks) { + xfs_alert(mp, + "reflink not compatible with realtime device!"); + error = -EINVAL; + goto out_filestream_unmount; + } + + if (xfs_globals.always_cow) { + xfs_info(mp, "using DEBUG-only always_cow mode."); + mp->m_always_cow = true; + } + } + + if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) { + xfs_alert(mp, + "reverse mapping btree not compatible with realtime device!"); + error = -EINVAL; + goto out_filestream_unmount; + } + + if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) + xfs_warn(mp, + "EXPERIMENTAL inode btree counters feature in use. Use at your own risk!"); + + error = xfs_mountfs(mp); + if (error) + goto out_filestream_unmount; + + root = igrab(VFS_I(mp->m_rootip)); + if (!root) { + error = -ENOENT; + goto out_unmount; + } + sb->s_root = d_make_root(root); + if (!sb->s_root) { + error = -ENOMEM; + goto out_unmount; + } + + return 0; + + out_filestream_unmount: + xfs_filestream_unmount(mp); + out_free_sb: + xfs_freesb(mp); + out_free_stats: + free_percpu(mp->m_stats.xs_stats); + out_destroy_counters: + xfs_destroy_percpu_counters(mp); + out_destroy_workqueues: + xfs_destroy_mount_workqueues(mp); + out_close_devices: + xfs_close_devices(mp); + out_free_names: + sb->s_fs_info = NULL; + xfs_mount_free(mp); + return error; + + out_unmount: + xfs_filestream_unmount(mp); + xfs_unmountfs(mp); + goto out_free_sb; +} + +static int +xfs_fc_get_tree( + struct fs_context *fc) +{ + return get_tree_bdev(fc, xfs_fc_fill_super); +} + +static int +xfs_remount_rw( + struct xfs_mount *mp) +{ + struct xfs_sb *sbp = &mp->m_sb; + int error; + + if (mp->m_flags & XFS_MOUNT_NORECOVERY) { + xfs_warn(mp, + "ro->rw transition prohibited on norecovery mount"); + return -EINVAL; + } + + if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { + xfs_warn(mp, + "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", + (sbp->sb_features_ro_compat & + XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); + return -EINVAL; + } + + mp->m_flags &= ~XFS_MOUNT_RDONLY; + + /* + * If this is the first remount to writeable state we might have some + * superblock changes to update. + */ + if (mp->m_update_sb) { + error = xfs_sync_sb(mp, false); + if (error) { + xfs_warn(mp, "failed to write sb changes"); + return error; + } + mp->m_update_sb = false; + } + + /* + * Fill out the reserve pool if it is empty. Use the stashed value if + * it is non-zero, otherwise go with the default. + */ + xfs_restore_resvblks(mp); + xfs_log_work_queue(mp); + + /* Recover any CoW blocks that never got remapped. */ + error = xfs_reflink_recover_cow(mp); + if (error) { + xfs_err(mp, + "Error %d recovering leftover CoW allocations.", error); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + xfs_start_block_reaping(mp); + + /* Create the per-AG metadata reservation pool .*/ + error = xfs_fs_reserve_ag_blocks(mp); + if (error && error != -ENOSPC) + return error; + + return 0; +} + +static int +xfs_remount_ro( + struct xfs_mount *mp) +{ + struct xfs_eofblocks eofb = { + .eof_flags = XFS_EOF_FLAGS_SYNC, + }; + int error; + + /* Flush all the dirty data to disk. */ + error = sync_filesystem(mp->m_super); + if (error) + return error; + + /* + * Cancel background eofb scanning so it cannot race with the final + * log force+buftarg wait and deadlock the remount. + */ + xfs_stop_block_reaping(mp); + + /* + * Clear out all remaining COW staging extents and speculative post-EOF + * preallocations so that we don't leave inodes requiring inactivation + * cleanups during reclaim on a read-only mount. We must process every + * cached inode, so this requires a synchronous cache scan. + */ + error = xfs_icache_free_cowblocks(mp, &eofb); + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + + /* Free the per-AG metadata reservation pool. */ + error = xfs_fs_unreserve_ag_blocks(mp); + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + + /* + * Before we sync the metadata, we need to free up the reserve block + * pool so that the used block count in the superblock on disk is + * correct at the end of the remount. Stash the current* reserve pool + * size so that if we get remounted rw, we can return it to the same + * size. + */ + xfs_save_resvblks(mp); + + xfs_quiesce_attr(mp); + mp->m_flags |= XFS_MOUNT_RDONLY; + + return 0; +} + +/* + * Logically we would return an error here to prevent users from believing + * they might have changed mount options using remount which can't be changed. + * + * But unfortunately mount(8) adds all options from mtab and fstab to the mount + * arguments in some cases so we can't blindly reject options, but have to + * check for each specified option if it actually differs from the currently + * set option and only reject it if that's the case. + * + * Until that is implemented we return success for every remount request, and + * silently ignore all options that we can't actually change. + */ +static int +xfs_fc_reconfigure( + struct fs_context *fc) +{ + struct xfs_mount *mp = XFS_M(fc->root->d_sb); + struct xfs_mount *new_mp = fc->s_fs_info; + xfs_sb_t *sbp = &mp->m_sb; + int flags = fc->sb_flags; + int error; + + /* version 5 superblocks always support version counters. */ + if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) + fc->sb_flags |= SB_I_VERSION; + + error = xfs_fc_validate_params(new_mp); + if (error) + return error; + + /* inode32 -> inode64 */ + if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && + !(new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) { + mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; + mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); + } + + /* inode64 -> inode32 */ + if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS) && + (new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) { + mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); + } + + /* ro -> rw */ + if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(flags & SB_RDONLY)) { + error = xfs_remount_rw(mp); + if (error) + return error; + } + + /* rw -> ro */ + if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (flags & SB_RDONLY)) { + error = xfs_remount_ro(mp); + if (error) + return error; + } + + return 0; +} + +static void xfs_fc_free( + struct fs_context *fc) +{ + struct xfs_mount *mp = fc->s_fs_info; + + /* + * mp is stored in the fs_context when it is initialized. + * mp is transferred to the superblock on a successful mount, + * but if an error occurs before the transfer we have to free + * it here. + */ + if (mp) + xfs_mount_free(mp); +} + +static const struct fs_context_operations xfs_context_ops = { + .parse_param = xfs_fc_parse_param, + .get_tree = xfs_fc_get_tree, + .reconfigure = xfs_fc_reconfigure, + .free = xfs_fc_free, +}; + +static int xfs_init_fs_context( + struct fs_context *fc) +{ + struct xfs_mount *mp; + + mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO); + if (!mp) + return -ENOMEM; + + spin_lock_init(&mp->m_sb_lock); + spin_lock_init(&mp->m_agirotor_lock); + INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); + spin_lock_init(&mp->m_perag_lock); + mutex_init(&mp->m_growlock); + INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); + INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); + INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); + INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); + mp->m_kobj.kobject.kset = xfs_kset; + /* + * We don't create the finobt per-ag space reservation until after log + * recovery, so we must set this to true so that an ifree transaction + * started during log recovery will not depend on space reservations + * for finobt expansion. + */ + mp->m_finobt_nores = true; + + /* + * These can be overridden by the mount option parsing. + */ + mp->m_logbufs = -1; + mp->m_logbsize = -1; + mp->m_allocsize_log = 16; /* 64k */ + + /* + * Copy binary VFS mount flags we are interested in. + */ + if (fc->sb_flags & SB_RDONLY) + mp->m_flags |= XFS_MOUNT_RDONLY; + if (fc->sb_flags & SB_DIRSYNC) + mp->m_flags |= XFS_MOUNT_DIRSYNC; + if (fc->sb_flags & SB_SYNCHRONOUS) + mp->m_flags |= XFS_MOUNT_WSYNC; + + fc->s_fs_info = mp; + fc->ops = &xfs_context_ops; + + return 0; +} + static struct file_system_type xfs_fs_type = { .owner = THIS_MODULE, .name = "xfs", - .mount = xfs_fs_mount, + .init_fs_context = xfs_init_fs_context, + .parameters = xfs_fs_parameters, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; @@ -1841,37 +1911,39 @@ STATIC int __init xfs_init_zones(void) { - if (bioset_init(&xfs_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE), - offsetof(struct xfs_ioend, io_inline_bio), - BIOSET_NEED_BVECS)) + xfs_log_ticket_zone = kmem_cache_create("xfs_log_ticket", + sizeof(struct xlog_ticket), + 0, 0, NULL); + if (!xfs_log_ticket_zone) goto out; - xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), - "xfs_log_ticket"); - if (!xfs_log_ticket_zone) - goto out_free_ioend_bioset; - - xfs_bmap_free_item_zone = kmem_zone_init( - sizeof(struct xfs_extent_free_item), - "xfs_bmap_free_item"); + xfs_bmap_free_item_zone = kmem_cache_create("xfs_bmap_free_item", + sizeof(struct xfs_extent_free_item), + 0, 0, NULL); if (!xfs_bmap_free_item_zone) goto out_destroy_log_ticket_zone; - xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), - "xfs_btree_cur"); + xfs_btree_cur_zone = kmem_cache_create("xfs_btree_cur", + sizeof(struct xfs_btree_cur), + 0, 0, NULL); if (!xfs_btree_cur_zone) goto out_destroy_bmap_free_item_zone; - xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), - "xfs_da_state"); + xfs_da_state_zone = kmem_cache_create("xfs_da_state", + sizeof(struct xfs_da_state), + 0, 0, NULL); if (!xfs_da_state_zone) goto out_destroy_btree_cur_zone; - xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork"); + xfs_ifork_zone = kmem_cache_create("xfs_ifork", + sizeof(struct xfs_ifork), + 0, 0, NULL); if (!xfs_ifork_zone) goto out_destroy_da_state_zone; - xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); + xfs_trans_zone = kmem_cache_create("xfs_trans", + sizeof(struct xfs_trans), + 0, 0, NULL); if (!xfs_trans_zone) goto out_destroy_ifork_zone; @@ -1881,111 +1953,122 @@ * size possible under XFS. This wastes a little bit of memory, * but it is much faster. */ - xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item), - "xfs_buf_item"); + xfs_buf_item_zone = kmem_cache_create("xfs_buf_item", + sizeof(struct xfs_buf_log_item), + 0, 0, NULL); if (!xfs_buf_item_zone) goto out_destroy_trans_zone; - xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) + - ((XFS_EFD_MAX_FAST_EXTENTS - 1) * - sizeof(xfs_extent_t))), "xfs_efd_item"); + xfs_efd_zone = kmem_cache_create("xfs_efd_item", + (sizeof(struct xfs_efd_log_item) + + (XFS_EFD_MAX_FAST_EXTENTS - 1) * + sizeof(struct xfs_extent)), + 0, 0, NULL); if (!xfs_efd_zone) goto out_destroy_buf_item_zone; - xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) + - ((XFS_EFI_MAX_FAST_EXTENTS - 1) * - sizeof(xfs_extent_t))), "xfs_efi_item"); + xfs_efi_zone = kmem_cache_create("xfs_efi_item", + (sizeof(struct xfs_efi_log_item) + + (XFS_EFI_MAX_FAST_EXTENTS - 1) * + sizeof(struct xfs_extent)), + 0, 0, NULL); if (!xfs_efi_zone) goto out_destroy_efd_zone; - xfs_inode_zone = - kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD | - KM_ZONE_ACCOUNT, xfs_fs_inode_init_once); + xfs_inode_zone = kmem_cache_create("xfs_inode", + sizeof(struct xfs_inode), 0, + (SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_MEM_SPREAD | SLAB_ACCOUNT), + xfs_fs_inode_init_once); if (!xfs_inode_zone) goto out_destroy_efi_zone; - xfs_ili_zone = - kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", - KM_ZONE_SPREAD, NULL); + xfs_ili_zone = kmem_cache_create("xfs_ili", + sizeof(struct xfs_inode_log_item), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + NULL); if (!xfs_ili_zone) goto out_destroy_inode_zone; - xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item), - "xfs_icr"); + + xfs_icreate_zone = kmem_cache_create("xfs_icr", + sizeof(struct xfs_icreate_item), + 0, 0, NULL); if (!xfs_icreate_zone) goto out_destroy_ili_zone; - xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item), - "xfs_rud_item"); + xfs_rud_zone = kmem_cache_create("xfs_rud_item", + sizeof(struct xfs_rud_log_item), + 0, 0, NULL); if (!xfs_rud_zone) goto out_destroy_icreate_zone; - xfs_rui_zone = kmem_zone_init( + xfs_rui_zone = kmem_cache_create("xfs_rui_item", xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), - "xfs_rui_item"); + 0, 0, NULL); if (!xfs_rui_zone) goto out_destroy_rud_zone; - xfs_cud_zone = kmem_zone_init(sizeof(struct xfs_cud_log_item), - "xfs_cud_item"); + xfs_cud_zone = kmem_cache_create("xfs_cud_item", + sizeof(struct xfs_cud_log_item), + 0, 0, NULL); if (!xfs_cud_zone) goto out_destroy_rui_zone; - xfs_cui_zone = kmem_zone_init( + xfs_cui_zone = kmem_cache_create("xfs_cui_item", xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), - "xfs_cui_item"); + 0, 0, NULL); if (!xfs_cui_zone) goto out_destroy_cud_zone; - xfs_bud_zone = kmem_zone_init(sizeof(struct xfs_bud_log_item), - "xfs_bud_item"); + xfs_bud_zone = kmem_cache_create("xfs_bud_item", + sizeof(struct xfs_bud_log_item), + 0, 0, NULL); if (!xfs_bud_zone) goto out_destroy_cui_zone; - xfs_bui_zone = kmem_zone_init( + xfs_bui_zone = kmem_cache_create("xfs_bui_item", xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), - "xfs_bui_item"); + 0, 0, NULL); if (!xfs_bui_zone) goto out_destroy_bud_zone; return 0; out_destroy_bud_zone: - kmem_zone_destroy(xfs_bud_zone); + kmem_cache_destroy(xfs_bud_zone); out_destroy_cui_zone: - kmem_zone_destroy(xfs_cui_zone); + kmem_cache_destroy(xfs_cui_zone); out_destroy_cud_zone: - kmem_zone_destroy(xfs_cud_zone); + kmem_cache_destroy(xfs_cud_zone); out_destroy_rui_zone: - kmem_zone_destroy(xfs_rui_zone); + kmem_cache_destroy(xfs_rui_zone); out_destroy_rud_zone: - kmem_zone_destroy(xfs_rud_zone); + kmem_cache_destroy(xfs_rud_zone); out_destroy_icreate_zone: - kmem_zone_destroy(xfs_icreate_zone); + kmem_cache_destroy(xfs_icreate_zone); out_destroy_ili_zone: - kmem_zone_destroy(xfs_ili_zone); + kmem_cache_destroy(xfs_ili_zone); out_destroy_inode_zone: - kmem_zone_destroy(xfs_inode_zone); + kmem_cache_destroy(xfs_inode_zone); out_destroy_efi_zone: - kmem_zone_destroy(xfs_efi_zone); + kmem_cache_destroy(xfs_efi_zone); out_destroy_efd_zone: - kmem_zone_destroy(xfs_efd_zone); + kmem_cache_destroy(xfs_efd_zone); out_destroy_buf_item_zone: - kmem_zone_destroy(xfs_buf_item_zone); + kmem_cache_destroy(xfs_buf_item_zone); out_destroy_trans_zone: - kmem_zone_destroy(xfs_trans_zone); + kmem_cache_destroy(xfs_trans_zone); out_destroy_ifork_zone: - kmem_zone_destroy(xfs_ifork_zone); + kmem_cache_destroy(xfs_ifork_zone); out_destroy_da_state_zone: - kmem_zone_destroy(xfs_da_state_zone); + kmem_cache_destroy(xfs_da_state_zone); out_destroy_btree_cur_zone: - kmem_zone_destroy(xfs_btree_cur_zone); + kmem_cache_destroy(xfs_btree_cur_zone); out_destroy_bmap_free_item_zone: - kmem_zone_destroy(xfs_bmap_free_item_zone); + kmem_cache_destroy(xfs_bmap_free_item_zone); out_destroy_log_ticket_zone: - kmem_zone_destroy(xfs_log_ticket_zone); - out_free_ioend_bioset: - bioset_exit(&xfs_ioend_bioset); + kmem_cache_destroy(xfs_log_ticket_zone); out: return -ENOMEM; } @@ -1998,25 +2081,24 @@ * destroy caches. */ rcu_barrier(); - kmem_zone_destroy(xfs_bui_zone); - kmem_zone_destroy(xfs_bud_zone); - kmem_zone_destroy(xfs_cui_zone); - kmem_zone_destroy(xfs_cud_zone); - kmem_zone_destroy(xfs_rui_zone); - kmem_zone_destroy(xfs_rud_zone); - kmem_zone_destroy(xfs_icreate_zone); - kmem_zone_destroy(xfs_ili_zone); - kmem_zone_destroy(xfs_inode_zone); - kmem_zone_destroy(xfs_efi_zone); - kmem_zone_destroy(xfs_efd_zone); - kmem_zone_destroy(xfs_buf_item_zone); - kmem_zone_destroy(xfs_trans_zone); - kmem_zone_destroy(xfs_ifork_zone); - kmem_zone_destroy(xfs_da_state_zone); - kmem_zone_destroy(xfs_btree_cur_zone); - kmem_zone_destroy(xfs_bmap_free_item_zone); - kmem_zone_destroy(xfs_log_ticket_zone); - bioset_exit(&xfs_ioend_bioset); + kmem_cache_destroy(xfs_bui_zone); + kmem_cache_destroy(xfs_bud_zone); + kmem_cache_destroy(xfs_cui_zone); + kmem_cache_destroy(xfs_cud_zone); + kmem_cache_destroy(xfs_rui_zone); + kmem_cache_destroy(xfs_rud_zone); + kmem_cache_destroy(xfs_icreate_zone); + kmem_cache_destroy(xfs_ili_zone); + kmem_cache_destroy(xfs_inode_zone); + kmem_cache_destroy(xfs_efi_zone); + kmem_cache_destroy(xfs_efd_zone); + kmem_cache_destroy(xfs_buf_item_zone); + kmem_cache_destroy(xfs_trans_zone); + kmem_cache_destroy(xfs_ifork_zone); + kmem_cache_destroy(xfs_da_state_zone); + kmem_cache_destroy(xfs_btree_cur_zone); + kmem_cache_destroy(xfs_bmap_free_item_zone); + kmem_cache_destroy(xfs_log_ticket_zone); } STATIC int __init @@ -2059,11 +2141,6 @@ printk(KERN_INFO XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"); - - xfs_extent_free_init_defer_op(); - xfs_rmap_update_init_defer_op(); - xfs_refcount_update_init_defer_op(); - xfs_bmap_update_init_defer_op(); xfs_dir_startup(); @@ -2180,3 +2257,4 @@ MODULE_AUTHOR("Silicon Graphics, Inc."); MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); -- Gitblit v1.6.2