From 95099d4622f8cb224d94e314c7a8e0df60b13f87 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Sat, 09 Dec 2023 08:38:01 +0000
Subject: [PATCH] enable docker ppp
---
kernel/fs/xfs/xfs_mount.c | 523 ++++++++++++++++++++++++---------------------------------
1 files changed, 223 insertions(+), 300 deletions(-)
diff --git a/kernel/fs/xfs/xfs_mount.c b/kernel/fs/xfs/xfs_mount.c
index 02d1509..a2a5a0f 100644
--- a/kernel/fs/xfs/xfs_mount.c
+++ b/kernel/fs/xfs/xfs_mount.c
@@ -12,9 +12,6 @@
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_ialloc.h"
@@ -27,14 +24,14 @@
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_fsops.h"
-#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_sysfs.h"
#include "xfs_rmap_btree.h"
#include "xfs_refcount_btree.h"
#include "xfs_reflink.h"
#include "xfs_extent_busy.h"
-
+#include "xfs_health.h"
+#include "xfs_trace.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
@@ -83,9 +80,9 @@
}
if (hole < 0) {
- xfs_uuid_table = kmem_realloc(xfs_uuid_table,
+ xfs_uuid_table = krealloc(xfs_uuid_table,
(xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
- KM_SLEEP);
+ GFP_KERNEL | __GFP_NOFAIL);
hole = xfs_uuid_table_size++;
}
xfs_uuid_table[hole] = *uuid;
@@ -149,8 +146,8 @@
spin_unlock(&mp->m_perag_lock);
ASSERT(pag);
ASSERT(atomic_read(&pag->pag_ref) == 0);
+ xfs_iunlink_destroy(pag);
xfs_buf_hash_destroy(pag);
- mutex_destroy(&pag->pag_ici_reclaim_lock);
call_rcu(&pag->rcu_head, __xfs_free_perag);
}
}
@@ -197,26 +194,30 @@
}
pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
- if (!pag)
+ if (!pag) {
+ error = -ENOMEM;
goto out_unwind_new_pags;
+ }
pag->pag_agno = index;
pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock);
- mutex_init(&pag->pag_ici_reclaim_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
- if (xfs_buf_hash_init(pag))
+
+ error = xfs_buf_hash_init(pag);
+ if (error)
goto out_free_pag;
init_waitqueue_head(&pag->pagb_wait);
spin_lock_init(&pag->pagb_lock);
pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT;
- if (radix_tree_preload(GFP_NOFS))
+ error = radix_tree_preload(GFP_NOFS);
+ if (error)
goto out_hash_destroy;
spin_lock(&mp->m_perag_lock);
if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
- BUG();
+ WARN_ON_ONCE(1);
spin_unlock(&mp->m_perag_lock);
radix_tree_preload_end();
error = -EEXIST;
@@ -227,6 +228,10 @@
/* first new pag is fully initialized */
if (first_initialised == NULLAGNUMBER)
first_initialised = index;
+ error = xfs_iunlink_init(pag);
+ if (error)
+ goto out_hash_destroy;
+ spin_lock_init(&pag->pag_state_lock);
}
index = xfs_set_inode_alloc(mp, agcount);
@@ -240,7 +245,6 @@
out_hash_destroy:
xfs_buf_hash_destroy(pag);
out_free_pag:
- mutex_destroy(&pag->pag_ici_reclaim_lock);
kmem_free(pag);
out_unwind_new_pags:
/* unwind any prior newly initialized pags */
@@ -249,7 +253,7 @@
if (!pag)
break;
xfs_buf_hash_destroy(pag);
- mutex_destroy(&pag->pag_ici_reclaim_lock);
+ xfs_iunlink_destroy(pag);
kmem_free(pag);
}
return error;
@@ -307,7 +311,7 @@
/*
* Initialize the mount structure from the superblock.
*/
- xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
+ xfs_sb_from_disk(sbp, bp->b_addr);
/*
* If we haven't validated the superblock, do so now before we try
@@ -357,132 +361,122 @@
}
/*
- * Update alignment values based on mount options and sb values
+ * If the sunit/swidth change would move the precomputed root inode value, we
+ * must reject the ondisk change because repair will stumble over that.
+ * However, we allow the mount to proceed because we never rejected this
+ * combination before. Returns true to update the sb, false otherwise.
+ */
+static inline int
+xfs_check_new_dalign(
+ struct xfs_mount *mp,
+ int new_dalign,
+ bool *update_sb)
+{
+ struct xfs_sb *sbp = &mp->m_sb;
+ xfs_ino_t calc_ino;
+
+ calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
+ trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
+
+ if (sbp->sb_rootino == calc_ino) {
+ *update_sb = true;
+ return 0;
+ }
+
+ xfs_warn(mp,
+"Cannot change stripe alignment; would require moving root inode.");
+
+ /*
+ * XXX: Next time we add a new incompat feature, this should start
+ * returning -EINVAL to fail the mount. Until then, spit out a warning
+ * that we're ignoring the administrator's instructions.
+ */
+ xfs_warn(mp, "Skipping superblock stripe alignment update.");
+ *update_sb = false;
+ return 0;
+}
+
+/*
+ * If we were provided with new sunit/swidth values as mount options, make sure
+ * that they pass basic alignment and superblock feature checks, and convert
+ * them into the same units (FSB) that everything else expects. This step
+ * /must/ be done before computing the inode geometry.
*/
STATIC int
-xfs_update_alignment(xfs_mount_t *mp)
+xfs_validate_new_dalign(
+ struct xfs_mount *mp)
{
- xfs_sb_t *sbp = &(mp->m_sb);
+ if (mp->m_dalign == 0)
+ return 0;
- if (mp->m_dalign) {
+ /*
+ * If stripe unit and stripe width are not multiples
+ * of the fs blocksize turn off alignment.
+ */
+ if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
+ (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
+ xfs_warn(mp,
+ "alignment check failed: sunit/swidth vs. blocksize(%d)",
+ mp->m_sb.sb_blocksize);
+ return -EINVAL;
+ } else {
/*
- * If stripe unit and stripe width are not multiples
- * of the fs blocksize turn off alignment.
+ * Convert the stripe unit and width to FSBs.
*/
- if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
- (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
+ mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
+ if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
xfs_warn(mp,
- "alignment check failed: sunit/swidth vs. blocksize(%d)",
- sbp->sb_blocksize);
+ "alignment check failed: sunit/swidth vs. agsize(%d)",
+ mp->m_sb.sb_agblocks);
return -EINVAL;
- } else {
- /*
- * Convert the stripe unit and width to FSBs.
- */
- mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
- if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
- xfs_warn(mp,
- "alignment check failed: sunit/swidth vs. agsize(%d)",
- sbp->sb_agblocks);
- return -EINVAL;
- } else if (mp->m_dalign) {
- mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
- } else {
- xfs_warn(mp,
- "alignment check failed: sunit(%d) less than bsize(%d)",
- mp->m_dalign, sbp->sb_blocksize);
- return -EINVAL;
- }
- }
-
- /*
- * Update superblock with new values
- * and log changes
- */
- if (xfs_sb_version_hasdalign(sbp)) {
- if (sbp->sb_unit != mp->m_dalign) {
- sbp->sb_unit = mp->m_dalign;
- mp->m_update_sb = true;
- }
- if (sbp->sb_width != mp->m_swidth) {
- sbp->sb_width = mp->m_swidth;
- mp->m_update_sb = true;
- }
+ } else if (mp->m_dalign) {
+ mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
} else {
xfs_warn(mp,
- "cannot change alignment: superblock does not support data alignment");
+ "alignment check failed: sunit(%d) less than bsize(%d)",
+ mp->m_dalign, mp->m_sb.sb_blocksize);
return -EINVAL;
}
- } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
- xfs_sb_version_hasdalign(&mp->m_sb)) {
- mp->m_dalign = sbp->sb_unit;
- mp->m_swidth = sbp->sb_width;
+ }
+
+ if (!xfs_sb_version_hasdalign(&mp->m_sb)) {
+ xfs_warn(mp,
+"cannot change alignment: superblock does not support data alignment");
+ return -EINVAL;
}
return 0;
}
-/*
- * Set the maximum inode count for this filesystem
- */
-STATIC void
-xfs_set_maxicount(xfs_mount_t *mp)
+/* Update alignment values based on mount options and sb values. */
+STATIC int
+xfs_update_alignment(
+ struct xfs_mount *mp)
{
- xfs_sb_t *sbp = &(mp->m_sb);
- uint64_t icount;
+ struct xfs_sb *sbp = &mp->m_sb;
- if (sbp->sb_imax_pct) {
- /*
- * Make sure the maximum inode count is a multiple
- * of the units we allocate inodes in.
- */
- icount = sbp->sb_dblocks * sbp->sb_imax_pct;
- do_div(icount, 100);
- do_div(icount, mp->m_ialloc_blks);
- mp->m_maxicount = (icount * mp->m_ialloc_blks) <<
- sbp->sb_inopblog;
- } else {
- mp->m_maxicount = 0;
- }
-}
+ if (mp->m_dalign) {
+ bool update_sb;
+ int error;
-/*
- * Set the default minimum read and write sizes unless
- * already specified in a mount option.
- * We use smaller I/O sizes when the file system
- * is being used for NFS service (wsync mount option).
- */
-STATIC void
-xfs_set_rw_sizes(xfs_mount_t *mp)
-{
- xfs_sb_t *sbp = &(mp->m_sb);
- int readio_log, writeio_log;
+ if (sbp->sb_unit == mp->m_dalign &&
+ sbp->sb_width == mp->m_swidth)
+ return 0;
- if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
- if (mp->m_flags & XFS_MOUNT_WSYNC) {
- readio_log = XFS_WSYNC_READIO_LOG;
- writeio_log = XFS_WSYNC_WRITEIO_LOG;
- } else {
- readio_log = XFS_READIO_LOG_LARGE;
- writeio_log = XFS_WRITEIO_LOG_LARGE;
- }
- } else {
- readio_log = mp->m_readio_log;
- writeio_log = mp->m_writeio_log;
+ error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
+ if (error || !update_sb)
+ return error;
+
+ sbp->sb_unit = mp->m_dalign;
+ sbp->sb_width = mp->m_swidth;
+ mp->m_update_sb = true;
+ } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
+ xfs_sb_version_hasdalign(&mp->m_sb)) {
+ mp->m_dalign = sbp->sb_unit;
+ mp->m_swidth = sbp->sb_width;
}
- if (sbp->sb_blocklog > readio_log) {
- mp->m_readio_log = sbp->sb_blocklog;
- } else {
- mp->m_readio_log = readio_log;
- }
- mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
- if (sbp->sb_blocklog > writeio_log) {
- mp->m_writeio_log = sbp->sb_blocklog;
- } else {
- mp->m_writeio_log = writeio_log;
- }
- mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
+ return 0;
}
/*
@@ -500,29 +494,6 @@
do_div(space, 100);
mp->m_low_space[i] = space * (i + 1);
}
-}
-
-
-/*
- * Set whether we're using inode alignment.
- */
-STATIC void
-xfs_set_inoalignment(xfs_mount_t *mp)
-{
- if (xfs_sb_version_hasalign(&mp->m_sb) &&
- mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
- mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
- else
- mp->m_inoalign_mask = 0;
- /*
- * If we are using stripe alignment, check whether
- * the stripe unit is a multiple of the inode alignment
- */
- if (mp->m_dalign && mp->m_inoalign_mask &&
- !(mp->m_dalign & mp->m_inoalign_mask))
- mp->m_sinoalign = mp->m_dalign;
- else
- mp->m_sinoalign = 0;
}
/*
@@ -639,7 +610,7 @@
(mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks ||
!xfs_verify_icount(mp, mp->m_sb.sb_icount) ||
mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
- mp->m_flags |= XFS_MOUNT_BAD_SUMMARY;
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
/*
* We can safely re-initialise incore superblock counters from the
@@ -654,10 +625,51 @@
*/
if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) ||
XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) &&
- !(mp->m_flags & XFS_MOUNT_BAD_SUMMARY))
+ !xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS))
return 0;
return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
+}
+
+/*
+ * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
+ * internal inode structures can be sitting in the CIL and AIL at this point,
+ * so we need to unpin them, write them back and/or reclaim them before unmount
+ * can proceed.
+ *
+ * An inode cluster that has been freed can have its buffer still pinned in
+ * memory because the transaction is still sitting in a iclog. The stale inodes
+ * on that buffer will be pinned to the buffer until the transaction hits the
+ * disk and the callbacks run. Pushing the AIL will skip the stale inodes and
+ * may never see the pinned buffer, so nothing will push out the iclog and
+ * unpin the buffer.
+ *
+ * Hence we need to force the log to unpin everything first. However, log
+ * forces don't wait for the discards they issue to complete, so we have to
+ * explicitly wait for them to complete here as well.
+ *
+ * Then we can tell the world we are unmounting so that error handling knows
+ * that the filesystem is going away and we should error out anything that we
+ * have been retrying in the background. This will prevent never-ending
+ * retries in AIL pushing from hanging the unmount.
+ *
+ * Finally, we can push the AIL to clean all the remaining dirty objects, then
+ * reclaim the remaining inodes that are still in memory at this point in time.
+ */
+static void
+xfs_unmount_flush_inodes(
+ struct xfs_mount *mp)
+{
+ xfs_log_force(mp, XFS_LOG_SYNC);
+ xfs_extent_busy_wait_all(mp);
+ flush_workqueue(xfs_discard_wq);
+
+ mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+
+ xfs_ail_push_all_sync(mp->m_ail);
+ cancel_delayed_work_sync(&mp->m_reclaim_work);
+ xfs_reclaim_inodes(mp);
+ xfs_health_unmount(mp);
}
/*
@@ -676,6 +688,7 @@
{
struct xfs_sb *sbp = &(mp->m_sb);
struct xfs_inode *rip;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
uint64_t resblks;
uint quotamount = 0;
uint quotaflags = 0;
@@ -730,28 +743,38 @@
}
/*
- * Check if sb_agblocks is aligned at stripe boundary
- * If sb_agblocks is NOT aligned turn off m_dalign since
- * allocator alignment is within an ag, therefore ag has
- * to be aligned at stripe boundary.
+ * If we were given new sunit/swidth options, do some basic validation
+ * checks and convert the incore dalign and swidth values to the
+ * same units (FSB) that everything else uses. This /must/ happen
+ * before computing the inode geometry.
*/
- error = xfs_update_alignment(mp);
+ error = xfs_validate_new_dalign(mp);
if (error)
goto out;
xfs_alloc_compute_maxlevels(mp);
xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
- xfs_ialloc_compute_maxlevels(mp);
+ xfs_ialloc_setup_geometry(mp);
xfs_rmapbt_compute_maxlevels(mp);
xfs_refcountbt_compute_maxlevels(mp);
- xfs_set_maxicount(mp);
+ /*
+ * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
+ * is NOT aligned turn off m_dalign since allocator alignment is within
+ * an ag, therefore ag has to be aligned at stripe boundary. Note that
+ * we must compute the free space and rmap btree geometry before doing
+ * this.
+ */
+ error = xfs_update_alignment(mp);
+ if (error)
+ goto out;
/* enable fail_at_unmount as default */
mp->m_fail_unmount = true;
- error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
+ error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype,
+ NULL, mp->m_super->s_id);
if (error)
goto out;
@@ -773,31 +796,15 @@
goto out_remove_errortag;
/*
- * Set the minimum read and write sizes
+ * Update the preferred write size based on the information from the
+ * on-disk superblock.
*/
- xfs_set_rw_sizes(mp);
+ mp->m_allocsize_log =
+ max_t(uint32_t, sbp->sb_blocklog, mp->m_allocsize_log);
+ mp->m_allocsize_blocks = 1U << (mp->m_allocsize_log - sbp->sb_blocklog);
/* set the low space thresholds for dynamic preallocation */
xfs_set_low_space_thresholds(mp);
-
- /*
- * Set the inode cluster size.
- * This may still be overridden by the file system
- * block size if it is larger than the chosen cluster size.
- *
- * For v5 filesystems, scale the cluster size with the inode size to
- * keep a constant ratio of inode per cluster buffer, but only if mkfs
- * has set the inode alignment value appropriately for larger cluster
- * sizes.
- */
- mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
- int new_size = mp->m_inode_cluster_size;
-
- new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
- if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
- mp->m_inode_cluster_size = new_size;
- }
/*
* If enabled, sparse inode chunk alignment is expected to match the
@@ -806,19 +813,14 @@
*/
if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
mp->m_sb.sb_spino_align !=
- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
+ XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
xfs_warn(mp,
"Sparse inode block alignment (%u) must match cluster size (%llu).",
mp->m_sb.sb_spino_align,
- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
+ XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
error = -EINVAL;
goto out_remove_uuid;
}
-
- /*
- * Set inode alignment fields
- */
- xfs_set_inoalignment(mp);
/*
* Check that the data (and log if separate) is an ok size.
@@ -865,9 +867,8 @@
goto out_free_dir;
}
- if (!sbp->sb_logblocks) {
+ if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) {
xfs_warn(mp, "no log defined");
- XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
error = -EFSCORRUPTED;
goto out_free_perag;
}
@@ -905,12 +906,10 @@
ASSERT(rip != NULL);
- if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) {
+ if (XFS_IS_CORRUPT(mp, !S_ISDIR(VFS_I(rip)->i_mode))) {
xfs_warn(mp, "corrupted root inode %llu: not a directory",
(unsigned long long)rip->i_ino);
xfs_iunlock(rip, XFS_ILOCK_EXCL);
- XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
- mp);
error = -EFSCORRUPTED;
goto out_rele_rip;
}
@@ -969,9 +968,17 @@
/*
* Finish recovering the file system. This part needed to be delayed
* until after the root and real-time bitmap inodes were consistently
- * read in.
+ * read in. Temporarily create per-AG space reservations for metadata
+ * btree shape changes because space freeing transactions (for inode
+ * inactivation) require the per-AG reservation in lieu of reserving
+ * blocks.
*/
+ error = xfs_fs_reserve_ag_blocks(mp);
+ if (error && error == -ENOSPC)
+ xfs_warn(mp,
+ "ENOSPC reserving per-AG metadata pool, log recovery may fail.");
error = xfs_log_mount_finish(mp);
+ xfs_fs_unreserve_ag_blocks(mp);
if (error) {
xfs_warn(mp, "log mount finish failed");
goto out_rtunmount;
@@ -1047,7 +1054,7 @@
/* Clean out dquots that might be in memory after quotacheck. */
xfs_qm_unmount(mp);
/*
- * Cancel all delayed reclaim work and reclaim the inodes directly.
+ * Flush all inode reclamation work and flush the log.
* We have to do this /after/ rtunmount and qm_unmount because those
* two will have scheduled delayed reclaim for the rt/quota inodes.
*
@@ -1057,10 +1064,8 @@
* qm_unmount_quotas and therefore rely on qm_unmount to release the
* quota inodes.
*/
- cancel_delayed_work_sync(&mp->m_reclaim_work);
- xfs_reclaim_inodes(mp, SYNC_WAIT);
+ xfs_unmount_flush_inodes(mp);
out_log_dealloc:
- mp->m_flags |= XFS_MOUNT_UNMOUNTING;
xfs_log_mount_cancel(mp);
out_fail_wait:
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
@@ -1095,52 +1100,13 @@
uint64_t resblks;
int error;
- xfs_icache_disable_reclaim(mp);
+ xfs_stop_block_reaping(mp);
xfs_fs_unreserve_ag_blocks(mp);
xfs_qm_unmount_quotas(mp);
xfs_rtunmount_inodes(mp);
xfs_irele(mp->m_rootip);
- /*
- * We can potentially deadlock here if we have an inode cluster
- * that has been freed has its buffer still pinned in memory because
- * the transaction is still sitting in a iclog. The stale inodes
- * on that buffer will have their flush locks held until the
- * transaction hits the disk and the callbacks run. the inode
- * flush takes the flush lock unconditionally and with nothing to
- * push out the iclog we will never get that unlocked. hence we
- * need to force the log first.
- */
- xfs_log_force(mp, XFS_LOG_SYNC);
-
- /*
- * Wait for all busy extents to be freed, including completion of
- * any discard operation.
- */
- xfs_extent_busy_wait_all(mp);
- flush_workqueue(xfs_discard_wq);
-
- /*
- * We now need to tell the world we are unmounting. This will allow
- * us to detect that the filesystem is going away and we should error
- * out anything that we have been retrying in the background. This will
- * prevent neverending retries in AIL pushing from hanging the unmount.
- */
- mp->m_flags |= XFS_MOUNT_UNMOUNTING;
-
- /*
- * Flush all pending changes from the AIL.
- */
- xfs_ail_push_all_sync(mp->m_ail);
-
- /*
- * And reclaim all inodes. At this point there should be no dirty
- * inodes and none should be pinned or locked, but use synchronous
- * reclaim just to be sure. We can stop background inode reclaim
- * here as well if it is still running.
- */
- cancel_delayed_work_sync(&mp->m_reclaim_work);
- xfs_reclaim_inodes(mp, SYNC_WAIT);
+ xfs_unmount_flush_inodes(mp);
xfs_qm_unmount(mp);
@@ -1216,8 +1182,7 @@
int
xfs_log_sbcount(xfs_mount_t *mp)
{
- /* allow this to proceed during the freeze sequence... */
- if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
+ if (!xfs_log_writable(mp))
return 0;
/*
@@ -1228,39 +1193,6 @@
return 0;
return xfs_sync_sb(mp, true);
-}
-
-/*
- * Deltas for the inode count are +/-64, hence we use a large batch size
- * of 128 so we don't need to take the counter lock on every update.
- */
-#define XFS_ICOUNT_BATCH 128
-int
-xfs_mod_icount(
- struct xfs_mount *mp,
- int64_t delta)
-{
- percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
- if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
- ASSERT(0);
- percpu_counter_add(&mp->m_icount, -delta);
- return -EINVAL;
- }
- return 0;
-}
-
-int
-xfs_mod_ifree(
- struct xfs_mount *mp,
- int64_t delta)
-{
- percpu_counter_add(&mp->m_ifree, delta);
- if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
- ASSERT(0);
- percpu_counter_add(&mp->m_ifree, -delta);
- return -EINVAL;
- }
- return 0;
}
/*
@@ -1341,10 +1273,9 @@
spin_unlock(&mp->m_sb_lock);
return 0;
}
- printk_once(KERN_WARNING
- "Filesystem \"%s\": reserve blocks depleted! "
- "Consider increasing reserve pool size.",
- mp->m_fsname);
+ xfs_warn_once(mp,
+"Reserve blocks depleted! Consider increasing reserve pool size.");
+
fdblocks_enospc:
spin_unlock(&mp->m_sb_lock);
return -ENOSPC;
@@ -1366,33 +1297,6 @@
mp->m_sb.sb_frextents = lcounter;
spin_unlock(&mp->m_sb_lock);
return ret;
-}
-
-/*
- * xfs_getsb() is called to obtain the buffer for the superblock.
- * The buffer is returned locked and read in from disk.
- * The buffer should be released with a call to xfs_brelse().
- *
- * If the flags parameter is BUF_TRYLOCK, then we'll only return
- * the superblock buffer if it can be locked without sleeping.
- * If it can't then we'll return NULL.
- */
-struct xfs_buf *
-xfs_getsb(
- struct xfs_mount *mp,
- int flags)
-{
- struct xfs_buf *bp = mp->m_sb_bp;
-
- if (!xfs_buf_trylock(bp)) {
- if (flags & XBF_TRYLOCK)
- return NULL;
- xfs_buf_lock(bp);
- }
-
- xfs_buf_hold(bp);
- ASSERT(bp->b_flags & XBF_DONE);
- return bp;
}
/*
@@ -1436,7 +1340,26 @@
if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
return;
- spin_lock(&mp->m_sb_lock);
- mp->m_flags |= XFS_MOUNT_BAD_SUMMARY;
- spin_unlock(&mp->m_sb_lock);
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
+}
+
+/*
+ * Update the in-core delayed block counter.
+ *
+ * We prefer to update the counter without having to take a spinlock for every
+ * counter update (i.e. batching). Each change to delayed allocation
+ * reservations can change can easily exceed the default percpu counter
+ * batching, so we use a larger batch factor here.
+ *
+ * Note that we don't currently have any callers requiring fast summation
+ * (e.g. percpu_counter_read) so we can use a big batch value here.
+ */
+#define XFS_DELALLOC_BATCH (4096)
+void
+xfs_mod_delalloc(
+ struct xfs_mount *mp,
+ int64_t delta)
+{
+ percpu_counter_add_batch(&mp->m_delalloc_blks, delta,
+ XFS_DELALLOC_BATCH);
}
--
Gitblit v1.6.2