~hc/RK356X_SDK_RELEASE.git

..	..	@@ -12,9 +12,6 @@
12	12	#include "xfs_bit.h"
13	13	#include "xfs_sb.h"
14	14	#include "xfs_mount.h"
15		-#include "xfs_defer.h"
16		-#include "xfs_da_format.h"
17		-#include "xfs_da_btree.h"
18	15	#include "xfs_inode.h"
19	16	#include "xfs_dir2.h"
20	17	#include "xfs_ialloc.h"
..	..	@@ -27,14 +24,14 @@
27	24	#include "xfs_error.h"
28	25	#include "xfs_quota.h"
29	26	#include "xfs_fsops.h"
30		-#include "xfs_trace.h"
31	27	#include "xfs_icache.h"
32	28	#include "xfs_sysfs.h"
33	29	#include "xfs_rmap_btree.h"
34	30	#include "xfs_refcount_btree.h"
35	31	#include "xfs_reflink.h"
36	32	#include "xfs_extent_busy.h"
37		-
	33	+#include "xfs_health.h"
	34	+#include "xfs_trace.h"
38	35
39	36	static DEFINE_MUTEX(xfs_uuid_table_mutex);
40	37	static int xfs_uuid_table_size;
..	..	@@ -83,9 +80,9 @@
83	80	}
84	81
85	82	if (hole < 0) {
86		- xfs_uuid_table = kmem_realloc(xfs_uuid_table,
	83	+ xfs_uuid_table = krealloc(xfs_uuid_table,
87	84	(xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
88		- KM_SLEEP);
	85	+ GFP_KERNEL \| __GFP_NOFAIL);
89	86	hole = xfs_uuid_table_size++;
90	87	}
91	88	xfs_uuid_table[hole] = *uuid;
..	..	@@ -149,8 +146,8 @@
149	146	spin_unlock(&mp->m_perag_lock);
150	147	ASSERT(pag);
151	148	ASSERT(atomic_read(&pag->pag_ref) == 0);
	149	+ xfs_iunlink_destroy(pag);
152	150	xfs_buf_hash_destroy(pag);
153		- mutex_destroy(&pag->pag_ici_reclaim_lock);
154	151	call_rcu(&pag->rcu_head, __xfs_free_perag);
155	152	}
156	153	}
..	..	@@ -197,26 +194,30 @@
197	194	}
198	195
199	196	pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
200		- if (!pag)
	197	+ if (!pag) {
	198	+ error = -ENOMEM;
201	199	goto out_unwind_new_pags;
	200	+ }
202	201	pag->pag_agno = index;
203	202	pag->pag_mount = mp;
204	203	spin_lock_init(&pag->pag_ici_lock);
205		- mutex_init(&pag->pag_ici_reclaim_lock);
206	204	INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
207		- if (xfs_buf_hash_init(pag))
	205	+
	206	+ error = xfs_buf_hash_init(pag);
	207	+ if (error)
208	208	goto out_free_pag;
209	209	init_waitqueue_head(&pag->pagb_wait);
210	210	spin_lock_init(&pag->pagb_lock);
211	211	pag->pagb_count = 0;
212	212	pag->pagb_tree = RB_ROOT;
213	213
214		- if (radix_tree_preload(GFP_NOFS))
	214	+ error = radix_tree_preload(GFP_NOFS);
	215	+ if (error)
215	216	goto out_hash_destroy;
216	217
217	218	spin_lock(&mp->m_perag_lock);
218	219	if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
219		- BUG();
	220	+ WARN_ON_ONCE(1);
220	221	spin_unlock(&mp->m_perag_lock);
221	222	radix_tree_preload_end();
222	223	error = -EEXIST;
..	..	@@ -227,6 +228,10 @@
227	228	/* first new pag is fully initialized */
228	229	if (first_initialised == NULLAGNUMBER)
229	230	first_initialised = index;
	231	+ error = xfs_iunlink_init(pag);
	232	+ if (error)
	233	+ goto out_hash_destroy;
	234	+ spin_lock_init(&pag->pag_state_lock);
230	235	}
231	236
232	237	index = xfs_set_inode_alloc(mp, agcount);
..	..	@@ -240,7 +245,6 @@
240	245	out_hash_destroy:
241	246	xfs_buf_hash_destroy(pag);
242	247	out_free_pag:
243		- mutex_destroy(&pag->pag_ici_reclaim_lock);
244	248	kmem_free(pag);
245	249	out_unwind_new_pags:
246	250	/* unwind any prior newly initialized pags */
..	..	@@ -249,7 +253,7 @@
249	253	if (!pag)
250	254	break;
251	255	xfs_buf_hash_destroy(pag);
252		- mutex_destroy(&pag->pag_ici_reclaim_lock);
	256	+ xfs_iunlink_destroy(pag);
253	257	kmem_free(pag);
254	258	}
255	259	return error;
..	..	@@ -307,7 +311,7 @@
307	311	/*
308	312	* Initialize the mount structure from the superblock.
309	313	*/
310		- xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
	314	+ xfs_sb_from_disk(sbp, bp->b_addr);
311	315
312	316	/*
313	317	* If we haven't validated the superblock, do so now before we try
..	..	@@ -357,132 +361,122 @@
357	361	}
358	362
359	363	/*
360		- * Update alignment values based on mount options and sb values
	364	+ * If the sunit/swidth change would move the precomputed root inode value, we
	365	+ * must reject the ondisk change because repair will stumble over that.
	366	+ * However, we allow the mount to proceed because we never rejected this
	367	+ * combination before. Returns true to update the sb, false otherwise.
	368	+ */
	369	+static inline int
	370	+xfs_check_new_dalign(
	371	+ struct xfs_mount *mp,
	372	+ int new_dalign,
	373	+ bool *update_sb)
	374	+{
	375	+ struct xfs_sb *sbp = &mp->m_sb;
	376	+ xfs_ino_t calc_ino;
	377	+
	378	+ calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
	379	+ trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
	380	+
	381	+ if (sbp->sb_rootino == calc_ino) {
	382	+ *update_sb = true;
	383	+ return 0;
	384	+ }
	385	+
	386	+ xfs_warn(mp,
	387	+"Cannot change stripe alignment; would require moving root inode.");
	388	+
	389	+ /*
	390	+ * XXX: Next time we add a new incompat feature, this should start
	391	+ * returning -EINVAL to fail the mount. Until then, spit out a warning
	392	+ * that we're ignoring the administrator's instructions.
	393	+ */
	394	+ xfs_warn(mp, "Skipping superblock stripe alignment update.");
	395	+ *update_sb = false;
	396	+ return 0;
	397	+}
	398	+
	399	+/*
	400	+ * If we were provided with new sunit/swidth values as mount options, make sure
	401	+ * that they pass basic alignment and superblock feature checks, and convert
	402	+ * them into the same units (FSB) that everything else expects. This step
	403	+ * /must/ be done before computing the inode geometry.
361	404	*/
362	405	STATIC int
363		-xfs_update_alignment(xfs_mount_t *mp)
	406	+xfs_validate_new_dalign(
	407	+ struct xfs_mount *mp)
364	408	{
365		- xfs_sb_t *sbp = &(mp->m_sb);
	409	+ if (mp->m_dalign == 0)
	410	+ return 0;
366	411
367		- if (mp->m_dalign) {
	412	+ /*
	413	+ * If stripe unit and stripe width are not multiples
	414	+ * of the fs blocksize turn off alignment.
	415	+ */
	416	+ if ((BBTOB(mp->m_dalign) & mp->m_blockmask) \|\|
	417	+ (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
	418	+ xfs_warn(mp,
	419	+ "alignment check failed: sunit/swidth vs. blocksize(%d)",
	420	+ mp->m_sb.sb_blocksize);
	421	+ return -EINVAL;
	422	+ } else {
368	423	/*
369		- * If stripe unit and stripe width are not multiples
370		- * of the fs blocksize turn off alignment.
	424	+ * Convert the stripe unit and width to FSBs.
371	425	*/
372		- if ((BBTOB(mp->m_dalign) & mp->m_blockmask) \|\|
373		- (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
	426	+ mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
	427	+ if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
374	428	xfs_warn(mp,
375		- "alignment check failed: sunit/swidth vs. blocksize(%d)",
376		- sbp->sb_blocksize);
	429	+ "alignment check failed: sunit/swidth vs. agsize(%d)",
	430	+ mp->m_sb.sb_agblocks);
377	431	return -EINVAL;
378		- } else {
379		- /*
380		- * Convert the stripe unit and width to FSBs.
381		- */
382		- mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
383		- if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
384		- xfs_warn(mp,
385		- "alignment check failed: sunit/swidth vs. agsize(%d)",
386		- sbp->sb_agblocks);
387		- return -EINVAL;
388		- } else if (mp->m_dalign) {
389		- mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
390		- } else {
391		- xfs_warn(mp,
392		- "alignment check failed: sunit(%d) less than bsize(%d)",
393		- mp->m_dalign, sbp->sb_blocksize);
394		- return -EINVAL;
395		- }
396		- }
397		-
398		- /*
399		- * Update superblock with new values
400		- * and log changes
401		- */
402		- if (xfs_sb_version_hasdalign(sbp)) {
403		- if (sbp->sb_unit != mp->m_dalign) {
404		- sbp->sb_unit = mp->m_dalign;
405		- mp->m_update_sb = true;
406		- }
407		- if (sbp->sb_width != mp->m_swidth) {
408		- sbp->sb_width = mp->m_swidth;
409		- mp->m_update_sb = true;
410		- }
	432	+ } else if (mp->m_dalign) {
	433	+ mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
411	434	} else {
412	435	xfs_warn(mp,
413		- "cannot change alignment: superblock does not support data alignment");
	436	+ "alignment check failed: sunit(%d) less than bsize(%d)",
	437	+ mp->m_dalign, mp->m_sb.sb_blocksize);
414	438	return -EINVAL;
415	439	}
416		- } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
417		- xfs_sb_version_hasdalign(&mp->m_sb)) {
418		- mp->m_dalign = sbp->sb_unit;
419		- mp->m_swidth = sbp->sb_width;
	440	+ }
	441	+
	442	+ if (!xfs_sb_version_hasdalign(&mp->m_sb)) {
	443	+ xfs_warn(mp,
	444	+"cannot change alignment: superblock does not support data alignment");
	445	+ return -EINVAL;
420	446	}
421	447
422	448	return 0;
423	449	}
424	450
425		-/*
426		- * Set the maximum inode count for this filesystem
427		- */
428		-STATIC void
429		-xfs_set_maxicount(xfs_mount_t *mp)
	451	+/* Update alignment values based on mount options and sb values. */
	452	+STATIC int
	453	+xfs_update_alignment(
	454	+ struct xfs_mount *mp)
430	455	{
431		- xfs_sb_t *sbp = &(mp->m_sb);
432		- uint64_t icount;
	456	+ struct xfs_sb *sbp = &mp->m_sb;
433	457
434		- if (sbp->sb_imax_pct) {
435		- /*
436		- * Make sure the maximum inode count is a multiple
437		- * of the units we allocate inodes in.
438		- */
439		- icount = sbp->sb_dblocks * sbp->sb_imax_pct;
440		- do_div(icount, 100);
441		- do_div(icount, mp->m_ialloc_blks);
442		- mp->m_maxicount = (icount * mp->m_ialloc_blks) <<
443		- sbp->sb_inopblog;
444		- } else {
445		- mp->m_maxicount = 0;
446		- }
447		-}
	458	+ if (mp->m_dalign) {
	459	+ bool update_sb;
	460	+ int error;
448	461
449		-/*
450		- * Set the default minimum read and write sizes unless
451		- * already specified in a mount option.
452		- * We use smaller I/O sizes when the file system
453		- * is being used for NFS service (wsync mount option).
454		- */
455		-STATIC void
456		-xfs_set_rw_sizes(xfs_mount_t *mp)
457		-{
458		- xfs_sb_t *sbp = &(mp->m_sb);
459		- int readio_log, writeio_log;
	462	+ if (sbp->sb_unit == mp->m_dalign &&
	463	+ sbp->sb_width == mp->m_swidth)
	464	+ return 0;
460	465
461		- if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
462		- if (mp->m_flags & XFS_MOUNT_WSYNC) {
463		- readio_log = XFS_WSYNC_READIO_LOG;
464		- writeio_log = XFS_WSYNC_WRITEIO_LOG;
465		- } else {
466		- readio_log = XFS_READIO_LOG_LARGE;
467		- writeio_log = XFS_WRITEIO_LOG_LARGE;
468		- }
469		- } else {
470		- readio_log = mp->m_readio_log;
471		- writeio_log = mp->m_writeio_log;
	466	+ error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
	467	+ if (error \|\| !update_sb)
	468	+ return error;
	469	+
	470	+ sbp->sb_unit = mp->m_dalign;
	471	+ sbp->sb_width = mp->m_swidth;
	472	+ mp->m_update_sb = true;
	473	+ } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
	474	+ xfs_sb_version_hasdalign(&mp->m_sb)) {
	475	+ mp->m_dalign = sbp->sb_unit;
	476	+ mp->m_swidth = sbp->sb_width;
472	477	}
473	478
474		- if (sbp->sb_blocklog > readio_log) {
475		- mp->m_readio_log = sbp->sb_blocklog;
476		- } else {
477		- mp->m_readio_log = readio_log;
478		- }
479		- mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
480		- if (sbp->sb_blocklog > writeio_log) {
481		- mp->m_writeio_log = sbp->sb_blocklog;
482		- } else {
483		- mp->m_writeio_log = writeio_log;
484		- }
485		- mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
	479	+ return 0;
486	480	}
487	481
488	482	/*
..	..	@@ -500,29 +494,6 @@
500	494	do_div(space, 100);
501	495	mp->m_low_space[i] = space * (i + 1);
502	496	}
503		-}
504		-
505		-
506		-/*
507		- * Set whether we're using inode alignment.
508		- */
509		-STATIC void
510		-xfs_set_inoalignment(xfs_mount_t *mp)
511		-{
512		- if (xfs_sb_version_hasalign(&mp->m_sb) &&
513		- mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
514		- mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
515		- else
516		- mp->m_inoalign_mask = 0;
517		- /*
518		- * If we are using stripe alignment, check whether
519		- * the stripe unit is a multiple of the inode alignment
520		- */
521		- if (mp->m_dalign && mp->m_inoalign_mask &&
522		- !(mp->m_dalign & mp->m_inoalign_mask))
523		- mp->m_sinoalign = mp->m_dalign;
524		- else
525		- mp->m_sinoalign = 0;
526	497	}
527	498
528	499	/*
..	..	@@ -639,7 +610,7 @@
639	610	(mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks \|\|
640	611	!xfs_verify_icount(mp, mp->m_sb.sb_icount) \|\|
641	612	mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
642		- mp->m_flags \|= XFS_MOUNT_BAD_SUMMARY;
	613	+ xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
643	614
644	615	/*
645	616	* We can safely re-initialise incore superblock counters from the
..	..	@@ -654,10 +625,51 @@
654	625	*/
655	626	if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) \|\|
656	627	XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) &&
657		- !(mp->m_flags & XFS_MOUNT_BAD_SUMMARY))
	628	+ !xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS))
658	629	return 0;
659	630
660	631	return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
	632	+}
	633	+
	634	+/*
	635	+ * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
	636	+ * internal inode structures can be sitting in the CIL and AIL at this point,
	637	+ * so we need to unpin them, write them back and/or reclaim them before unmount
	638	+ * can proceed.
	639	+ *
	640	+ * An inode cluster that has been freed can have its buffer still pinned in
	641	+ * memory because the transaction is still sitting in a iclog. The stale inodes
	642	+ * on that buffer will be pinned to the buffer until the transaction hits the
	643	+ * disk and the callbacks run. Pushing the AIL will skip the stale inodes and
	644	+ * may never see the pinned buffer, so nothing will push out the iclog and
	645	+ * unpin the buffer.
	646	+ *
	647	+ * Hence we need to force the log to unpin everything first. However, log
	648	+ * forces don't wait for the discards they issue to complete, so we have to
	649	+ * explicitly wait for them to complete here as well.
	650	+ *
	651	+ * Then we can tell the world we are unmounting so that error handling knows
	652	+ * that the filesystem is going away and we should error out anything that we
	653	+ * have been retrying in the background. This will prevent never-ending
	654	+ * retries in AIL pushing from hanging the unmount.
	655	+ *
	656	+ * Finally, we can push the AIL to clean all the remaining dirty objects, then
	657	+ * reclaim the remaining inodes that are still in memory at this point in time.
	658	+ */
	659	+static void
	660	+xfs_unmount_flush_inodes(
	661	+ struct xfs_mount *mp)
	662	+{
	663	+ xfs_log_force(mp, XFS_LOG_SYNC);
	664	+ xfs_extent_busy_wait_all(mp);
	665	+ flush_workqueue(xfs_discard_wq);
	666	+
	667	+ mp->m_flags \|= XFS_MOUNT_UNMOUNTING;
	668	+
	669	+ xfs_ail_push_all_sync(mp->m_ail);
	670	+ cancel_delayed_work_sync(&mp->m_reclaim_work);
	671	+ xfs_reclaim_inodes(mp);
	672	+ xfs_health_unmount(mp);
661	673	}
662	674
663	675	/*
..	..	@@ -676,6 +688,7 @@
676	688	{
677	689	struct xfs_sb *sbp = &(mp->m_sb);
678	690	struct xfs_inode *rip;
	691	+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
679	692	uint64_t resblks;
680	693	uint quotamount = 0;
681	694	uint quotaflags = 0;
..	..	@@ -730,28 +743,38 @@
730	743	}
731	744
732	745	/*
733		- * Check if sb_agblocks is aligned at stripe boundary
734		- * If sb_agblocks is NOT aligned turn off m_dalign since
735		- * allocator alignment is within an ag, therefore ag has
736		- * to be aligned at stripe boundary.
	746	+ * If we were given new sunit/swidth options, do some basic validation
	747	+ * checks and convert the incore dalign and swidth values to the
	748	+ * same units (FSB) that everything else uses. This /must/ happen
	749	+ * before computing the inode geometry.
737	750	*/
738		- error = xfs_update_alignment(mp);
	751	+ error = xfs_validate_new_dalign(mp);
739	752	if (error)
740	753	goto out;
741	754
742	755	xfs_alloc_compute_maxlevels(mp);
743	756	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
744	757	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
745		- xfs_ialloc_compute_maxlevels(mp);
	758	+ xfs_ialloc_setup_geometry(mp);
746	759	xfs_rmapbt_compute_maxlevels(mp);
747	760	xfs_refcountbt_compute_maxlevels(mp);
748	761
749		- xfs_set_maxicount(mp);
	762	+ /*
	763	+ * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
	764	+ * is NOT aligned turn off m_dalign since allocator alignment is within
	765	+ * an ag, therefore ag has to be aligned at stripe boundary. Note that
	766	+ * we must compute the free space and rmap btree geometry before doing
	767	+ * this.
	768	+ */
	769	+ error = xfs_update_alignment(mp);
	770	+ if (error)
	771	+ goto out;
750	772
751	773	/* enable fail_at_unmount as default */
752	774	mp->m_fail_unmount = true;
753	775
754		- error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
	776	+ error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype,
	777	+ NULL, mp->m_super->s_id);
755	778	if (error)
756	779	goto out;
757	780
..	..	@@ -773,31 +796,15 @@
773	796	goto out_remove_errortag;
774	797
775	798	/*
776		- * Set the minimum read and write sizes
	799	+ * Update the preferred write size based on the information from the
	800	+ * on-disk superblock.
777	801	*/
778		- xfs_set_rw_sizes(mp);
	802	+ mp->m_allocsize_log =
	803	+ max_t(uint32_t, sbp->sb_blocklog, mp->m_allocsize_log);
	804	+ mp->m_allocsize_blocks = 1U << (mp->m_allocsize_log - sbp->sb_blocklog);
779	805
780	806	/* set the low space thresholds for dynamic preallocation */
781	807	xfs_set_low_space_thresholds(mp);
782		-
783		- /*
784		- * Set the inode cluster size.
785		- * This may still be overridden by the file system
786		- * block size if it is larger than the chosen cluster size.
787		- *
788		- * For v5 filesystems, scale the cluster size with the inode size to
789		- * keep a constant ratio of inode per cluster buffer, but only if mkfs
790		- * has set the inode alignment value appropriately for larger cluster
791		- * sizes.
792		- */
793		- mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
794		- if (xfs_sb_version_hascrc(&mp->m_sb)) {
795		- int new_size = mp->m_inode_cluster_size;
796		-
797		- new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
798		- if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
799		- mp->m_inode_cluster_size = new_size;
800		- }
801	808
802	809	/*
803	810	* If enabled, sparse inode chunk alignment is expected to match the
..	..	@@ -806,19 +813,14 @@
806	813	*/
807	814	if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
808	815	mp->m_sb.sb_spino_align !=
809		- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
	816	+ XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
810	817	xfs_warn(mp,
811	818	"Sparse inode block alignment (%u) must match cluster size (%llu).",
812	819	mp->m_sb.sb_spino_align,
813		- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
	820	+ XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
814	821	error = -EINVAL;
815	822	goto out_remove_uuid;
816	823	}
817		-
818		- /*
819		- * Set inode alignment fields
820		- */
821		- xfs_set_inoalignment(mp);
822	824
823	825	/*
824	826	* Check that the data (and log if separate) is an ok size.
..	..	@@ -865,9 +867,8 @@
865	867	goto out_free_dir;
866	868	}
867	869
868		- if (!sbp->sb_logblocks) {
	870	+ if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) {
869	871	xfs_warn(mp, "no log defined");
870		- XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
871	872	error = -EFSCORRUPTED;
872	873	goto out_free_perag;
873	874	}
..	..	@@ -905,12 +906,10 @@
905	906
906	907	ASSERT(rip != NULL);
907	908
908		- if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) {
	909	+ if (XFS_IS_CORRUPT(mp, !S_ISDIR(VFS_I(rip)->i_mode))) {
909	910	xfs_warn(mp, "corrupted root inode %llu: not a directory",
910	911	(unsigned long long)rip->i_ino);
911	912	xfs_iunlock(rip, XFS_ILOCK_EXCL);
912		- XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
913		- mp);
914	913	error = -EFSCORRUPTED;
915	914	goto out_rele_rip;
916	915	}
..	..	@@ -969,9 +968,17 @@
969	968	/*
970	969	* Finish recovering the file system. This part needed to be delayed
971	970	* until after the root and real-time bitmap inodes were consistently
972		- * read in.
	971	+ * read in. Temporarily create per-AG space reservations for metadata
	972	+ * btree shape changes because space freeing transactions (for inode
	973	+ * inactivation) require the per-AG reservation in lieu of reserving
	974	+ * blocks.
973	975	*/
	976	+ error = xfs_fs_reserve_ag_blocks(mp);
	977	+ if (error && error == -ENOSPC)
	978	+ xfs_warn(mp,
	979	+ "ENOSPC reserving per-AG metadata pool, log recovery may fail.");
974	980	error = xfs_log_mount_finish(mp);
	981	+ xfs_fs_unreserve_ag_blocks(mp);
975	982	if (error) {
976	983	xfs_warn(mp, "log mount finish failed");
977	984	goto out_rtunmount;
..	..	@@ -1047,7 +1054,7 @@
1047	1054	/* Clean out dquots that might be in memory after quotacheck. */
1048	1055	xfs_qm_unmount(mp);
1049	1056	/*
1050		- * Cancel all delayed reclaim work and reclaim the inodes directly.
	1057	+ * Flush all inode reclamation work and flush the log.
1051	1058	* We have to do this /after/ rtunmount and qm_unmount because those
1052	1059	* two will have scheduled delayed reclaim for the rt/quota inodes.
1053	1060	*
..	..	@@ -1057,10 +1064,8 @@
1057	1064	* qm_unmount_quotas and therefore rely on qm_unmount to release the
1058	1065	* quota inodes.
1059	1066	*/
1060		- cancel_delayed_work_sync(&mp->m_reclaim_work);
1061		- xfs_reclaim_inodes(mp, SYNC_WAIT);
	1067	+ xfs_unmount_flush_inodes(mp);
1062	1068	out_log_dealloc:
1063		- mp->m_flags \|= XFS_MOUNT_UNMOUNTING;
1064	1069	xfs_log_mount_cancel(mp);
1065	1070	out_fail_wait:
1066	1071	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
..	..	@@ -1095,52 +1100,13 @@
1095	1100	uint64_t resblks;
1096	1101	int error;
1097	1102
1098		- xfs_icache_disable_reclaim(mp);
	1103	+ xfs_stop_block_reaping(mp);
1099	1104	xfs_fs_unreserve_ag_blocks(mp);
1100	1105	xfs_qm_unmount_quotas(mp);
1101	1106	xfs_rtunmount_inodes(mp);
1102	1107	xfs_irele(mp->m_rootip);
1103	1108
1104		- /*
1105		- * We can potentially deadlock here if we have an inode cluster
1106		- * that has been freed has its buffer still pinned in memory because
1107		- * the transaction is still sitting in a iclog. The stale inodes
1108		- * on that buffer will have their flush locks held until the
1109		- * transaction hits the disk and the callbacks run. the inode
1110		- * flush takes the flush lock unconditionally and with nothing to
1111		- * push out the iclog we will never get that unlocked. hence we
1112		- * need to force the log first.
1113		- */
1114		- xfs_log_force(mp, XFS_LOG_SYNC);
1115		-
1116		- /*
1117		- * Wait for all busy extents to be freed, including completion of
1118		- * any discard operation.
1119		- */
1120		- xfs_extent_busy_wait_all(mp);
1121		- flush_workqueue(xfs_discard_wq);
1122		-
1123		- /*
1124		- * We now need to tell the world we are unmounting. This will allow
1125		- * us to detect that the filesystem is going away and we should error
1126		- * out anything that we have been retrying in the background. This will
1127		- * prevent neverending retries in AIL pushing from hanging the unmount.
1128		- */
1129		- mp->m_flags \|= XFS_MOUNT_UNMOUNTING;
1130		-
1131		- /*
1132		- * Flush all pending changes from the AIL.
1133		- */
1134		- xfs_ail_push_all_sync(mp->m_ail);
1135		-
1136		- /*
1137		- * And reclaim all inodes. At this point there should be no dirty
1138		- * inodes and none should be pinned or locked, but use synchronous
1139		- * reclaim just to be sure. We can stop background inode reclaim
1140		- * here as well if it is still running.
1141		- */
1142		- cancel_delayed_work_sync(&mp->m_reclaim_work);
1143		- xfs_reclaim_inodes(mp, SYNC_WAIT);
	1109	+ xfs_unmount_flush_inodes(mp);
1144	1110
1145	1111	xfs_qm_unmount(mp);
1146	1112
..	..	@@ -1216,8 +1182,7 @@
1216	1182	int
1217	1183	xfs_log_sbcount(xfs_mount_t *mp)
1218	1184	{
1219		- /* allow this to proceed during the freeze sequence... */
1220		- if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
	1185	+ if (!xfs_log_writable(mp))
1221	1186	return 0;
1222	1187
1223	1188	/*
..	..	@@ -1228,39 +1193,6 @@
1228	1193	return 0;
1229	1194
1230	1195	return xfs_sync_sb(mp, true);
1231		-}
1232		-
1233		-/*
1234		- * Deltas for the inode count are +/-64, hence we use a large batch size
1235		- * of 128 so we don't need to take the counter lock on every update.
1236		- */
1237		-#define XFS_ICOUNT_BATCH 128
1238		-int
1239		-xfs_mod_icount(
1240		- struct xfs_mount *mp,
1241		- int64_t delta)
1242		-{
1243		- percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
1244		- if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
1245		- ASSERT(0);
1246		- percpu_counter_add(&mp->m_icount, -delta);
1247		- return -EINVAL;
1248		- }
1249		- return 0;
1250		-}
1251		-
1252		-int
1253		-xfs_mod_ifree(
1254		- struct xfs_mount *mp,
1255		- int64_t delta)
1256		-{
1257		- percpu_counter_add(&mp->m_ifree, delta);
1258		- if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
1259		- ASSERT(0);
1260		- percpu_counter_add(&mp->m_ifree, -delta);
1261		- return -EINVAL;
1262		- }
1263		- return 0;
1264	1196	}
1265	1197
1266	1198	/*
..	..	@@ -1341,10 +1273,9 @@
1341	1273	spin_unlock(&mp->m_sb_lock);
1342	1274	return 0;
1343	1275	}
1344		- printk_once(KERN_WARNING
1345		- "Filesystem \"%s\": reserve blocks depleted! "
1346		- "Consider increasing reserve pool size.",
1347		- mp->m_fsname);
	1276	+ xfs_warn_once(mp,
	1277	+"Reserve blocks depleted! Consider increasing reserve pool size.");
	1278	+
1348	1279	fdblocks_enospc:
1349	1280	spin_unlock(&mp->m_sb_lock);
1350	1281	return -ENOSPC;
..	..	@@ -1366,33 +1297,6 @@
1366	1297	mp->m_sb.sb_frextents = lcounter;
1367	1298	spin_unlock(&mp->m_sb_lock);
1368	1299	return ret;
1369		-}
1370		-
1371		-/*
1372		- * xfs_getsb() is called to obtain the buffer for the superblock.
1373		- * The buffer is returned locked and read in from disk.
1374		- * The buffer should be released with a call to xfs_brelse().
1375		- *
1376		- * If the flags parameter is BUF_TRYLOCK, then we'll only return
1377		- * the superblock buffer if it can be locked without sleeping.
1378		- * If it can't then we'll return NULL.
1379		- */
1380		-struct xfs_buf *
1381		-xfs_getsb(
1382		- struct xfs_mount *mp,
1383		- int flags)
1384		-{
1385		- struct xfs_buf *bp = mp->m_sb_bp;
1386		-
1387		- if (!xfs_buf_trylock(bp)) {
1388		- if (flags & XBF_TRYLOCK)
1389		- return NULL;
1390		- xfs_buf_lock(bp);
1391		- }
1392		-
1393		- xfs_buf_hold(bp);
1394		- ASSERT(bp->b_flags & XBF_DONE);
1395		- return bp;
1396	1300	}
1397	1301
1398	1302	/*
..	..	@@ -1436,7 +1340,26 @@
1436	1340	if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1437	1341	return;
1438	1342
1439		- spin_lock(&mp->m_sb_lock);
1440		- mp->m_flags \|= XFS_MOUNT_BAD_SUMMARY;
1441		- spin_unlock(&mp->m_sb_lock);
	1343	+ xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
	1344	+}
	1345	+
	1346	+/*
	1347	+ * Update the in-core delayed block counter.
	1348	+ *
	1349	+ * We prefer to update the counter without having to take a spinlock for every
	1350	+ * counter update (i.e. batching). Each change to delayed allocation
	1351	+ * reservations can change can easily exceed the default percpu counter
	1352	+ * batching, so we use a larger batch factor here.
	1353	+ *
	1354	+ * Note that we don't currently have any callers requiring fast summation
	1355	+ * (e.g. percpu_counter_read) so we can use a big batch value here.
	1356	+ */
	1357	+#define XFS_DELALLOC_BATCH (4096)
	1358	+void
	1359	+xfs_mod_delalloc(
	1360	+ struct xfs_mount *mp,
	1361	+ int64_t delta)
	1362	+{
	1363	+ percpu_counter_add_batch(&mp->m_delalloc_blks, delta,
	1364	+ XFS_DELALLOC_BATCH);
1442	1365	}