~hc/RK356X_SDK_RELEASE.git

..	..	@@ -12,9 +12,6 @@
12	12	#include "xfs_bit.h"
13	13	#include "xfs_sb.h"
14	14	#include "xfs_mount.h"
15		-#include "xfs_defer.h"
16		-#include "xfs_da_format.h"
17		-#include "xfs_da_btree.h"
18	15	#include "xfs_inode.h"
19	16	#include "xfs_dir2.h"
20	17	#include "xfs_ialloc.h"
..	..	@@ -27,14 +24,14 @@
27	24	#include "xfs_error.h"
28	25	#include "xfs_quota.h"
29	26	#include "xfs_fsops.h"
30		-#include "xfs_trace.h"
31	27	#include "xfs_icache.h"
32	28	#include "xfs_sysfs.h"
33	29	#include "xfs_rmap_btree.h"
34	30	#include "xfs_refcount_btree.h"
35	31	#include "xfs_reflink.h"
36	32	#include "xfs_extent_busy.h"
37		-
	33	+#include "xfs_health.h"
	34	+#include "xfs_trace.h"
38	35
39	36	static DEFINE_MUTEX(xfs_uuid_table_mutex);
40	37	static int xfs_uuid_table_size;
..	..	@@ -83,9 +80,9 @@
83	80	}
84	81
85	82	if (hole < 0) {
86		- xfs_uuid_table = kmem_realloc(xfs_uuid_table,
	83	+ xfs_uuid_table = krealloc(xfs_uuid_table,
87	84	(xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
88		- KM_SLEEP);
	85	+ GFP_KERNEL \| __GFP_NOFAIL);
89	86	hole = xfs_uuid_table_size++;
90	87	}
91	88	xfs_uuid_table[hole] = *uuid;
..	..	@@ -129,7 +126,6 @@
129	126	{
130	127	struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
131	128
132		- ASSERT(atomic_read(&pag->pag_ref) == 0);
133	129	kmem_free(pag);
134	130	}
135	131
..	..	@@ -148,9 +144,9 @@
148	144	pag = radix_tree_delete(&mp->m_perag_tree, agno);
149	145	spin_unlock(&mp->m_perag_lock);
150	146	ASSERT(pag);
151		- ASSERT(atomic_read(&pag->pag_ref) == 0);
	147	+ XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
	148	+ xfs_iunlink_destroy(pag);
152	149	xfs_buf_hash_destroy(pag);
153		- mutex_destroy(&pag->pag_ici_reclaim_lock);
154	150	call_rcu(&pag->rcu_head, __xfs_free_perag);
155	151	}
156	152	}
..	..	@@ -197,26 +193,30 @@
197	193	}
198	194
199	195	pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
200		- if (!pag)
	196	+ if (!pag) {
	197	+ error = -ENOMEM;
201	198	goto out_unwind_new_pags;
	199	+ }
202	200	pag->pag_agno = index;
203	201	pag->pag_mount = mp;
204	202	spin_lock_init(&pag->pag_ici_lock);
205		- mutex_init(&pag->pag_ici_reclaim_lock);
206	203	INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
207		- if (xfs_buf_hash_init(pag))
	204	+
	205	+ error = xfs_buf_hash_init(pag);
	206	+ if (error)
208	207	goto out_free_pag;
209	208	init_waitqueue_head(&pag->pagb_wait);
210	209	spin_lock_init(&pag->pagb_lock);
211	210	pag->pagb_count = 0;
212	211	pag->pagb_tree = RB_ROOT;
213	212
214		- if (radix_tree_preload(GFP_NOFS))
	213	+ error = radix_tree_preload(GFP_NOFS);
	214	+ if (error)
215	215	goto out_hash_destroy;
216	216
217	217	spin_lock(&mp->m_perag_lock);
218	218	if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
219		- BUG();
	219	+ WARN_ON_ONCE(1);
220	220	spin_unlock(&mp->m_perag_lock);
221	221	radix_tree_preload_end();
222	222	error = -EEXIST;
..	..	@@ -227,6 +227,10 @@
227	227	/* first new pag is fully initialized */
228	228	if (first_initialised == NULLAGNUMBER)
229	229	first_initialised = index;
	230	+ error = xfs_iunlink_init(pag);
	231	+ if (error)
	232	+ goto out_hash_destroy;
	233	+ spin_lock_init(&pag->pag_state_lock);
230	234	}
231	235
232	236	index = xfs_set_inode_alloc(mp, agcount);
..	..	@@ -240,7 +244,6 @@
240	244	out_hash_destroy:
241	245	xfs_buf_hash_destroy(pag);
242	246	out_free_pag:
243		- mutex_destroy(&pag->pag_ici_reclaim_lock);
244	247	kmem_free(pag);
245	248	out_unwind_new_pags:
246	249	/* unwind any prior newly initialized pags */
..	..	@@ -249,7 +252,7 @@
249	252	if (!pag)
250	253	break;
251	254	xfs_buf_hash_destroy(pag);
252		- mutex_destroy(&pag->pag_ici_reclaim_lock);
	255	+ xfs_iunlink_destroy(pag);
253	256	kmem_free(pag);
254	257	}
255	258	return error;
..	..	@@ -307,7 +310,7 @@
307	310	/*
308	311	* Initialize the mount structure from the superblock.
309	312	*/
310		- xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
	313	+ xfs_sb_from_disk(sbp, bp->b_addr);
311	314
312	315	/*
313	316	* If we haven't validated the superblock, do so now before we try
..	..	@@ -357,132 +360,122 @@
357	360	}
358	361
359	362	/*
360		- * Update alignment values based on mount options and sb values
	363	+ * If the sunit/swidth change would move the precomputed root inode value, we
	364	+ * must reject the ondisk change because repair will stumble over that.
	365	+ * However, we allow the mount to proceed because we never rejected this
	366	+ * combination before. Returns true to update the sb, false otherwise.
	367	+ */
	368	+static inline int
	369	+xfs_check_new_dalign(
	370	+ struct xfs_mount *mp,
	371	+ int new_dalign,
	372	+ bool *update_sb)
	373	+{
	374	+ struct xfs_sb *sbp = &mp->m_sb;
	375	+ xfs_ino_t calc_ino;
	376	+
	377	+ calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
	378	+ trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
	379	+
	380	+ if (sbp->sb_rootino == calc_ino) {
	381	+ *update_sb = true;
	382	+ return 0;
	383	+ }
	384	+
	385	+ xfs_warn(mp,
	386	+"Cannot change stripe alignment; would require moving root inode.");
	387	+
	388	+ /*
	389	+ * XXX: Next time we add a new incompat feature, this should start
	390	+ * returning -EINVAL to fail the mount. Until then, spit out a warning
	391	+ * that we're ignoring the administrator's instructions.
	392	+ */
	393	+ xfs_warn(mp, "Skipping superblock stripe alignment update.");
	394	+ *update_sb = false;
	395	+ return 0;
	396	+}
	397	+
	398	+/*
	399	+ * If we were provided with new sunit/swidth values as mount options, make sure
	400	+ * that they pass basic alignment and superblock feature checks, and convert
	401	+ * them into the same units (FSB) that everything else expects. This step
	402	+ * /must/ be done before computing the inode geometry.
361	403	*/
362	404	STATIC int
363		-xfs_update_alignment(xfs_mount_t *mp)
	405	+xfs_validate_new_dalign(
	406	+ struct xfs_mount *mp)
364	407	{
365		- xfs_sb_t *sbp = &(mp->m_sb);
	408	+ if (mp->m_dalign == 0)
	409	+ return 0;
366	410
367		- if (mp->m_dalign) {
	411	+ /*
	412	+ * If stripe unit and stripe width are not multiples
	413	+ * of the fs blocksize turn off alignment.
	414	+ */
	415	+ if ((BBTOB(mp->m_dalign) & mp->m_blockmask) \|\|
	416	+ (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
	417	+ xfs_warn(mp,
	418	+ "alignment check failed: sunit/swidth vs. blocksize(%d)",
	419	+ mp->m_sb.sb_blocksize);
	420	+ return -EINVAL;
	421	+ } else {
368	422	/*
369		- * If stripe unit and stripe width are not multiples
370		- * of the fs blocksize turn off alignment.
	423	+ * Convert the stripe unit and width to FSBs.
371	424	*/
372		- if ((BBTOB(mp->m_dalign) & mp->m_blockmask) \|\|
373		- (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
	425	+ mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
	426	+ if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
374	427	xfs_warn(mp,
375		- "alignment check failed: sunit/swidth vs. blocksize(%d)",
376		- sbp->sb_blocksize);
	428	+ "alignment check failed: sunit/swidth vs. agsize(%d)",
	429	+ mp->m_sb.sb_agblocks);
377	430	return -EINVAL;
378		- } else {
379		- /*
380		- * Convert the stripe unit and width to FSBs.
381		- */
382		- mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
383		- if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
384		- xfs_warn(mp,
385		- "alignment check failed: sunit/swidth vs. agsize(%d)",
386		- sbp->sb_agblocks);
387		- return -EINVAL;
388		- } else if (mp->m_dalign) {
389		- mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
390		- } else {
391		- xfs_warn(mp,
392		- "alignment check failed: sunit(%d) less than bsize(%d)",
393		- mp->m_dalign, sbp->sb_blocksize);
394		- return -EINVAL;
395		- }
396		- }
397		-
398		- /*
399		- * Update superblock with new values
400		- * and log changes
401		- */
402		- if (xfs_sb_version_hasdalign(sbp)) {
403		- if (sbp->sb_unit != mp->m_dalign) {
404		- sbp->sb_unit = mp->m_dalign;
405		- mp->m_update_sb = true;
406		- }
407		- if (sbp->sb_width != mp->m_swidth) {
408		- sbp->sb_width = mp->m_swidth;
409		- mp->m_update_sb = true;
410		- }
	431	+ } else if (mp->m_dalign) {
	432	+ mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
411	433	} else {
412	434	xfs_warn(mp,
413		- "cannot change alignment: superblock does not support data alignment");
	435	+ "alignment check failed: sunit(%d) less than bsize(%d)",
	436	+ mp->m_dalign, mp->m_sb.sb_blocksize);
414	437	return -EINVAL;
415	438	}
416		- } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
417		- xfs_sb_version_hasdalign(&mp->m_sb)) {
418		- mp->m_dalign = sbp->sb_unit;
419		- mp->m_swidth = sbp->sb_width;
	439	+ }
	440	+
	441	+ if (!xfs_sb_version_hasdalign(&mp->m_sb)) {
	442	+ xfs_warn(mp,
	443	+"cannot change alignment: superblock does not support data alignment");
	444	+ return -EINVAL;
420	445	}
421	446
422	447	return 0;
423	448	}
424	449
425		-/*
426		- * Set the maximum inode count for this filesystem
427		- */
428		-STATIC void
429		-xfs_set_maxicount(xfs_mount_t *mp)
	450	+/* Update alignment values based on mount options and sb values. */
	451	+STATIC int
	452	+xfs_update_alignment(
	453	+ struct xfs_mount *mp)
430	454	{
431		- xfs_sb_t *sbp = &(mp->m_sb);
432		- uint64_t icount;
	455	+ struct xfs_sb *sbp = &mp->m_sb;
433	456
434		- if (sbp->sb_imax_pct) {
435		- /*
436		- * Make sure the maximum inode count is a multiple
437		- * of the units we allocate inodes in.
438		- */
439		- icount = sbp->sb_dblocks * sbp->sb_imax_pct;
440		- do_div(icount, 100);
441		- do_div(icount, mp->m_ialloc_blks);
442		- mp->m_maxicount = (icount * mp->m_ialloc_blks) <<
443		- sbp->sb_inopblog;
444		- } else {
445		- mp->m_maxicount = 0;
446		- }
447		-}
	457	+ if (mp->m_dalign) {
	458	+ bool update_sb;
	459	+ int error;
448	460
449		-/*
450		- * Set the default minimum read and write sizes unless
451		- * already specified in a mount option.
452		- * We use smaller I/O sizes when the file system
453		- * is being used for NFS service (wsync mount option).
454		- */
455		-STATIC void
456		-xfs_set_rw_sizes(xfs_mount_t *mp)
457		-{
458		- xfs_sb_t *sbp = &(mp->m_sb);
459		- int readio_log, writeio_log;
	461	+ if (sbp->sb_unit == mp->m_dalign &&
	462	+ sbp->sb_width == mp->m_swidth)
	463	+ return 0;
460	464
461		- if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
462		- if (mp->m_flags & XFS_MOUNT_WSYNC) {
463		- readio_log = XFS_WSYNC_READIO_LOG;
464		- writeio_log = XFS_WSYNC_WRITEIO_LOG;
465		- } else {
466		- readio_log = XFS_READIO_LOG_LARGE;
467		- writeio_log = XFS_WRITEIO_LOG_LARGE;
468		- }
469		- } else {
470		- readio_log = mp->m_readio_log;
471		- writeio_log = mp->m_writeio_log;
	465	+ error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
	466	+ if (error \|\| !update_sb)
	467	+ return error;
	468	+
	469	+ sbp->sb_unit = mp->m_dalign;
	470	+ sbp->sb_width = mp->m_swidth;
	471	+ mp->m_update_sb = true;
	472	+ } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
	473	+ xfs_sb_version_hasdalign(&mp->m_sb)) {
	474	+ mp->m_dalign = sbp->sb_unit;
	475	+ mp->m_swidth = sbp->sb_width;
472	476	}
473	477
474		- if (sbp->sb_blocklog > readio_log) {
475		- mp->m_readio_log = sbp->sb_blocklog;
476		- } else {
477		- mp->m_readio_log = readio_log;
478		- }
479		- mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
480		- if (sbp->sb_blocklog > writeio_log) {
481		- mp->m_writeio_log = sbp->sb_blocklog;
482		- } else {
483		- mp->m_writeio_log = writeio_log;
484		- }
485		- mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
	478	+ return 0;
486	479	}
487	480
488	481	/*
..	..	@@ -500,29 +493,6 @@
500	493	do_div(space, 100);
501	494	mp->m_low_space[i] = space * (i + 1);
502	495	}
503		-}
504		-
505		-
506		-/*
507		- * Set whether we're using inode alignment.
508		- */
509		-STATIC void
510		-xfs_set_inoalignment(xfs_mount_t *mp)
511		-{
512		- if (xfs_sb_version_hasalign(&mp->m_sb) &&
513		- mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
514		- mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
515		- else
516		- mp->m_inoalign_mask = 0;
517		- /*
518		- * If we are using stripe alignment, check whether
519		- * the stripe unit is a multiple of the inode alignment
520		- */
521		- if (mp->m_dalign && mp->m_inoalign_mask &&
522		- !(mp->m_dalign & mp->m_inoalign_mask))
523		- mp->m_sinoalign = mp->m_dalign;
524		- else
525		- mp->m_sinoalign = 0;
526	496	}
527	497
528	498	/*
..	..	@@ -639,7 +609,7 @@
639	609	(mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks \|\|
640	610	!xfs_verify_icount(mp, mp->m_sb.sb_icount) \|\|
641	611	mp->m_sb.sb_ifree > mp->m_sb.sb_icount))
642		- mp->m_flags \|= XFS_MOUNT_BAD_SUMMARY;
	612	+ xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
643	613
644	614	/*
645	615	* We can safely re-initialise incore superblock counters from the
..	..	@@ -654,10 +624,51 @@
654	624	*/
655	625	if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) \|\|
656	626	XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) &&
657		- !(mp->m_flags & XFS_MOUNT_BAD_SUMMARY))
	627	+ !xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS))
658	628	return 0;
659	629
660	630	return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount);
	631	+}
	632	+
	633	+/*
	634	+ * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
	635	+ * internal inode structures can be sitting in the CIL and AIL at this point,
	636	+ * so we need to unpin them, write them back and/or reclaim them before unmount
	637	+ * can proceed.
	638	+ *
	639	+ * An inode cluster that has been freed can have its buffer still pinned in
	640	+ * memory because the transaction is still sitting in a iclog. The stale inodes
	641	+ * on that buffer will be pinned to the buffer until the transaction hits the
	642	+ * disk and the callbacks run. Pushing the AIL will skip the stale inodes and
	643	+ * may never see the pinned buffer, so nothing will push out the iclog and
	644	+ * unpin the buffer.
	645	+ *
	646	+ * Hence we need to force the log to unpin everything first. However, log
	647	+ * forces don't wait for the discards they issue to complete, so we have to
	648	+ * explicitly wait for them to complete here as well.
	649	+ *
	650	+ * Then we can tell the world we are unmounting so that error handling knows
	651	+ * that the filesystem is going away and we should error out anything that we
	652	+ * have been retrying in the background. This will prevent never-ending
	653	+ * retries in AIL pushing from hanging the unmount.
	654	+ *
	655	+ * Finally, we can push the AIL to clean all the remaining dirty objects, then
	656	+ * reclaim the remaining inodes that are still in memory at this point in time.
	657	+ */
	658	+static void
	659	+xfs_unmount_flush_inodes(
	660	+ struct xfs_mount *mp)
	661	+{
	662	+ xfs_log_force(mp, XFS_LOG_SYNC);
	663	+ xfs_extent_busy_wait_all(mp);
	664	+ flush_workqueue(xfs_discard_wq);
	665	+
	666	+ mp->m_flags \|= XFS_MOUNT_UNMOUNTING;
	667	+
	668	+ xfs_ail_push_all_sync(mp->m_ail);
	669	+ cancel_delayed_work_sync(&mp->m_reclaim_work);
	670	+ xfs_reclaim_inodes(mp);
	671	+ xfs_health_unmount(mp);
661	672	}
662	673
663	674	/*
..	..	@@ -676,6 +687,7 @@
676	687	{
677	688	struct xfs_sb *sbp = &(mp->m_sb);
678	689	struct xfs_inode *rip;
	690	+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
679	691	uint64_t resblks;
680	692	uint quotamount = 0;
681	693	uint quotaflags = 0;
..	..	@@ -730,28 +742,38 @@
730	742	}
731	743
732	744	/*
733		- * Check if sb_agblocks is aligned at stripe boundary
734		- * If sb_agblocks is NOT aligned turn off m_dalign since
735		- * allocator alignment is within an ag, therefore ag has
736		- * to be aligned at stripe boundary.
	745	+ * If we were given new sunit/swidth options, do some basic validation
	746	+ * checks and convert the incore dalign and swidth values to the
	747	+ * same units (FSB) that everything else uses. This /must/ happen
	748	+ * before computing the inode geometry.
737	749	*/
738		- error = xfs_update_alignment(mp);
	750	+ error = xfs_validate_new_dalign(mp);
739	751	if (error)
740	752	goto out;
741	753
742	754	xfs_alloc_compute_maxlevels(mp);
743	755	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
744	756	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
745		- xfs_ialloc_compute_maxlevels(mp);
	757	+ xfs_ialloc_setup_geometry(mp);
746	758	xfs_rmapbt_compute_maxlevels(mp);
747	759	xfs_refcountbt_compute_maxlevels(mp);
748	760
749		- xfs_set_maxicount(mp);
	761	+ /*
	762	+ * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
	763	+ * is NOT aligned turn off m_dalign since allocator alignment is within
	764	+ * an ag, therefore ag has to be aligned at stripe boundary. Note that
	765	+ * we must compute the free space and rmap btree geometry before doing
	766	+ * this.
	767	+ */
	768	+ error = xfs_update_alignment(mp);
	769	+ if (error)
	770	+ goto out;
750	771
751	772	/* enable fail_at_unmount as default */
752	773	mp->m_fail_unmount = true;
753	774
754		- error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
	775	+ error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype,
	776	+ NULL, mp->m_super->s_id);
755	777	if (error)
756	778	goto out;
757	779
..	..	@@ -773,31 +795,15 @@
773	795	goto out_remove_errortag;
774	796
775	797	/*
776		- * Set the minimum read and write sizes
	798	+ * Update the preferred write size based on the information from the
	799	+ * on-disk superblock.
777	800	*/
778		- xfs_set_rw_sizes(mp);
	801	+ mp->m_allocsize_log =
	802	+ max_t(uint32_t, sbp->sb_blocklog, mp->m_allocsize_log);
	803	+ mp->m_allocsize_blocks = 1U << (mp->m_allocsize_log - sbp->sb_blocklog);
779	804
780	805	/* set the low space thresholds for dynamic preallocation */
781	806	xfs_set_low_space_thresholds(mp);
782		-
783		- /*
784		- * Set the inode cluster size.
785		- * This may still be overridden by the file system
786		- * block size if it is larger than the chosen cluster size.
787		- *
788		- * For v5 filesystems, scale the cluster size with the inode size to
789		- * keep a constant ratio of inode per cluster buffer, but only if mkfs
790		- * has set the inode alignment value appropriately for larger cluster
791		- * sizes.
792		- */
793		- mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
794		- if (xfs_sb_version_hascrc(&mp->m_sb)) {
795		- int new_size = mp->m_inode_cluster_size;
796		-
797		- new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
798		- if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
799		- mp->m_inode_cluster_size = new_size;
800		- }
801	807
802	808	/*
803	809	* If enabled, sparse inode chunk alignment is expected to match the
..	..	@@ -806,19 +812,14 @@
806	812	*/
807	813	if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
808	814	mp->m_sb.sb_spino_align !=
809		- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
	815	+ XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
810	816	xfs_warn(mp,
811	817	"Sparse inode block alignment (%u) must match cluster size (%llu).",
812	818	mp->m_sb.sb_spino_align,
813		- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
	819	+ XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
814	820	error = -EINVAL;
815	821	goto out_remove_uuid;
816	822	}
817		-
818		- /*
819		- * Set inode alignment fields
820		- */
821		- xfs_set_inoalignment(mp);
822	823
823	824	/*
824	825	* Check that the data (and log if separate) is an ok size.
..	..	@@ -865,9 +866,8 @@
865	866	goto out_free_dir;
866	867	}
867	868
868		- if (!sbp->sb_logblocks) {
	869	+ if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) {
869	870	xfs_warn(mp, "no log defined");
870		- XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
871	871	error = -EFSCORRUPTED;
872	872	goto out_free_perag;
873	873	}
..	..	@@ -905,12 +905,10 @@
905	905
906	906	ASSERT(rip != NULL);
907	907
908		- if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) {
	908	+ if (XFS_IS_CORRUPT(mp, !S_ISDIR(VFS_I(rip)->i_mode))) {
909	909	xfs_warn(mp, "corrupted root inode %llu: not a directory",
910	910	(unsigned long long)rip->i_ino);
911	911	xfs_iunlock(rip, XFS_ILOCK_EXCL);
912		- XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
913		- mp);
914	912	error = -EFSCORRUPTED;
915	913	goto out_rele_rip;
916	914	}
..	..	@@ -969,9 +967,17 @@
969	967	/*
970	968	* Finish recovering the file system. This part needed to be delayed
971	969	* until after the root and real-time bitmap inodes were consistently
972		- * read in.
	970	+ * read in. Temporarily create per-AG space reservations for metadata
	971	+ * btree shape changes because space freeing transactions (for inode
	972	+ * inactivation) require the per-AG reservation in lieu of reserving
	973	+ * blocks.
973	974	*/
	975	+ error = xfs_fs_reserve_ag_blocks(mp);
	976	+ if (error && error == -ENOSPC)
	977	+ xfs_warn(mp,
	978	+ "ENOSPC reserving per-AG metadata pool, log recovery may fail.");
974	979	error = xfs_log_mount_finish(mp);
	980	+ xfs_fs_unreserve_ag_blocks(mp);
975	981	if (error) {
976	982	xfs_warn(mp, "log mount finish failed");
977	983	goto out_rtunmount;
..	..	@@ -1047,7 +1053,7 @@
1047	1053	/* Clean out dquots that might be in memory after quotacheck. */
1048	1054	xfs_qm_unmount(mp);
1049	1055	/*
1050		- * Cancel all delayed reclaim work and reclaim the inodes directly.
	1056	+ * Flush all inode reclamation work and flush the log.
1051	1057	* We have to do this /after/ rtunmount and qm_unmount because those
1052	1058	* two will have scheduled delayed reclaim for the rt/quota inodes.
1053	1059	*
..	..	@@ -1057,10 +1063,8 @@
1057	1063	* qm_unmount_quotas and therefore rely on qm_unmount to release the
1058	1064	* quota inodes.
1059	1065	*/
1060		- cancel_delayed_work_sync(&mp->m_reclaim_work);
1061		- xfs_reclaim_inodes(mp, SYNC_WAIT);
	1066	+ xfs_unmount_flush_inodes(mp);
1062	1067	out_log_dealloc:
1063		- mp->m_flags \|= XFS_MOUNT_UNMOUNTING;
1064	1068	xfs_log_mount_cancel(mp);
1065	1069	out_fail_wait:
1066	1070	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
..	..	@@ -1095,52 +1099,13 @@
1095	1099	uint64_t resblks;
1096	1100	int error;
1097	1101
1098		- xfs_icache_disable_reclaim(mp);
	1102	+ xfs_stop_block_reaping(mp);
1099	1103	xfs_fs_unreserve_ag_blocks(mp);
1100	1104	xfs_qm_unmount_quotas(mp);
1101	1105	xfs_rtunmount_inodes(mp);
1102	1106	xfs_irele(mp->m_rootip);
1103	1107
1104		- /*
1105		- * We can potentially deadlock here if we have an inode cluster
1106		- * that has been freed has its buffer still pinned in memory because
1107		- * the transaction is still sitting in a iclog. The stale inodes
1108		- * on that buffer will have their flush locks held until the
1109		- * transaction hits the disk and the callbacks run. the inode
1110		- * flush takes the flush lock unconditionally and with nothing to
1111		- * push out the iclog we will never get that unlocked. hence we
1112		- * need to force the log first.
1113		- */
1114		- xfs_log_force(mp, XFS_LOG_SYNC);
1115		-
1116		- /*
1117		- * Wait for all busy extents to be freed, including completion of
1118		- * any discard operation.
1119		- */
1120		- xfs_extent_busy_wait_all(mp);
1121		- flush_workqueue(xfs_discard_wq);
1122		-
1123		- /*
1124		- * We now need to tell the world we are unmounting. This will allow
1125		- * us to detect that the filesystem is going away and we should error
1126		- * out anything that we have been retrying in the background. This will
1127		- * prevent neverending retries in AIL pushing from hanging the unmount.
1128		- */
1129		- mp->m_flags \|= XFS_MOUNT_UNMOUNTING;
1130		-
1131		- /*
1132		- * Flush all pending changes from the AIL.
1133		- */
1134		- xfs_ail_push_all_sync(mp->m_ail);
1135		-
1136		- /*
1137		- * And reclaim all inodes. At this point there should be no dirty
1138		- * inodes and none should be pinned or locked, but use synchronous
1139		- * reclaim just to be sure. We can stop background inode reclaim
1140		- * here as well if it is still running.
1141		- */
1142		- cancel_delayed_work_sync(&mp->m_reclaim_work);
1143		- xfs_reclaim_inodes(mp, SYNC_WAIT);
	1108	+ xfs_unmount_flush_inodes(mp);
1144	1109
1145	1110	xfs_qm_unmount(mp);
1146	1111
..	..	@@ -1216,8 +1181,7 @@
1216	1181	int
1217	1182	xfs_log_sbcount(xfs_mount_t *mp)
1218	1183	{
1219		- /* allow this to proceed during the freeze sequence... */
1220		- if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
	1184	+ if (!xfs_log_writable(mp))
1221	1185	return 0;
1222	1186
1223	1187	/*
..	..	@@ -1228,39 +1192,6 @@
1228	1192	return 0;
1229	1193
1230	1194	return xfs_sync_sb(mp, true);
1231		-}
1232		-
1233		-/*
1234		- * Deltas for the inode count are +/-64, hence we use a large batch size
1235		- * of 128 so we don't need to take the counter lock on every update.
1236		- */
1237		-#define XFS_ICOUNT_BATCH 128
1238		-int
1239		-xfs_mod_icount(
1240		- struct xfs_mount *mp,
1241		- int64_t delta)
1242		-{
1243		- percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
1244		- if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
1245		- ASSERT(0);
1246		- percpu_counter_add(&mp->m_icount, -delta);
1247		- return -EINVAL;
1248		- }
1249		- return 0;
1250		-}
1251		-
1252		-int
1253		-xfs_mod_ifree(
1254		- struct xfs_mount *mp,
1255		- int64_t delta)
1256		-{
1257		- percpu_counter_add(&mp->m_ifree, delta);
1258		- if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
1259		- ASSERT(0);
1260		- percpu_counter_add(&mp->m_ifree, -delta);
1261		- return -EINVAL;
1262		- }
1263		- return 0;
1264	1195	}
1265	1196
1266	1197	/*
..	..	@@ -1341,10 +1272,9 @@
1341	1272	spin_unlock(&mp->m_sb_lock);
1342	1273	return 0;
1343	1274	}
1344		- printk_once(KERN_WARNING
1345		- "Filesystem \"%s\": reserve blocks depleted! "
1346		- "Consider increasing reserve pool size.",
1347		- mp->m_fsname);
	1275	+ xfs_warn_once(mp,
	1276	+"Reserve blocks depleted! Consider increasing reserve pool size.");
	1277	+
1348	1278	fdblocks_enospc:
1349	1279	spin_unlock(&mp->m_sb_lock);
1350	1280	return -ENOSPC;
..	..	@@ -1366,33 +1296,6 @@
1366	1296	mp->m_sb.sb_frextents = lcounter;
1367	1297	spin_unlock(&mp->m_sb_lock);
1368	1298	return ret;
1369		-}
1370		-
1371		-/*
1372		- * xfs_getsb() is called to obtain the buffer for the superblock.
1373		- * The buffer is returned locked and read in from disk.
1374		- * The buffer should be released with a call to xfs_brelse().
1375		- *
1376		- * If the flags parameter is BUF_TRYLOCK, then we'll only return
1377		- * the superblock buffer if it can be locked without sleeping.
1378		- * If it can't then we'll return NULL.
1379		- */
1380		-struct xfs_buf *
1381		-xfs_getsb(
1382		- struct xfs_mount *mp,
1383		- int flags)
1384		-{
1385		- struct xfs_buf *bp = mp->m_sb_bp;
1386		-
1387		- if (!xfs_buf_trylock(bp)) {
1388		- if (flags & XBF_TRYLOCK)
1389		- return NULL;
1390		- xfs_buf_lock(bp);
1391		- }
1392		-
1393		- xfs_buf_hold(bp);
1394		- ASSERT(bp->b_flags & XBF_DONE);
1395		- return bp;
1396	1299	}
1397	1300
1398	1301	/*
..	..	@@ -1436,7 +1339,26 @@
1436	1339	if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1437	1340	return;
1438	1341
1439		- spin_lock(&mp->m_sb_lock);
1440		- mp->m_flags \|= XFS_MOUNT_BAD_SUMMARY;
1441		- spin_unlock(&mp->m_sb_lock);
	1342	+ xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
	1343	+}
	1344	+
	1345	+/*
	1346	+ * Update the in-core delayed block counter.
	1347	+ *
	1348	+ * We prefer to update the counter without having to take a spinlock for every
	1349	+ * counter update (i.e. batching). Each change to delayed allocation
	1350	+ * reservations can change can easily exceed the default percpu counter
	1351	+ * batching, so we use a larger batch factor here.
	1352	+ *
	1353	+ * Note that we don't currently have any callers requiring fast summation
	1354	+ * (e.g. percpu_counter_read) so we can use a big batch value here.
	1355	+ */
	1356	+#define XFS_DELALLOC_BATCH (4096)
	1357	+void
	1358	+xfs_mod_delalloc(
	1359	+ struct xfs_mount *mp,
	1360	+ int64_t delta)
	1361	+{
	1362	+ percpu_counter_add_batch(&mp->m_delalloc_blks, delta,
	1363	+ XFS_DELALLOC_BATCH);
1442	1364	}