~hc/RK356X_SDK_RELEASE.git

..	..	@@ -5,13 +5,13 @@
5	5	*/
6	6	#include "xfs.h"
7	7	#include "xfs_fs.h"
	8	+#include "xfs_shared.h"
8	9	#include "xfs_format.h"
9	10	#include "xfs_log_format.h"
10	11	#include "xfs_trans_resv.h"
11	12	#include "xfs_sb.h"
12	13	#include "xfs_mount.h"
13	14	#include "xfs_inode.h"
14		-#include "xfs_error.h"
15	15	#include "xfs_trans.h"
16	16	#include "xfs_trans_priv.h"
17	17	#include "xfs_inode_item.h"
..	..	@@ -22,9 +22,8 @@
22	22	#include "xfs_dquot_item.h"
23	23	#include "xfs_dquot.h"
24	24	#include "xfs_reflink.h"
	25	+#include "xfs_ialloc.h"
25	26
26		-#include <linux/kthread.h>
27		-#include <linux/freezer.h>
28	27	#include <linux/iversion.h>
29	28
30	29	/*
..	..	@@ -38,24 +37,22 @@
38	37	struct xfs_inode *ip;
39	38
40	39	/*
41		- * if this didn't occur in transactions, we could use
42		- * KM_MAYFAIL and return NULL here on ENOMEM. Set the
43		- * code up to do this anyway.
	40	+ * XXX: If this didn't occur in transactions, we could drop GFP_NOFAIL
	41	+ * and return NULL here on ENOMEM.
44	42	*/
45		- ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
46		- if (!ip)
47		- return NULL;
	43	+ ip = kmem_cache_alloc(xfs_inode_zone, GFP_KERNEL \| __GFP_NOFAIL);
	44	+
48	45	if (inode_init_always(mp->m_super, VFS_I(ip))) {
49		- kmem_zone_free(xfs_inode_zone, ip);
	46	+ kmem_cache_free(xfs_inode_zone, ip);
50	47	return NULL;
51	48	}
52	49
53		- /* VFS doesn't initialise i_mode! */
	50	+ /* VFS doesn't initialise i_mode or i_state! */
54	51	VFS_I(ip)->i_mode = 0;
	52	+ VFS_I(ip)->i_state = 0;
55	53
56	54	XFS_STATS_INC(mp, vn_active);
57	55	ASSERT(atomic_read(&ip->i_pincount) == 0);
58		- ASSERT(!xfs_isiflocked(ip));
59	56	ASSERT(ip->i_ino == 0);
60	57
61	58	/* initialise the xfs inode */
..	..	@@ -64,12 +61,15 @@
64	61	memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
65	62	ip->i_afp = NULL;
66	63	ip->i_cowfp = NULL;
67		- ip->i_cnextents = 0;
68		- ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
69	64	memset(&ip->i_df, 0, sizeof(ip->i_df));
70	65	ip->i_flags = 0;
71	66	ip->i_delayed_blks = 0;
72	67	memset(&ip->i_d, 0, sizeof(ip->i_d));
	68	+ ip->i_sick = 0;
	69	+ ip->i_checked = 0;
	70	+ INIT_WORK(&ip->i_ioend_work, xfs_end_io);
	71	+ INIT_LIST_HEAD(&ip->i_ioend_list);
	72	+ spin_lock_init(&ip->i_ioend_lock);
73	73
74	74	return ip;
75	75	}
..	..	@@ -85,15 +85,18 @@
85	85	case S_IFREG:
86	86	case S_IFDIR:
87	87	case S_IFLNK:
88		- xfs_idestroy_fork(ip, XFS_DATA_FORK);
	88	+ xfs_idestroy_fork(&ip->i_df);
89	89	break;
90	90	}
91	91
92		- if (ip->i_afp)
93		- xfs_idestroy_fork(ip, XFS_ATTR_FORK);
94		- if (ip->i_cowfp)
95		- xfs_idestroy_fork(ip, XFS_COW_FORK);
96		-
	92	+ if (ip->i_afp) {
	93	+ xfs_idestroy_fork(ip->i_afp);
	94	+ kmem_cache_free(xfs_ifork_zone, ip->i_afp);
	95	+ }
	96	+ if (ip->i_cowfp) {
	97	+ xfs_idestroy_fork(ip->i_cowfp);
	98	+ kmem_cache_free(xfs_ifork_zone, ip->i_cowfp);
	99	+ }
97	100	if (ip->i_itemp) {
98	101	ASSERT(!test_bit(XFS_LI_IN_AIL,
99	102	&ip->i_itemp->ili_item.li_flags));
..	..	@@ -101,7 +104,7 @@
101	104	ip->i_itemp = NULL;
102	105	}
103	106
104		- kmem_zone_free(xfs_inode_zone, ip);
	107	+ kmem_cache_free(xfs_inode_zone, ip);
105	108	}
106	109
107	110	static void
..	..	@@ -110,6 +113,7 @@
110	113	{
111	114	/* asserts to verify all state is correct here */
112	115	ASSERT(atomic_read(&ip->i_pincount) == 0);
	116	+ ASSERT(!ip->i_itemp \|\| list_empty(&ip->i_itemp->ili_item.li_bio_list));
113	117	XFS_STATS_DEC(ip->i_mount, vn_active);
114	118
115	119	call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
..	..	@@ -119,7 +123,7 @@
119	123	xfs_inode_free(
120	124	struct xfs_inode *ip)
121	125	{
122		- ASSERT(!xfs_isiflocked(ip));
	126	+ ASSERT(!xfs_iflags_test(ip, XFS_IFLUSHING));
123	127
124	128	/*
125	129	* Because we use RCU freeing we need to ensure the inode always
..	..	@@ -136,11 +140,8 @@
136	140	}
137	141
138	142	/*
139		- * Queue a new inode reclaim pass if there are reclaimable inodes and there
140		- * isn't a reclaim pass already in progress. By default it runs every 5s based
141		- * on the xfs periodic sync default of 30s. Perhaps this should have it's own
142		- * tunable, but that can be done if this method proves to be ineffective or too
143		- * aggressive.
	143	+ * Queue background inode reclaim work if there are reclaimable inodes and there
	144	+ * isn't reclaim work already scheduled or in progress.
144	145	*/
145	146	static void
146	147	xfs_reclaim_work_queue(
..	..	@@ -153,24 +154,6 @@
153	154	msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
154	155	}
155	156	rcu_read_unlock();
156		-}
157		-
158		-/*
159		- * This is a fast pass over the inode cache to try to get reclaim moving on as
160		- * many inodes as possible in a short period of time. It kicks itself every few
161		- * seconds, as well as being kicked by the inode cache shrinker when memory
162		- * goes low. It scans as quickly as possible avoiding locked inodes or those
163		- * already being flushed, and once done schedules a future pass.
164		- */
165		-void
166		-xfs_reclaim_worker(
167		- struct work_struct *work)
168		-{
169		- struct xfs_mount *mp = container_of(to_delayed_work(work),
170		- struct xfs_mount, m_reclaim_work);
171		-
172		- xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
173		- xfs_reclaim_work_queue(mp);
174	157	}
175	158
176	159	static void
..	..	@@ -286,6 +269,8 @@
286	269	uint64_t version = inode_peek_iversion(inode);
287	270	umode_t mode = inode->i_mode;
288	271	dev_t dev = inode->i_rdev;
	272	+ kuid_t uid = inode->i_uid;
	273	+ kgid_t gid = inode->i_gid;
289	274
290	275	error = inode_init_always(mp->m_super, inode);
291	276
..	..	@@ -294,6 +279,8 @@
294	279	inode_set_iversion_queried(inode, version);
295	280	inode->i_mode = mode;
296	281	inode->i_rdev = dev;
	282	+ inode->i_uid = uid;
	283	+ inode->i_gid = gid;
297	284	return error;
298	285	}
299	286
..	..	@@ -416,6 +403,7 @@
416	403	spin_unlock(&ip->i_flags_lock);
417	404	rcu_read_unlock();
418	405
	406	+ ASSERT(!rwsem_is_locked(&inode->i_rwsem));
419	407	error = xfs_reinit_inode(mp, inode);
420	408	if (error) {
421	409	bool wake;
..	..	@@ -446,9 +434,8 @@
446	434	ip->i_flags \|= XFS_INEW;
447	435	xfs_inode_clear_reclaim_tag(pag, ip->i_ino);
448	436	inode->i_state = I_NEW;
449		-
450		- ASSERT(!rwsem_is_locked(&inode->i_rwsem));
451		- init_rwsem(&inode->i_rwsem);
	437	+ ip->i_sick = 0;
	438	+ ip->i_checked = 0;
452	439
453	440	spin_unlock(&ip->i_flags_lock);
454	441	spin_unlock(&pag->pag_ici_lock);
..	..	@@ -470,7 +457,7 @@
470	457	xfs_ilock(ip, lock_flags);
471	458
472	459	if (!(flags & XFS_IGET_INCORE))
473		- xfs_iflags_clear(ip, XFS_ISTALE \| XFS_IDONTCACHE);
	460	+ xfs_iflags_clear(ip, XFS_ISTALE);
474	461	XFS_STATS_INC(mp, xs_ig_found);
475	462
476	463	return 0;
..	..	@@ -501,17 +488,41 @@
501	488	if (!ip)
502	489	return -ENOMEM;
503	490
504		- error = xfs_iread(mp, tp, ip, flags);
	491	+ error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, flags);
505	492	if (error)
506	493	goto out_destroy;
507	494
508		- if (!xfs_inode_verify_forks(ip)) {
509		- error = -EFSCORRUPTED;
510		- goto out_destroy;
	495	+ /*
	496	+ * For version 5 superblocks, if we are initialising a new inode and we
	497	+ * are not utilising the XFS_MOUNT_IKEEP inode cluster mode, we can
	498	+ * simply build the new inode core with a random generation number.
	499	+ *
	500	+ * For version 4 (and older) superblocks, log recovery is dependent on
	501	+ * the di_flushiter field being initialised from the current on-disk
	502	+ * value and hence we must also read the inode off disk even when
	503	+ * initializing new inodes.
	504	+ */
	505	+ if (xfs_sb_version_has_v3inode(&mp->m_sb) &&
	506	+ (flags & XFS_IGET_CREATE) && !(mp->m_flags & XFS_MOUNT_IKEEP)) {
	507	+ VFS_I(ip)->i_generation = prandom_u32();
	508	+ } else {
	509	+ struct xfs_dinode *dip;
	510	+ struct xfs_buf *bp;
	511	+
	512	+ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0);
	513	+ if (error)
	514	+ goto out_destroy;
	515	+
	516	+ error = xfs_inode_from_disk(ip, dip);
	517	+ if (!error)
	518	+ xfs_buf_set_ref(bp, XFS_INO_REF);
	519	+ xfs_trans_brelse(tp, bp);
	520	+
	521	+ if (error)
	522	+ goto out_destroy;
511	523	}
512	524
513	525	trace_xfs_iget_miss(ip);
514		-
515	526
516	527	/*
517	528	* Check the inode free state is valid. This also detects lookup
..	..	@@ -552,7 +563,7 @@
552	563	*/
553	564	iflags = XFS_INEW;
554	565	if (flags & XFS_IGET_DONTCACHE)
555		- iflags \|= XFS_IDONTCACHE;
	566	+ d_mark_dontcache(VFS_I(ip));
556	567	ip->i_udquot = NULL;
557	568	ip->i_gdquot = NULL;
558	569	ip->i_pdquot = NULL;
..	..	@@ -585,48 +596,31 @@
585	596	}
586	597
587	598	/*
588		- * Look up an inode by number in the given file system.
589		- * The inode is looked up in the cache held in each AG.
590		- * If the inode is found in the cache, initialise the vfs inode
591		- * if necessary.
	599	+ * Look up an inode by number in the given file system. The inode is looked up
	600	+ * in the cache held in each AG. If the inode is found in the cache, initialise
	601	+ * the vfs inode if necessary.
592	602	*
593		- * If it is not in core, read it in from the file system's device,
594		- * add it to the cache and initialise the vfs inode.
	603	+ * If it is not in core, read it in from the file system's device, add it to the
	604	+ * cache and initialise the vfs inode.
595	605	*
596	606	* The inode is locked according to the value of the lock_flags parameter.
597		- * This flag parameter indicates how and if the inode's IO lock and inode lock
598		- * should be taken.
599		- *
600		- * mp -- the mount point structure for the current file system. It points
601		- * to the inode hash table.
602		- * tp -- a pointer to the current transaction if there is one. This is
603		- * simply passed through to the xfs_iread() call.
604		- * ino -- the number of the inode desired. This is the unique identifier
605		- * within the file system for the inode being requested.
606		- * lock_flags -- flags indicating how to lock the inode. See the comment
607		- * for xfs_ilock() for a list of valid values.
	607	+ * Inode lookup is only done during metadata operations and not as part of the
	608	+ * data IO path. Hence we only allow locking of the XFS_ILOCK during lookup.
608	609	*/
609	610	int
610	611	xfs_iget(
611		- xfs_mount_t *mp,
612		- xfs_trans_t *tp,
613		- xfs_ino_t ino,
614		- uint flags,
615		- uint lock_flags,
616		- xfs_inode_t **ipp)
	612	+ struct xfs_mount *mp,
	613	+ struct xfs_trans *tp,
	614	+ xfs_ino_t ino,
	615	+ uint flags,
	616	+ uint lock_flags,
	617	+ struct xfs_inode **ipp)
617	618	{
618		- xfs_inode_t *ip;
619		- int error;
620		- xfs_perag_t *pag;
621		- xfs_agino_t agino;
	619	+ struct xfs_inode *ip;
	620	+ struct xfs_perag *pag;
	621	+ xfs_agino_t agino;
	622	+ int error;
622	623
623		- /*
624		- * xfs_reclaim_inode() uses the ILOCK to ensure an inode
625		- * doesn't get freed while it's being referenced during a
626		- * radix tree traversal here. It assumes this function
627		- * aqcuires only the ILOCK (and therefore it has no need to
628		- * involve the IOLOCK in this synchronization).
629		- */
630	624	ASSERT((lock_flags & (XFS_IOLOCK_EXCL \| XFS_IOLOCK_SHARED)) == 0);
631	625
632	626	/* reject inode numbers outside existing AGs */
..	..	@@ -728,25 +722,22 @@
728	722	*/
729	723	#define XFS_LOOKUP_BATCH 32
730	724
731		-STATIC int
732		-xfs_inode_ag_walk_grab(
	725	+/*
	726	+ * Decide if the given @ip is eligible to be a part of the inode walk, and
	727	+ * grab it if so. Returns true if it's ready to go or false if we should just
	728	+ * ignore it.
	729	+ */
	730	+STATIC bool
	731	+xfs_inode_walk_ag_grab(
733	732	struct xfs_inode *ip,
734	733	int flags)
735	734	{
736	735	struct inode *inode = VFS_I(ip);
737		- bool newinos = !!(flags & XFS_AGITER_INEW_WAIT);
	736	+ bool newinos = !!(flags & XFS_INODE_WALK_INEW_WAIT);
738	737
739	738	ASSERT(rcu_read_lock_held());
740	739
741		- /*
742		- * check for stale RCU freed inode
743		- *
744		- * If the inode has been reallocated, it doesn't matter if it's not in
745		- * the AG we are walking - we are walking for writeback, so if it
746		- * passes all the "valid inode" checks and is dirty, then we'll write
747		- * it back anyway. If it has been reallocated and still being
748		- * initialised, the XFS_INEW check below will catch it.
749		- */
	740	+ /* Check for stale RCU freed inode */
750	741	spin_lock(&ip->i_flags_lock);
751	742	if (!ip->i_ino)
752	743	goto out_unlock_noent;
..	..	@@ -759,39 +750,41 @@
759	750
760	751	/* nothing to sync during shutdown */
761	752	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
762		- return -EFSCORRUPTED;
	753	+ return false;
763	754
764	755	/* If we can't grab the inode, it must on it's way to reclaim. */
765	756	if (!igrab(inode))
766		- return -ENOENT;
	757	+ return false;
767	758
768	759	/* inode is valid */
769		- return 0;
	760	+ return true;
770	761
771	762	out_unlock_noent:
772	763	spin_unlock(&ip->i_flags_lock);
773		- return -ENOENT;
	764	+ return false;
774	765	}
775	766
	767	+/*
	768	+ * For a given per-AG structure @pag, grab, @execute, and rele all incore
	769	+ * inodes with the given radix tree @tag.
	770	+ */
776	771	STATIC int
777		-xfs_inode_ag_walk(
778		- struct xfs_mount *mp,
	772	+xfs_inode_walk_ag(
779	773	struct xfs_perag *pag,
780		- int (execute)(struct xfs_inode ip, int flags,
781		- void *args),
782		- int flags,
	774	+ int iter_flags,
	775	+ int (execute)(struct xfs_inode ip, void *args),
783	776	void *args,
784		- int tag,
785		- int iter_flags)
	777	+ int tag)
786	778	{
	779	+ struct xfs_mount *mp = pag->pag_mount;
787	780	uint32_t first_index;
788	781	int last_error = 0;
789	782	int skipped;
790		- int done;
	783	+ bool done;
791	784	int nr_found;
792	785
793	786	restart:
794		- done = 0;
	787	+ done = false;
795	788	skipped = 0;
796	789	first_index = 0;
797	790	nr_found = 0;
..	..	@@ -802,7 +795,7 @@
802	795
803	796	rcu_read_lock();
804	797
805		- if (tag == -1)
	798	+ if (tag == XFS_ICI_NO_TAG)
806	799	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
807	800	(void **)batch, first_index,
808	801	XFS_LOOKUP_BATCH);
..	..	@@ -824,7 +817,7 @@
824	817	for (i = 0; i < nr_found; i++) {
825	818	struct xfs_inode *ip = batch[i];
826	819
827		- if (done \|\| xfs_inode_ag_walk_grab(ip, iter_flags))
	820	+ if (done \|\| !xfs_inode_walk_ag_grab(ip, iter_flags))
828	821	batch[i] = NULL;
829	822
830	823	/*
..	..	@@ -843,7 +836,7 @@
843	836	continue;
844	837	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
845	838	if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
846		- done = 1;
	839	+ done = true;
847	840	}
848	841
849	842	/* unlock now we've grabbed the inodes. */
..	..	@@ -852,10 +845,10 @@
852	845	for (i = 0; i < nr_found; i++) {
853	846	if (!batch[i])
854	847	continue;
855		- if ((iter_flags & XFS_AGITER_INEW_WAIT) &&
	848	+ if ((iter_flags & XFS_INODE_WALK_INEW_WAIT) &&
856	849	xfs_iflags_test(batch[i], XFS_INEW))
857	850	xfs_inew_wait(batch[i]);
858		- error = execute(batch[i], flags, args);
	851	+ error = execute(batch[i], args);
859	852	xfs_irele(batch[i]);
860	853	if (error == -EAGAIN) {
861	854	skipped++;
..	..	@@ -876,6 +869,49 @@
876	869	if (skipped) {
877	870	delay(1);
878	871	goto restart;
	872	+ }
	873	+ return last_error;
	874	+}
	875	+
	876	+/* Fetch the next (possibly tagged) per-AG structure. */
	877	+static inline struct xfs_perag *
	878	+xfs_inode_walk_get_perag(
	879	+ struct xfs_mount *mp,
	880	+ xfs_agnumber_t agno,
	881	+ int tag)
	882	+{
	883	+ if (tag == XFS_ICI_NO_TAG)
	884	+ return xfs_perag_get(mp, agno);
	885	+ return xfs_perag_get_tag(mp, agno, tag);
	886	+}
	887	+
	888	+/*
	889	+ * Call the @execute function on all incore inodes matching the radix tree
	890	+ * @tag.
	891	+ */
	892	+int
	893	+xfs_inode_walk(
	894	+ struct xfs_mount *mp,
	895	+ int iter_flags,
	896	+ int (execute)(struct xfs_inode ip, void *args),
	897	+ void *args,
	898	+ int tag)
	899	+{
	900	+ struct xfs_perag *pag;
	901	+ int error = 0;
	902	+ int last_error = 0;
	903	+ xfs_agnumber_t ag;
	904	+
	905	+ ag = 0;
	906	+ while ((pag = xfs_inode_walk_get_perag(mp, ag, tag))) {
	907	+ ag = pag->pag_agno + 1;
	908	+ error = xfs_inode_walk_ag(pag, iter_flags, execute, args, tag);
	909	+ xfs_perag_put(pag);
	910	+ if (error) {
	911	+ last_error = error;
	912	+ if (error == -EFSCORRUPTED)
	913	+ break;
	914	+ }
879	915	}
880	916	return last_error;
881	917	}
..	..	@@ -943,233 +979,77 @@
943	979	xfs_queue_cowblocks(mp);
944	980	}
945	981
946		-int
947		-xfs_inode_ag_iterator_flags(
948		- struct xfs_mount *mp,
949		- int (execute)(struct xfs_inode ip, int flags,
950		- void *args),
951		- int flags,
952		- void *args,
953		- int iter_flags)
954		-{
955		- struct xfs_perag *pag;
956		- int error = 0;
957		- int last_error = 0;
958		- xfs_agnumber_t ag;
959		-
960		- ag = 0;
961		- while ((pag = xfs_perag_get(mp, ag))) {
962		- ag = pag->pag_agno + 1;
963		- error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1,
964		- iter_flags);
965		- xfs_perag_put(pag);
966		- if (error) {
967		- last_error = error;
968		- if (error == -EFSCORRUPTED)
969		- break;
970		- }
971		- }
972		- return last_error;
973		-}
974		-
975		-int
976		-xfs_inode_ag_iterator(
977		- struct xfs_mount *mp,
978		- int (execute)(struct xfs_inode ip, int flags,
979		- void *args),
980		- int flags,
981		- void *args)
982		-{
983		- return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0);
984		-}
985		-
986		-int
987		-xfs_inode_ag_iterator_tag(
988		- struct xfs_mount *mp,
989		- int (execute)(struct xfs_inode ip, int flags,
990		- void *args),
991		- int flags,
992		- void *args,
993		- int tag)
994		-{
995		- struct xfs_perag *pag;
996		- int error = 0;
997		- int last_error = 0;
998		- xfs_agnumber_t ag;
999		-
1000		- ag = 0;
1001		- while ((pag = xfs_perag_get_tag(mp, ag, tag))) {
1002		- ag = pag->pag_agno + 1;
1003		- error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag,
1004		- 0);
1005		- xfs_perag_put(pag);
1006		- if (error) {
1007		- last_error = error;
1008		- if (error == -EFSCORRUPTED)
1009		- break;
1010		- }
1011		- }
1012		- return last_error;
1013		-}
1014		-
1015	982	/*
1016	983	* Grab the inode for reclaim exclusively.
1017		- * Return 0 if we grabbed it, non-zero otherwise.
	984	+ *
	985	+ * We have found this inode via a lookup under RCU, so the inode may have
	986	+ * already been freed, or it may be in the process of being recycled by
	987	+ * xfs_iget(). In both cases, the inode will have XFS_IRECLAIM set. If the inode
	988	+ * has been fully recycled by the time we get the i_flags_lock, XFS_IRECLAIMABLE
	989	+ * will not be set. Hence we need to check for both these flag conditions to
	990	+ * avoid inodes that are no longer reclaim candidates.
	991	+ *
	992	+ * Note: checking for other state flags here, under the i_flags_lock or not, is
	993	+ * racy and should be avoided. Those races should be resolved only after we have
	994	+ * ensured that we are able to reclaim this inode and the world can see that we
	995	+ * are going to reclaim it.
	996	+ *
	997	+ * Return true if we grabbed it, false otherwise.
1018	998	*/
1019		-STATIC int
	999	+static bool
1020	1000	xfs_reclaim_inode_grab(
1021		- struct xfs_inode *ip,
1022		- int flags)
	1001	+ struct xfs_inode *ip)
1023	1002	{
1024	1003	ASSERT(rcu_read_lock_held());
1025	1004
1026		- /* quick check for stale RCU freed inode */
1027		- if (!ip->i_ino)
1028		- return 1;
1029		-
1030		- /*
1031		- * If we are asked for non-blocking operation, do unlocked checks to
1032		- * see if the inode already is being flushed or in reclaim to avoid
1033		- * lock traffic.
1034		- */
1035		- if ((flags & SYNC_TRYLOCK) &&
1036		- __xfs_iflags_test(ip, XFS_IFLOCK \| XFS_IRECLAIM))
1037		- return 1;
1038		-
1039		- /*
1040		- * The radix tree lock here protects a thread in xfs_iget from racing
1041		- * with us starting reclaim on the inode. Once we have the
1042		- * XFS_IRECLAIM flag set it will not touch us.
1043		- *
1044		- * Due to RCU lookup, we may find inodes that have been freed and only
1045		- * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that
1046		- * aren't candidates for reclaim at all, so we must check the
1047		- * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
1048		- */
1049	1005	spin_lock(&ip->i_flags_lock);
1050	1006	if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) \|\|
1051	1007	__xfs_iflags_test(ip, XFS_IRECLAIM)) {
1052	1008	/* not a reclaim candidate. */
1053	1009	spin_unlock(&ip->i_flags_lock);
1054		- return 1;
	1010	+ return false;
1055	1011	}
1056	1012	__xfs_iflags_set(ip, XFS_IRECLAIM);
1057	1013	spin_unlock(&ip->i_flags_lock);
1058		- return 0;
	1014	+ return true;
1059	1015	}
1060	1016
1061	1017	/*
1062		- * Inodes in different states need to be treated differently. The following
1063		- * table lists the inode states and the reclaim actions necessary:
	1018	+ * Inode reclaim is non-blocking, so the default action if progress cannot be
	1019	+ * made is to "requeue" the inode for reclaim by unlocking it and clearing the
	1020	+ * XFS_IRECLAIM flag. If we are in a shutdown state, we don't care about
	1021	+ * blocking anymore and hence we can wait for the inode to be able to reclaim
	1022	+ * it.
1064	1023	*
1065		- * inode state iflush ret required action
1066		- * --------------- ---------- ---------------
1067		- * bad - reclaim
1068		- * shutdown EIO unpin and reclaim
1069		- * clean, unpinned 0 reclaim
1070		- * stale, unpinned 0 reclaim
1071		- * clean, pinned(*) 0 requeue
1072		- * stale, pinned EAGAIN requeue
1073		- * dirty, async - requeue
1074		- * dirty, sync 0 reclaim
1075		- *
1076		- * (*) dgc: I don't think the clean, pinned state is possible but it gets
1077		- * handled anyway given the order of checks implemented.
1078		- *
1079		- * Also, because we get the flush lock first, we know that any inode that has
1080		- * been flushed delwri has had the flush completed by the time we check that
1081		- * the inode is clean.
1082		- *
1083		- * Note that because the inode is flushed delayed write by AIL pushing, the
1084		- * flush lock may already be held here and waiting on it can result in very
1085		- * long latencies. Hence for sync reclaims, where we wait on the flush lock,
1086		- * the caller should push the AIL first before trying to reclaim inodes to
1087		- * minimise the amount of time spent waiting. For background relaim, we only
1088		- * bother to reclaim clean inodes anyway.
1089		- *
1090		- * Hence the order of actions after gaining the locks should be:
1091		- * bad => reclaim
1092		- * shutdown => unpin and reclaim
1093		- * pinned, async => requeue
1094		- * pinned, sync => unpin
1095		- * stale => reclaim
1096		- * clean => reclaim
1097		- * dirty, async => requeue
1098		- * dirty, sync => flush, wait and reclaim
	1024	+ * We do no IO here - if callers require inodes to be cleaned they must push the
	1025	+ * AIL first to trigger writeback of dirty inodes. This enables writeback to be
	1026	+ * done in the background in a non-blocking manner, and enables memory reclaim
	1027	+ * to make progress without blocking.
1099	1028	*/
1100		-STATIC int
	1029	+static void
1101	1030	xfs_reclaim_inode(
1102	1031	struct xfs_inode *ip,
1103		- struct xfs_perag *pag,
1104		- int sync_mode)
	1032	+ struct xfs_perag *pag)
1105	1033	{
1106		- struct xfs_buf *bp = NULL;
1107	1034	xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */
1108		- int error;
1109	1035
1110		-restart:
1111		- error = 0;
1112		- xfs_ilock(ip, XFS_ILOCK_EXCL);
1113		- if (!xfs_iflock_nowait(ip)) {
1114		- if (!(sync_mode & SYNC_WAIT))
1115		- goto out;
1116		- xfs_iflock(ip);
1117		- }
	1036	+ if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
	1037	+ goto out;
	1038	+ if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING))
	1039	+ goto out_iunlock;
1118	1040
1119	1041	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1120	1042	xfs_iunpin_wait(ip);
1121		- /* xfs_iflush_abort() drops the flush lock */
1122		- xfs_iflush_abort(ip, false);
	1043	+ xfs_iflush_abort(ip);
1123	1044	goto reclaim;
1124	1045	}
1125		- if (xfs_ipincount(ip)) {
1126		- if (!(sync_mode & SYNC_WAIT))
1127		- goto out_ifunlock;
1128		- xfs_iunpin_wait(ip);
1129		- }
1130		- if (xfs_inode_clean(ip)) {
1131		- xfs_ifunlock(ip);
1132		- goto reclaim;
1133		- }
	1046	+ if (xfs_ipincount(ip))
	1047	+ goto out_clear_flush;
	1048	+ if (!xfs_inode_clean(ip))
	1049	+ goto out_clear_flush;
1134	1050
1135		- /*
1136		- * Never flush out dirty data during non-blocking reclaim, as it would
1137		- * just contend with AIL pushing trying to do the same job.
1138		- */
1139		- if (!(sync_mode & SYNC_WAIT))
1140		- goto out_ifunlock;
1141		-
1142		- /*
1143		- * Now we have an inode that needs flushing.
1144		- *
1145		- * Note that xfs_iflush will never block on the inode buffer lock, as
1146		- * xfs_ifree_cluster() can lock the inode buffer before it locks the
1147		- * ip->i_lock, and we are doing the exact opposite here. As a result,
1148		- * doing a blocking xfs_imap_to_bp() to get the cluster buffer would
1149		- * result in an ABBA deadlock with xfs_ifree_cluster().
1150		- *
1151		- * As xfs_ifree_cluser() must gather all inodes that are active in the
1152		- * cache to mark them stale, if we hit this case we don't actually want
1153		- * to do IO here - we want the inode marked stale so we can simply
1154		- * reclaim it. Hence if we get an EAGAIN error here, just unlock the
1155		- * inode, back off and try again. Hopefully the next pass through will
1156		- * see the stale flag set on the inode.
1157		- */
1158		- error = xfs_iflush(ip, &bp);
1159		- if (error == -EAGAIN) {
1160		- xfs_iunlock(ip, XFS_ILOCK_EXCL);
1161		- /* backoff longer than in xfs_ifree_cluster */
1162		- delay(2);
1163		- goto restart;
1164		- }
1165		-
1166		- if (!error) {
1167		- error = xfs_bwrite(bp);
1168		- xfs_buf_relse(bp);
1169		- }
1170		-
	1051	+ xfs_iflags_clear(ip, XFS_IFLUSHING);
1171	1052	reclaim:
1172		- ASSERT(!xfs_isiflocked(ip));
1173	1053
1174	1054	/*
1175	1055	* Because we use RCU freeing we need to ensure the inode always appears
..	..	@@ -1217,21 +1097,14 @@
1217	1097	ASSERT(xfs_inode_clean(ip));
1218	1098
1219	1099	__xfs_inode_free(ip);
1220		- return error;
	1100	+ return;
1221	1101
1222		-out_ifunlock:
1223		- xfs_ifunlock(ip);
	1102	+out_clear_flush:
	1103	+ xfs_iflags_clear(ip, XFS_IFLUSHING);
	1104	+out_iunlock:
	1105	+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
1224	1106	out:
1225	1107	xfs_iflags_clear(ip, XFS_IRECLAIM);
1226		- xfs_iunlock(ip, XFS_ILOCK_EXCL);
1227		- /*
1228		- * We could return -EAGAIN here to make reclaim rescan the inode tree in
1229		- * a short while. However, this just burns CPU time scanning the tree
1230		- * waiting for IO to complete and the reclaim work never goes back to
1231		- * the idle state. Instead, return 0 to let the next scheduled
1232		- * background reclaim attempt to reclaim the inode again.
1233		- */
1234		- return 0;
1235	1108	}
1236	1109
1237	1110	/*
..	..	@@ -1239,23 +1112,19 @@
1239	1112	* corrupted, we still want to try to reclaim all the inodes. If we don't,
1240	1113	* then a shut down during filesystem unmount reclaim walk leak all the
1241	1114	* unreclaimed inodes.
	1115	+ *
	1116	+ * Returns non-zero if any AGs or inodes were skipped in the reclaim pass
	1117	+ * so that callers that want to block until all dirty inodes are written back
	1118	+ * and reclaimed can sanely loop.
1242	1119	*/
1243		-STATIC int
	1120	+static void
1244	1121	xfs_reclaim_inodes_ag(
1245	1122	struct xfs_mount *mp,
1246		- int flags,
1247	1123	int *nr_to_scan)
1248	1124	{
1249	1125	struct xfs_perag *pag;
1250		- int error = 0;
1251		- int last_error = 0;
1252		- xfs_agnumber_t ag;
1253		- int trylock = flags & SYNC_TRYLOCK;
1254		- int skipped;
	1126	+ xfs_agnumber_t ag = 0;
1255	1127
1256		-restart:
1257		- ag = 0;
1258		- skipped = 0;
1259	1128	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
1260	1129	unsigned long first_index = 0;
1261	1130	int done = 0;
..	..	@@ -1263,16 +1132,7 @@
1263	1132
1264	1133	ag = pag->pag_agno + 1;
1265	1134
1266		- if (trylock) {
1267		- if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
1268		- skipped++;
1269		- xfs_perag_put(pag);
1270		- continue;
1271		- }
1272		- first_index = pag->pag_ici_reclaim_cursor;
1273		- } else
1274		- mutex_lock(&pag->pag_ici_reclaim_lock);
1275		-
	1135	+ first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
1276	1136	do {
1277	1137	struct xfs_inode *batch[XFS_LOOKUP_BATCH];
1278	1138	int i;
..	..	@@ -1296,7 +1156,7 @@
1296	1156	for (i = 0; i < nr_found; i++) {
1297	1157	struct xfs_inode *ip = batch[i];
1298	1158
1299		- if (done \|\| xfs_reclaim_inode_grab(ip, flags))
	1159	+ if (done \|\| !xfs_reclaim_inode_grab(ip))
1300	1160	batch[i] = NULL;
1301	1161
1302	1162	/*
..	..	@@ -1325,59 +1185,39 @@
1325	1185	rcu_read_unlock();
1326	1186
1327	1187	for (i = 0; i < nr_found; i++) {
1328		- if (!batch[i])
1329		- continue;
1330		- error = xfs_reclaim_inode(batch[i], pag, flags);
1331		- if (error && last_error != -EFSCORRUPTED)
1332		- last_error = error;
	1188	+ if (batch[i])
	1189	+ xfs_reclaim_inode(batch[i], pag);
1333	1190	}
1334	1191
1335	1192	*nr_to_scan -= XFS_LOOKUP_BATCH;
1336		-
1337	1193	cond_resched();
1338		-
1339	1194	} while (nr_found && !done && *nr_to_scan > 0);
1340	1195
1341		- if (trylock && !done)
1342		- pag->pag_ici_reclaim_cursor = first_index;
1343		- else
1344		- pag->pag_ici_reclaim_cursor = 0;
1345		- mutex_unlock(&pag->pag_ici_reclaim_lock);
	1196	+ if (done)
	1197	+ first_index = 0;
	1198	+ WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
1346	1199	xfs_perag_put(pag);
1347	1200	}
1348		-
1349		- /*
1350		- * if we skipped any AG, and we still have scan count remaining, do
1351		- * another pass this time using blocking reclaim semantics (i.e
1352		- * waiting on the reclaim locks and ignoring the reclaim cursors). This
1353		- * ensure that when we get more reclaimers than AGs we block rather
1354		- * than spin trying to execute reclaim.
1355		- */
1356		- if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
1357		- trylock = 0;
1358		- goto restart;
1359		- }
1360		- return last_error;
1361	1201	}
1362	1202
1363		-int
	1203	+void
1364	1204	xfs_reclaim_inodes(
1365		- xfs_mount_t *mp,
1366		- int mode)
	1205	+ struct xfs_mount *mp)
1367	1206	{
1368	1207	int nr_to_scan = INT_MAX;
1369	1208
1370		- return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
	1209	+ while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
	1210	+ xfs_ail_push_all_sync(mp->m_ail);
	1211	+ xfs_reclaim_inodes_ag(mp, &nr_to_scan);
	1212	+ }
1371	1213	}
1372	1214
1373	1215	/*
1374		- * Scan a certain number of inodes for reclaim.
1375		- *
1376		- * When called we make sure that there is a background (fast) inode reclaim in
1377		- * progress, while we will throttle the speed of reclaim via doing synchronous
1378		- * reclaim of inodes. That means if we come across dirty inodes, we wait for
1379		- * them to be cleaned, which we hope will not be very long due to the
1380		- * background walker having already kicked the IO off on those dirty inodes.
	1216	+ * The shrinker infrastructure determines how many inodes we should scan for
	1217	+ * reclaim. We want as many clean inodes ready to reclaim as possible, so we
	1218	+ * push the AIL here. We also want to proactively free up memory if we can to
	1219	+ * minimise the amount of work memory reclaim has to do so we kick the
	1220	+ * background reclaim if it isn't already scheduled.
1381	1221	*/
1382	1222	long
1383	1223	xfs_reclaim_inodes_nr(
..	..	@@ -1388,7 +1228,8 @@
1388	1228	xfs_reclaim_work_queue(mp);
1389	1229	xfs_ail_push_all(mp->m_ail);
1390	1230
1391		- return xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK \| SYNC_WAIT, &nr_to_scan);
	1231	+ xfs_reclaim_inodes_ag(mp, &nr_to_scan);
	1232	+ return 0;
1392	1233	}
1393	1234
1394	1235	/*
..	..	@@ -1411,59 +1252,108 @@
1411	1252	return reclaimable;
1412	1253	}
1413	1254
1414		-STATIC int
	1255	+STATIC bool
1415	1256	xfs_inode_match_id(
1416	1257	struct xfs_inode *ip,
1417	1258	struct xfs_eofblocks *eofb)
1418	1259	{
1419	1260	if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
1420	1261	!uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
1421		- return 0;
	1262	+ return false;
1422	1263
1423	1264	if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
1424	1265	!gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
1425		- return 0;
	1266	+ return false;
1426	1267
1427	1268	if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
1428		- xfs_get_projid(ip) != eofb->eof_prid)
1429		- return 0;
	1269	+ ip->i_d.di_projid != eofb->eof_prid)
	1270	+ return false;
1430	1271
1431		- return 1;
	1272	+ return true;
1432	1273	}
1433	1274
1434	1275	/*
1435	1276	* A union-based inode filtering algorithm. Process the inode if any of the
1436	1277	* criteria match. This is for global/internal scans only.
1437	1278	*/
1438		-STATIC int
	1279	+STATIC bool
1439	1280	xfs_inode_match_id_union(
1440	1281	struct xfs_inode *ip,
1441	1282	struct xfs_eofblocks *eofb)
1442	1283	{
1443	1284	if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
1444	1285	uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
1445		- return 1;
	1286	+ return true;
1446	1287
1447	1288	if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
1448	1289	gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
1449		- return 1;
	1290	+ return true;
1450	1291
1451	1292	if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
1452		- xfs_get_projid(ip) == eofb->eof_prid)
1453		- return 1;
	1293	+ ip->i_d.di_projid == eofb->eof_prid)
	1294	+ return true;
1454	1295
1455		- return 0;
	1296	+ return false;
	1297	+}
	1298	+
	1299	+/*
	1300	+ * Is this inode @ip eligible for eof/cow block reclamation, given some
	1301	+ * filtering parameters @eofb? The inode is eligible if @eofb is null or
	1302	+ * if the predicate functions match.
	1303	+ */
	1304	+static bool
	1305	+xfs_inode_matches_eofb(
	1306	+ struct xfs_inode *ip,
	1307	+ struct xfs_eofblocks *eofb)
	1308	+{
	1309	+ bool match;
	1310	+
	1311	+ if (!eofb)
	1312	+ return true;
	1313	+
	1314	+ if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
	1315	+ match = xfs_inode_match_id_union(ip, eofb);
	1316	+ else
	1317	+ match = xfs_inode_match_id(ip, eofb);
	1318	+ if (!match)
	1319	+ return false;
	1320	+
	1321	+ /* skip the inode if the file size is too small */
	1322	+ if ((eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE) &&
	1323	+ XFS_ISIZE(ip) < eofb->eof_min_file_size)
	1324	+ return false;
	1325	+
	1326	+ return true;
	1327	+}
	1328	+
	1329	+/*
	1330	+ * This is a fast pass over the inode cache to try to get reclaim moving on as
	1331	+ * many inodes as possible in a short period of time. It kicks itself every few
	1332	+ * seconds, as well as being kicked by the inode cache shrinker when memory
	1333	+ * goes low.
	1334	+ */
	1335	+void
	1336	+xfs_reclaim_worker(
	1337	+ struct work_struct *work)
	1338	+{
	1339	+ struct xfs_mount *mp = container_of(to_delayed_work(work),
	1340	+ struct xfs_mount, m_reclaim_work);
	1341	+ int nr_to_scan = INT_MAX;
	1342	+
	1343	+ xfs_reclaim_inodes_ag(mp, &nr_to_scan);
	1344	+ xfs_reclaim_work_queue(mp);
1456	1345	}
1457	1346
1458	1347	STATIC int
1459	1348	xfs_inode_free_eofblocks(
1460	1349	struct xfs_inode *ip,
1461		- int flags,
1462	1350	void *args)
1463	1351	{
1464		- int ret = 0;
1465		- struct xfs_eofblocks *eofb = args;
1466		- int match;
	1352	+ struct xfs_eofblocks *eofb = args;
	1353	+ bool wait;
	1354	+ int ret;
	1355	+
	1356	+ wait = eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC);
1467	1357
1468	1358	if (!xfs_can_free_eofblocks(ip, false)) {
1469	1359	/* inode could be preallocated or append-only */
..	..	@@ -1476,54 +1366,26 @@
1476	1366	* If the mapping is dirty the operation can block and wait for some
1477	1367	* time. Unless we are waiting, skip it.
1478	1368	*/
1479		- if (!(flags & SYNC_WAIT) &&
1480		- mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY))
	1369	+ if (!wait && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY))
1481	1370	return 0;
1482	1371
1483		- if (eofb) {
1484		- if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
1485		- match = xfs_inode_match_id_union(ip, eofb);
1486		- else
1487		- match = xfs_inode_match_id(ip, eofb);
1488		- if (!match)
1489		- return 0;
1490		-
1491		- /* skip the inode if the file size is too small */
1492		- if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
1493		- XFS_ISIZE(ip) < eofb->eof_min_file_size)
1494		- return 0;
1495		- }
	1372	+ if (!xfs_inode_matches_eofb(ip, eofb))
	1373	+ return 0;
1496	1374
1497	1375	/*
1498	1376	* If the caller is waiting, return -EAGAIN to keep the background
1499	1377	* scanner moving and revisit the inode in a subsequent pass.
1500	1378	*/
1501	1379	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
1502		- if (flags & SYNC_WAIT)
1503		- ret = -EAGAIN;
1504		- return ret;
	1380	+ if (wait)
	1381	+ return -EAGAIN;
	1382	+ return 0;
1505	1383	}
	1384	+
1506	1385	ret = xfs_free_eofblocks(ip);
1507	1386	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1508	1387
1509	1388	return ret;
1510		-}
1511		-
1512		-static int
1513		-__xfs_icache_free_eofblocks(
1514		- struct xfs_mount *mp,
1515		- struct xfs_eofblocks *eofb,
1516		- int (execute)(struct xfs_inode ip, int flags,
1517		- void *args),
1518		- int tag)
1519		-{
1520		- int flags = SYNC_TRYLOCK;
1521		-
1522		- if (eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC))
1523		- flags = SYNC_WAIT;
1524		-
1525		- return xfs_inode_ag_iterator_tag(mp, execute, flags,
1526		- eofb, tag);
1527	1389	}
1528	1390
1529	1391	int
..	..	@@ -1531,7 +1393,7 @@
1531	1393	struct xfs_mount *mp,
1532	1394	struct xfs_eofblocks *eofb)
1533	1395	{
1534		- return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_eofblocks,
	1396	+ return xfs_inode_walk(mp, 0, xfs_inode_free_eofblocks, eofb,
1535	1397	XFS_ICI_EOFBLOCKS_TAG);
1536	1398	}
1537	1399
..	..	@@ -1558,7 +1420,7 @@
1558	1420	eofb.eof_flags = XFS_EOF_FLAGS_UNION\|XFS_EOF_FLAGS_SYNC;
1559	1421
1560	1422	if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
1561		- dq = xfs_inode_dquot(ip, XFS_DQ_USER);
	1423	+ dq = xfs_inode_dquot(ip, XFS_DQTYPE_USER);
1562	1424	if (dq && xfs_dquot_lowsp(dq)) {
1563	1425	eofb.eof_uid = VFS_I(ip)->i_uid;
1564	1426	eofb.eof_flags \|= XFS_EOF_FLAGS_UID;
..	..	@@ -1567,7 +1429,7 @@
1567	1429	}
1568	1430
1569	1431	if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) {
1570		- dq = xfs_inode_dquot(ip, XFS_DQ_GROUP);
	1432	+ dq = xfs_inode_dquot(ip, XFS_DQTYPE_GROUP);
1571	1433	if (dq && xfs_dquot_lowsp(dq)) {
1572	1434	eofb.eof_gid = VFS_I(ip)->i_gid;
1573	1435	eofb.eof_flags \|= XFS_EOF_FLAGS_GID;
..	..	@@ -1748,29 +1610,16 @@
1748	1610	STATIC int
1749	1611	xfs_inode_free_cowblocks(
1750	1612	struct xfs_inode *ip,
1751		- int flags,
1752	1613	void *args)
1753	1614	{
1754	1615	struct xfs_eofblocks *eofb = args;
1755		- int match;
1756	1616	int ret = 0;
1757	1617
1758	1618	if (!xfs_prep_free_cowblocks(ip))
1759	1619	return 0;
1760	1620
1761		- if (eofb) {
1762		- if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
1763		- match = xfs_inode_match_id_union(ip, eofb);
1764		- else
1765		- match = xfs_inode_match_id(ip, eofb);
1766		- if (!match)
1767		- return 0;
1768		-
1769		- /* skip the inode if the file size is too small */
1770		- if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
1771		- XFS_ISIZE(ip) < eofb->eof_min_file_size)
1772		- return 0;
1773		- }
	1621	+ if (!xfs_inode_matches_eofb(ip, eofb))
	1622	+ return 0;
1774	1623
1775	1624	/* Free the CoW blocks */
1776	1625	xfs_ilock(ip, XFS_IOLOCK_EXCL);
..	..	@@ -1794,7 +1643,7 @@
1794	1643	struct xfs_mount *mp,
1795	1644	struct xfs_eofblocks *eofb)
1796	1645	{
1797		- return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_cowblocks,
	1646	+ return xfs_inode_walk(mp, 0, xfs_inode_free_cowblocks, eofb,
1798	1647	XFS_ICI_COWBLOCKS_TAG);
1799	1648	}
1800	1649
..	..	@@ -1826,7 +1675,7 @@
1826	1675
1827	1676	/* Disable post-EOF and CoW block auto-reclamation. */
1828	1677	void
1829		-xfs_icache_disable_reclaim(
	1678	+xfs_stop_block_reaping(
1830	1679	struct xfs_mount *mp)
1831	1680	{
1832	1681	cancel_delayed_work_sync(&mp->m_eofblocks_work);
..	..	@@ -1835,7 +1684,7 @@
1835	1684
1836	1685	/* Enable post-EOF and CoW block auto-reclamation. */
1837	1686	void
1838		-xfs_icache_enable_reclaim(
	1687	+xfs_start_block_reaping(
1839	1688	struct xfs_mount *mp)
1840	1689	{
1841	1690	xfs_queue_eofblocks(mp);