~hc/RK356X_SDK_RELEASE.git

..	..	@@ -49,8 +49,6 @@
49	49	#include <trace/events/ext4.h>
50	50	#include <trace/events/android_fs.h>
51	51
52		-#define MPAGE_DA_EXTENT_TAIL 0x01
53		-
54	52	static __u32 ext4_inode_csum(struct inode inode, struct ext4_inode raw,
55	53	struct ext4_inode_info *ei)
56	54	{
..	..	@@ -104,8 +102,8 @@
104	102	return provided == calculated;
105	103	}
106	104
107		-static void ext4_inode_csum_set(struct inode inode, struct ext4_inode raw,
108		- struct ext4_inode_info *ei)
	105	+void ext4_inode_csum_set(struct inode inode, struct ext4_inode raw,
	106	+ struct ext4_inode_info *ei)
109	107	{
110	108	__u32 csum;
111	109
..	..	@@ -162,32 +160,6 @@
162	160	}
163	161	return S_ISLNK(inode->i_mode) && inode->i_size &&
164	162	(inode->i_size < EXT4_N_BLOCKS * 4);
165		-}
166		-
167		-/*
168		- * Restart the transaction associated with *handle. This does a commit,
169		- * so before we call here everything must be consistently dirtied against
170		- * this transaction.
171		- */
172		-int ext4_truncate_restart_trans(handle_t handle, struct inode inode,
173		- int nblocks)
174		-{
175		- int ret;
176		-
177		- /*
178		- * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
179		- * moment, get_block can be called only for blocks inside i_size since
180		- * page cache has been already dropped and writes are blocked by
181		- * i_mutex. So we can safely drop the i_data_sem here.
182		- */
183		- BUG_ON(EXT4_JOURNAL(inode) == NULL);
184		- jbd_debug(2, "restarting handle %p\n", handle);
185		- up_write(&EXT4_I(inode)->i_data_sem);
186		- ret = ext4_journal_restart(handle, nblocks);
187		- down_write(&EXT4_I(inode)->i_data_sem);
188		- ext4_discard_preallocations(inode);
189		-
190		- return ret;
191	163	}
192	164
193	165	/*
..	..	@@ -251,6 +223,16 @@
251	223	truncate_inode_pages_final(&inode->i_data);
252	224
253	225	/*
	226	+ * For inodes with journalled data, transaction commit could have
	227	+ * dirtied the inode. Flush worker is ignoring it because of I_FREEING
	228	+ * flag but we still need to remove the inode from the writeback lists.
	229	+ */
	230	+ if (!list_empty_careful(&inode->i_io_list)) {
	231	+ WARN_ON_ONCE(!ext4_should_journal_data(inode));
	232	+ inode_io_list_del(inode);
	233	+ }
	234	+
	235	+ /*
254	236	* Protect us against freezing - iput() caller didn't have to have any
255	237	* protection against it. When we are in a running transaction though,
256	238	* we are already protected against freezing and we cannot grab further
..	..	@@ -305,9 +287,9 @@
305	287	if (inode->i_blocks) {
306	288	err = ext4_truncate(inode);
307	289	if (err) {
308		- ext4_error(inode->i_sb,
309		- "couldn't truncate inode %lu (err %d)",
310		- inode->i_ino, err);
	290	+ ext4_error_err(inode->i_sb, -err,
	291	+ "couldn't truncate inode %lu (err %d)",
	292	+ inode->i_ino, err);
311	293	goto stop_handle;
312	294	}
313	295	}
..	..	@@ -355,6 +337,8 @@
355	337	ext4_xattr_inode_array_free(ea_inode_array);
356	338	return;
357	339	no_delete:
	340	+ if (!list_empty(&EXT4_I(inode)->i_fc_list))
	341	+ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
358	342	ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
359	343	}
360	344
..	..	@@ -410,8 +394,8 @@
410	394	* inode's preallocations.
411	395	*/
412	396	if ((ei->i_reserved_data_blocks == 0) &&
413		- (atomic_read(&inode->i_writecount) == 0))
414		- ext4_discard_preallocations(inode);
	397	+ !inode_is_open_for_write(inode))
	398	+ ext4_discard_preallocations(inode, 0);
415	399	}
416	400
417	401	static int __check_block_validity(struct inode inode, const char func,
..	..	@@ -437,7 +421,7 @@
437	421	{
438	422	int ret;
439	423
440		- if (IS_ENCRYPTED(inode))
	424	+ if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
441	425	return fscrypt_zeroout_range(inode, lblk, pblk, len);
442	426
443	427	ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
..	..	@@ -469,11 +453,9 @@
469	453	*/
470	454	down_read(&EXT4_I(inode)->i_data_sem);
471	455	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
472		- retval = ext4_ext_map_blocks(handle, inode, map, flags &
473		- EXT4_GET_BLOCKS_KEEP_SIZE);
	456	+ retval = ext4_ext_map_blocks(handle, inode, map, 0);
474	457	} else {
475		- retval = ext4_ind_map_blocks(handle, inode, map, flags &
476		- EXT4_GET_BLOCKS_KEEP_SIZE);
	458	+ retval = ext4_ind_map_blocks(handle, inode, map, 0);
477	459	}
478	460	up_read((&EXT4_I(inode)->i_data_sem));
479	461
..	..	@@ -530,9 +512,8 @@
530	512	#endif
531	513
532	514	map->m_flags = 0;
533		- ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
534		- "logical block %lu\n", inode->i_ino, flags, map->m_len,
535		- (unsigned long) map->m_lblk);
	515	+ ext_debug(inode, "flag 0x%x, max_blocks %u, logical block %lu\n",
	516	+ flags, map->m_len, (unsigned long) map->m_lblk);
536	517
537	518	/*
538	519	* ext4_map_blocks returns an int, and m_len is an unsigned int
..	..	@@ -545,7 +526,8 @@
545	526	return -EFSCORRUPTED;
546	527
547	528	/* Lookup extent status tree firstly */
548		- if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
	529	+ if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) &&
	530	+ ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
549	531	if (ext4_es_is_written(&es) \|\| ext4_es_is_unwritten(&es)) {
550	532	map->m_pblk = ext4_es_pblock(&es) +
551	533	map->m_lblk - es.es_lblk;
..	..	@@ -563,7 +545,7 @@
563	545	map->m_len = retval;
564	546	retval = 0;
565	547	} else {
566		- BUG_ON(1);
	548	+ BUG();
567	549	}
568	550	#ifdef ES_AGGRESSIVE_TEST
569	551	ext4_map_blocks_es_recheck(handle, inode, map,
..	..	@@ -578,11 +560,9 @@
578	560	*/
579	561	down_read(&EXT4_I(inode)->i_data_sem);
580	562	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
581		- retval = ext4_ext_map_blocks(handle, inode, map, flags &
582		- EXT4_GET_BLOCKS_KEEP_SIZE);
	563	+ retval = ext4_ext_map_blocks(handle, inode, map, 0);
583	564	} else {
584		- retval = ext4_ind_map_blocks(handle, inode, map, flags &
585		- EXT4_GET_BLOCKS_KEEP_SIZE);
	565	+ retval = ext4_ind_map_blocks(handle, inode, map, 0);
586	566	}
587	567	if (retval > 0) {
588	568	unsigned int status;
..	..	@@ -599,8 +579,8 @@
599	579	EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
600	580	if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
601	581	!(status & EXTENT_STATUS_WRITTEN) &&
602		- ext4_find_delalloc_range(inode, map->m_lblk,
603		- map->m_lblk + map->m_len - 1))
	582	+ ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
	583	+ map->m_lblk + map->m_len - 1))
604	584	status \|= EXTENT_STATUS_DELAYED;
605	585	ret = ext4_es_insert_extent(inode, map->m_lblk,
606	586	map->m_len, map->m_pblk, status);
..	..	@@ -700,8 +680,6 @@
700	680	if (flags & EXT4_GET_BLOCKS_ZERO &&
701	681	map->m_flags & EXT4_MAP_MAPPED &&
702	682	map->m_flags & EXT4_MAP_NEW) {
703		- clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
704		- map->m_len);
705	683	ret = ext4_issue_zeroout(inode, map->m_lblk,
706	684	map->m_pblk, map->m_len);
707	685	if (ret) {
..	..	@@ -715,7 +693,7 @@
715	693	* extent status tree.
716	694	*/
717	695	if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
718		- ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
	696	+ ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
719	697	if (ext4_es_is_written(&es))
720	698	goto out_sem;
721	699	}
..	..	@@ -723,8 +701,8 @@
723	701	EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
724	702	if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
725	703	!(status & EXTENT_STATUS_WRITTEN) &&
726		- ext4_find_delalloc_range(inode, map->m_lblk,
727		- map->m_lblk + map->m_len - 1))
	704	+ ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
	705	+ map->m_lblk + map->m_len - 1))
728	706	status \|= EXTENT_STATUS_DELAYED;
729	707	ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
730	708	map->m_pblk, status);
..	..	@@ -765,6 +743,12 @@
765	743	return ret;
766	744	}
767	745	}
	746	+ if (retval > 0 && (map->m_flags & EXT4_MAP_UNWRITTEN \|\|
	747	+ map->m_flags & EXT4_MAP_MAPPED))
	748	+ ext4_fc_track_range(handle, inode, map->m_lblk,
	749	+ map->m_lblk + map->m_len - 1);
	750	+ if (retval < 0)
	751	+ ext_debug(inode, "failed with err %d\n", retval);
768	752	return retval;
769	753	}
770	754
..	..	@@ -847,136 +831,6 @@
847	831	#define DIO_MAX_BLOCKS 4096
848	832
849	833	/*
850		- * Get blocks function for the cases that need to start a transaction -
851		- * generally difference cases of direct IO and DAX IO. It also handles retries
852		- * in case of ENOSPC.
853		- */
854		-static int ext4_get_block_trans(struct inode *inode, sector_t iblock,
855		- struct buffer_head *bh_result, int flags)
856		-{
857		- int dio_credits;
858		- handle_t *handle;
859		- int retries = 0;
860		- int ret;
861		-
862		- /* Trim mapping request to maximum we can map at once for DIO */
863		- if (bh_result->b_size >> inode->i_blkbits > DIO_MAX_BLOCKS)
864		- bh_result->b_size = DIO_MAX_BLOCKS << inode->i_blkbits;
865		- dio_credits = ext4_chunk_trans_blocks(inode,
866		- bh_result->b_size >> inode->i_blkbits);
867		-retry:
868		- handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits);
869		- if (IS_ERR(handle))
870		- return PTR_ERR(handle);
871		-
872		- ret = _ext4_get_block(inode, iblock, bh_result, flags);
873		- ext4_journal_stop(handle);
874		-
875		- if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
876		- goto retry;
877		- return ret;
878		-}
879		-
880		-/* Get block function for DIO reads and writes to inodes without extents */
881		-int ext4_dio_get_block(struct inode *inode, sector_t iblock,
882		- struct buffer_head *bh, int create)
883		-{
884		- /* We don't expect handle for direct IO */
885		- WARN_ON_ONCE(ext4_journal_current_handle());
886		-
887		- if (!create)
888		- return _ext4_get_block(inode, iblock, bh, 0);
889		- return ext4_get_block_trans(inode, iblock, bh, EXT4_GET_BLOCKS_CREATE);
890		-}
891		-
892		-/*
893		- * Get block function for AIO DIO writes when we create unwritten extent if
894		- * blocks are not allocated yet. The extent will be converted to written
895		- * after IO is complete.
896		- */
897		-static int ext4_dio_get_block_unwritten_async(struct inode *inode,
898		- sector_t iblock, struct buffer_head *bh_result, int create)
899		-{
900		- int ret;
901		-
902		- /* We don't expect handle for direct IO */
903		- WARN_ON_ONCE(ext4_journal_current_handle());
904		-
905		- ret = ext4_get_block_trans(inode, iblock, bh_result,
906		- EXT4_GET_BLOCKS_IO_CREATE_EXT);
907		-
908		- /*
909		- * When doing DIO using unwritten extents, we need io_end to convert
910		- * unwritten extents to written on IO completion. We allocate io_end
911		- * once we spot unwritten extent and store it in b_private. Generic
912		- * DIO code keeps b_private set and furthermore passes the value to
913		- * our completion callback in 'private' argument.
914		- */
915		- if (!ret && buffer_unwritten(bh_result)) {
916		- if (!bh_result->b_private) {
917		- ext4_io_end_t *io_end;
918		-
919		- io_end = ext4_init_io_end(inode, GFP_KERNEL);
920		- if (!io_end)
921		- return -ENOMEM;
922		- bh_result->b_private = io_end;
923		- ext4_set_io_unwritten_flag(inode, io_end);
924		- }
925		- set_buffer_defer_completion(bh_result);
926		- }
927		-
928		- return ret;
929		-}
930		-
931		-/*
932		- * Get block function for non-AIO DIO writes when we create unwritten extent if
933		- * blocks are not allocated yet. The extent will be converted to written
934		- * after IO is complete by ext4_direct_IO_write().
935		- */
936		-static int ext4_dio_get_block_unwritten_sync(struct inode *inode,
937		- sector_t iblock, struct buffer_head *bh_result, int create)
938		-{
939		- int ret;
940		-
941		- /* We don't expect handle for direct IO */
942		- WARN_ON_ONCE(ext4_journal_current_handle());
943		-
944		- ret = ext4_get_block_trans(inode, iblock, bh_result,
945		- EXT4_GET_BLOCKS_IO_CREATE_EXT);
946		-
947		- /*
948		- * Mark inode as having pending DIO writes to unwritten extents.
949		- * ext4_direct_IO_write() checks this flag and converts extents to
950		- * written.
951		- */
952		- if (!ret && buffer_unwritten(bh_result))
953		- ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
954		-
955		- return ret;
956		-}
957		-
958		-static int ext4_dio_get_block_overwrite(struct inode *inode, sector_t iblock,
959		- struct buffer_head *bh_result, int create)
960		-{
961		- int ret;
962		-
963		- ext4_debug("ext4_dio_get_block_overwrite: inode %lu, create flag %d\n",
964		- inode->i_ino, create);
965		- /* We don't expect handle for direct IO */
966		- WARN_ON_ONCE(ext4_journal_current_handle());
967		-
968		- ret = _ext4_get_block(inode, iblock, bh_result, 0);
969		- /*
970		- * Blocks should have been preallocated! ext4_file_write_iter() checks
971		- * that.
972		- */
973		- WARN_ON_ONCE(!buffer_mapped(bh_result) \|\| buffer_unwritten(bh_result));
974		-
975		- return ret;
976		-}
977		-
978		-
979		-/*
980	834	* `handle' can be NULL if create is zero
981	835	*/
982	836	struct buffer_head ext4_getblk(handle_t handle, struct inode *inode,
..	..	@@ -987,7 +841,8 @@
987	841	int create = map_flags & EXT4_GET_BLOCKS_CREATE;
988	842	int err;
989	843
990		- J_ASSERT(handle != NULL \|\| create == 0);
	844	+ J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
	845	+ \|\| handle != NULL \|\| create == 0);
991	846
992	847	map.m_lblk = block;
993	848	map.m_len = 1;
..	..	@@ -1003,7 +858,8 @@
1003	858	return ERR_PTR(-ENOMEM);
1004	859	if (map.m_flags & EXT4_MAP_NEW) {
1005	860	J_ASSERT(create != 0);
1006		- J_ASSERT(handle != NULL);
	861	+ J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
	862	+ \|\| (handle != NULL));
1007	863
1008	864	/*
1009	865	* Now that we do not always journal data, we should
..	..	@@ -1040,18 +896,20 @@
1040	896	ext4_lblk_t block, int map_flags)
1041	897	{
1042	898	struct buffer_head *bh;
	899	+ int ret;
1043	900
1044	901	bh = ext4_getblk(handle, inode, block, map_flags);
1045	902	if (IS_ERR(bh))
1046	903	return bh;
1047		- if (!bh \|\| buffer_uptodate(bh))
	904	+ if (!bh \|\| ext4_buffer_uptodate(bh))
1048	905	return bh;
1049		- ll_rw_block(REQ_OP_READ, REQ_META \| REQ_PRIO, 1, &bh);
1050		- wait_on_buffer(bh);
1051		- if (buffer_uptodate(bh))
1052		- return bh;
1053		- put_bh(bh);
1054		- return ERR_PTR(-EIO);
	906	+
	907	+ ret = ext4_read_bh_lock(bh, REQ_META \| REQ_PRIO, true);
	908	+ if (ret) {
	909	+ put_bh(bh);
	910	+ return ERR_PTR(ret);
	911	+ }
	912	+ return bh;
1055	913	}
1056	914
1057	915	/* Read a contiguous batch of blocks. */
..	..	@@ -1071,9 +929,8 @@
1071	929
1072	930	for (i = 0; i < bh_count; i++)
1073	931	/* Note that NULL bhs[i] is valid because of holes. */
1074		- if (bhs[i] && !buffer_uptodate(bhs[i]))
1075		- ll_rw_block(REQ_OP_READ, REQ_META \| REQ_PRIO, 1,
1076		- &bhs[i]);
	932	+ if (bhs[i] && !ext4_buffer_uptodate(bhs[i]))
	933	+ ext4_read_bh_lock(bhs[i], REQ_META \| REQ_PRIO, false);
1077	934
1078	935	if (!wait)
1079	936	return 0;
..	..	@@ -1190,8 +1047,9 @@
1190	1047	int err = 0;
1191	1048	unsigned blocksize = inode->i_sb->s_blocksize;
1192	1049	unsigned bbits;
1193		- struct buffer_head bh, head, wait[2], *wait_bh = wait;
1194		- bool decrypt = false;
	1050	+ struct buffer_head bh, head, *wait[2];
	1051	+ int nr_wait = 0;
	1052	+ int i;
1195	1053
1196	1054	BUG_ON(!PageLocked(page));
1197	1055	BUG_ON(from > PAGE_SIZE);
..	..	@@ -1222,7 +1080,6 @@
1222	1080	if (err)
1223	1081	break;
1224	1082	if (buffer_new(bh)) {
1225		- clean_bdev_bh_alias(bh);
1226	1083	if (PageUptodate(page)) {
1227	1084	clear_buffer_new(bh);
1228	1085	set_buffer_uptodate(bh);
..	..	@@ -1243,23 +1100,33 @@
1243	1100	if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
1244	1101	!buffer_unwritten(bh) &&
1245	1102	(block_start < from \|\| block_end > to)) {
1246		- ll_rw_block(REQ_OP_READ, 0, 1, &bh);
1247		- *wait_bh++ = bh;
1248		- decrypt = fscrypt_inode_uses_fs_layer_crypto(inode);
	1103	+ ext4_read_bh_lock(bh, 0, false);
	1104	+ wait[nr_wait++] = bh;
1249	1105	}
1250	1106	}
1251	1107	/*
1252	1108	* If we issued read requests, let them complete.
1253	1109	*/
1254		- while (wait_bh > wait) {
1255		- wait_on_buffer(*--wait_bh);
1256		- if (!buffer_uptodate(*wait_bh))
	1110	+ for (i = 0; i < nr_wait; i++) {
	1111	+ wait_on_buffer(wait[i]);
	1112	+ if (!buffer_uptodate(wait[i]))
1257	1113	err = -EIO;
1258	1114	}
1259		- if (unlikely(err))
	1115	+ if (unlikely(err)) {
1260	1116	page_zero_new_buffers(page, from, to);
1261		- else if (decrypt)
1262		- err = fscrypt_decrypt_pagecache_blocks(page, PAGE_SIZE, 0);
	1117	+ } else if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
	1118	+ for (i = 0; i < nr_wait; i++) {
	1119	+ int err2;
	1120	+
	1121	+ err2 = fscrypt_decrypt_pagecache_blocks(page, blocksize,
	1122	+ bh_offset(wait[i]));
	1123	+ if (err2) {
	1124	+ clear_buffer_uptodate(wait[i]);
	1125	+ err = err2;
	1126	+ }
	1127	+ }
	1128	+ }
	1129	+
1263	1130	return err;
1264	1131	}
1265	1132	#endif
..	..	@@ -1319,6 +1186,13 @@
1319	1186	page = grab_cache_page_write_begin(mapping, index, flags);
1320	1187	if (!page)
1321	1188	return -ENOMEM;
	1189	+ /*
	1190	+ * The same as page allocation, we prealloc buffer heads before
	1191	+ * starting the handle.
	1192	+ */
	1193	+ if (!page_has_buffers(page))
	1194	+ create_empty_buffers(page, inode->i_sb->s_blocksize, 0);
	1195	+
1322	1196	unlock_page(page);
1323	1197
1324	1198	retry_journal:
..	..	@@ -1442,6 +1316,7 @@
1442	1316	goto errout;
1443	1317	}
1444	1318	copied = ret;
	1319	+ ret = 0;
1445	1320	} else
1446	1321	copied = block_write_end(file, mapping, pos,
1447	1322	len, copied, page, fsdata);
..	..	@@ -1466,15 +1341,16 @@
1466	1341	* filesystems.
1467	1342	*/
1468	1343	if (i_size_changed \|\| inline_data)
1469		- ext4_mark_inode_dirty(handle, inode);
	1344	+ ret = ext4_mark_inode_dirty(handle, inode);
1470	1345
	1346	+errout:
1471	1347	if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
1472	1348	/* if we have allocated more blocks and copied
1473	1349	* less. We will have blocks allocated outside
1474	1350	* inode->i_size. So truncate them
1475	1351	*/
1476	1352	ext4_orphan_add(handle, inode);
1477		-errout:
	1353	+
1478	1354	ret2 = ext4_journal_stop(handle);
1479	1355	if (!ret)
1480	1356	ret = ret2;
..	..	@@ -1558,6 +1434,7 @@
1558	1434	goto errout;
1559	1435	}
1560	1436	copied = ret;
	1437	+ ret = 0;
1561	1438	} else if (unlikely(copied < len) && !PageUptodate(page)) {
1562	1439	copied = 0;
1563	1440	ext4_journalled_zero_new_buffers(handle, page, from, to);
..	..	@@ -1587,6 +1464,7 @@
1587	1464	ret = ret2;
1588	1465	}
1589	1466
	1467	+errout:
1590	1468	if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
1591	1469	/* if we have allocated more blocks and copied
1592	1470	* less. We will have blocks allocated outside
..	..	@@ -1594,7 +1472,6 @@
1594	1472	*/
1595	1473	ext4_orphan_add(handle, inode);
1596	1474
1597		-errout:
1598	1475	ret2 = ext4_journal_stop(handle);
1599	1476	if (!ret)
1600	1477	ret = ret2;
..	..	@@ -1643,7 +1520,7 @@
1643	1520	return 0; /* success */
1644	1521	}
1645	1522
1646		-static void ext4_da_release_space(struct inode *inode, int to_free)
	1523	+void ext4_da_release_space(struct inode *inode, int to_free)
1647	1524	{
1648	1525	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1649	1526	struct ext4_inode_info *ei = EXT4_I(inode);
..	..	@@ -1678,64 +1555,6 @@
1678	1555	dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
1679	1556	}
1680	1557
1681		-static void ext4_da_page_release_reservation(struct page *page,
1682		- unsigned int offset,
1683		- unsigned int length)
1684		-{
1685		- int to_release = 0, contiguous_blks = 0;
1686		- struct buffer_head head, bh;
1687		- unsigned int curr_off = 0;
1688		- struct inode *inode = page->mapping->host;
1689		- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1690		- unsigned int stop = offset + length;
1691		- int num_clusters;
1692		- ext4_fsblk_t lblk;
1693		-
1694		- BUG_ON(stop > PAGE_SIZE \|\| stop < length);
1695		-
1696		- head = page_buffers(page);
1697		- bh = head;
1698		- do {
1699		- unsigned int next_off = curr_off + bh->b_size;
1700		-
1701		- if (next_off > stop)
1702		- break;
1703		-
1704		- if ((offset <= curr_off) && (buffer_delay(bh))) {
1705		- to_release++;
1706		- contiguous_blks++;
1707		- clear_buffer_delay(bh);
1708		- } else if (contiguous_blks) {
1709		- lblk = page->index <<
1710		- (PAGE_SHIFT - inode->i_blkbits);
1711		- lblk += (curr_off >> inode->i_blkbits) -
1712		- contiguous_blks;
1713		- ext4_es_remove_extent(inode, lblk, contiguous_blks);
1714		- contiguous_blks = 0;
1715		- }
1716		- curr_off = next_off;
1717		- } while ((bh = bh->b_this_page) != head);
1718		-
1719		- if (contiguous_blks) {
1720		- lblk = page->index << (PAGE_SHIFT - inode->i_blkbits);
1721		- lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
1722		- ext4_es_remove_extent(inode, lblk, contiguous_blks);
1723		- }
1724		-
1725		- /* If we have released all the blocks belonging to a cluster, then we
1726		- * need to release the reserved space for that cluster. */
1727		- num_clusters = EXT4_NUM_B2C(sbi, to_release);
1728		- while (num_clusters > 0) {
1729		- lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) +
1730		- ((num_clusters - 1) << sbi->s_cluster_bits);
1731		- if (sbi->s_cluster_ratio == 1 \|\|
1732		- !ext4_find_delalloc_cluster(inode, lblk))
1733		- ext4_da_release_space(inode, 1);
1734		-
1735		- num_clusters--;
1736		- }
1737		-}
1738		-
1739	1558	/*
1740	1559	* Delayed allocation stuff
1741	1560	*/
..	..	@@ -1755,6 +1574,7 @@
1755	1574	struct ext4_map_blocks map;
1756	1575	struct ext4_io_submit io_submit; /* IO submission data */
1757	1576	unsigned int do_map:1;
	1577	+ unsigned int scanned_until_end:1;
1758	1578	};
1759	1579
1760	1580	static void mpage_release_unused_pages(struct mpage_da_data *mpd,
..	..	@@ -1770,13 +1590,21 @@
1770	1590	if (mpd->first_page >= mpd->next_page)
1771	1591	return;
1772	1592
	1593	+ mpd->scanned_until_end = 0;
1773	1594	index = mpd->first_page;
1774	1595	end = mpd->next_page - 1;
1775	1596	if (invalidate) {
1776	1597	ext4_lblk_t start, last;
1777	1598	start = index << (PAGE_SHIFT - inode->i_blkbits);
1778	1599	last = end << (PAGE_SHIFT - inode->i_blkbits);
	1600	+
	1601	+ /*
	1602	+ * avoid racing with extent status tree scans made by
	1603	+ * ext4_insert_delayed_block()
	1604	+ */
	1605	+ down_write(&EXT4_I(inode)->i_data_sem);
1779	1606	ext4_es_remove_extent(inode, start, last - start + 1);
	1607	+ up_write(&EXT4_I(inode)->i_data_sem);
1780	1608	}
1781	1609
1782	1610	pagevec_init(&pvec);
..	..	@@ -1829,6 +1657,70 @@
1829	1657	}
1830	1658
1831	1659	/*
	1660	+ * ext4_insert_delayed_block - adds a delayed block to the extents status
	1661	+ * tree, incrementing the reserved cluster/block
	1662	+ * count or making a pending reservation
	1663	+ * where needed
	1664	+ *
	1665	+ * @inode - file containing the newly added block
	1666	+ * @lblk - logical block to be added
	1667	+ *
	1668	+ * Returns 0 on success, negative error code on failure.
	1669	+ */
	1670	+static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
	1671	+{
	1672	+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	1673	+ int ret;
	1674	+ bool allocated = false;
	1675	+ bool reserved = false;
	1676	+
	1677	+ /*
	1678	+ * If the cluster containing lblk is shared with a delayed,
	1679	+ * written, or unwritten extent in a bigalloc file system, it's
	1680	+ * already been accounted for and does not need to be reserved.
	1681	+ * A pending reservation must be made for the cluster if it's
	1682	+ * shared with a written or unwritten extent and doesn't already
	1683	+ * have one. Written and unwritten extents can be purged from the
	1684	+ * extents status tree if the system is under memory pressure, so
	1685	+ * it's necessary to examine the extent tree if a search of the
	1686	+ * extents status tree doesn't get a match.
	1687	+ */
	1688	+ if (sbi->s_cluster_ratio == 1) {
	1689	+ ret = ext4_da_reserve_space(inode);
	1690	+ if (ret != 0) /* ENOSPC */
	1691	+ goto errout;
	1692	+ reserved = true;
	1693	+ } else { /* bigalloc */
	1694	+ if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
	1695	+ if (!ext4_es_scan_clu(inode,
	1696	+ &ext4_es_is_mapped, lblk)) {
	1697	+ ret = ext4_clu_mapped(inode,
	1698	+ EXT4_B2C(sbi, lblk));
	1699	+ if (ret < 0)
	1700	+ goto errout;
	1701	+ if (ret == 0) {
	1702	+ ret = ext4_da_reserve_space(inode);
	1703	+ if (ret != 0) /* ENOSPC */
	1704	+ goto errout;
	1705	+ reserved = true;
	1706	+ } else {
	1707	+ allocated = true;
	1708	+ }
	1709	+ } else {
	1710	+ allocated = true;
	1711	+ }
	1712	+ }
	1713	+ }
	1714	+
	1715	+ ret = ext4_es_insert_delayed_block(inode, lblk, allocated);
	1716	+ if (ret && reserved)
	1717	+ ext4_da_release_space(inode, 1);
	1718	+
	1719	+errout:
	1720	+ return ret;
	1721	+}
	1722	+
	1723	+/*
1832	1724	* This function is grabs code from the very beginning of
1833	1725	* ext4_map_blocks, but assumes that the caller is from delayed write
1834	1726	* time. This function looks up the requested blocks and sets the
..	..	@@ -1851,12 +1743,11 @@
1851	1743	invalid_block = ~0;
1852	1744
1853	1745	map->m_flags = 0;
1854		- ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
1855		- "logical block %lu\n", inode->i_ino, map->m_len,
	1746	+ ext_debug(inode, "max_blocks %u, logical block %lu\n", map->m_len,
1856	1747	(unsigned long) map->m_lblk);
1857	1748
1858	1749	/* Lookup extent status tree firstly */
1859		- if (ext4_es_lookup_extent(inode, iblock, &es)) {
	1750	+ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
1860	1751	if (ext4_es_is_hole(&es)) {
1861	1752	retval = 0;
1862	1753	down_read(&EXT4_I(inode)->i_data_sem);
..	..	@@ -1884,7 +1775,7 @@
1884	1775	else if (ext4_es_is_unwritten(&es))
1885	1776	map->m_flags \|= EXT4_MAP_UNWRITTEN;
1886	1777	else
1887		- BUG_ON(1);
	1778	+ BUG();
1888	1779
1889	1780	#ifdef ES_AGGRESSIVE_TEST
1890	1781	ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
..	..	@@ -1907,28 +1798,14 @@
1907	1798	add_delayed:
1908	1799	if (retval == 0) {
1909	1800	int ret;
	1801	+
1910	1802	/*
1911	1803	* XXX: __block_prepare_write() unmaps passed block,
1912	1804	* is it OK?
1913	1805	*/
1914		- /*
1915		- * If the block was allocated from previously allocated cluster,
1916		- * then we don't need to reserve it again. However we still need
1917		- * to reserve metadata for every block we're going to write.
1918		- */
1919		- if (EXT4_SB(inode->i_sb)->s_cluster_ratio == 1 \|\|
1920		- !ext4_find_delalloc_cluster(inode, map->m_lblk)) {
1921		- ret = ext4_da_reserve_space(inode);
1922		- if (ret) {
1923		- /* not enough space to reserve */
1924		- retval = ret;
1925		- goto out_unlock;
1926		- }
1927		- }
1928	1806
1929		- ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
1930		- ~0, EXTENT_STATUS_DELAYED);
1931		- if (ret) {
	1807	+ ret = ext4_insert_delayed_block(inode, map->m_lblk);
	1808	+ if (ret != 0) {
1932	1809	retval = ret;
1933	1810	goto out_unlock;
1934	1811	}
..	..	@@ -2088,6 +1965,9 @@
2088	1965	}
2089	1966	if (ret == 0)
2090	1967	ret = err;
	1968	+ err = ext4_jbd2_inode_add_write(handle, inode, page_offset(page), len);
	1969	+ if (ret == 0)
	1970	+ ret = err;
2091	1971	EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
2092	1972	err = ext4_journal_stop(handle);
2093	1973	if (!ret)
..	..	@@ -2169,6 +2049,15 @@
2169	2049	len = size & ~PAGE_MASK;
2170	2050	else
2171	2051	len = PAGE_SIZE;
	2052	+
	2053	+ /* Should never happen but for bugs in other kernel subsystems */
	2054	+ if (!page_has_buffers(page)) {
	2055	+ ext4_warning_inode(inode,
	2056	+ "page %lu does not have buffers attached", page->index);
	2057	+ ClearPageDirty(page);
	2058	+ unlock_page(page);
	2059	+ return 0;
	2060	+ }
2172	2061
2173	2062	page_bufs = page_buffers(page);
2174	2063	/*
..	..	@@ -2262,7 +2151,7 @@
2262	2151	return err;
2263	2152	}
2264	2153
2265		-#define BH_FLAGS ((1 << BH_Unwritten) \| (1 << BH_Delay))
	2154	+#define BH_FLAGS (BIT(BH_Unwritten) \| BIT(BH_Delay))
2266	2155
2267	2156	/*
2268	2157	* mballoc gives us at most this number of blocks...
..	..	@@ -2372,7 +2261,84 @@
2372	2261	if (err < 0)
2373	2262	return err;
2374	2263	}
2375		- return lblk < blocks;
	2264	+ if (lblk >= blocks) {
	2265	+ mpd->scanned_until_end = 1;
	2266	+ return 0;
	2267	+ }
	2268	+ return 1;
	2269	+}
	2270	+
	2271	+/*
	2272	+ * mpage_process_page - update page buffers corresponding to changed extent and
	2273	+ * may submit fully mapped page for IO
	2274	+ *
	2275	+ * @mpd - description of extent to map, on return next extent to map
	2276	+ * @m_lblk - logical block mapping.
	2277	+ * @m_pblk - corresponding physical mapping.
	2278	+ * @map_bh - determines on return whether this page requires any further
	2279	+ * mapping or not.
	2280	+ * Scan given page buffers corresponding to changed extent and update buffer
	2281	+ * state according to new extent state.
	2282	+ * We map delalloc buffers to their physical location, clear unwritten bits.
	2283	+ * If the given page is not fully mapped, we update @map to the next extent in
	2284	+ * the given page that needs mapping & return @map_bh as true.
	2285	+ */
	2286	+static int mpage_process_page(struct mpage_da_data mpd, struct page page,
	2287	+ ext4_lblk_t m_lblk, ext4_fsblk_t m_pblk,
	2288	+ bool *map_bh)
	2289	+{
	2290	+ struct buffer_head head, bh;
	2291	+ ext4_io_end_t *io_end = mpd->io_submit.io_end;
	2292	+ ext4_lblk_t lblk = *m_lblk;
	2293	+ ext4_fsblk_t pblock = *m_pblk;
	2294	+ int err = 0;
	2295	+ int blkbits = mpd->inode->i_blkbits;
	2296	+ ssize_t io_end_size = 0;
	2297	+ struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end);
	2298	+
	2299	+ bh = head = page_buffers(page);
	2300	+ do {
	2301	+ if (lblk < mpd->map.m_lblk)
	2302	+ continue;
	2303	+ if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
	2304	+ /*
	2305	+ * Buffer after end of mapped extent.
	2306	+ * Find next buffer in the page to map.
	2307	+ */
	2308	+ mpd->map.m_len = 0;
	2309	+ mpd->map.m_flags = 0;
	2310	+ io_end_vec->size += io_end_size;
	2311	+ io_end_size = 0;
	2312	+
	2313	+ err = mpage_process_page_bufs(mpd, head, bh, lblk);
	2314	+ if (err > 0)
	2315	+ err = 0;
	2316	+ if (!err && mpd->map.m_len && mpd->map.m_lblk > lblk) {
	2317	+ io_end_vec = ext4_alloc_io_end_vec(io_end);
	2318	+ if (IS_ERR(io_end_vec)) {
	2319	+ err = PTR_ERR(io_end_vec);
	2320	+ goto out;
	2321	+ }
	2322	+ io_end_vec->offset = (loff_t)mpd->map.m_lblk << blkbits;
	2323	+ }
	2324	+ *map_bh = true;
	2325	+ goto out;
	2326	+ }
	2327	+ if (buffer_delay(bh)) {
	2328	+ clear_buffer_delay(bh);
	2329	+ bh->b_blocknr = pblock++;
	2330	+ }
	2331	+ clear_buffer_unwritten(bh);
	2332	+ io_end_size += (1 << blkbits);
	2333	+ } while (lblk++, (bh = bh->b_this_page) != head);
	2334	+
	2335	+ io_end_vec->size += io_end_size;
	2336	+ io_end_size = 0;
	2337	+ *map_bh = false;
	2338	+out:
	2339	+ *m_lblk = lblk;
	2340	+ *m_pblk = pblock;
	2341	+ return err;
2376	2342	}
2377	2343
2378	2344	/*
..	..	@@ -2394,12 +2360,12 @@
2394	2360	struct pagevec pvec;
2395	2361	int nr_pages, i;
2396	2362	struct inode *inode = mpd->inode;
2397		- struct buffer_head head, bh;
2398	2363	int bpp_bits = PAGE_SHIFT - inode->i_blkbits;
2399	2364	pgoff_t start, end;
2400	2365	ext4_lblk_t lblk;
2401		- sector_t pblock;
	2366	+ ext4_fsblk_t pblock;
2402	2367	int err;
	2368	+ bool map_bh = false;
2403	2369
2404	2370	start = mpd->map.m_lblk >> bpp_bits;
2405	2371	end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits;
..	..	@@ -2415,50 +2381,19 @@
2415	2381	for (i = 0; i < nr_pages; i++) {
2416	2382	struct page *page = pvec.pages[i];
2417	2383
2418		- bh = head = page_buffers(page);
2419		- do {
2420		- if (lblk < mpd->map.m_lblk)
2421		- continue;
2422		- if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
2423		- /*
2424		- * Buffer after end of mapped extent.
2425		- * Find next buffer in the page to map.
2426		- */
2427		- mpd->map.m_len = 0;
2428		- mpd->map.m_flags = 0;
2429		- /*
2430		- * FIXME: If dioread_nolock supports
2431		- * blocksize < pagesize, we need to make
2432		- * sure we add size mapped so far to
2433		- * io_end->size as the following call
2434		- * can submit the page for IO.
2435		- */
2436		- err = mpage_process_page_bufs(mpd, head,
2437		- bh, lblk);
2438		- pagevec_release(&pvec);
2439		- if (err > 0)
2440		- err = 0;
2441		- return err;
2442		- }
2443		- if (buffer_delay(bh)) {
2444		- clear_buffer_delay(bh);
2445		- bh->b_blocknr = pblock++;
2446		- }
2447		- clear_buffer_unwritten(bh);
2448		- } while (lblk++, (bh = bh->b_this_page) != head);
2449		-
	2384	+ err = mpage_process_page(mpd, page, &lblk, &pblock,
	2385	+ &map_bh);
2450	2386	/*
2451		- * FIXME: This is going to break if dioread_nolock
2452		- * supports blocksize < pagesize as we will try to
2453		- * convert potentially unmapped parts of inode.
	2387	+ * If map_bh is true, means page may require further bh
	2388	+ * mapping, or maybe the page was submitted for IO.
	2389	+ * So we return to call further extent mapping.
2454	2390	*/
2455		- mpd->io_submit.io_end->size += PAGE_SIZE;
	2391	+ if (err < 0 \|\| map_bh)
	2392	+ goto out;
2456	2393	/* Page fully mapped - let IO run! */
2457	2394	err = mpage_submit_page(mpd, page);
2458		- if (err < 0) {
2459		- pagevec_release(&pvec);
2460		- return err;
2461		- }
	2395	+ if (err < 0)
	2396	+ goto out;
2462	2397	}
2463	2398	pagevec_release(&pvec);
2464	2399	}
..	..	@@ -2466,6 +2401,9 @@
2466	2401	mpd->map.m_len = 0;
2467	2402	mpd->map.m_flags = 0;
2468	2403	return 0;
	2404	+out:
	2405	+ pagevec_release(&pvec);
	2406	+ return err;
2469	2407	}
2470	2408
2471	2409	static int mpage_map_one_extent(handle_t handle, struct mpage_da_data mpd)
..	..	@@ -2497,7 +2435,7 @@
2497	2435	dioread_nolock = ext4_should_dioread_nolock(inode);
2498	2436	if (dioread_nolock)
2499	2437	get_blocks_flags \|= EXT4_GET_BLOCKS_IO_CREATE_EXT;
2500		- if (map->m_flags & (1 << BH_Delay))
	2438	+ if (map->m_flags & BIT(BH_Delay))
2501	2439	get_blocks_flags \|= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
2502	2440
2503	2441	err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
..	..	@@ -2513,10 +2451,6 @@
2513	2451	}
2514	2452
2515	2453	BUG_ON(map->m_len == 0);
2516		- if (map->m_flags & EXT4_MAP_NEW) {
2517		- clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
2518		- map->m_len);
2519		- }
2520	2454	return 0;
2521	2455	}
2522	2456
..	..	@@ -2549,16 +2483,20 @@
2549	2483	int err;
2550	2484	loff_t disksize;
2551	2485	int progress = 0;
	2486	+ ext4_io_end_t *io_end = mpd->io_submit.io_end;
	2487	+ struct ext4_io_end_vec *io_end_vec;
2552	2488
2553		- mpd->io_submit.io_end->offset =
2554		- ((loff_t)map->m_lblk) << inode->i_blkbits;
	2489	+ io_end_vec = ext4_alloc_io_end_vec(io_end);
	2490	+ if (IS_ERR(io_end_vec))
	2491	+ return PTR_ERR(io_end_vec);
	2492	+ io_end_vec->offset = ((loff_t)map->m_lblk) << inode->i_blkbits;
2555	2493	do {
2556	2494	err = mpage_map_one_extent(handle, mpd);
2557	2495	if (err < 0) {
2558	2496	struct super_block *sb = inode->i_sb;
2559	2497
2560	2498	if (ext4_forced_shutdown(EXT4_SB(sb)) \|\|
2561		- EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
	2499	+ ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
2562	2500	goto invalidate_dirty_pages;
2563	2501	/*
2564	2502	* Let the uper layers retry transient errors.
..	..	@@ -2615,10 +2553,11 @@
2615	2553	EXT4_I(inode)->i_disksize = disksize;
2616	2554	up_write(&EXT4_I(inode)->i_data_sem);
2617	2555	err2 = ext4_mark_inode_dirty(handle, inode);
2618		- if (err2)
2619		- ext4_error(inode->i_sb,
2620		- "Failed to mark inode %lu dirty",
2621		- inode->i_ino);
	2556	+ if (err2) {
	2557	+ ext4_error_err(inode->i_sb, -err2,
	2558	+ "Failed to mark inode %lu dirty",
	2559	+ inode->i_ino);
	2560	+ }
2622	2561	if (!err)
2623	2562	err = err2;
2624	2563	}
..	..	@@ -2666,7 +2605,7 @@
2666	2605	long left = mpd->wbc->nr_to_write;
2667	2606	pgoff_t index = mpd->first_page;
2668	2607	pgoff_t end = mpd->last_page;
2669		- int tag;
	2608	+ xa_mark_t tag;
2670	2609	int i, err = 0;
2671	2610	int blkbits = mpd->inode->i_blkbits;
2672	2611	ext4_lblk_t lblk;
..	..	@@ -2684,7 +2623,7 @@
2684	2623	nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2685	2624	tag);
2686	2625	if (nr_pages == 0)
2687		- goto out;
	2626	+ break;
2688	2627
2689	2628	for (i = 0; i < nr_pages; i++) {
2690	2629	struct page *page = pvec.pages[i];
..	..	@@ -2723,6 +2662,22 @@
2723	2662	wait_on_page_writeback(page);
2724	2663	BUG_ON(PageWriteback(page));
2725	2664
	2665	+ /*
	2666	+ * Should never happen but for buggy code in
	2667	+ * other subsystems that call
	2668	+ * set_page_dirty() without properly warning
	2669	+ * the file system first. See [1] for more
	2670	+ * information.
	2671	+ *
	2672	+ * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz
	2673	+ */
	2674	+ if (!page_has_buffers(page)) {
	2675	+ ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index);
	2676	+ ClearPageDirty(page);
	2677	+ unlock_page(page);
	2678	+ continue;
	2679	+ }
	2680	+
2726	2681	if (mpd->map.m_len == 0)
2727	2682	mpd->first_page = page->index;
2728	2683	mpd->next_page = page->index + 1;
..	..	@@ -2739,6 +2694,7 @@
2739	2694	pagevec_release(&pvec);
2740	2695	cond_resched();
2741	2696	}
	2697	+ mpd->scanned_until_end = 1;
2742	2698	return 0;
2743	2699	out:
2744	2700	pagevec_release(&pvec);
..	..	@@ -2757,7 +2713,6 @@
2757	2713	struct inode *inode = mapping->host;
2758	2714	int needed_blocks, rsv_blocks = 0, ret = 0;
2759	2715	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2760		- bool done;
2761	2716	struct blk_plug plug;
2762	2717	bool give_up_on_write = false;
2763	2718
..	..	@@ -2791,18 +2746,9 @@
2791	2746	* the stack trace.
2792	2747	*/
2793	2748	if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) \|\|
2794		- sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
	2749	+ ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) {
2795	2750	ret = -EROFS;
2796	2751	goto out_writepages;
2797		- }
2798		-
2799		- if (ext4_should_dioread_nolock(inode)) {
2800		- /*
2801		- * We may need to convert up to one extent per block in
2802		- * the page and we may dirty the inode.
2803		- */
2804		- rsv_blocks = 1 + ext4_chunk_trans_blocks(inode,
2805		- PAGE_SIZE >> inode->i_blkbits);
2806	2752	}
2807	2753
2808	2754	/*
..	..	@@ -2821,6 +2767,15 @@
2821	2767	EXT4_STATE_MAY_INLINE_DATA));
2822	2768	ext4_destroy_inline_data(handle, inode);
2823	2769	ext4_journal_stop(handle);
	2770	+ }
	2771	+
	2772	+ if (ext4_should_dioread_nolock(inode)) {
	2773	+ /*
	2774	+ * We may need to convert up to one extent per block in
	2775	+ * the page and we may dirty the inode.
	2776	+ */
	2777	+ rsv_blocks = 1 + ext4_chunk_trans_blocks(inode,
	2778	+ PAGE_SIZE >> inode->i_blkbits);
2824	2779	}
2825	2780
2826	2781	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
..	..	@@ -2843,7 +2798,6 @@
2843	2798	retry:
2844	2799	if (wbc->sync_mode == WB_SYNC_ALL \|\| wbc->tagged_writepages)
2845	2800	tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
2846		- done = false;
2847	2801	blk_start_plug(&plug);
2848	2802
2849	2803	/*
..	..	@@ -2853,22 +2807,23 @@
2853	2807	* started.
2854	2808	*/
2855	2809	mpd.do_map = 0;
	2810	+ mpd.scanned_until_end = 0;
2856	2811	mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
2857	2812	if (!mpd.io_submit.io_end) {
2858	2813	ret = -ENOMEM;
2859	2814	goto unplug;
2860	2815	}
2861	2816	ret = mpage_prepare_extent_to_map(&mpd);
	2817	+ /* Unlock pages we didn't use */
	2818	+ mpage_release_unused_pages(&mpd, false);
2862	2819	/* Submit prepared bio */
2863	2820	ext4_io_submit(&mpd.io_submit);
2864	2821	ext4_put_io_end_defer(mpd.io_submit.io_end);
2865	2822	mpd.io_submit.io_end = NULL;
2866		- /* Unlock pages we didn't use */
2867		- mpage_release_unused_pages(&mpd, false);
2868	2823	if (ret < 0)
2869	2824	goto unplug;
2870	2825
2871		- while (!done && mpd.first_page <= mpd.last_page) {
	2826	+ while (!mpd.scanned_until_end && wbc->nr_to_write > 0) {
2872	2827	/* For each extent of pages we use new io_end */
2873	2828	mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
2874	2829	if (!mpd.io_submit.io_end) {
..	..	@@ -2903,26 +2858,15 @@
2903	2858
2904	2859	trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc);
2905	2860	ret = mpage_prepare_extent_to_map(&mpd);
2906		- if (!ret) {
2907		- if (mpd.map.m_len)
2908		- ret = mpage_map_and_submit_extent(handle, &mpd,
	2861	+ if (!ret && mpd.map.m_len)
	2862	+ ret = mpage_map_and_submit_extent(handle, &mpd,
2909	2863	&give_up_on_write);
2910		- else {
2911		- /*
2912		- * We scanned the whole range (or exhausted
2913		- * nr_to_write), submitted what was mapped and
2914		- * didn't find anything needing mapping. We are
2915		- * done.
2916		- */
2917		- done = true;
2918		- }
2919		- }
2920	2864	/*
2921	2865	* Caution: If the handle is synchronous,
2922	2866	* ext4_journal_stop() can wait for transaction commit
2923	2867	* to finish which may depend on writeback of pages to
2924	2868	* complete or on page lock to be released. In that
2925		- * case, we have to wait until after after we have
	2869	+ * case, we have to wait until after we have
2926	2870	* submitted all the IO, released page locks we hold,
2927	2871	* and dropped io_end reference (for extent conversion
2928	2872	* to be able to complete) before stopping the handle.
..	..	@@ -2932,10 +2876,11 @@
2932	2876	handle = NULL;
2933	2877	mpd.do_map = 0;
2934	2878	}
2935		- /* Submit prepared bio */
2936		- ext4_io_submit(&mpd.io_submit);
2937	2879	/* Unlock pages we didn't use */
2938	2880	mpage_release_unused_pages(&mpd, give_up_on_write);
	2881	+ /* Submit prepared bio */
	2882	+ ext4_io_submit(&mpd.io_submit);
	2883	+
2939	2884	/*
2940	2885	* Drop our io_end reference we got from init. We have
2941	2886	* to be careful and use deferred io_end finishing if
..	..	@@ -3002,7 +2947,7 @@
3002	2947	percpu_down_read(&sbi->s_writepages_rwsem);
3003	2948	trace_ext4_writepages(inode, wbc);
3004	2949
3005		- ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc);
	2950	+ ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
3006	2951	trace_ext4_writepages_result(inode, wbc, ret,
3007	2952	nr_to_write - wbc->nr_to_write);
3008	2953	percpu_up_read(&sbi->s_writepages_rwsem);
..	..	@@ -3212,58 +3157,42 @@
3212	3157	end = start + copied - 1;
3213	3158
3214	3159	/*
3215		- * generic_write_end() will run mark_inode_dirty() if i_size
3216		- * changes. So let's piggyback the i_disksize mark_inode_dirty
3217		- * into that.
	3160	+ * Since we are holding inode lock, we are sure i_disksize <=
	3161	+ * i_size. We also know that if i_disksize < i_size, there are
	3162	+ * delalloc writes pending in the range upto i_size. If the end of
	3163	+ * the current write is <= i_size, there's no need to touch
	3164	+ * i_disksize since writeback will push i_disksize upto i_size
	3165	+ * eventually. If the end of the current write is > i_size and
	3166	+ * inside an allocated block (ext4_da_should_update_i_disksize()
	3167	+ * check), we need to update i_disksize here as neither
	3168	+ * ext4_writepage() nor certain ext4_writepages() paths not
	3169	+ * allocating blocks update i_disksize.
	3170	+ *
	3171	+ * Note that we defer inode dirtying to generic_write_end() /
	3172	+ * ext4_da_write_inline_data_end().
3218	3173	*/
3219	3174	new_i_size = pos + copied;
3220		- if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
	3175	+ if (copied && new_i_size > inode->i_size) {
3221	3176	if (ext4_has_inline_data(inode) \|\|
3222		- ext4_da_should_update_i_disksize(page, end)) {
	3177	+ ext4_da_should_update_i_disksize(page, end))
3223	3178	ext4_update_i_disksize(inode, new_i_size);
3224		- /* We need to mark inode dirty even if
3225		- * new_i_size is less that inode->i_size
3226		- * bu greater than i_disksize.(hint delalloc)
3227		- */
3228		- ext4_mark_inode_dirty(handle, inode);
3229		- }
3230	3179	}
3231	3180
3232	3181	if (write_mode != CONVERT_INLINE_DATA &&
3233	3182	ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
3234	3183	ext4_has_inline_data(inode))
3235		- ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied,
	3184	+ ret = ext4_da_write_inline_data_end(inode, pos, len, copied,
3236	3185	page);
3237	3186	else
3238		- ret2 = generic_write_end(file, mapping, pos, len, copied,
	3187	+ ret = generic_write_end(file, mapping, pos, len, copied,
3239	3188	page, fsdata);
3240	3189
3241		- copied = ret2;
3242		- if (ret2 < 0)
3243		- ret = ret2;
	3190	+ copied = ret;
3244	3191	ret2 = ext4_journal_stop(handle);
3245		- if (!ret)
	3192	+ if (unlikely(ret2 && !ret))
3246	3193	ret = ret2;
3247	3194
3248	3195	return ret ? ret : copied;
3249		-}
3250		-
3251		-static void ext4_da_invalidatepage(struct page *page, unsigned int offset,
3252		- unsigned int length)
3253		-{
3254		- /*
3255		- * Drop reserved blocks
3256		- */
3257		- BUG_ON(!PageLocked(page));
3258		- if (!page_has_buffers(page))
3259		- goto out;
3260		-
3261		- ext4_da_page_release_reservation(page, offset, length);
3262		-
3263		-out:
3264		- ext4_invalidatepage(page, offset, length);
3265		-
3266		- return;
3267	3196	}
3268	3197
3269	3198	/*
..	..	@@ -3328,13 +3257,15 @@
3328	3257	{
3329	3258	struct inode *inode = mapping->host;
3330	3259	journal_t *journal;
	3260	+ sector_t ret = 0;
3331	3261	int err;
3332	3262
	3263	+ inode_lock_shared(inode);
3333	3264	/*
3334	3265	* We can get here for an inline file via the FIBMAP ioctl
3335	3266	*/
3336	3267	if (ext4_has_inline_data(inode))
3337		- return 0;
	3268	+ goto out;
3338	3269
3339	3270	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
3340	3271	test_opt(inode->i_sb, DELALLOC)) {
..	..	@@ -3373,10 +3304,14 @@
3373	3304	jbd2_journal_unlock_updates(journal);
3374	3305
3375	3306	if (err)
3376		- return 0;
	3307	+ goto out;
3377	3308	}
3378	3309
3379		- return generic_block_bmap(mapping, block, ext4_get_block);
	3310	+ ret = iomap_bmap(mapping, block, &ext4_iomap_ops);
	3311	+
	3312	+out:
	3313	+ inode_unlock_shared(inode);
	3314	+ return ret;
3380	3315	}
3381	3316
3382	3317	static int ext4_readpage(struct file file, struct page page)
..	..	@@ -3390,23 +3325,20 @@
3390	3325	ret = ext4_readpage_inline(inode, page);
3391	3326
3392	3327	if (ret == -EAGAIN)
3393		- return ext4_mpage_readpages(page->mapping, NULL, page, 1,
3394		- false);
	3328	+ return ext4_mpage_readpages(inode, NULL, page);
3395	3329
3396	3330	return ret;
3397	3331	}
3398	3332
3399		-static int
3400		-ext4_readpages(struct file file, struct address_space mapping,
3401		- struct list_head *pages, unsigned nr_pages)
	3333	+static void ext4_readahead(struct readahead_control *rac)
3402	3334	{
3403		- struct inode *inode = mapping->host;
	3335	+ struct inode *inode = rac->mapping->host;
3404	3336
3405		- /* If the file has inline data, no need to do readpages. */
	3337	+ /* If the file has inline data, no need to do readahead. */
3406	3338	if (ext4_has_inline_data(inode))
3407		- return 0;
	3339	+ return;
3408	3340
3409		- return ext4_mpage_readpages(mapping, pages, NULL, nr_pages, true);
	3341	+ ext4_mpage_readpages(inode, rac, NULL);
3410	3342	}
3411	3343
3412	3344	static void ext4_invalidatepage(struct page *page, unsigned int offset,
..	..	@@ -3455,7 +3387,7 @@
3455	3387	if (PageChecked(page))
3456	3388	return 0;
3457	3389	if (journal)
3458		- return jbd2_journal_try_to_free_buffers(journal, page, wait);
	3390	+ return jbd2_journal_try_to_free_buffers(journal, page);
3459	3391	else
3460	3392	return try_to_free_buffers(page);
3461	3393	}
..	..	@@ -3464,216 +3396,215 @@
3464	3396	{
3465	3397	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
3466	3398
3467		- if (journal)
3468		- return !jbd2_transaction_committed(journal,
3469		- EXT4_I(inode)->i_datasync_tid);
	3399	+ if (journal) {
	3400	+ if (jbd2_transaction_committed(journal,
	3401	+ EXT4_I(inode)->i_datasync_tid))
	3402	+ return false;
	3403	+ if (test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
	3404	+ return !list_empty(&EXT4_I(inode)->i_fc_list);
	3405	+ return true;
	3406	+ }
	3407	+
3470	3408	/* Any metadata buffers to write? */
3471	3409	if (!list_empty(&inode->i_mapping->private_list))
3472	3410	return true;
3473	3411	return inode->i_state & I_DIRTY_DATASYNC;
3474	3412	}
3475	3413
3476		-static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
3477		- unsigned flags, struct iomap *iomap)
	3414	+static void ext4_set_iomap(struct inode inode, struct iomap iomap,
	3415	+ struct ext4_map_blocks *map, loff_t offset,
	3416	+ loff_t length)
3478	3417	{
3479		- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3480		- unsigned int blkbits = inode->i_blkbits;
3481		- unsigned long first_block, last_block;
3482		- struct ext4_map_blocks map;
3483		- bool delalloc = false;
3484		- int ret;
3485		-
3486		- if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK)
3487		- return -EINVAL;
3488		- first_block = offset >> blkbits;
3489		- last_block = min_t(loff_t, (offset + length - 1) >> blkbits,
3490		- EXT4_MAX_LOGICAL_BLOCK);
3491		-
3492		- if (flags & IOMAP_REPORT) {
3493		- if (ext4_has_inline_data(inode)) {
3494		- ret = ext4_inline_data_iomap(inode, iomap);
3495		- if (ret != -EAGAIN) {
3496		- if (ret == 0 && offset >= iomap->length)
3497		- ret = -ENOENT;
3498		- return ret;
3499		- }
3500		- }
3501		- } else {
3502		- if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
3503		- return -ERANGE;
3504		- }
3505		-
3506		- map.m_lblk = first_block;
3507		- map.m_len = last_block - first_block + 1;
3508		-
3509		- if (flags & IOMAP_REPORT) {
3510		- ret = ext4_map_blocks(NULL, inode, &map, 0);
3511		- if (ret < 0)
3512		- return ret;
3513		-
3514		- if (ret == 0) {
3515		- ext4_lblk_t end = map.m_lblk + map.m_len - 1;
3516		- struct extent_status es;
3517		-
3518		- ext4_es_find_delayed_extent_range(inode, map.m_lblk, end, &es);
3519		-
3520		- if (!es.es_len \|\| es.es_lblk > end) {
3521		- /* entire range is a hole */
3522		- } else if (es.es_lblk > map.m_lblk) {
3523		- /* range starts with a hole */
3524		- map.m_len = es.es_lblk - map.m_lblk;
3525		- } else {
3526		- ext4_lblk_t offs = 0;
3527		-
3528		- if (es.es_lblk < map.m_lblk)
3529		- offs = map.m_lblk - es.es_lblk;
3530		- map.m_lblk = es.es_lblk + offs;
3531		- map.m_len = es.es_len - offs;
3532		- delalloc = true;
3533		- }
3534		- }
3535		- } else if (flags & IOMAP_WRITE) {
3536		- int dio_credits;
3537		- handle_t *handle;
3538		- int retries = 0;
3539		-
3540		- /* Trim mapping request to maximum we can map at once for DIO */
3541		- if (map.m_len > DIO_MAX_BLOCKS)
3542		- map.m_len = DIO_MAX_BLOCKS;
3543		- dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
3544		-retry:
3545		- /*
3546		- * Either we allocate blocks and then we don't get unwritten
3547		- * extent so we have reserved enough credits, or the blocks
3548		- * are already allocated and unwritten and in that case
3549		- * extent conversion fits in the credits as well.
3550		- */
3551		- handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
3552		- dio_credits);
3553		- if (IS_ERR(handle))
3554		- return PTR_ERR(handle);
3555		-
3556		- ret = ext4_map_blocks(handle, inode, &map,
3557		- EXT4_GET_BLOCKS_CREATE_ZERO);
3558		- if (ret < 0) {
3559		- ext4_journal_stop(handle);
3560		- if (ret == -ENOSPC &&
3561		- ext4_should_retry_alloc(inode->i_sb, &retries))
3562		- goto retry;
3563		- return ret;
3564		- }
3565		-
3566		- /*
3567		- * If we added blocks beyond i_size, we need to make sure they
3568		- * will get truncated if we crash before updating i_size in
3569		- * ext4_iomap_end(). For faults we don't need to do that (and
3570		- * even cannot because for orphan list operations inode_lock is
3571		- * required) - if we happen to instantiate block beyond i_size,
3572		- * it is because we race with truncate which has already added
3573		- * the inode to the orphan list.
3574		- */
3575		- if (!(flags & IOMAP_FAULT) && first_block + map.m_len >
3576		- (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) {
3577		- int err;
3578		-
3579		- err = ext4_orphan_add(handle, inode);
3580		- if (err < 0) {
3581		- ext4_journal_stop(handle);
3582		- return err;
3583		- }
3584		- }
3585		- ext4_journal_stop(handle);
3586		- } else {
3587		- ret = ext4_map_blocks(NULL, inode, &map, 0);
3588		- if (ret < 0)
3589		- return ret;
3590		- }
	3418	+ u8 blkbits = inode->i_blkbits;
3591	3419
3592	3420	/*
3593	3421	* Writes that span EOF might trigger an I/O size update on completion,
3594		- * so consider them to be dirty for the purposes of O_DSYNC, even if
3595		- * there is no other metadata changes being made or are pending here.
	3422	+ * so consider them to be dirty for the purpose of O_DSYNC, even if
	3423	+ * there is no other metadata changes being made or are pending.
3596	3424	*/
3597	3425	iomap->flags = 0;
3598	3426	if (ext4_inode_datasync_dirty(inode) \|\|
3599	3427	offset + length > i_size_read(inode))
3600	3428	iomap->flags \|= IOMAP_F_DIRTY;
3601		- iomap->bdev = inode->i_sb->s_bdev;
3602		- iomap->dax_dev = sbi->s_daxdev;
3603		- iomap->offset = (u64)first_block << blkbits;
3604		- iomap->length = (u64)map.m_len << blkbits;
3605	3429
3606		- if (ret == 0) {
3607		- iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
3608		- iomap->addr = IOMAP_NULL_ADDR;
3609		- } else {
3610		- if (map.m_flags & EXT4_MAP_MAPPED) {
3611		- iomap->type = IOMAP_MAPPED;
3612		- } else if (map.m_flags & EXT4_MAP_UNWRITTEN) {
3613		- iomap->type = IOMAP_UNWRITTEN;
3614		- } else {
3615		- WARN_ON_ONCE(1);
3616		- return -EIO;
3617		- }
3618		- iomap->addr = (u64)map.m_pblk << blkbits;
3619		- }
3620		-
3621		- if (map.m_flags & EXT4_MAP_NEW)
	3430	+ if (map->m_flags & EXT4_MAP_NEW)
3622	3431	iomap->flags \|= IOMAP_F_NEW;
3623	3432
	3433	+ iomap->bdev = inode->i_sb->s_bdev;
	3434	+ iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
	3435	+ iomap->offset = (u64) map->m_lblk << blkbits;
	3436	+ iomap->length = (u64) map->m_len << blkbits;
	3437	+
	3438	+ if ((map->m_flags & EXT4_MAP_MAPPED) &&
	3439	+ !ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
	3440	+ iomap->flags \|= IOMAP_F_MERGED;
	3441	+
	3442	+ /*
	3443	+ * Flags passed to ext4_map_blocks() for direct I/O writes can result
	3444	+ * in m_flags having both EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN bits
	3445	+ * set. In order for any allocated unwritten extents to be converted
	3446	+ * into written extents correctly within the ->end_io() handler, we
	3447	+ * need to ensure that the iomap->type is set appropriately. Hence, the
	3448	+ * reason why we need to check whether the EXT4_MAP_UNWRITTEN bit has
	3449	+ * been set first.
	3450	+ */
	3451	+ if (map->m_flags & EXT4_MAP_UNWRITTEN) {
	3452	+ iomap->type = IOMAP_UNWRITTEN;
	3453	+ iomap->addr = (u64) map->m_pblk << blkbits;
	3454	+ } else if (map->m_flags & EXT4_MAP_MAPPED) {
	3455	+ iomap->type = IOMAP_MAPPED;
	3456	+ iomap->addr = (u64) map->m_pblk << blkbits;
	3457	+ } else {
	3458	+ iomap->type = IOMAP_HOLE;
	3459	+ iomap->addr = IOMAP_NULL_ADDR;
	3460	+ }
	3461	+}
	3462	+
	3463	+static int ext4_iomap_alloc(struct inode inode, struct ext4_map_blocks map,
	3464	+ unsigned int flags)
	3465	+{
	3466	+ handle_t *handle;
	3467	+ u8 blkbits = inode->i_blkbits;
	3468	+ int ret, dio_credits, m_flags = 0, retries = 0;
	3469	+
	3470	+ /*
	3471	+ * Trim the mapping request to the maximum value that we can map at
	3472	+ * once for direct I/O.
	3473	+ */
	3474	+ if (map->m_len > DIO_MAX_BLOCKS)
	3475	+ map->m_len = DIO_MAX_BLOCKS;
	3476	+ dio_credits = ext4_chunk_trans_blocks(inode, map->m_len);
	3477	+
	3478	+retry:
	3479	+ /*
	3480	+ * Either we allocate blocks and then don't get an unwritten extent, so
	3481	+ * in that case we have reserved enough credits. Or, the blocks are
	3482	+ * already allocated and unwritten. In that case, the extent conversion
	3483	+ * fits into the credits as well.
	3484	+ */
	3485	+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits);
	3486	+ if (IS_ERR(handle))
	3487	+ return PTR_ERR(handle);
	3488	+
	3489	+ /*
	3490	+ * DAX and direct I/O are the only two operations that are currently
	3491	+ * supported with IOMAP_WRITE.
	3492	+ */
	3493	+ WARN_ON(!IS_DAX(inode) && !(flags & IOMAP_DIRECT));
	3494	+ if (IS_DAX(inode))
	3495	+ m_flags = EXT4_GET_BLOCKS_CREATE_ZERO;
	3496	+ /*
	3497	+ * We use i_size instead of i_disksize here because delalloc writeback
	3498	+ * can complete at any point during the I/O and subsequently push the
	3499	+ * i_disksize out to i_size. This could be beyond where direct I/O is
	3500	+ * happening and thus expose allocated blocks to direct I/O reads.
	3501	+ */
	3502	+ else if (((loff_t)map->m_lblk << blkbits) >= i_size_read(inode))
	3503	+ m_flags = EXT4_GET_BLOCKS_CREATE;
	3504	+ else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
	3505	+ m_flags = EXT4_GET_BLOCKS_IO_CREATE_EXT;
	3506	+
	3507	+ ret = ext4_map_blocks(handle, inode, map, m_flags);
	3508	+
	3509	+ /*
	3510	+ * We cannot fill holes in indirect tree based inodes as that could
	3511	+ * expose stale data in the case of a crash. Use the magic error code
	3512	+ * to fallback to buffered I/O.
	3513	+ */
	3514	+ if (!m_flags && !ret)
	3515	+ ret = -ENOTBLK;
	3516	+
	3517	+ ext4_journal_stop(handle);
	3518	+ if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
	3519	+ goto retry;
	3520	+
	3521	+ return ret;
	3522	+}
	3523	+
	3524	+
	3525	+static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
	3526	+ unsigned flags, struct iomap iomap, struct iomap srcmap)
	3527	+{
	3528	+ int ret;
	3529	+ struct ext4_map_blocks map;
	3530	+ u8 blkbits = inode->i_blkbits;
	3531	+
	3532	+ if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK)
	3533	+ return -EINVAL;
	3534	+
	3535	+ if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
	3536	+ return -ERANGE;
	3537	+
	3538	+ /*
	3539	+ * Calculate the first and last logical blocks respectively.
	3540	+ */
	3541	+ map.m_lblk = offset >> blkbits;
	3542	+ map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits,
	3543	+ EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1;
	3544	+
	3545	+ if (flags & IOMAP_WRITE) {
	3546	+ /*
	3547	+ * We check here if the blocks are already allocated, then we
	3548	+ * don't need to start a journal txn and we can directly return
	3549	+ * the mapping information. This could boost performance
	3550	+ * especially in multi-threaded overwrite requests.
	3551	+ */
	3552	+ if (offset + length <= i_size_read(inode)) {
	3553	+ ret = ext4_map_blocks(NULL, inode, &map, 0);
	3554	+ if (ret > 0 && (map.m_flags & EXT4_MAP_MAPPED))
	3555	+ goto out;
	3556	+ }
	3557	+ ret = ext4_iomap_alloc(inode, &map, flags);
	3558	+ } else {
	3559	+ ret = ext4_map_blocks(NULL, inode, &map, 0);
	3560	+ }
	3561	+
	3562	+ if (ret < 0)
	3563	+ return ret;
	3564	+out:
	3565	+
	3566	+ /*
	3567	+ * When inline encryption is enabled, sometimes I/O to an encrypted file
	3568	+ * has to be broken up to guarantee DUN contiguity. Handle this by
	3569	+ * limiting the length of the mapping returned.
	3570	+ */
	3571	+ map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
	3572	+
	3573	+ ext4_set_iomap(inode, iomap, &map, offset, length);
	3574	+
3624	3575	return 0;
	3576	+}
	3577	+
	3578	+static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset,
	3579	+ loff_t length, unsigned flags, struct iomap *iomap,
	3580	+ struct iomap *srcmap)
	3581	+{
	3582	+ int ret;
	3583	+
	3584	+ /*
	3585	+ * Even for writes we don't need to allocate blocks, so just pretend
	3586	+ * we are reading to save overhead of starting a transaction.
	3587	+ */
	3588	+ flags &= ~IOMAP_WRITE;
	3589	+ ret = ext4_iomap_begin(inode, offset, length, flags, iomap, srcmap);
	3590	+ WARN_ON_ONCE(iomap->type != IOMAP_MAPPED);
	3591	+ return ret;
3625	3592	}
3626	3593
3627	3594	static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
3628	3595	ssize_t written, unsigned flags, struct iomap *iomap)
3629	3596	{
3630		- int ret = 0;
3631		- handle_t *handle;
3632		- int blkbits = inode->i_blkbits;
3633		- bool truncate = false;
3634		-
3635		- if (!(flags & IOMAP_WRITE) \|\| (flags & IOMAP_FAULT))
3636		- return 0;
3637		-
3638		- handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3639		- if (IS_ERR(handle)) {
3640		- ret = PTR_ERR(handle);
3641		- goto orphan_del;
3642		- }
3643		- if (ext4_update_inode_size(inode, offset + written))
3644		- ext4_mark_inode_dirty(handle, inode);
3645	3597	/*
3646		- * We may need to truncate allocated but not written blocks beyond EOF.
	3598	+ * Check to see whether an error occurred while writing out the data to
	3599	+ * the allocated blocks. If so, return the magic error code so that we
	3600	+ * fallback to buffered I/O and attempt to complete the remainder of
	3601	+ * the I/O. Any blocks that may have been allocated in preparation for
	3602	+ * the direct I/O will be reused during buffered I/O.
3647	3603	*/
3648		- if (iomap->offset + iomap->length >
3649		- ALIGN(inode->i_size, 1 << blkbits)) {
3650		- ext4_lblk_t written_blk, end_blk;
	3604	+ if (flags & (IOMAP_WRITE \| IOMAP_DIRECT) && written == 0)
	3605	+ return -ENOTBLK;
3651	3606
3652		- written_blk = (offset + written) >> blkbits;
3653		- end_blk = (offset + length) >> blkbits;
3654		- if (written_blk < end_blk && ext4_can_truncate(inode))
3655		- truncate = true;
3656		- }
3657		- /*
3658		- * Remove inode from orphan list if we were extending a inode and
3659		- * everything went fine.
3660		- */
3661		- if (!truncate && inode->i_nlink &&
3662		- !list_empty(&EXT4_I(inode)->i_orphan))
3663		- ext4_orphan_del(handle, inode);
3664		- ext4_journal_stop(handle);
3665		- if (truncate) {
3666		- ext4_truncate_failed_write(inode);
3667		-orphan_del:
3668		- /*
3669		- * If truncate failed early the inode might still be on the
3670		- * orphan list; we need to make sure the inode is removed from
3671		- * the orphan list in that case.
3672		- */
3673		- if (inode->i_nlink)
3674		- ext4_orphan_del(NULL, inode);
3675		- }
3676		- return ret;
	3607	+ return 0;
3677	3608	}
3678	3609
3679	3610	const struct iomap_ops ext4_iomap_ops = {
..	..	@@ -3681,310 +3612,94 @@
3681	3612	.iomap_end = ext4_iomap_end,
3682	3613	};
3683	3614
3684		-static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3685		- ssize_t size, void *private)
	3615	+const struct iomap_ops ext4_iomap_overwrite_ops = {
	3616	+ .iomap_begin = ext4_iomap_overwrite_begin,
	3617	+ .iomap_end = ext4_iomap_end,
	3618	+};
	3619	+
	3620	+static bool ext4_iomap_is_delalloc(struct inode *inode,
	3621	+ struct ext4_map_blocks *map)
3686	3622	{
3687		- ext4_io_end_t *io_end = private;
	3623	+ struct extent_status es;
	3624	+ ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1;
3688	3625
3689		- /* if not async direct IO just return */
3690		- if (!io_end)
3691		- return 0;
	3626	+ ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
	3627	+ map->m_lblk, end, &es);
3692	3628
3693		- ext_debug("ext4_end_io_dio(): io_end 0x%p "
3694		- "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
3695		- io_end, io_end->inode->i_ino, iocb, offset, size);
	3629	+ if (!es.es_len \|\| es.es_lblk > end)
	3630	+ return false;
	3631	+
	3632	+ if (es.es_lblk > map->m_lblk) {
	3633	+ map->m_len = es.es_lblk - map->m_lblk;
	3634	+ return false;
	3635	+ }
	3636	+
	3637	+ offset = map->m_lblk - es.es_lblk;
	3638	+ map->m_len = es.es_len - offset;
	3639	+
	3640	+ return true;
	3641	+}
	3642	+
	3643	+static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
	3644	+ loff_t length, unsigned int flags,
	3645	+ struct iomap iomap, struct iomap srcmap)
	3646	+{
	3647	+ int ret;
	3648	+ bool delalloc = false;
	3649	+ struct ext4_map_blocks map;
	3650	+ u8 blkbits = inode->i_blkbits;
	3651	+
	3652	+ if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK)
	3653	+ return -EINVAL;
	3654	+
	3655	+ if (ext4_has_inline_data(inode)) {
	3656	+ ret = ext4_inline_data_iomap(inode, iomap);
	3657	+ if (ret != -EAGAIN) {
	3658	+ if (ret == 0 && offset >= iomap->length)
	3659	+ ret = -ENOENT;
	3660	+ return ret;
	3661	+ }
	3662	+ }
3696	3663
3697	3664	/*
3698		- * Error during AIO DIO. We cannot convert unwritten extents as the
3699		- * data was not written. Just clear the unwritten flag and drop io_end.
	3665	+ * Calculate the first and last logical block respectively.
3700	3666	*/
3701		- if (size <= 0) {
3702		- ext4_clear_io_unwritten_flag(io_end);
3703		- size = 0;
	3667	+ map.m_lblk = offset >> blkbits;
	3668	+ map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits,
	3669	+ EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1;
	3670	+
	3671	+ /*
	3672	+ * Fiemap callers may call for offset beyond s_bitmap_maxbytes.
	3673	+ * So handle it here itself instead of querying ext4_map_blocks().
	3674	+ * Since ext4_map_blocks() will warn about it and will return
	3675	+ * -EIO error.
	3676	+ */
	3677	+ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
	3678	+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	3679	+
	3680	+ if (offset >= sbi->s_bitmap_maxbytes) {
	3681	+ map.m_flags = 0;
	3682	+ goto set_iomap;
	3683	+ }
3704	3684	}
3705		- io_end->offset = offset;
3706		- io_end->size = size;
3707		- ext4_put_io_end(io_end);
	3685	+
	3686	+ ret = ext4_map_blocks(NULL, inode, &map, 0);
	3687	+ if (ret < 0)
	3688	+ return ret;
	3689	+ if (ret == 0)
	3690	+ delalloc = ext4_iomap_is_delalloc(inode, &map);
	3691	+
	3692	+set_iomap:
	3693	+ ext4_set_iomap(inode, iomap, &map, offset, length);
	3694	+ if (delalloc && iomap->type == IOMAP_HOLE)
	3695	+ iomap->type = IOMAP_DELALLOC;
3708	3696
3709	3697	return 0;
3710	3698	}
3711	3699
3712		-/*
3713		- * Handling of direct IO writes.
3714		- *
3715		- * For ext4 extent files, ext4 will do direct-io write even to holes,
3716		- * preallocated extents, and those write extend the file, no need to
3717		- * fall back to buffered IO.
3718		- *
3719		- * For holes, we fallocate those blocks, mark them as unwritten
3720		- * If those blocks were preallocated, we mark sure they are split, but
3721		- * still keep the range to write as unwritten.
3722		- *
3723		- * The unwritten extents will be converted to written when DIO is completed.
3724		- * For async direct IO, since the IO may still pending when return, we
3725		- * set up an end_io call back function, which will do the conversion
3726		- * when async direct IO completed.
3727		- *
3728		- * If the O_DIRECT write will extend the file then add this inode to the
3729		- * orphan list. So recovery will truncate it back to the original size
3730		- * if the machine crashes during the write.
3731		- *
3732		- */
3733		-static ssize_t ext4_direct_IO_write(struct kiocb iocb, struct iov_iter iter)
3734		-{
3735		- struct file *file = iocb->ki_filp;
3736		- struct inode *inode = file->f_mapping->host;
3737		- struct ext4_inode_info *ei = EXT4_I(inode);
3738		- ssize_t ret;
3739		- loff_t offset = iocb->ki_pos;
3740		- size_t count = iov_iter_count(iter);
3741		- int overwrite = 0;
3742		- get_block_t *get_block_func = NULL;
3743		- int dio_flags = 0;
3744		- loff_t final_size = offset + count;
3745		- int orphan = 0;
3746		- handle_t *handle;
3747		-
3748		- if (final_size > inode->i_size \|\| final_size > ei->i_disksize) {
3749		- /* Credits for sb + inode write */
3750		- handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3751		- if (IS_ERR(handle)) {
3752		- ret = PTR_ERR(handle);
3753		- goto out;
3754		- }
3755		- ret = ext4_orphan_add(handle, inode);
3756		- if (ret) {
3757		- ext4_journal_stop(handle);
3758		- goto out;
3759		- }
3760		- orphan = 1;
3761		- ext4_update_i_disksize(inode, inode->i_size);
3762		- ext4_journal_stop(handle);
3763		- }
3764		-
3765		- BUG_ON(iocb->private == NULL);
3766		-
3767		- /*
3768		- * Make all waiters for direct IO properly wait also for extent
3769		- * conversion. This also disallows race between truncate() and
3770		- * overwrite DIO as i_dio_count needs to be incremented under i_mutex.
3771		- */
3772		- inode_dio_begin(inode);
3773		-
3774		- /* If we do a overwrite dio, i_mutex locking can be released */
3775		- overwrite = ((int )iocb->private);
3776		-
3777		- if (overwrite)
3778		- inode_unlock(inode);
3779		-
3780		- /*
3781		- * For extent mapped files we could direct write to holes and fallocate.
3782		- *
3783		- * Allocated blocks to fill the hole are marked as unwritten to prevent
3784		- * parallel buffered read to expose the stale data before DIO complete
3785		- * the data IO.
3786		- *
3787		- * As to previously fallocated extents, ext4 get_block will just simply
3788		- * mark the buffer mapped but still keep the extents unwritten.
3789		- *
3790		- * For non AIO case, we will convert those unwritten extents to written
3791		- * after return back from blockdev_direct_IO. That way we save us from
3792		- * allocating io_end structure and also the overhead of offloading
3793		- * the extent convertion to a workqueue.
3794		- *
3795		- * For async DIO, the conversion needs to be deferred when the
3796		- * IO is completed. The ext4 end_io callback function will be
3797		- * called to take care of the conversion work. Here for async
3798		- * case, we allocate an io_end structure to hook to the iocb.
3799		- */
3800		- iocb->private = NULL;
3801		- if (overwrite)
3802		- get_block_func = ext4_dio_get_block_overwrite;
3803		- else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) \|\|
3804		- round_down(offset, i_blocksize(inode)) >= inode->i_size) {
3805		- get_block_func = ext4_dio_get_block;
3806		- dio_flags = DIO_LOCKING \| DIO_SKIP_HOLES;
3807		- } else if (is_sync_kiocb(iocb)) {
3808		- get_block_func = ext4_dio_get_block_unwritten_sync;
3809		- dio_flags = DIO_LOCKING;
3810		- } else {
3811		- get_block_func = ext4_dio_get_block_unwritten_async;
3812		- dio_flags = DIO_LOCKING;
3813		- }
3814		- ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
3815		- get_block_func, ext4_end_io_dio, NULL,
3816		- dio_flags);
3817		-
3818		- if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3819		- EXT4_STATE_DIO_UNWRITTEN)) {
3820		- int err;
3821		- /*
3822		- * for non AIO case, since the IO is already
3823		- * completed, we could do the conversion right here
3824		- */
3825		- err = ext4_convert_unwritten_extents(NULL, inode,
3826		- offset, ret);
3827		- if (err < 0)
3828		- ret = err;
3829		- ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3830		- }
3831		-
3832		- inode_dio_end(inode);
3833		- /* take i_mutex locking again if we do a ovewrite dio */
3834		- if (overwrite)
3835		- inode_lock(inode);
3836		-
3837		- if (ret < 0 && final_size > inode->i_size)
3838		- ext4_truncate_failed_write(inode);
3839		-
3840		- /* Handle extending of i_size after direct IO write */
3841		- if (orphan) {
3842		- int err;
3843		-
3844		- /* Credits for sb + inode write */
3845		- handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3846		- if (IS_ERR(handle)) {
3847		- /*
3848		- * We wrote the data but cannot extend
3849		- * i_size. Bail out. In async io case, we do
3850		- * not return error here because we have
3851		- * already submmitted the corresponding
3852		- * bio. Returning error here makes the caller
3853		- * think that this IO is done and failed
3854		- * resulting in race with bio's completion
3855		- * handler.
3856		- */
3857		- if (!ret)
3858		- ret = PTR_ERR(handle);
3859		- if (inode->i_nlink)
3860		- ext4_orphan_del(NULL, inode);
3861		-
3862		- goto out;
3863		- }
3864		- if (inode->i_nlink)
3865		- ext4_orphan_del(handle, inode);
3866		- if (ret > 0) {
3867		- loff_t end = offset + ret;
3868		- if (end > inode->i_size \|\| end > ei->i_disksize) {
3869		- ext4_update_i_disksize(inode, end);
3870		- if (end > inode->i_size)
3871		- i_size_write(inode, end);
3872		- /*
3873		- * We're going to return a positive `ret'
3874		- * here due to non-zero-length I/O, so there's
3875		- * no way of reporting error returns from
3876		- * ext4_mark_inode_dirty() to userspace. So
3877		- * ignore it.
3878		- */
3879		- ext4_mark_inode_dirty(handle, inode);
3880		- }
3881		- }
3882		- err = ext4_journal_stop(handle);
3883		- if (ret == 0)
3884		- ret = err;
3885		- }
3886		-out:
3887		- return ret;
3888		-}
3889		-
3890		-static ssize_t ext4_direct_IO_read(struct kiocb iocb, struct iov_iter iter)
3891		-{
3892		- struct address_space *mapping = iocb->ki_filp->f_mapping;
3893		- struct inode *inode = mapping->host;
3894		- size_t count = iov_iter_count(iter);
3895		- ssize_t ret;
3896		- loff_t offset = iocb->ki_pos;
3897		- loff_t size = i_size_read(inode);
3898		-
3899		- if (offset >= size)
3900		- return 0;
3901		-
3902		- /*
3903		- * Shared inode_lock is enough for us - it protects against concurrent
3904		- * writes & truncates and since we take care of writing back page cache,
3905		- * we are protected against page writeback as well.
3906		- */
3907		- if (iocb->ki_flags & IOCB_NOWAIT) {
3908		- if (!inode_trylock_shared(inode))
3909		- return -EAGAIN;
3910		- } else {
3911		- inode_lock_shared(inode);
3912		- }
3913		-
3914		- ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
3915		- iocb->ki_pos + count - 1);
3916		- if (ret)
3917		- goto out_unlock;
3918		- ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
3919		- iter, ext4_dio_get_block, NULL, NULL, 0);
3920		-out_unlock:
3921		- inode_unlock_shared(inode);
3922		- return ret;
3923		-}
3924		-
3925		-static ssize_t ext4_direct_IO(struct kiocb iocb, struct iov_iter iter)
3926		-{
3927		- struct file *file = iocb->ki_filp;
3928		- struct inode *inode = file->f_mapping->host;
3929		- size_t count = iov_iter_count(iter);
3930		- loff_t offset = iocb->ki_pos;
3931		- ssize_t ret;
3932		- int rw = iov_iter_rw(iter);
3933		-
3934		- if (!fscrypt_dio_supported(iocb, iter))
3935		- return 0;
3936		-
3937		- if (fsverity_active(inode))
3938		- return 0;
3939		-
3940		- /*
3941		- * If we are doing data journalling we don't support O_DIRECT
3942		- */
3943		- if (ext4_should_journal_data(inode))
3944		- return 0;
3945		-
3946		- /* Let buffer I/O handle the inline data case. */
3947		- if (ext4_has_inline_data(inode))
3948		- return 0;
3949		-
3950		- if (trace_android_fs_dataread_start_enabled() &&
3951		- (rw == READ)) {
3952		- char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
3953		-
3954		- path = android_fstrace_get_pathname(pathbuf,
3955		- MAX_TRACE_PATHBUF_LEN,
3956		- inode);
3957		- trace_android_fs_dataread_start(inode, offset, count,
3958		- current->pid, path,
3959		- current->comm);
3960		- }
3961		- if (trace_android_fs_datawrite_start_enabled() &&
3962		- (rw == WRITE)) {
3963		- char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
3964		-
3965		- path = android_fstrace_get_pathname(pathbuf,
3966		- MAX_TRACE_PATHBUF_LEN,
3967		- inode);
3968		- trace_android_fs_datawrite_start(inode, offset, count,
3969		- current->pid, path,
3970		- current->comm);
3971		- }
3972		- trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
3973		- if (iov_iter_rw(iter) == READ)
3974		- ret = ext4_direct_IO_read(iocb, iter);
3975		- else
3976		- ret = ext4_direct_IO_write(iocb, iter);
3977		- trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
3978		-
3979		- if (trace_android_fs_dataread_start_enabled() &&
3980		- (rw == READ))
3981		- trace_android_fs_dataread_end(inode, offset, count);
3982		- if (trace_android_fs_datawrite_start_enabled() &&
3983		- (rw == WRITE))
3984		- trace_android_fs_datawrite_end(inode, offset, count);
3985		-
3986		- return ret;
3987		-}
	3700	+const struct iomap_ops ext4_iomap_report_ops = {
	3701	+ .iomap_begin = ext4_iomap_begin_report,
	3702	+};
3988	3703
3989	3704	/*
3990	3705	* Pages can be marked dirty completely asynchronously from ext4's journalling
..	..	@@ -4012,9 +3727,16 @@
4012	3727	return __set_page_dirty_buffers(page);
4013	3728	}
4014	3729
	3730	+static int ext4_iomap_swap_activate(struct swap_info_struct *sis,
	3731	+ struct file file, sector_t span)
	3732	+{
	3733	+ return iomap_swapfile_activate(sis, file, span,
	3734	+ &ext4_iomap_report_ops);
	3735	+}
	3736	+
4015	3737	static const struct address_space_operations ext4_aops = {
4016	3738	.readpage = ext4_readpage,
4017		- .readpages = ext4_readpages,
	3739	+ .readahead = ext4_readahead,
4018	3740	.writepage = ext4_writepage,
4019	3741	.writepages = ext4_writepages,
4020	3742	.write_begin = ext4_write_begin,
..	..	@@ -4023,15 +3745,16 @@
4023	3745	.bmap = ext4_bmap,
4024	3746	.invalidatepage = ext4_invalidatepage,
4025	3747	.releasepage = ext4_releasepage,
4026		- .direct_IO = ext4_direct_IO,
	3748	+ .direct_IO = noop_direct_IO,
4027	3749	.migratepage = buffer_migrate_page,
4028	3750	.is_partially_uptodate = block_is_partially_uptodate,
4029	3751	.error_remove_page = generic_error_remove_page,
	3752	+ .swap_activate = ext4_iomap_swap_activate,
4030	3753	};
4031	3754
4032	3755	static const struct address_space_operations ext4_journalled_aops = {
4033	3756	.readpage = ext4_readpage,
4034		- .readpages = ext4_readpages,
	3757	+ .readahead = ext4_readahead,
4035	3758	.writepage = ext4_writepage,
4036	3759	.writepages = ext4_writepages,
4037	3760	.write_begin = ext4_write_begin,
..	..	@@ -4040,26 +3763,28 @@
4040	3763	.bmap = ext4_bmap,
4041	3764	.invalidatepage = ext4_journalled_invalidatepage,
4042	3765	.releasepage = ext4_releasepage,
4043		- .direct_IO = ext4_direct_IO,
	3766	+ .direct_IO = noop_direct_IO,
4044	3767	.is_partially_uptodate = block_is_partially_uptodate,
4045	3768	.error_remove_page = generic_error_remove_page,
	3769	+ .swap_activate = ext4_iomap_swap_activate,
4046	3770	};
4047	3771
4048	3772	static const struct address_space_operations ext4_da_aops = {
4049	3773	.readpage = ext4_readpage,
4050		- .readpages = ext4_readpages,
	3774	+ .readahead = ext4_readahead,
4051	3775	.writepage = ext4_writepage,
4052	3776	.writepages = ext4_writepages,
4053	3777	.write_begin = ext4_da_write_begin,
4054	3778	.write_end = ext4_da_write_end,
4055	3779	.set_page_dirty = ext4_set_page_dirty,
4056	3780	.bmap = ext4_bmap,
4057		- .invalidatepage = ext4_da_invalidatepage,
	3781	+ .invalidatepage = ext4_invalidatepage,
4058	3782	.releasepage = ext4_releasepage,
4059		- .direct_IO = ext4_direct_IO,
	3783	+ .direct_IO = noop_direct_IO,
4060	3784	.migratepage = buffer_migrate_page,
4061	3785	.is_partially_uptodate = block_is_partially_uptodate,
4062	3786	.error_remove_page = generic_error_remove_page,
	3787	+ .swap_activate = ext4_iomap_swap_activate,
4063	3788	};
4064	3789
4065	3790	static const struct address_space_operations ext4_dax_aops = {
..	..	@@ -4068,6 +3793,7 @@
4068	3793	.set_page_dirty = noop_set_page_dirty,
4069	3794	.bmap = ext4_bmap,
4070	3795	.invalidatepage = noop_invalidatepage,
	3796	+ .swap_activate = ext4_iomap_swap_activate,
4071	3797	};
4072	3798
4073	3799	void ext4_set_aops(struct inode *inode)
..	..	@@ -4141,18 +3867,18 @@
4141	3867	set_buffer_uptodate(bh);
4142	3868
4143	3869	if (!buffer_uptodate(bh)) {
4144		- err = -EIO;
4145		- ll_rw_block(REQ_OP_READ, 0, 1, &bh);
4146		- wait_on_buffer(bh);
4147		- /* Uhhuh. Read error. Complain and punt. */
4148		- if (!buffer_uptodate(bh))
	3870	+ err = ext4_read_bh_lock(bh, 0, true);
	3871	+ if (err)
4149	3872	goto unlock;
4150	3873	if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
4151	3874	/* We expect the key to be set. */
4152	3875	BUG_ON(!fscrypt_has_encryption_key(inode));
4153		- BUG_ON(blocksize != PAGE_SIZE);
4154		- WARN_ON_ONCE(fscrypt_decrypt_pagecache_blocks(
4155		- page, PAGE_SIZE, 0));
	3876	+ err = fscrypt_decrypt_pagecache_blocks(page, blocksize,
	3877	+ bh_offset(bh));
	3878	+ if (err) {
	3879	+ clear_buffer_uptodate(bh);
	3880	+ goto unlock;
	3881	+ }
4156	3882	}
4157	3883	}
4158	3884	if (ext4_should_journal_data(inode)) {
..	..	@@ -4292,6 +4018,8 @@
4292	4018	loff_t len)
4293	4019	{
4294	4020	handle_t *handle;
	4021	+ int ret;
	4022	+
4295	4023	loff_t size = i_size_read(inode);
4296	4024
4297	4025	WARN_ON(!inode_is_locked(inode));
..	..	@@ -4305,10 +4033,10 @@
4305	4033	if (IS_ERR(handle))
4306	4034	return PTR_ERR(handle);
4307	4035	ext4_update_i_disksize(inode, size);
4308		- ext4_mark_inode_dirty(handle, inode);
	4036	+ ret = ext4_mark_inode_dirty(handle, inode);
4309	4037	ext4_journal_stop(handle);
4310	4038
4311		- return 0;
	4039	+ return ret;
4312	4040	}
4313	4041
4314	4042	static void ext4_wait_dax_page(struct ext4_inode_info *ei)
..	..	@@ -4352,29 +4080,19 @@
4352	4080	* Returns: 0 on success or negative on failure
4353	4081	*/
4354	4082
4355		-int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
	4083	+int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
4356	4084	{
	4085	+ struct inode *inode = file_inode(file);
4357	4086	struct super_block *sb = inode->i_sb;
4358	4087	ext4_lblk_t first_block, stop_block;
4359	4088	struct address_space *mapping = inode->i_mapping;
4360		- loff_t first_block_offset, last_block_offset;
	4089	+ loff_t first_block_offset, last_block_offset, max_length;
	4090	+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4361	4091	handle_t *handle;
4362	4092	unsigned int credits;
4363		- int ret = 0;
4364		-
4365		- if (!S_ISREG(inode->i_mode))
4366		- return -EOPNOTSUPP;
	4093	+ int ret = 0, ret2 = 0;
4367	4094
4368	4095	trace_ext4_punch_hole(inode, offset, length, 0);
4369		-
4370		- ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
4371		- if (ext4_has_inline_data(inode)) {
4372		- down_write(&EXT4_I(inode)->i_mmap_sem);
4373		- ret = ext4_convert_inline_data(inode);
4374		- up_write(&EXT4_I(inode)->i_mmap_sem);
4375		- if (ret)
4376		- return ret;
4377		- }
4378	4096
4379	4097	/*
4380	4098	* Write out all dirty pages to avoid race conditions
..	..	@@ -4403,6 +4121,14 @@
4403	4121	offset;
4404	4122	}
4405	4123
	4124	+ /*
	4125	+ * For punch hole the length + offset needs to be within one block
	4126	+ * before last range. Adjust the length if it goes beyond that limit.
	4127	+ */
	4128	+ max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize;
	4129	+ if (offset + length > max_length)
	4130	+ length = max_length - offset;
	4131	+
4406	4132	if (offset & (sb->s_blocksize - 1) \|\|
4407	4133	(offset + length) & (sb->s_blocksize - 1)) {
4408	4134	/*
..	..	@@ -4417,6 +4143,10 @@
4417	4143
4418	4144	/* Wait all existing dio workers, newcomers will block on i_mutex */
4419	4145	inode_dio_wait(inode);
	4146	+
	4147	+ ret = file_modified(file);
	4148	+ if (ret)
	4149	+ goto out_mutex;
4420	4150
4421	4151	/*
4422	4152	* Prevent page faults from reinstantiating pages we have released from
..	..	@@ -4464,7 +4194,7 @@
4464	4194	if (stop_block > first_block) {
4465	4195
4466	4196	down_write(&EXT4_I(inode)->i_data_sem);
4467		- ext4_discard_preallocations(inode);
	4197	+ ext4_discard_preallocations(inode, 0);
4468	4198
4469	4199	ret = ext4_es_remove_extent(inode, first_block,
4470	4200	stop_block - first_block);
..	..	@@ -4482,11 +4212,14 @@
4482	4212
4483	4213	up_write(&EXT4_I(inode)->i_data_sem);
4484	4214	}
	4215	+ ext4_fc_track_range(handle, inode, first_block, stop_block);
4485	4216	if (IS_SYNC(inode))
4486	4217	ext4_handle_sync(handle);
4487	4218
4488	4219	inode->i_mtime = inode->i_ctime = current_time(inode);
4489		- ext4_mark_inode_dirty(handle, inode);
	4220	+ ret2 = ext4_mark_inode_dirty(handle, inode);
	4221	+ if (unlikely(ret2))
	4222	+ ret = ret2;
4490	4223	if (ret >= 0)
4491	4224	ext4_update_inode_fsync_trans(handle, inode, 1);
4492	4225	out_stop:
..	..	@@ -4555,7 +4288,7 @@
4555	4288	{
4556	4289	struct ext4_inode_info *ei = EXT4_I(inode);
4557	4290	unsigned int credits;
4558		- int err = 0;
	4291	+ int err = 0, err2;
4559	4292	handle_t *handle;
4560	4293	struct address_space *mapping = inode->i_mapping;
4561	4294
..	..	@@ -4569,9 +4302,7 @@
4569	4302	trace_ext4_truncate_enter(inode);
4570	4303
4571	4304	if (!ext4_can_truncate(inode))
4572		- return 0;
4573		-
4574		- ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
	4305	+ goto out_trace;
4575	4306
4576	4307	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
4577	4308	ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
..	..	@@ -4580,16 +4311,14 @@
4580	4311	int has_inline = 1;
4581	4312
4582	4313	err = ext4_inline_data_truncate(inode, &has_inline);
4583		- if (err)
4584		- return err;
4585		- if (has_inline)
4586		- return 0;
	4314	+ if (err \|\| has_inline)
	4315	+ goto out_trace;
4587	4316	}
4588	4317
4589	4318	/* If we zero-out tail of the page, we have to create jinode for jbd2 */
4590	4319	if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
4591	4320	if (ext4_inode_attach_jinode(inode) < 0)
4592		- return 0;
	4321	+ goto out_trace;
4593	4322	}
4594	4323
4595	4324	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
..	..	@@ -4598,8 +4327,10 @@
4598	4327	credits = ext4_blocks_for_truncate(inode);
4599	4328
4600	4329	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
4601		- if (IS_ERR(handle))
4602		- return PTR_ERR(handle);
	4330	+ if (IS_ERR(handle)) {
	4331	+ err = PTR_ERR(handle);
	4332	+ goto out_trace;
	4333	+ }
4603	4334
4604	4335	if (inode->i_size & (inode->i_sb->s_blocksize - 1))
4605	4336	ext4_block_truncate_page(handle, mapping, inode->i_size);
..	..	@@ -4619,7 +4350,7 @@
4619	4350
4620	4351	down_write(&EXT4_I(inode)->i_data_sem);
4621	4352
4622		- ext4_discard_preallocations(inode);
	4353	+ ext4_discard_preallocations(inode, 0);
4623	4354
4624	4355	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
4625	4356	err = ext4_ext_truncate(handle, inode);
..	..	@@ -4645,9 +4376,12 @@
4645	4376	ext4_orphan_del(handle, inode);
4646	4377
4647	4378	inode->i_mtime = inode->i_ctime = current_time(inode);
4648		- ext4_mark_inode_dirty(handle, inode);
	4379	+ err2 = ext4_mark_inode_dirty(handle, inode);
	4380	+ if (unlikely(err2 && !err))
	4381	+ err = err2;
4649	4382	ext4_journal_stop(handle);
4650	4383
	4384	+out_trace:
4651	4385	trace_ext4_truncate_exit(inode);
4652	4386	return err;
4653	4387	}
..	..	@@ -4658,21 +4392,22 @@
4658	4392	* data in memory that is needed to recreate the on-disk version of this
4659	4393	* inode.
4660	4394	*/
4661		-static int __ext4_get_inode_loc(struct inode *inode,
4662		- struct ext4_iloc *iloc, int in_mem)
	4395	+static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino,
	4396	+ struct ext4_iloc *iloc, int in_mem,
	4397	+ ext4_fsblk_t *ret_block)
4663	4398	{
4664	4399	struct ext4_group_desc *gdp;
4665	4400	struct buffer_head *bh;
4666		- struct super_block *sb = inode->i_sb;
4667	4401	ext4_fsblk_t block;
	4402	+ struct blk_plug plug;
4668	4403	int inodes_per_block, inode_offset;
4669	4404
4670	4405	iloc->bh = NULL;
4671		- if (inode->i_ino < EXT4_ROOT_INO \|\|
4672		- inode->i_ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
	4406	+ if (ino < EXT4_ROOT_INO \|\|
	4407	+ ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
4673	4408	return -EFSCORRUPTED;
4674	4409
4675		- iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
	4410	+ iloc->block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
4676	4411	gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
4677	4412	if (!gdp)
4678	4413	return -EIO;
..	..	@@ -4681,7 +4416,7 @@
4681	4416	* Figure out the offset within the block group inode table
4682	4417	*/
4683	4418	inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
4684		- inode_offset = ((inode->i_ino - 1) %
	4419	+ inode_offset = ((ino - 1) %
4685	4420	EXT4_INODES_PER_GROUP(sb));
4686	4421	block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
4687	4422	iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
..	..	@@ -4689,19 +4424,12 @@
4689	4424	bh = sb_getblk(sb, block);
4690	4425	if (unlikely(!bh))
4691	4426	return -ENOMEM;
	4427	+ if (ext4_simulate_fail(sb, EXT4_SIM_INODE_EIO))
	4428	+ goto simulate_eio;
4692	4429	if (!buffer_uptodate(bh)) {
4693	4430	lock_buffer(bh);
4694	4431
4695		- /*
4696		- * If the buffer has the write error flag, we have failed
4697		- * to write out another inode in the same block. In this
4698		- * case, we don't have to read the block because we may
4699		- * read the old inode data successfully.
4700		- */
4701		- if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
4702		- set_buffer_uptodate(bh);
4703		-
4704		- if (buffer_uptodate(bh)) {
	4432	+ if (ext4_buffer_uptodate(bh)) {
4705	4433	/* someone brought it uptodate while we waited */
4706	4434	unlock_buffer(bh);
4707	4435	goto has_buffer;
..	..	@@ -4753,6 +4481,7 @@
4753	4481	* If we need to do any I/O, try to pre-readahead extra
4754	4482	* blocks from the inode table.
4755	4483	*/
	4484	+ blk_start_plug(&plug);
4756	4485	if (EXT4_SB(sb)->s_inode_readahead_blks) {
4757	4486	ext4_fsblk_t b, end, table;
4758	4487	unsigned num;
..	..	@@ -4771,7 +4500,7 @@
4771	4500	if (end > table)
4772	4501	end = table;
4773	4502	while (b <= end)
4774		- sb_breadahead_unmovable(sb, b++);
	4503	+ ext4_sb_breadahead_unmovable(sb, b++);
4775	4504	}
4776	4505
4777	4506	/*
..	..	@@ -4779,14 +4508,14 @@
4779	4508	* has in-inode xattrs, or we don't have this inode in memory.
4780	4509	* Read the block from disk.
4781	4510	*/
4782		- trace_ext4_load_inode(inode);
4783		- get_bh(bh);
4784		- bh->b_end_io = end_buffer_read_sync;
4785		- submit_bh(REQ_OP_READ, REQ_META \| REQ_PRIO, bh);
	4511	+ trace_ext4_load_inode(sb, ino);
	4512	+ ext4_read_bh_nowait(bh, REQ_META \| REQ_PRIO, NULL);
	4513	+ blk_finish_plug(&plug);
4786	4514	wait_on_buffer(bh);
4787	4515	if (!buffer_uptodate(bh)) {
4788		- EXT4_ERROR_INODE_BLOCK(inode, block,
4789		- "unable to read itable block");
	4516	+ simulate_eio:
	4517	+ if (ret_block)
	4518	+ *ret_block = block;
4790	4519	brelse(bh);
4791	4520	return -EIO;
4792	4521	}
..	..	@@ -4796,16 +4525,50 @@
4796	4525	return 0;
4797	4526	}
4798	4527
4799		-int ext4_get_inode_loc(struct inode inode, struct ext4_iloc iloc)
	4528	+static int __ext4_get_inode_loc_noinmem(struct inode *inode,
	4529	+ struct ext4_iloc *iloc)
4800	4530	{
4801		- /* We have all inode data except xattrs in memory here. */
4802		- return __ext4_get_inode_loc(inode, iloc,
4803		- !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
	4531	+ ext4_fsblk_t err_blk = 0;
	4532	+ int ret;
	4533	+
	4534	+ ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc, 0,
	4535	+ &err_blk);
	4536	+
	4537	+ if (ret == -EIO)
	4538	+ ext4_error_inode_block(inode, err_blk, EIO,
	4539	+ "unable to read itable block");
	4540	+
	4541	+ return ret;
4804	4542	}
4805	4543
4806		-static bool ext4_should_use_dax(struct inode *inode)
	4544	+int ext4_get_inode_loc(struct inode inode, struct ext4_iloc iloc)
4807	4545	{
4808		- if (!test_opt(inode->i_sb, DAX))
	4546	+ ext4_fsblk_t err_blk = 0;
	4547	+ int ret;
	4548	+
	4549	+ /* We have all inode data except xattrs in memory here. */
	4550	+ ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc,
	4551	+ !ext4_test_inode_state(inode, EXT4_STATE_XATTR), &err_blk);
	4552	+
	4553	+ if (ret == -EIO)
	4554	+ ext4_error_inode_block(inode, err_blk, EIO,
	4555	+ "unable to read itable block");
	4556	+
	4557	+ return ret;
	4558	+}
	4559	+
	4560	+
	4561	+int ext4_get_fc_inode_loc(struct super_block *sb, unsigned long ino,
	4562	+ struct ext4_iloc *iloc)
	4563	+{
	4564	+ return __ext4_get_inode_loc(sb, ino, iloc, 0, NULL);
	4565	+}
	4566	+
	4567	+static bool ext4_should_enable_dax(struct inode *inode)
	4568	+{
	4569	+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	4570	+
	4571	+ if (test_opt2(inode->i_sb, DAX_NEVER))
4809	4572	return false;
4810	4573	if (!S_ISREG(inode->i_mode))
4811	4574	return false;
..	..	@@ -4817,13 +4580,20 @@
4817	4580	return false;
4818	4581	if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
4819	4582	return false;
4820		- return true;
	4583	+ if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags))
	4584	+ return false;
	4585	+ if (test_opt(inode->i_sb, DAX_ALWAYS))
	4586	+ return true;
	4587	+
	4588	+ return ext4_test_inode_flag(inode, EXT4_INODE_DAX);
4821	4589	}
4822	4590
4823		-void ext4_set_inode_flags(struct inode *inode)
	4591	+void ext4_set_inode_flags(struct inode *inode, bool init)
4824	4592	{
4825	4593	unsigned int flags = EXT4_I(inode)->i_flags;
4826	4594	unsigned int new_fl = 0;
	4595	+
	4596	+ WARN_ON_ONCE(IS_DAX(inode) && init);
4827	4597
4828	4598	if (flags & EXT4_SYNC_FL)
4829	4599	new_fl \|= S_SYNC;
..	..	@@ -4835,8 +4605,13 @@
4835	4605	new_fl \|= S_NOATIME;
4836	4606	if (flags & EXT4_DIRSYNC_FL)
4837	4607	new_fl \|= S_DIRSYNC;
4838		- if (ext4_should_use_dax(inode))
	4608	+
	4609	+ /* Because of the way inode_set_flags() works we must preserve S_DAX
	4610	+ * here if already set. */
	4611	+ new_fl \|= (inode->i_flags & S_DAX);
	4612	+ if (init && ext4_should_enable_dax(inode))
4839	4613	new_fl \|= S_DAX;
	4614	+
4840	4615	if (flags & EXT4_ENCRYPT_FL)
4841	4616	new_fl \|= S_ENCRYPTED;
4842	4617	if (flags & EXT4_CASEFOLD_FL)
..	..	@@ -4877,8 +4652,7 @@
4877	4652	__le32 magic = (void )raw_inode +
4878	4653	EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
4879	4654
4880		- if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
4881		- EXT4_INODE_SIZE(inode->i_sb) &&
	4655	+ if (EXT4_INODE_HAS_XATTR_SPACE(inode) &&
4882	4656	*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
4883	4657	ext4_set_inode_state(inode, EXT4_STATE_XATTR);
4884	4658	return ext4_find_inline_data_nolock(inode);
..	..	@@ -4937,7 +4711,7 @@
4937	4711	(ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
4938	4712	if (flags & EXT4_IGET_HANDLE)
4939	4713	return ERR_PTR(-ESTALE);
4940		- __ext4_error(sb, function, line,
	4714	+ __ext4_error(sb, function, line, EFSCORRUPTED, 0,
4941	4715	"inode #%lu: comm %s: iget: illegal inode #",
4942	4716	ino, current->comm);
4943	4717	return ERR_PTR(-EFSCORRUPTED);
..	..	@@ -4952,7 +4726,7 @@
4952	4726	ei = EXT4_I(inode);
4953	4727	iloc.bh = NULL;
4954	4728
4955		- ret = __ext4_get_inode_loc(inode, &iloc, 0);
	4729	+ ret = __ext4_get_inode_loc_noinmem(inode, &iloc);
4956	4730	if (ret < 0)
4957	4731	goto bad_inode;
4958	4732	raw_inode = ext4_raw_inode(&iloc);
..	..	@@ -4998,9 +4772,11 @@
4998	4772	sizeof(gen));
4999	4773	}
5000	4774
5001		- if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
5002		- ext4_error_inode(inode, function, line, 0,
5003		- "iget: checksum invalid");
	4775	+ if ((!ext4_inode_csum_verify(inode, raw_inode, ei) \|\|
	4776	+ ext4_simulate_fail(sb, EXT4_SIM_INODE_CRC)) &&
	4777	+ (!(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))) {
	4778	+ ext4_error_inode_err(inode, function, line, 0,
	4779	+ EFSBADCRC, "iget: checksum invalid");
5004	4780	ret = -EFSBADCRC;
5005	4781	goto bad_inode;
5006	4782	}
..	..	@@ -5049,7 +4825,7 @@
5049	4825	* not initialized on a new filesystem. */
5050	4826	}
5051	4827	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
5052		- ext4_set_inode_flags(inode);
	4828	+ ext4_set_inode_flags(inode, true);
5053	4829	inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
5054	4830	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
5055	4831	if (ext4_has_feature_64bit(sb))
..	..	@@ -5088,6 +4864,7 @@
5088	4864	for (block = 0; block < EXT4_N_BLOCKS; block++)
5089	4865	ei->i_data[block] = raw_inode->i_block[block];
5090	4866	INIT_LIST_HEAD(&ei->i_orphan);
	4867	+ ext4_fc_init_inode(&ei->vfs_inode);
5091	4868
5092	4869	/*
5093	4870	* Set transaction id's of transactions that have to be committed
..	..	@@ -5153,9 +4930,10 @@
5153	4930	goto bad_inode;
5154	4931	} else if (!ext4_has_inline_data(inode)) {
5155	4932	/* validate the block references in the inode */
5156		- if (S_ISREG(inode->i_mode) \|\| S_ISDIR(inode->i_mode) \|\|
5157		- (S_ISLNK(inode->i_mode) &&
5158		- !ext4_inode_is_fast_symlink(inode))) {
	4933	+ if (!(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) &&
	4934	+ (S_ISREG(inode->i_mode) \|\| S_ISDIR(inode->i_mode) \|\|
	4935	+ (S_ISLNK(inode->i_mode) &&
	4936	+ !ext4_inode_is_fast_symlink(inode)))) {
5159	4937	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5160	4938	ret = ext4_ext_check_inode(inode);
5161	4939	else
..	..	@@ -5212,7 +4990,7 @@
5212	4990	goto bad_inode;
5213	4991	}
5214	4992	if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
5215		- EXT4_ERROR_INODE(inode,
	4993	+ ext4_error_inode(inode, function, line, 0,
5216	4994	"casefold flag without casefold feature");
5217	4995	brelse(iloc.bh);
5218	4996
..	..	@@ -5264,21 +5042,22 @@
5264	5042	return 0;
5265	5043	}
5266	5044
5267		-struct other_inode {
5268		- unsigned long orig_ino;
5269		- struct ext4_inode *raw_inode;
5270		-};
5271		-
5272		-static int other_inode_match(struct inode * inode, unsigned long ino,
5273		- void *data)
	5045	+static void __ext4_update_other_inode_time(struct super_block *sb,
	5046	+ unsigned long orig_ino,
	5047	+ unsigned long ino,
	5048	+ struct ext4_inode *raw_inode)
5274	5049	{
5275		- struct other_inode oi = (struct other_inode ) data;
	5050	+ struct inode *inode;
5276	5051
5277		- if ((inode->i_ino != ino) \|\|
5278		- (inode->i_state & (I_FREEING \| I_WILL_FREE \| I_NEW \|
	5052	+ inode = find_inode_by_ino_rcu(sb, ino);
	5053	+ if (!inode)
	5054	+ return;
	5055	+
	5056	+ if ((inode->i_state & (I_FREEING \| I_WILL_FREE \| I_NEW \|
5279	5057	I_DIRTY_INODE)) \|\|
5280	5058	((inode->i_state & I_DIRTY_TIME) == 0))
5281		- return 0;
	5059	+ return;
	5060	+
5282	5061	spin_lock(&inode->i_lock);
5283	5062	if (((inode->i_state & (I_FREEING \| I_WILL_FREE \| I_NEW \|
5284	5063	I_DIRTY_INODE)) == 0) &&
..	..	@@ -5289,16 +5068,15 @@
5289	5068	spin_unlock(&inode->i_lock);
5290	5069
5291	5070	spin_lock(&ei->i_raw_lock);
5292		- EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
5293		- EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
5294		- EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
5295		- ext4_inode_csum_set(inode, oi->raw_inode, ei);
	5071	+ EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
	5072	+ EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
	5073	+ EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
	5074	+ ext4_inode_csum_set(inode, raw_inode, ei);
5296	5075	spin_unlock(&ei->i_raw_lock);
5297		- trace_ext4_other_inode_update_time(inode, oi->orig_ino);
5298		- return -1;
	5076	+ trace_ext4_other_inode_update_time(inode, orig_ino);
	5077	+ return;
5299	5078	}
5300	5079	spin_unlock(&inode->i_lock);
5301		- return -1;
5302	5080	}
5303	5081
5304	5082	/*
..	..	@@ -5308,24 +5086,24 @@
5308	5086	static void ext4_update_other_inodes_time(struct super_block *sb,
5309	5087	unsigned long orig_ino, char *buf)
5310	5088	{
5311		- struct other_inode oi;
5312	5089	unsigned long ino;
5313	5090	int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
5314	5091	int inode_size = EXT4_INODE_SIZE(sb);
5315	5092
5316		- oi.orig_ino = orig_ino;
5317	5093	/*
5318	5094	* Calculate the first inode in the inode table block. Inode
5319	5095	* numbers are one-based. That is, the first inode in a block
5320	5096	* (assuming 4k blocks and 256 byte inodes) is (n*16 + 1).
5321	5097	*/
5322	5098	ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1;
	5099	+ rcu_read_lock();
5323	5100	for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
5324	5101	if (ino == orig_ino)
5325	5102	continue;
5326		- oi.raw_inode = (struct ext4_inode *) buf;
5327		- (void) find_inode_nowait(sb, ino, other_inode_match, &oi);
	5103	+ __ext4_update_other_inode_time(sb, orig_ino, ino,
	5104	+ (struct ext4_inode *)buf);
5328	5105	}
	5106	+ rcu_read_unlock();
5329	5107	}
5330	5108
5331	5109	/*
..	..	@@ -5535,12 +5313,12 @@
5535	5313	if (wbc->sync_mode != WB_SYNC_ALL \|\| wbc->for_sync)
5536	5314	return 0;
5537	5315
5538		- err = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
	5316	+ err = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
5539	5317	EXT4_I(inode)->i_sync_tid);
5540	5318	} else {
5541	5319	struct ext4_iloc iloc;
5542	5320
5543		- err = __ext4_get_inode_loc(inode, &iloc, 0);
	5321	+ err = __ext4_get_inode_loc_noinmem(inode, &iloc);
5544	5322	if (err)
5545	5323	return err;
5546	5324	/*
..	..	@@ -5550,8 +5328,8 @@
5550	5328	if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
5551	5329	sync_dirty_buffer(iloc.bh);
5552	5330	if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
5553		- EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
5554		- "IO error syncing inode");
	5331	+ ext4_error_inode_block(inode, iloc.bh->b_blocknr, EIO,
	5332	+ "IO error syncing inode");
5555	5333	err = -EIO;
5556	5334	}
5557	5335	brelse(iloc.bh);
..	..	@@ -5664,6 +5442,7 @@
5664	5442	if (error)
5665	5443	return error;
5666	5444	}
	5445	+ ext4_fc_start_update(inode);
5667	5446	if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) \|\|
5668	5447	(ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
5669	5448	handle_t *handle;
..	..	@@ -5687,6 +5466,7 @@
5687	5466
5688	5467	if (error) {
5689	5468	ext4_journal_stop(handle);
	5469	+ ext4_fc_stop_update(inode);
5690	5470	return error;
5691	5471	}
5692	5472	/* Update corresponding info in inode so that everything is in
..	..	@@ -5697,37 +5477,61 @@
5697	5477	inode->i_gid = attr->ia_gid;
5698	5478	error = ext4_mark_inode_dirty(handle, inode);
5699	5479	ext4_journal_stop(handle);
	5480	+ if (unlikely(error)) {
	5481	+ ext4_fc_stop_update(inode);
	5482	+ return error;
	5483	+ }
5700	5484	}
5701	5485
5702	5486	if (attr->ia_valid & ATTR_SIZE) {
5703	5487	handle_t *handle;
5704	5488	loff_t oldsize = inode->i_size;
5705		- int shrink = (attr->ia_size <= inode->i_size);
	5489	+ loff_t old_disksize;
	5490	+ int shrink = (attr->ia_size < inode->i_size);
5706	5491
5707	5492	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
5708	5493	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5709	5494
5710		- if (attr->ia_size > sbi->s_bitmap_maxbytes)
	5495	+ if (attr->ia_size > sbi->s_bitmap_maxbytes) {
	5496	+ ext4_fc_stop_update(inode);
5711	5497	return -EFBIG;
	5498	+ }
5712	5499	}
5713		- if (!S_ISREG(inode->i_mode))
	5500	+ if (!S_ISREG(inode->i_mode)) {
	5501	+ ext4_fc_stop_update(inode);
5714	5502	return -EINVAL;
	5503	+ }
5715	5504
5716	5505	if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
5717	5506	inode_inc_iversion(inode);
5718	5507
5719		- if (ext4_should_order_data(inode) &&
5720		- (attr->ia_size < inode->i_size)) {
5721		- error = ext4_begin_ordered_truncate(inode,
	5508	+ if (shrink) {
	5509	+ if (ext4_should_order_data(inode)) {
	5510	+ error = ext4_begin_ordered_truncate(inode,
5722	5511	attr->ia_size);
5723		- if (error)
5724		- goto err_out;
	5512	+ if (error)
	5513	+ goto err_out;
	5514	+ }
	5515	+ /*
	5516	+ * Blocks are going to be removed from the inode. Wait
	5517	+ * for dio in flight.
	5518	+ */
	5519	+ inode_dio_wait(inode);
5725	5520	}
	5521	+
	5522	+ down_write(&EXT4_I(inode)->i_mmap_sem);
	5523	+
	5524	+ rc = ext4_break_layouts(inode);
	5525	+ if (rc) {
	5526	+ up_write(&EXT4_I(inode)->i_mmap_sem);
	5527	+ goto err_out;
	5528	+ }
	5529	+
5726	5530	if (attr->ia_size != inode->i_size) {
5727	5531	handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
5728	5532	if (IS_ERR(handle)) {
5729	5533	error = PTR_ERR(handle);
5730		- goto err_out;
	5534	+ goto out_mmap_sem;
5731	5535	}
5732	5536	if (ext4_handle_valid(handle) && shrink) {
5733	5537	error = ext4_orphan_add(handle, inode);
..	..	@@ -5741,7 +5545,22 @@
5741	5545	inode->i_mtime = current_time(inode);
5742	5546	inode->i_ctime = inode->i_mtime;
5743	5547	}
	5548	+
	5549	+ if (shrink)
	5550	+ ext4_fc_track_range(handle, inode,
	5551	+ (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
	5552	+ inode->i_sb->s_blocksize_bits,
	5553	+ EXT_MAX_BLOCKS - 1);
	5554	+ else
	5555	+ ext4_fc_track_range(
	5556	+ handle, inode,
	5557	+ (oldsize > 0 ? oldsize - 1 : oldsize) >>
	5558	+ inode->i_sb->s_blocksize_bits,
	5559	+ (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
	5560	+ inode->i_sb->s_blocksize_bits);
	5561	+
5744	5562	down_write(&EXT4_I(inode)->i_data_sem);
	5563	+ old_disksize = EXT4_I(inode)->i_disksize;
5745	5564	EXT4_I(inode)->i_disksize = attr->ia_size;
5746	5565	rc = ext4_mark_inode_dirty(handle, inode);
5747	5566	if (!error)
..	..	@@ -5753,32 +5572,18 @@
5753	5572	*/
5754	5573	if (!error)
5755	5574	i_size_write(inode, attr->ia_size);
	5575	+ else
	5576	+ EXT4_I(inode)->i_disksize = old_disksize;
5756	5577	up_write(&EXT4_I(inode)->i_data_sem);
5757	5578	ext4_journal_stop(handle);
5758		- if (error) {
5759		- if (orphan && inode->i_nlink)
5760		- ext4_orphan_del(NULL, inode);
5761		- goto err_out;
	5579	+ if (error)
	5580	+ goto out_mmap_sem;
	5581	+ if (!shrink) {
	5582	+ pagecache_isize_extended(inode, oldsize,
	5583	+ inode->i_size);
	5584	+ } else if (ext4_should_journal_data(inode)) {
	5585	+ ext4_wait_for_tail_page_commit(inode);
5762	5586	}
5763		- }
5764		- if (!shrink) {
5765		- pagecache_isize_extended(inode, oldsize, inode->i_size);
5766		- } else {
5767		- /*
5768		- * Blocks are going to be removed from the inode. Wait
5769		- * for dio in flight.
5770		- */
5771		- inode_dio_wait(inode);
5772		- }
5773		- if (orphan && ext4_should_journal_data(inode))
5774		- ext4_wait_for_tail_page_commit(inode);
5775		- down_write(&EXT4_I(inode)->i_mmap_sem);
5776		-
5777		- rc = ext4_break_layouts(inode);
5778		- if (rc) {
5779		- up_write(&EXT4_I(inode)->i_mmap_sem);
5780		- error = rc;
5781		- goto err_out;
5782	5587	}
5783	5588
5784	5589	/*
..	..	@@ -5786,11 +5591,16 @@
5786	5591	* in data=journal mode to make pages freeable.
5787	5592	*/
5788	5593	truncate_pagecache(inode, inode->i_size);
5789		- if (shrink) {
	5594	+ /*
	5595	+ * Call ext4_truncate() even if i_size didn't change to
	5596	+ * truncate possible preallocated blocks.
	5597	+ */
	5598	+ if (attr->ia_size <= oldsize) {
5790	5599	rc = ext4_truncate(inode);
5791	5600	if (rc)
5792	5601	error = rc;
5793	5602	}
	5603	+out_mmap_sem:
5794	5604	up_write(&EXT4_I(inode)->i_mmap_sem);
5795	5605	}
5796	5606
..	..	@@ -5810,9 +5620,11 @@
5810	5620	rc = posix_acl_chmod(inode, inode->i_mode);
5811	5621
5812	5622	err_out:
5813		- ext4_std_error(inode->i_sb, error);
	5623	+ if (error)
	5624	+ ext4_std_error(inode->i_sb, error);
5814	5625	if (!error)
5815	5626	error = rc;
	5627	+ ext4_fc_stop_update(inode);
5816	5628	return error;
5817	5629	}
5818	5630
..	..	@@ -5824,7 +5636,8 @@
5824	5636	struct ext4_inode_info *ei = EXT4_I(inode);
5825	5637	unsigned int flags;
5826	5638
5827		- if (EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) {
	5639	+ if ((request_mask & STATX_BTIME) &&
	5640	+ EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) {
5828	5641	stat->result_mask \|= STATX_BTIME;
5829	5642	stat->btime.tv_sec = ei->i_crtime.tv_sec;
5830	5643	stat->btime.tv_nsec = ei->i_crtime.tv_nsec;
..	..	@@ -5993,7 +5806,14 @@
5993	5806	put_bh(iloc->bh);
5994	5807	return -EIO;
5995	5808	}
5996		- if (IS_I_VERSION(inode))
	5809	+ ext4_fc_track_inode(handle, inode);
	5810	+
	5811	+ /*
	5812	+ * ea_inodes are using i_version for storing reference count, don't
	5813	+ * mess with it
	5814	+ */
	5815	+ if (IS_I_VERSION(inode) &&
	5816	+ !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
5997	5817	inode_inc_iversion(inode);
5998	5818
5999	5819	/* the do_update_inode consumes one bh->b_count */
..	..	@@ -6107,9 +5927,8 @@
6107	5927	* If this is felt to be critical, then e2fsck should be run to
6108	5928	* force a large enough s_min_extra_isize.
6109	5929	*/
6110		- if (ext4_handle_valid(handle) &&
6111		- jbd2_journal_extend(handle,
6112		- EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) != 0)
	5930	+ if (ext4_journal_extend(handle,
	5931	+ EXT4_DATA_TRANS_BLOCKS(inode->i_sb), 0) != 0)
6113	5932	return -ENOSPC;
6114	5933
6115	5934	if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
..	..	@@ -6178,7 +5997,8 @@
6178	5997	* Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
6179	5998	* we start and wait on commits.
6180	5999	*/
6181		-int ext4_mark_inode_dirty(handle_t handle, struct inode inode)
	6000	+int __ext4_mark_inode_dirty(handle_t handle, struct inode inode,
	6001	+ const char *func, unsigned int line)
6182	6002	{
6183	6003	struct ext4_iloc iloc;
6184	6004	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
..	..	@@ -6188,13 +6008,18 @@
6188	6008	trace_ext4_mark_inode_dirty(inode, _RET_IP_);
6189	6009	err = ext4_reserve_inode_write(handle, inode, &iloc);
6190	6010	if (err)
6191		- return err;
	6011	+ goto out;
6192	6012
6193	6013	if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize)
6194	6014	ext4_try_to_expand_extra_isize(inode, sbi->s_want_extra_isize,
6195	6015	iloc, handle);
6196	6016
6197		- return ext4_mark_iloc_dirty(handle, inode, &iloc);
	6017	+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
	6018	+out:
	6019	+ if (unlikely(err))
	6020	+ ext4_error_inode_err(inode, func, line, 0, err,
	6021	+ "mark_inode_dirty error");
	6022	+ return err;
6198	6023	}
6199	6024
6200	6025	/*
..	..	@@ -6231,36 +6056,6 @@
6231	6056	out:
6232	6057	return;
6233	6058	}
6234		-
6235		-#if 0
6236		-/*
6237		- * Bind an inode's backing buffer_head into this transaction, to prevent
6238		- * it from being flushed to disk early. Unlike
6239		- * ext4_reserve_inode_write, this leaves behind no bh reference and
6240		- * returns no iloc structure, so the caller needs to repeat the iloc
6241		- * lookup to mark the inode dirty later.
6242		- */
6243		-static int ext4_pin_inode(handle_t handle, struct inode inode)
6244		-{
6245		- struct ext4_iloc iloc;
6246		-
6247		- int err = 0;
6248		- if (handle) {
6249		- err = ext4_get_inode_loc(inode, &iloc);
6250		- if (!err) {
6251		- BUFFER_TRACE(iloc.bh, "get_write_access");
6252		- err = jbd2_journal_get_write_access(handle, iloc.bh);
6253		- if (!err)
6254		- err = ext4_handle_dirty_metadata(handle,
6255		- NULL,
6256		- iloc.bh);
6257		- brelse(iloc.bh);
6258		- }
6259		- }
6260		- ext4_std_error(inode->i_sb, err);
6261		- return err;
6262		-}
6263		-#endif
6264	6059
6265	6060	int ext4_change_inode_journal_flag(struct inode *inode, int val)
6266	6061	{
..	..	@@ -6341,6 +6136,8 @@
6341	6136	if (IS_ERR(handle))
6342	6137	return PTR_ERR(handle);
6343	6138
	6139	+ ext4_fc_mark_ineligible(inode->i_sb,
	6140	+ EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
6344	6141	err = ext4_mark_inode_dirty(handle, inode);
6345	6142	ext4_handle_sync(handle);
6346	6143	ext4_journal_stop(handle);
..	..	@@ -6354,13 +6151,14 @@
6354	6151	return !buffer_mapped(bh);
6355	6152	}
6356	6153
6357		-int ext4_page_mkwrite(struct vm_fault *vmf)
	6154	+vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
6358	6155	{
6359	6156	struct vm_area_struct *vma = vmf->vma;
6360	6157	struct page *page = vmf->page;
6361	6158	loff_t size;
6362	6159	unsigned long len;
6363		- int ret;
	6160	+ int err;
	6161	+ vm_fault_t ret;
6364	6162	struct file *file = vma->vm_file;
6365	6163	struct inode *inode = file_inode(file);
6366	6164	struct address_space *mapping = inode->i_mapping;
..	..	@@ -6376,18 +6174,26 @@
6376	6174
6377	6175	down_read(&EXT4_I(inode)->i_mmap_sem);
6378	6176
6379		- ret = ext4_convert_inline_data(inode);
6380		- if (ret)
	6177	+ err = ext4_convert_inline_data(inode);
	6178	+ if (err)
6381	6179	goto out_ret;
	6180	+
	6181	+ /*
	6182	+ * On data journalling we skip straight to the transaction handle:
	6183	+ * there's no delalloc; page truncated will be checked later; the
	6184	+ * early return w/ all buffers mapped (calculates size/len) can't
	6185	+ * be used; and there's no dioread_nolock, so only ext4_get_block.
	6186	+ */
	6187	+ if (ext4_should_journal_data(inode))
	6188	+ goto retry_alloc;
6382	6189
6383	6190	/* Delalloc case is easy... */
6384	6191	if (test_opt(inode->i_sb, DELALLOC) &&
6385		- !ext4_should_journal_data(inode) &&
6386	6192	!ext4_nonda_switch(inode->i_sb)) {
6387	6193	do {
6388		- ret = block_page_mkwrite(vma, vmf,
	6194	+ err = block_page_mkwrite(vma, vmf,
6389	6195	ext4_da_get_block_prep);
6390		- } while (ret == -ENOSPC &&
	6196	+ } while (err == -ENOSPC &&
6391	6197	ext4_should_retry_alloc(inode->i_sb, &retries));
6392	6198	goto out_ret;
6393	6199	}
..	..	@@ -6408,6 +6214,9 @@
6408	6214	/*
6409	6215	* Return if we have all the buffers mapped. This avoids the need to do
6410	6216	* journal_start/journal_stop which can block and take a long time
	6217	+ *
	6218	+ * This cannot be done for data journalling, as we have to add the
	6219	+ * inode to the transaction's list to writeprotect pages on commit.
6411	6220	*/
6412	6221	if (page_has_buffers(page)) {
6413	6222	if (!ext4_walk_page_buffers(NULL, page_buffers(page),
..	..	@@ -6432,36 +6241,67 @@
6432	6241	ret = VM_FAULT_SIGBUS;
6433	6242	goto out;
6434	6243	}
6435		- ret = block_page_mkwrite(vma, vmf, get_block);
6436		- if (!ret && ext4_should_journal_data(inode)) {
6437		- if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
6438		- PAGE_SIZE, NULL, do_journal_get_write_access)) {
6439		- unlock_page(page);
6440		- ret = VM_FAULT_SIGBUS;
6441		- ext4_journal_stop(handle);
6442		- goto out;
	6244	+ /*
	6245	+ * Data journalling can't use block_page_mkwrite() because it
	6246	+ * will set_buffer_dirty() before do_journal_get_write_access()
	6247	+ * thus might hit warning messages for dirty metadata buffers.
	6248	+ */
	6249	+ if (!ext4_should_journal_data(inode)) {
	6250	+ err = block_page_mkwrite(vma, vmf, get_block);
	6251	+ } else {
	6252	+ lock_page(page);
	6253	+ size = i_size_read(inode);
	6254	+ /* Page got truncated from under us? */
	6255	+ if (page->mapping != mapping \|\| page_offset(page) > size) {
	6256	+ ret = VM_FAULT_NOPAGE;
	6257	+ goto out_error;
6443	6258	}
6444		- ext4_set_inode_state(inode, EXT4_STATE_JDATA);
	6259	+
	6260	+ if (page->index == size >> PAGE_SHIFT)
	6261	+ len = size & ~PAGE_MASK;
	6262	+ else
	6263	+ len = PAGE_SIZE;
	6264	+
	6265	+ err = __block_write_begin(page, 0, len, ext4_get_block);
	6266	+ if (!err) {
	6267	+ ret = VM_FAULT_SIGBUS;
	6268	+ if (ext4_walk_page_buffers(handle, page_buffers(page),
	6269	+ 0, len, NULL, do_journal_get_write_access))
	6270	+ goto out_error;
	6271	+ if (ext4_walk_page_buffers(handle, page_buffers(page),
	6272	+ 0, len, NULL, write_end_fn))
	6273	+ goto out_error;
	6274	+ if (ext4_jbd2_inode_add_write(handle, inode,
	6275	+ page_offset(page), len))
	6276	+ goto out_error;
	6277	+ ext4_set_inode_state(inode, EXT4_STATE_JDATA);
	6278	+ } else {
	6279	+ unlock_page(page);
	6280	+ }
6445	6281	}
6446	6282	ext4_journal_stop(handle);
6447		- if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
	6283	+ if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
6448	6284	goto retry_alloc;
6449	6285	out_ret:
6450		- ret = block_page_mkwrite_return(ret);
	6286	+ ret = block_page_mkwrite_return(err);
6451	6287	out:
6452	6288	up_read(&EXT4_I(inode)->i_mmap_sem);
6453	6289	sb_end_pagefault(inode->i_sb);
6454	6290	return ret;
	6291	+out_error:
	6292	+ unlock_page(page);
	6293	+ ext4_journal_stop(handle);
	6294	+ goto out;
6455	6295	}
6456	6296
6457		-int ext4_filemap_fault(struct vm_fault *vmf)
	6297	+vm_fault_t ext4_filemap_fault(struct vm_fault *vmf)
6458	6298	{
6459	6299	struct inode *inode = file_inode(vmf->vma->vm_file);
6460		- int err;
	6300	+ vm_fault_t ret;
6461	6301
6462	6302	down_read(&EXT4_I(inode)->i_mmap_sem);
6463		- err = filemap_fault(vmf);
	6303	+ ret = filemap_fault(vmf);
6464	6304	up_read(&EXT4_I(inode)->i_mmap_sem);
6465	6305
6466		- return err;
	6306	+ return ret;
6467	6307	}