~hc/RK356X_SDK_RELEASE.git

..	..	@@ -29,10 +29,60 @@
29	29	#include <linux/pagevec.h>
30	30	#include <linux/uio.h>
31	31	#include <linux/mman.h>
	32	+#include <linux/backing-dev.h>
32	33	#include "ext4.h"
33	34	#include "ext4_jbd2.h"
34	35	#include "xattr.h"
35	36	#include "acl.h"
	37	+#include "truncate.h"
	38	+
	39	+static bool ext4_dio_supported(struct kiocb iocb, struct iov_iter iter)
	40	+{
	41	+ struct inode *inode = file_inode(iocb->ki_filp);
	42	+
	43	+ if (!fscrypt_dio_supported(iocb, iter))
	44	+ return false;
	45	+ if (fsverity_active(inode))
	46	+ return false;
	47	+ if (ext4_should_journal_data(inode))
	48	+ return false;
	49	+ if (ext4_has_inline_data(inode))
	50	+ return false;
	51	+ return true;
	52	+}
	53	+
	54	+static ssize_t ext4_dio_read_iter(struct kiocb iocb, struct iov_iter to)
	55	+{
	56	+ ssize_t ret;
	57	+ struct inode *inode = file_inode(iocb->ki_filp);
	58	+
	59	+ if (iocb->ki_flags & IOCB_NOWAIT) {
	60	+ if (!inode_trylock_shared(inode))
	61	+ return -EAGAIN;
	62	+ } else {
	63	+ inode_lock_shared(inode);
	64	+ }
	65	+
	66	+ if (!ext4_dio_supported(iocb, to)) {
	67	+ inode_unlock_shared(inode);
	68	+ /*
	69	+ * Fallback to buffered I/O if the operation being performed on
	70	+ * the inode is not supported by direct I/O. The IOCB_DIRECT
	71	+ * flag needs to be cleared here in order to ensure that the
	72	+ * direct I/O path within generic_file_read_iter() is not
	73	+ * taken.
	74	+ */
	75	+ iocb->ki_flags &= ~IOCB_DIRECT;
	76	+ return generic_file_read_iter(iocb, to);
	77	+ }
	78	+
	79	+ ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL,
	80	+ is_sync_kiocb(iocb));
	81	+ inode_unlock_shared(inode);
	82	+
	83	+ file_accessed(iocb->ki_filp);
	84	+ return ret;
	85	+}
36	86
37	87	#ifdef CONFIG_FS_DAX
38	88	static ssize_t ext4_dax_read_iter(struct kiocb iocb, struct iov_iter to)
..	..	@@ -65,16 +115,21 @@
65	115
66	116	static ssize_t ext4_file_read_iter(struct kiocb iocb, struct iov_iter to)
67	117	{
68		- if (unlikely(ext4_forced_shutdown(EXT4_SB(file_inode(iocb->ki_filp)->i_sb))))
	118	+ struct inode *inode = file_inode(iocb->ki_filp);
	119	+
	120	+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
69	121	return -EIO;
70	122
71	123	if (!iov_iter_count(to))
72	124	return 0; /* skip atime */
73	125
74	126	#ifdef CONFIG_FS_DAX
75		- if (IS_DAX(file_inode(iocb->ki_filp)))
	127	+ if (IS_DAX(inode))
76	128	return ext4_dax_read_iter(iocb, to);
77	129	#endif
	130	+ if (iocb->ki_flags & IOCB_DIRECT)
	131	+ return ext4_dio_read_iter(iocb, to);
	132	+
78	133	return generic_file_read_iter(iocb, to);
79	134	}
80	135
..	..	@@ -92,23 +147,15 @@
92	147	/* if we are the last writer on the inode, drop the block reservation */
93	148	if ((filp->f_mode & FMODE_WRITE) &&
94	149	(atomic_read(&inode->i_writecount) == 1) &&
95		- !EXT4_I(inode)->i_reserved_data_blocks)
96		- {
	150	+ !EXT4_I(inode)->i_reserved_data_blocks) {
97	151	down_write(&EXT4_I(inode)->i_data_sem);
98		- ext4_discard_preallocations(inode);
	152	+ ext4_discard_preallocations(inode, 0);
99	153	up_write(&EXT4_I(inode)->i_data_sem);
100	154	}
101	155	if (is_dx(inode) && filp->private_data)
102	156	ext4_htree_free_dir_info(filp->private_data);
103	157
104	158	return 0;
105		-}
106		-
107		-static void ext4_unwritten_wait(struct inode *inode)
108		-{
109		- wait_queue_head_t *wq = ext4_ioend_wq(inode);
110		-
111		- wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
112	159	}
113	160
114	161	/*
..	..	@@ -120,19 +167,25 @@
120	167	* threads are at work on the same unwritten block, they must be synchronized
121	168	* or one thread will zero the other's data, causing corruption.
122	169	*/
123		-static int
124		-ext4_unaligned_aio(struct inode inode, struct iov_iter from, loff_t pos)
	170	+static bool
	171	+ext4_unaligned_io(struct inode inode, struct iov_iter from, loff_t pos)
125	172	{
126	173	struct super_block *sb = inode->i_sb;
127		- int blockmask = sb->s_blocksize - 1;
128		-
129		- if (pos >= ALIGN(i_size_read(inode), sb->s_blocksize))
130		- return 0;
	174	+ unsigned long blockmask = sb->s_blocksize - 1;
131	175
132	176	if ((pos \| iov_iter_alignment(from)) & blockmask)
133		- return 1;
	177	+ return true;
134	178
135		- return 0;
	179	+ return false;
	180	+}
	181	+
	182	+static bool
	183	+ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
	184	+{
	185	+ if (offset + len > i_size_read(inode) \|\|
	186	+ offset + len > EXT4_I(inode)->i_disksize)
	187	+ return true;
	188	+ return false;
136	189	}
137	190
138	191	/* Is IO overwriting allocated and initialized blocks? */
..	..	@@ -158,17 +211,18 @@
158	211	return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
159	212	}
160	213
161		-static ssize_t ext4_write_checks(struct kiocb iocb, struct iov_iter from)
	214	+static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
	215	+ struct iov_iter *from)
162	216	{
163	217	struct inode *inode = file_inode(iocb->ki_filp);
164	218	ssize_t ret;
165	219
	220	+ if (unlikely(IS_IMMUTABLE(inode)))
	221	+ return -EPERM;
	222	+
166	223	ret = generic_write_checks(iocb, from);
167	224	if (ret <= 0)
168	225	return ret;
169		-
170		- if (unlikely(IS_IMMUTABLE(inode)))
171		- return -EPERM;
172	226
173	227	/*
174	228	* If we have encountered a bitmap-format file, the size limit
..	..	@@ -181,15 +235,398 @@
181	235	return -EFBIG;
182	236	iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
183	237	}
	238	+
184	239	return iov_iter_count(from);
	240	+}
	241	+
	242	+static ssize_t ext4_write_checks(struct kiocb iocb, struct iov_iter from)
	243	+{
	244	+ ssize_t ret, count;
	245	+
	246	+ count = ext4_generic_write_checks(iocb, from);
	247	+ if (count <= 0)
	248	+ return count;
	249	+
	250	+ ret = file_modified(iocb->ki_filp);
	251	+ if (ret)
	252	+ return ret;
	253	+ return count;
	254	+}
	255	+
	256	+static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
	257	+ struct iov_iter *from)
	258	+{
	259	+ ssize_t ret;
	260	+ struct inode *inode = file_inode(iocb->ki_filp);
	261	+
	262	+ if (iocb->ki_flags & IOCB_NOWAIT)
	263	+ return -EOPNOTSUPP;
	264	+
	265	+ ext4_fc_start_update(inode);
	266	+ inode_lock(inode);
	267	+ ret = ext4_write_checks(iocb, from);
	268	+ if (ret <= 0)
	269	+ goto out;
	270	+
	271	+ current->backing_dev_info = inode_to_bdi(inode);
	272	+ ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
	273	+ current->backing_dev_info = NULL;
	274	+
	275	+out:
	276	+ inode_unlock(inode);
	277	+ ext4_fc_stop_update(inode);
	278	+ if (likely(ret > 0)) {
	279	+ iocb->ki_pos += ret;
	280	+ ret = generic_write_sync(iocb, ret);
	281	+ }
	282	+
	283	+ return ret;
	284	+}
	285	+
	286	+static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
	287	+ ssize_t written, size_t count)
	288	+{
	289	+ handle_t *handle;
	290	+ bool truncate = false;
	291	+ u8 blkbits = inode->i_blkbits;
	292	+ ext4_lblk_t written_blk, end_blk;
	293	+ int ret;
	294	+
	295	+ /*
	296	+ * Note that EXT4_I(inode)->i_disksize can get extended up to
	297	+ * inode->i_size while the I/O was running due to writeback of delalloc
	298	+ * blocks. But, the code in ext4_iomap_alloc() is careful to use
	299	+ * zeroed/unwritten extents if this is possible; thus we won't leave
	300	+ * uninitialized blocks in a file even if we didn't succeed in writing
	301	+ * as much as we intended.
	302	+ */
	303	+ WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
	304	+ if (offset + count <= EXT4_I(inode)->i_disksize) {
	305	+ /*
	306	+ * We need to ensure that the inode is removed from the orphan
	307	+ * list if it has been added prematurely, due to writeback of
	308	+ * delalloc blocks.
	309	+ */
	310	+ if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
	311	+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
	312	+
	313	+ if (IS_ERR(handle)) {
	314	+ ext4_orphan_del(NULL, inode);
	315	+ return PTR_ERR(handle);
	316	+ }
	317	+
	318	+ ext4_orphan_del(handle, inode);
	319	+ ext4_journal_stop(handle);
	320	+ }
	321	+
	322	+ return written;
	323	+ }
	324	+
	325	+ if (written < 0)
	326	+ goto truncate;
	327	+
	328	+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
	329	+ if (IS_ERR(handle)) {
	330	+ written = PTR_ERR(handle);
	331	+ goto truncate;
	332	+ }
	333	+
	334	+ if (ext4_update_inode_size(inode, offset + written)) {
	335	+ ret = ext4_mark_inode_dirty(handle, inode);
	336	+ if (unlikely(ret)) {
	337	+ written = ret;
	338	+ ext4_journal_stop(handle);
	339	+ goto truncate;
	340	+ }
	341	+ }
	342	+
	343	+ /*
	344	+ * We may need to truncate allocated but not written blocks beyond EOF.
	345	+ */
	346	+ written_blk = ALIGN(offset + written, 1 << blkbits);
	347	+ end_blk = ALIGN(offset + count, 1 << blkbits);
	348	+ if (written_blk < end_blk && ext4_can_truncate(inode))
	349	+ truncate = true;
	350	+
	351	+ /*
	352	+ * Remove the inode from the orphan list if it has been extended and
	353	+ * everything went OK.
	354	+ */
	355	+ if (!truncate && inode->i_nlink)
	356	+ ext4_orphan_del(handle, inode);
	357	+ ext4_journal_stop(handle);
	358	+
	359	+ if (truncate) {
	360	+truncate:
	361	+ ext4_truncate_failed_write(inode);
	362	+ /*
	363	+ * If the truncate operation failed early, then the inode may
	364	+ * still be on the orphan list. In that case, we need to try
	365	+ * remove the inode from the in-memory linked list.
	366	+ */
	367	+ if (inode->i_nlink)
	368	+ ext4_orphan_del(NULL, inode);
	369	+ }
	370	+
	371	+ return written;
	372	+}
	373	+
	374	+static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
	375	+ int error, unsigned int flags)
	376	+{
	377	+ loff_t pos = iocb->ki_pos;
	378	+ struct inode *inode = file_inode(iocb->ki_filp);
	379	+
	380	+ if (error)
	381	+ return error;
	382	+
	383	+ if (size && flags & IOMAP_DIO_UNWRITTEN) {
	384	+ error = ext4_convert_unwritten_extents(NULL, inode, pos, size);
	385	+ if (error < 0)
	386	+ return error;
	387	+ }
	388	+ /*
	389	+ * If we are extending the file, we have to update i_size here before
	390	+ * page cache gets invalidated in iomap_dio_rw(). Otherwise racing
	391	+ * buffered reads could zero out too much from page cache pages. Update
	392	+ * of on-disk size will happen later in ext4_dio_write_iter() where
	393	+ * we have enough information to also perform orphan list handling etc.
	394	+ * Note that we perform all extending writes synchronously under
	395	+ * i_rwsem held exclusively so i_size update is safe here in that case.
	396	+ * If the write was not extending, we cannot see pos > i_size here
	397	+ * because operations reducing i_size like truncate wait for all
	398	+ * outstanding DIO before updating i_size.
	399	+ */
	400	+ pos += size;
	401	+ if (pos > i_size_read(inode))
	402	+ i_size_write(inode, pos);
	403	+
	404	+ return 0;
	405	+}
	406	+
	407	+static const struct iomap_dio_ops ext4_dio_write_ops = {
	408	+ .end_io = ext4_dio_write_end_io,
	409	+};
	410	+
	411	+/*
	412	+ * The intention here is to start with shared lock acquired then see if any
	413	+ * condition requires an exclusive inode lock. If yes, then we restart the
	414	+ * whole operation by releasing the shared lock and acquiring exclusive lock.
	415	+ *
	416	+ * - For unaligned_io we never take shared lock as it may cause data corruption
	417	+ * when two unaligned IO tries to modify the same block e.g. while zeroing.
	418	+ *
	419	+ * - For extending writes case we don't take the shared lock, since it requires
	420	+ * updating inode i_disksize and/or orphan handling with exclusive lock.
	421	+ *
	422	+ * - shared locking will only be true mostly with overwrites. Otherwise we will
	423	+ * switch to exclusive i_rwsem lock.
	424	+ */
	425	+static ssize_t ext4_dio_write_checks(struct kiocb iocb, struct iov_iter from,
	426	+ bool ilock_shared, bool extend)
	427	+{
	428	+ struct file *file = iocb->ki_filp;
	429	+ struct inode *inode = file_inode(file);
	430	+ loff_t offset;
	431	+ size_t count;
	432	+ ssize_t ret;
	433	+
	434	+restart:
	435	+ ret = ext4_generic_write_checks(iocb, from);
	436	+ if (ret <= 0)
	437	+ goto out;
	438	+
	439	+ offset = iocb->ki_pos;
	440	+ count = ret;
	441	+ if (ext4_extending_io(inode, offset, count))
	442	+ *extend = true;
	443	+ /*
	444	+ * Determine whether the IO operation will overwrite allocated
	445	+ * and initialized blocks.
	446	+ * We need exclusive i_rwsem for changing security info
	447	+ * in file_modified().
	448	+ */
	449	+ if (ilock_shared && (!IS_NOSEC(inode) \|\| extend \|\|
	450	+ !ext4_overwrite_io(inode, offset, count))) {
	451	+ if (iocb->ki_flags & IOCB_NOWAIT) {
	452	+ ret = -EAGAIN;
	453	+ goto out;
	454	+ }
	455	+ inode_unlock_shared(inode);
	456	+ *ilock_shared = false;
	457	+ inode_lock(inode);
	458	+ goto restart;
	459	+ }
	460	+
	461	+ ret = file_modified(file);
	462	+ if (ret < 0)
	463	+ goto out;
	464	+
	465	+ return count;
	466	+out:
	467	+ if (*ilock_shared)
	468	+ inode_unlock_shared(inode);
	469	+ else
	470	+ inode_unlock(inode);
	471	+ return ret;
	472	+}
	473	+
	474	+static ssize_t ext4_dio_write_iter(struct kiocb iocb, struct iov_iter from)
	475	+{
	476	+ ssize_t ret;
	477	+ handle_t *handle;
	478	+ struct inode *inode = file_inode(iocb->ki_filp);
	479	+ loff_t offset = iocb->ki_pos;
	480	+ size_t count = iov_iter_count(from);
	481	+ const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
	482	+ bool extend = false, unaligned_io = false;
	483	+ bool ilock_shared = true;
	484	+
	485	+ /*
	486	+ * We initially start with shared inode lock unless it is
	487	+ * unaligned IO which needs exclusive lock anyways.
	488	+ */
	489	+ if (ext4_unaligned_io(inode, from, offset)) {
	490	+ unaligned_io = true;
	491	+ ilock_shared = false;
	492	+ }
	493	+ /*
	494	+ * Quick check here without any i_rwsem lock to see if it is extending
	495	+ * IO. A more reliable check is done in ext4_dio_write_checks() with
	496	+ * proper locking in place.
	497	+ */
	498	+ if (offset + count > i_size_read(inode))
	499	+ ilock_shared = false;
	500	+
	501	+ if (iocb->ki_flags & IOCB_NOWAIT) {
	502	+ if (ilock_shared) {
	503	+ if (!inode_trylock_shared(inode))
	504	+ return -EAGAIN;
	505	+ } else {
	506	+ if (!inode_trylock(inode))
	507	+ return -EAGAIN;
	508	+ }
	509	+ } else {
	510	+ if (ilock_shared)
	511	+ inode_lock_shared(inode);
	512	+ else
	513	+ inode_lock(inode);
	514	+ }
	515	+
	516	+ /* Fallback to buffered I/O if the inode does not support direct I/O. */
	517	+ if (!ext4_dio_supported(iocb, from)) {
	518	+ if (ilock_shared)
	519	+ inode_unlock_shared(inode);
	520	+ else
	521	+ inode_unlock(inode);
	522	+ return ext4_buffered_write_iter(iocb, from);
	523	+ }
	524	+
	525	+ ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend);
	526	+ if (ret <= 0)
	527	+ return ret;
	528	+
	529	+ /* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */
	530	+ if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io \|\| extend)) {
	531	+ ret = -EAGAIN;
	532	+ goto out;
	533	+ }
	534	+ /*
	535	+ * Make sure inline data cannot be created anymore since we are going
	536	+ * to allocate blocks for DIO. We know the inode does not have any
	537	+ * inline data now because ext4_dio_supported() checked for that.
	538	+ */
	539	+ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
	540	+
	541	+ offset = iocb->ki_pos;
	542	+ count = ret;
	543	+
	544	+ /*
	545	+ * Unaligned direct IO must be serialized among each other as zeroing
	546	+ * of partial blocks of two competing unaligned IOs can result in data
	547	+ * corruption.
	548	+ *
	549	+ * So we make sure we don't allow any unaligned IO in flight.
	550	+ * For IOs where we need not wait (like unaligned non-AIO DIO),
	551	+ * below inode_dio_wait() may anyway become a no-op, since we start
	552	+ * with exclusive lock.
	553	+ */
	554	+ if (unaligned_io)
	555	+ inode_dio_wait(inode);
	556	+
	557	+ if (extend) {
	558	+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
	559	+ if (IS_ERR(handle)) {
	560	+ ret = PTR_ERR(handle);
	561	+ goto out;
	562	+ }
	563	+
	564	+ ext4_fc_start_update(inode);
	565	+ ret = ext4_orphan_add(handle, inode);
	566	+ ext4_fc_stop_update(inode);
	567	+ if (ret) {
	568	+ ext4_journal_stop(handle);
	569	+ goto out;
	570	+ }
	571	+
	572	+ ext4_journal_stop(handle);
	573	+ }
	574	+
	575	+ if (ilock_shared)
	576	+ iomap_ops = &ext4_iomap_overwrite_ops;
	577	+ ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
	578	+ is_sync_kiocb(iocb) \|\| unaligned_io \|\| extend);
	579	+ if (ret == -ENOTBLK)
	580	+ ret = 0;
	581	+
	582	+ if (extend)
	583	+ ret = ext4_handle_inode_extension(inode, offset, ret, count);
	584	+
	585	+out:
	586	+ if (ilock_shared)
	587	+ inode_unlock_shared(inode);
	588	+ else
	589	+ inode_unlock(inode);
	590	+
	591	+ if (ret >= 0 && iov_iter_count(from)) {
	592	+ ssize_t err;
	593	+ loff_t endbyte;
	594	+
	595	+ offset = iocb->ki_pos;
	596	+ err = ext4_buffered_write_iter(iocb, from);
	597	+ if (err < 0)
	598	+ return err;
	599	+
	600	+ /*
	601	+ * We need to ensure that the pages within the page cache for
	602	+ * the range covered by this I/O are written to disk and
	603	+ * invalidated. This is in attempt to preserve the expected
	604	+ * direct I/O semantics in the case we fallback to buffered I/O
	605	+ * to complete off the I/O request.
	606	+ */
	607	+ ret += err;
	608	+ endbyte = offset + err - 1;
	609	+ err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
	610	+ offset, endbyte);
	611	+ if (!err)
	612	+ invalidate_mapping_pages(iocb->ki_filp->f_mapping,
	613	+ offset >> PAGE_SHIFT,
	614	+ endbyte >> PAGE_SHIFT);
	615	+ }
	616	+
	617	+ return ret;
185	618	}
186	619
187	620	#ifdef CONFIG_FS_DAX
188	621	static ssize_t
189	622	ext4_dax_write_iter(struct kiocb iocb, struct iov_iter from)
190	623	{
191		- struct inode *inode = file_inode(iocb->ki_filp);
192	624	ssize_t ret;
	625	+ size_t count;
	626	+ loff_t offset;
	627	+ handle_t *handle;
	628	+ bool extend = false;
	629	+ struct inode *inode = file_inode(iocb->ki_filp);
193	630
194	631	if (iocb->ki_flags & IOCB_NOWAIT) {
195	632	if (!inode_trylock(inode))
..	..	@@ -197,17 +634,35 @@
197	634	} else {
198	635	inode_lock(inode);
199	636	}
	637	+
200	638	ret = ext4_write_checks(iocb, from);
201	639	if (ret <= 0)
202	640	goto out;
203		- ret = file_remove_privs(iocb->ki_filp);
204		- if (ret)
205		- goto out;
206		- ret = file_update_time(iocb->ki_filp);
207		- if (ret)
208		- goto out;
	641	+
	642	+ offset = iocb->ki_pos;
	643	+ count = iov_iter_count(from);
	644	+
	645	+ if (offset + count > EXT4_I(inode)->i_disksize) {
	646	+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
	647	+ if (IS_ERR(handle)) {
	648	+ ret = PTR_ERR(handle);
	649	+ goto out;
	650	+ }
	651	+
	652	+ ret = ext4_orphan_add(handle, inode);
	653	+ if (ret) {
	654	+ ext4_journal_stop(handle);
	655	+ goto out;
	656	+ }
	657	+
	658	+ extend = true;
	659	+ ext4_journal_stop(handle);
	660	+ }
209	661
210	662	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
	663	+
	664	+ if (extend)
	665	+ ret = ext4_handle_inode_extension(inode, offset, ret, count);
211	666	out:
212	667	inode_unlock(inode);
213	668	if (ret > 0)
..	..	@@ -220,10 +675,6 @@
220	675	ext4_file_write_iter(struct kiocb iocb, struct iov_iter from)
221	676	{
222	677	struct inode *inode = file_inode(iocb->ki_filp);
223		- int o_direct = iocb->ki_flags & IOCB_DIRECT;
224		- int unaligned_aio = 0;
225		- int overwrite = 0;
226		- ssize_t ret;
227	678
228	679	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
229	680	return -EIO;
..	..	@@ -232,61 +683,10 @@
232	683	if (IS_DAX(inode))
233	684	return ext4_dax_write_iter(iocb, from);
234	685	#endif
235		- if (!o_direct && (iocb->ki_flags & IOCB_NOWAIT))
236		- return -EOPNOTSUPP;
237		-
238		- if (!inode_trylock(inode)) {
239		- if (iocb->ki_flags & IOCB_NOWAIT)
240		- return -EAGAIN;
241		- inode_lock(inode);
242		- }
243		-
244		- ret = ext4_write_checks(iocb, from);
245		- if (ret <= 0)
246		- goto out;
247		-
248		- /*
249		- * Unaligned direct AIO must be serialized among each other as zeroing
250		- * of partial blocks of two competing unaligned AIOs can result in data
251		- * corruption.
252		- */
253		- if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
254		- !is_sync_kiocb(iocb) &&
255		- ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
256		- unaligned_aio = 1;
257		- ext4_unwritten_wait(inode);
258		- }
259		-
260		- iocb->private = &overwrite;
261		- /* Check whether we do a DIO overwrite or not */
262		- if (o_direct && !unaligned_aio) {
263		- if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
264		- if (ext4_should_dioread_nolock(inode))
265		- overwrite = 1;
266		- } else if (iocb->ki_flags & IOCB_NOWAIT) {
267		- ret = -EAGAIN;
268		- goto out;
269		- }
270		- }
271		-
272		- ret = __generic_file_write_iter(iocb, from);
273		- /*
274		- * Unaligned direct AIO must be the only IO in flight. Otherwise
275		- * overlapping aligned IO after unaligned might result in data
276		- * corruption.
277		- */
278		- if (ret == -EIOCBQUEUED && unaligned_aio)
279		- ext4_unwritten_wait(inode);
280		- inode_unlock(inode);
281		-
282		- if (ret > 0)
283		- ret = generic_write_sync(iocb, ret);
284		-
285		- return ret;
286		-
287		-out:
288		- inode_unlock(inode);
289		- return ret;
	686	+ if (iocb->ki_flags & IOCB_DIRECT)
	687	+ return ext4_dio_write_iter(iocb, from);
	688	+ else
	689	+ return ext4_buffered_write_iter(iocb, from);
290	690	}
291	691
292	692	#ifdef CONFIG_FS_DAX
..	..	@@ -368,20 +768,25 @@
368	768	.fault = ext4_filemap_fault,
369	769	.map_pages = filemap_map_pages,
370	770	.page_mkwrite = ext4_page_mkwrite,
	771	+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
	772	+ .allow_speculation = filemap_allow_speculation,
	773	+#endif
371	774	};
372	775
373	776	static int ext4_file_mmap(struct file file, struct vm_area_struct vma)
374	777	{
375	778	struct inode *inode = file->f_mapping->host;
	779	+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	780	+ struct dax_device *dax_dev = sbi->s_daxdev;
376	781
377		- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
	782	+ if (unlikely(ext4_forced_shutdown(sbi)))
378	783	return -EIO;
379	784
380	785	/*
381		- * We don't support synchronous mappings for non-DAX files. At least
382		- * until someone comes with a sensible use case.
	786	+ * We don't support synchronous mappings for non-DAX files and
	787	+ * for DAX files if underneath dax_device is not synchronous.
383	788	*/
384		- if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
	789	+ if (!daxdev_mapping_supported(vma, dax_dev))
385	790	return -EOPNOTSUPP;
386	791
387	792	file_accessed(file);
..	..	@@ -403,13 +808,13 @@
403	808	handle_t *handle;
404	809	int err;
405	810
406		- if (likely(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED))
	811	+ if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
407	812	return 0;
408	813
409	814	if (sb_rdonly(sb) \|\| !sb_start_intwrite_trylock(sb))
410	815	return 0;
411	816
412		- sbi->s_mount_flags \|= EXT4_MF_MNTDIR_SAMPLED;
	817	+ ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
413	818	/*
414	819	* Sample where the filesystem has been mounted and
415	820	* store it in the superblock for sysadmin convenience
..	..	@@ -432,7 +837,7 @@
432	837	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
433	838	if (err)
434	839	goto out_journal;
435		- strlcpy(sbi->s_es->s_last_mounted, cp,
	840	+ strncpy(sbi->s_es->s_last_mounted, cp,
436	841	sizeof(sbi->s_es->s_last_mounted));
437	842	ext4_handle_dirty_super(handle, sb);
438	843	out_journal:
..	..	@@ -442,7 +847,7 @@
442	847	return err;
443	848	}
444	849
445		-static int ext4_file_open(struct inode * inode, struct file * filp)
	850	+static int ext4_file_open(struct inode inode, struct file filp)
446	851	{
447	852	int ret;
448	853
..	..	@@ -471,7 +876,7 @@
471	876	return ret;
472	877	}
473	878
474		- filp->f_mode \|= FMODE_NOWAIT;
	879	+ filp->f_mode \|= FMODE_NOWAIT \| FMODE_BUF_RASYNC;
475	880	return dquot_file_open(inode, filp);
476	881	}
477	882
..	..	@@ -496,12 +901,14 @@
496	901	maxbytes, i_size_read(inode));
497	902	case SEEK_HOLE:
498	903	inode_lock_shared(inode);
499		- offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
	904	+ offset = iomap_seek_hole(inode, offset,
	905	+ &ext4_iomap_report_ops);
500	906	inode_unlock_shared(inode);
501	907	break;
502	908	case SEEK_DATA:
503	909	inode_lock_shared(inode);
504		- offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
	910	+ offset = iomap_seek_data(inode, offset,
	911	+ &ext4_iomap_report_ops);
505	912	inode_unlock_shared(inode);
506	913	break;
507	914	}
..	..	@@ -515,6 +922,7 @@
515	922	.llseek = ext4_llseek,
516	923	.read_iter = ext4_file_read_iter,
517	924	.write_iter = ext4_file_write_iter,
	925	+ .iopoll = iomap_dio_iopoll,
518	926	.unlocked_ioctl = ext4_ioctl,
519	927	#ifdef CONFIG_COMPAT
520	928	.compat_ioctl = ext4_compat_ioctl,