~hc/RK356X_SDK_RELEASE.git

..	..	@@ -29,10 +29,60 @@
29	29	#include <linux/pagevec.h>
30	30	#include <linux/uio.h>
31	31	#include <linux/mman.h>
	32	+#include <linux/backing-dev.h>
32	33	#include "ext4.h"
33	34	#include "ext4_jbd2.h"
34	35	#include "xattr.h"
35	36	#include "acl.h"
	37	+#include "truncate.h"
	38	+
	39	+static bool ext4_dio_supported(struct kiocb iocb, struct iov_iter iter)
	40	+{
	41	+ struct inode *inode = file_inode(iocb->ki_filp);
	42	+
	43	+ if (!fscrypt_dio_supported(iocb, iter))
	44	+ return false;
	45	+ if (fsverity_active(inode))
	46	+ return false;
	47	+ if (ext4_should_journal_data(inode))
	48	+ return false;
	49	+ if (ext4_has_inline_data(inode))
	50	+ return false;
	51	+ return true;
	52	+}
	53	+
	54	+static ssize_t ext4_dio_read_iter(struct kiocb iocb, struct iov_iter to)
	55	+{
	56	+ ssize_t ret;
	57	+ struct inode *inode = file_inode(iocb->ki_filp);
	58	+
	59	+ if (iocb->ki_flags & IOCB_NOWAIT) {
	60	+ if (!inode_trylock_shared(inode))
	61	+ return -EAGAIN;
	62	+ } else {
	63	+ inode_lock_shared(inode);
	64	+ }
	65	+
	66	+ if (!ext4_dio_supported(iocb, to)) {
	67	+ inode_unlock_shared(inode);
	68	+ /*
	69	+ * Fallback to buffered I/O if the operation being performed on
	70	+ * the inode is not supported by direct I/O. The IOCB_DIRECT
	71	+ * flag needs to be cleared here in order to ensure that the
	72	+ * direct I/O path within generic_file_read_iter() is not
	73	+ * taken.
	74	+ */
	75	+ iocb->ki_flags &= ~IOCB_DIRECT;
	76	+ return generic_file_read_iter(iocb, to);
	77	+ }
	78	+
	79	+ ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL,
	80	+ is_sync_kiocb(iocb));
	81	+ inode_unlock_shared(inode);
	82	+
	83	+ file_accessed(iocb->ki_filp);
	84	+ return ret;
	85	+}
36	86
37	87	#ifdef CONFIG_FS_DAX
38	88	static ssize_t ext4_dax_read_iter(struct kiocb iocb, struct iov_iter to)
..	..	@@ -65,16 +115,21 @@
65	115
66	116	static ssize_t ext4_file_read_iter(struct kiocb iocb, struct iov_iter to)
67	117	{
68		- if (unlikely(ext4_forced_shutdown(EXT4_SB(file_inode(iocb->ki_filp)->i_sb))))
	118	+ struct inode *inode = file_inode(iocb->ki_filp);
	119	+
	120	+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
69	121	return -EIO;
70	122
71	123	if (!iov_iter_count(to))
72	124	return 0; /* skip atime */
73	125
74	126	#ifdef CONFIG_FS_DAX
75		- if (IS_DAX(file_inode(iocb->ki_filp)))
	127	+ if (IS_DAX(inode))
76	128	return ext4_dax_read_iter(iocb, to);
77	129	#endif
	130	+ if (iocb->ki_flags & IOCB_DIRECT)
	131	+ return ext4_dio_read_iter(iocb, to);
	132	+
78	133	return generic_file_read_iter(iocb, to);
79	134	}
80	135
..	..	@@ -92,23 +147,15 @@
92	147	/* if we are the last writer on the inode, drop the block reservation */
93	148	if ((filp->f_mode & FMODE_WRITE) &&
94	149	(atomic_read(&inode->i_writecount) == 1) &&
95		- !EXT4_I(inode)->i_reserved_data_blocks)
96		- {
	150	+ !EXT4_I(inode)->i_reserved_data_blocks) {
97	151	down_write(&EXT4_I(inode)->i_data_sem);
98		- ext4_discard_preallocations(inode);
	152	+ ext4_discard_preallocations(inode, 0);
99	153	up_write(&EXT4_I(inode)->i_data_sem);
100	154	}
101	155	if (is_dx(inode) && filp->private_data)
102	156	ext4_htree_free_dir_info(filp->private_data);
103	157
104	158	return 0;
105		-}
106		-
107		-static void ext4_unwritten_wait(struct inode *inode)
108		-{
109		- wait_queue_head_t *wq = ext4_ioend_wq(inode);
110		-
111		- wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
112	159	}
113	160
114	161	/*
..	..	@@ -120,19 +167,25 @@
120	167	* threads are at work on the same unwritten block, they must be synchronized
121	168	* or one thread will zero the other's data, causing corruption.
122	169	*/
123		-static int
124		-ext4_unaligned_aio(struct inode inode, struct iov_iter from, loff_t pos)
	170	+static bool
	171	+ext4_unaligned_io(struct inode inode, struct iov_iter from, loff_t pos)
125	172	{
126	173	struct super_block *sb = inode->i_sb;
127		- int blockmask = sb->s_blocksize - 1;
128		-
129		- if (pos >= ALIGN(i_size_read(inode), sb->s_blocksize))
130		- return 0;
	174	+ unsigned long blockmask = sb->s_blocksize - 1;
131	175
132	176	if ((pos \| iov_iter_alignment(from)) & blockmask)
133		- return 1;
	177	+ return true;
134	178
135		- return 0;
	179	+ return false;
	180	+}
	181	+
	182	+static bool
	183	+ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
	184	+{
	185	+ if (offset + len > i_size_read(inode) \|\|
	186	+ offset + len > EXT4_I(inode)->i_disksize)
	187	+ return true;
	188	+ return false;
136	189	}
137	190
138	191	/* Is IO overwriting allocated and initialized blocks? */
..	..	@@ -158,17 +211,18 @@
158	211	return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
159	212	}
160	213
161		-static ssize_t ext4_write_checks(struct kiocb iocb, struct iov_iter from)
	214	+static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
	215	+ struct iov_iter *from)
162	216	{
163	217	struct inode *inode = file_inode(iocb->ki_filp);
164	218	ssize_t ret;
165	219
	220	+ if (unlikely(IS_IMMUTABLE(inode)))
	221	+ return -EPERM;
	222	+
166	223	ret = generic_write_checks(iocb, from);
167	224	if (ret <= 0)
168	225	return ret;
169		-
170		- if (unlikely(IS_IMMUTABLE(inode)))
171		- return -EPERM;
172	226
173	227	/*
174	228	* If we have encountered a bitmap-format file, the size limit
..	..	@@ -181,15 +235,394 @@
181	235	return -EFBIG;
182	236	iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
183	237	}
	238	+
184	239	return iov_iter_count(from);
	240	+}
	241	+
	242	+static ssize_t ext4_write_checks(struct kiocb iocb, struct iov_iter from)
	243	+{
	244	+ ssize_t ret, count;
	245	+
	246	+ count = ext4_generic_write_checks(iocb, from);
	247	+ if (count <= 0)
	248	+ return count;
	249	+
	250	+ ret = file_modified(iocb->ki_filp);
	251	+ if (ret)
	252	+ return ret;
	253	+ return count;
	254	+}
	255	+
	256	+static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
	257	+ struct iov_iter *from)
	258	+{
	259	+ ssize_t ret;
	260	+ struct inode *inode = file_inode(iocb->ki_filp);
	261	+
	262	+ if (iocb->ki_flags & IOCB_NOWAIT)
	263	+ return -EOPNOTSUPP;
	264	+
	265	+ inode_lock(inode);
	266	+ ret = ext4_write_checks(iocb, from);
	267	+ if (ret <= 0)
	268	+ goto out;
	269	+
	270	+ current->backing_dev_info = inode_to_bdi(inode);
	271	+ ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
	272	+ current->backing_dev_info = NULL;
	273	+
	274	+out:
	275	+ inode_unlock(inode);
	276	+ if (likely(ret > 0)) {
	277	+ iocb->ki_pos += ret;
	278	+ ret = generic_write_sync(iocb, ret);
	279	+ }
	280	+
	281	+ return ret;
	282	+}
	283	+
	284	+static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
	285	+ ssize_t written, size_t count)
	286	+{
	287	+ handle_t *handle;
	288	+ bool truncate = false;
	289	+ u8 blkbits = inode->i_blkbits;
	290	+ ext4_lblk_t written_blk, end_blk;
	291	+ int ret;
	292	+
	293	+ /*
	294	+ * Note that EXT4_I(inode)->i_disksize can get extended up to
	295	+ * inode->i_size while the I/O was running due to writeback of delalloc
	296	+ * blocks. But, the code in ext4_iomap_alloc() is careful to use
	297	+ * zeroed/unwritten extents if this is possible; thus we won't leave
	298	+ * uninitialized blocks in a file even if we didn't succeed in writing
	299	+ * as much as we intended.
	300	+ */
	301	+ WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
	302	+ if (offset + count <= EXT4_I(inode)->i_disksize) {
	303	+ /*
	304	+ * We need to ensure that the inode is removed from the orphan
	305	+ * list if it has been added prematurely, due to writeback of
	306	+ * delalloc blocks.
	307	+ */
	308	+ if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
	309	+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
	310	+
	311	+ if (IS_ERR(handle)) {
	312	+ ext4_orphan_del(NULL, inode);
	313	+ return PTR_ERR(handle);
	314	+ }
	315	+
	316	+ ext4_orphan_del(handle, inode);
	317	+ ext4_journal_stop(handle);
	318	+ }
	319	+
	320	+ return written;
	321	+ }
	322	+
	323	+ if (written < 0)
	324	+ goto truncate;
	325	+
	326	+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
	327	+ if (IS_ERR(handle)) {
	328	+ written = PTR_ERR(handle);
	329	+ goto truncate;
	330	+ }
	331	+
	332	+ if (ext4_update_inode_size(inode, offset + written)) {
	333	+ ret = ext4_mark_inode_dirty(handle, inode);
	334	+ if (unlikely(ret)) {
	335	+ written = ret;
	336	+ ext4_journal_stop(handle);
	337	+ goto truncate;
	338	+ }
	339	+ }
	340	+
	341	+ /*
	342	+ * We may need to truncate allocated but not written blocks beyond EOF.
	343	+ */
	344	+ written_blk = ALIGN(offset + written, 1 << blkbits);
	345	+ end_blk = ALIGN(offset + count, 1 << blkbits);
	346	+ if (written_blk < end_blk && ext4_can_truncate(inode))
	347	+ truncate = true;
	348	+
	349	+ /*
	350	+ * Remove the inode from the orphan list if it has been extended and
	351	+ * everything went OK.
	352	+ */
	353	+ if (!truncate && inode->i_nlink)
	354	+ ext4_orphan_del(handle, inode);
	355	+ ext4_journal_stop(handle);
	356	+
	357	+ if (truncate) {
	358	+truncate:
	359	+ ext4_truncate_failed_write(inode);
	360	+ /*
	361	+ * If the truncate operation failed early, then the inode may
	362	+ * still be on the orphan list. In that case, we need to try
	363	+ * remove the inode from the in-memory linked list.
	364	+ */
	365	+ if (inode->i_nlink)
	366	+ ext4_orphan_del(NULL, inode);
	367	+ }
	368	+
	369	+ return written;
	370	+}
	371	+
	372	+static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
	373	+ int error, unsigned int flags)
	374	+{
	375	+ loff_t pos = iocb->ki_pos;
	376	+ struct inode *inode = file_inode(iocb->ki_filp);
	377	+
	378	+ if (error)
	379	+ return error;
	380	+
	381	+ if (size && flags & IOMAP_DIO_UNWRITTEN) {
	382	+ error = ext4_convert_unwritten_extents(NULL, inode, pos, size);
	383	+ if (error < 0)
	384	+ return error;
	385	+ }
	386	+ /*
	387	+ * If we are extending the file, we have to update i_size here before
	388	+ * page cache gets invalidated in iomap_dio_rw(). Otherwise racing
	389	+ * buffered reads could zero out too much from page cache pages. Update
	390	+ * of on-disk size will happen later in ext4_dio_write_iter() where
	391	+ * we have enough information to also perform orphan list handling etc.
	392	+ * Note that we perform all extending writes synchronously under
	393	+ * i_rwsem held exclusively so i_size update is safe here in that case.
	394	+ * If the write was not extending, we cannot see pos > i_size here
	395	+ * because operations reducing i_size like truncate wait for all
	396	+ * outstanding DIO before updating i_size.
	397	+ */
	398	+ pos += size;
	399	+ if (pos > i_size_read(inode))
	400	+ i_size_write(inode, pos);
	401	+
	402	+ return 0;
	403	+}
	404	+
	405	+static const struct iomap_dio_ops ext4_dio_write_ops = {
	406	+ .end_io = ext4_dio_write_end_io,
	407	+};
	408	+
	409	+/*
	410	+ * The intention here is to start with shared lock acquired then see if any
	411	+ * condition requires an exclusive inode lock. If yes, then we restart the
	412	+ * whole operation by releasing the shared lock and acquiring exclusive lock.
	413	+ *
	414	+ * - For unaligned_io we never take shared lock as it may cause data corruption
	415	+ * when two unaligned IO tries to modify the same block e.g. while zeroing.
	416	+ *
	417	+ * - For extending writes case we don't take the shared lock, since it requires
	418	+ * updating inode i_disksize and/or orphan handling with exclusive lock.
	419	+ *
	420	+ * - shared locking will only be true mostly with overwrites. Otherwise we will
	421	+ * switch to exclusive i_rwsem lock.
	422	+ */
	423	+static ssize_t ext4_dio_write_checks(struct kiocb iocb, struct iov_iter from,
	424	+ bool ilock_shared, bool extend)
	425	+{
	426	+ struct file *file = iocb->ki_filp;
	427	+ struct inode *inode = file_inode(file);
	428	+ loff_t offset;
	429	+ size_t count;
	430	+ ssize_t ret;
	431	+
	432	+restart:
	433	+ ret = ext4_generic_write_checks(iocb, from);
	434	+ if (ret <= 0)
	435	+ goto out;
	436	+
	437	+ offset = iocb->ki_pos;
	438	+ count = ret;
	439	+ if (ext4_extending_io(inode, offset, count))
	440	+ *extend = true;
	441	+ /*
	442	+ * Determine whether the IO operation will overwrite allocated
	443	+ * and initialized blocks.
	444	+ * We need exclusive i_rwsem for changing security info
	445	+ * in file_modified().
	446	+ */
	447	+ if (ilock_shared && (!IS_NOSEC(inode) \|\| extend \|\|
	448	+ !ext4_overwrite_io(inode, offset, count))) {
	449	+ if (iocb->ki_flags & IOCB_NOWAIT) {
	450	+ ret = -EAGAIN;
	451	+ goto out;
	452	+ }
	453	+ inode_unlock_shared(inode);
	454	+ *ilock_shared = false;
	455	+ inode_lock(inode);
	456	+ goto restart;
	457	+ }
	458	+
	459	+ ret = file_modified(file);
	460	+ if (ret < 0)
	461	+ goto out;
	462	+
	463	+ return count;
	464	+out:
	465	+ if (*ilock_shared)
	466	+ inode_unlock_shared(inode);
	467	+ else
	468	+ inode_unlock(inode);
	469	+ return ret;
	470	+}
	471	+
	472	+static ssize_t ext4_dio_write_iter(struct kiocb iocb, struct iov_iter from)
	473	+{
	474	+ ssize_t ret;
	475	+ handle_t *handle;
	476	+ struct inode *inode = file_inode(iocb->ki_filp);
	477	+ loff_t offset = iocb->ki_pos;
	478	+ size_t count = iov_iter_count(from);
	479	+ const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
	480	+ bool extend = false, unaligned_io = false;
	481	+ bool ilock_shared = true;
	482	+
	483	+ /*
	484	+ * We initially start with shared inode lock unless it is
	485	+ * unaligned IO which needs exclusive lock anyways.
	486	+ */
	487	+ if (ext4_unaligned_io(inode, from, offset)) {
	488	+ unaligned_io = true;
	489	+ ilock_shared = false;
	490	+ }
	491	+ /*
	492	+ * Quick check here without any i_rwsem lock to see if it is extending
	493	+ * IO. A more reliable check is done in ext4_dio_write_checks() with
	494	+ * proper locking in place.
	495	+ */
	496	+ if (offset + count > i_size_read(inode))
	497	+ ilock_shared = false;
	498	+
	499	+ if (iocb->ki_flags & IOCB_NOWAIT) {
	500	+ if (ilock_shared) {
	501	+ if (!inode_trylock_shared(inode))
	502	+ return -EAGAIN;
	503	+ } else {
	504	+ if (!inode_trylock(inode))
	505	+ return -EAGAIN;
	506	+ }
	507	+ } else {
	508	+ if (ilock_shared)
	509	+ inode_lock_shared(inode);
	510	+ else
	511	+ inode_lock(inode);
	512	+ }
	513	+
	514	+ /* Fallback to buffered I/O if the inode does not support direct I/O. */
	515	+ if (!ext4_dio_supported(iocb, from)) {
	516	+ if (ilock_shared)
	517	+ inode_unlock_shared(inode);
	518	+ else
	519	+ inode_unlock(inode);
	520	+ return ext4_buffered_write_iter(iocb, from);
	521	+ }
	522	+
	523	+ ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend);
	524	+ if (ret <= 0)
	525	+ return ret;
	526	+
	527	+ /* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */
	528	+ if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io \|\| extend)) {
	529	+ ret = -EAGAIN;
	530	+ goto out;
	531	+ }
	532	+ /*
	533	+ * Make sure inline data cannot be created anymore since we are going
	534	+ * to allocate blocks for DIO. We know the inode does not have any
	535	+ * inline data now because ext4_dio_supported() checked for that.
	536	+ */
	537	+ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
	538	+
	539	+ offset = iocb->ki_pos;
	540	+ count = ret;
	541	+
	542	+ /*
	543	+ * Unaligned direct IO must be serialized among each other as zeroing
	544	+ * of partial blocks of two competing unaligned IOs can result in data
	545	+ * corruption.
	546	+ *
	547	+ * So we make sure we don't allow any unaligned IO in flight.
	548	+ * For IOs where we need not wait (like unaligned non-AIO DIO),
	549	+ * below inode_dio_wait() may anyway become a no-op, since we start
	550	+ * with exclusive lock.
	551	+ */
	552	+ if (unaligned_io)
	553	+ inode_dio_wait(inode);
	554	+
	555	+ if (extend) {
	556	+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
	557	+ if (IS_ERR(handle)) {
	558	+ ret = PTR_ERR(handle);
	559	+ goto out;
	560	+ }
	561	+
	562	+ ret = ext4_orphan_add(handle, inode);
	563	+ if (ret) {
	564	+ ext4_journal_stop(handle);
	565	+ goto out;
	566	+ }
	567	+
	568	+ ext4_journal_stop(handle);
	569	+ }
	570	+
	571	+ if (ilock_shared)
	572	+ iomap_ops = &ext4_iomap_overwrite_ops;
	573	+ ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
	574	+ is_sync_kiocb(iocb) \|\| unaligned_io \|\| extend);
	575	+ if (ret == -ENOTBLK)
	576	+ ret = 0;
	577	+
	578	+ if (extend)
	579	+ ret = ext4_handle_inode_extension(inode, offset, ret, count);
	580	+
	581	+out:
	582	+ if (ilock_shared)
	583	+ inode_unlock_shared(inode);
	584	+ else
	585	+ inode_unlock(inode);
	586	+
	587	+ if (ret >= 0 && iov_iter_count(from)) {
	588	+ ssize_t err;
	589	+ loff_t endbyte;
	590	+
	591	+ offset = iocb->ki_pos;
	592	+ err = ext4_buffered_write_iter(iocb, from);
	593	+ if (err < 0)
	594	+ return err;
	595	+
	596	+ /*
	597	+ * We need to ensure that the pages within the page cache for
	598	+ * the range covered by this I/O are written to disk and
	599	+ * invalidated. This is in attempt to preserve the expected
	600	+ * direct I/O semantics in the case we fallback to buffered I/O
	601	+ * to complete off the I/O request.
	602	+ */
	603	+ ret += err;
	604	+ endbyte = offset + err - 1;
	605	+ err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
	606	+ offset, endbyte);
	607	+ if (!err)
	608	+ invalidate_mapping_pages(iocb->ki_filp->f_mapping,
	609	+ offset >> PAGE_SHIFT,
	610	+ endbyte >> PAGE_SHIFT);
	611	+ }
	612	+
	613	+ return ret;
185	614	}
186	615
187	616	#ifdef CONFIG_FS_DAX
188	617	static ssize_t
189	618	ext4_dax_write_iter(struct kiocb iocb, struct iov_iter from)
190	619	{
191		- struct inode *inode = file_inode(iocb->ki_filp);
192	620	ssize_t ret;
	621	+ size_t count;
	622	+ loff_t offset;
	623	+ handle_t *handle;
	624	+ bool extend = false;
	625	+ struct inode *inode = file_inode(iocb->ki_filp);
193	626
194	627	if (iocb->ki_flags & IOCB_NOWAIT) {
195	628	if (!inode_trylock(inode))
..	..	@@ -197,17 +630,35 @@
197	630	} else {
198	631	inode_lock(inode);
199	632	}
	633	+
200	634	ret = ext4_write_checks(iocb, from);
201	635	if (ret <= 0)
202	636	goto out;
203		- ret = file_remove_privs(iocb->ki_filp);
204		- if (ret)
205		- goto out;
206		- ret = file_update_time(iocb->ki_filp);
207		- if (ret)
208		- goto out;
	637	+
	638	+ offset = iocb->ki_pos;
	639	+ count = iov_iter_count(from);
	640	+
	641	+ if (offset + count > EXT4_I(inode)->i_disksize) {
	642	+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
	643	+ if (IS_ERR(handle)) {
	644	+ ret = PTR_ERR(handle);
	645	+ goto out;
	646	+ }
	647	+
	648	+ ret = ext4_orphan_add(handle, inode);
	649	+ if (ret) {
	650	+ ext4_journal_stop(handle);
	651	+ goto out;
	652	+ }
	653	+
	654	+ extend = true;
	655	+ ext4_journal_stop(handle);
	656	+ }
209	657
210	658	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
	659	+
	660	+ if (extend)
	661	+ ret = ext4_handle_inode_extension(inode, offset, ret, count);
211	662	out:
212	663	inode_unlock(inode);
213	664	if (ret > 0)
..	..	@@ -220,10 +671,6 @@
220	671	ext4_file_write_iter(struct kiocb iocb, struct iov_iter from)
221	672	{
222	673	struct inode *inode = file_inode(iocb->ki_filp);
223		- int o_direct = iocb->ki_flags & IOCB_DIRECT;
224		- int unaligned_aio = 0;
225		- int overwrite = 0;
226		- ssize_t ret;
227	674
228	675	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
229	676	return -EIO;
..	..	@@ -232,61 +679,10 @@
232	679	if (IS_DAX(inode))
233	680	return ext4_dax_write_iter(iocb, from);
234	681	#endif
235		- if (!o_direct && (iocb->ki_flags & IOCB_NOWAIT))
236		- return -EOPNOTSUPP;
237		-
238		- if (!inode_trylock(inode)) {
239		- if (iocb->ki_flags & IOCB_NOWAIT)
240		- return -EAGAIN;
241		- inode_lock(inode);
242		- }
243		-
244		- ret = ext4_write_checks(iocb, from);
245		- if (ret <= 0)
246		- goto out;
247		-
248		- /*
249		- * Unaligned direct AIO must be serialized among each other as zeroing
250		- * of partial blocks of two competing unaligned AIOs can result in data
251		- * corruption.
252		- */
253		- if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
254		- !is_sync_kiocb(iocb) &&
255		- ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
256		- unaligned_aio = 1;
257		- ext4_unwritten_wait(inode);
258		- }
259		-
260		- iocb->private = &overwrite;
261		- /* Check whether we do a DIO overwrite or not */
262		- if (o_direct && !unaligned_aio) {
263		- if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
264		- if (ext4_should_dioread_nolock(inode))
265		- overwrite = 1;
266		- } else if (iocb->ki_flags & IOCB_NOWAIT) {
267		- ret = -EAGAIN;
268		- goto out;
269		- }
270		- }
271		-
272		- ret = __generic_file_write_iter(iocb, from);
273		- /*
274		- * Unaligned direct AIO must be the only IO in flight. Otherwise
275		- * overlapping aligned IO after unaligned might result in data
276		- * corruption.
277		- */
278		- if (ret == -EIOCBQUEUED && unaligned_aio)
279		- ext4_unwritten_wait(inode);
280		- inode_unlock(inode);
281		-
282		- if (ret > 0)
283		- ret = generic_write_sync(iocb, ret);
284		-
285		- return ret;
286		-
287		-out:
288		- inode_unlock(inode);
289		- return ret;
	682	+ if (iocb->ki_flags & IOCB_DIRECT)
	683	+ return ext4_dio_write_iter(iocb, from);
	684	+ else
	685	+ return ext4_buffered_write_iter(iocb, from);
290	686	}
291	687
292	688	#ifdef CONFIG_FS_DAX
..	..	@@ -368,20 +764,25 @@
368	764	.fault = ext4_filemap_fault,
369	765	.map_pages = filemap_map_pages,
370	766	.page_mkwrite = ext4_page_mkwrite,
	767	+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
	768	+ .allow_speculation = filemap_allow_speculation,
	769	+#endif
371	770	};
372	771
373	772	static int ext4_file_mmap(struct file file, struct vm_area_struct vma)
374	773	{
375	774	struct inode *inode = file->f_mapping->host;
	775	+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	776	+ struct dax_device *dax_dev = sbi->s_daxdev;
376	777
377		- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
	778	+ if (unlikely(ext4_forced_shutdown(sbi)))
378	779	return -EIO;
379	780
380	781	/*
381		- * We don't support synchronous mappings for non-DAX files. At least
382		- * until someone comes with a sensible use case.
	782	+ * We don't support synchronous mappings for non-DAX files and
	783	+ * for DAX files if underneath dax_device is not synchronous.
383	784	*/
384		- if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
	785	+ if (!daxdev_mapping_supported(vma, dax_dev))
385	786	return -EOPNOTSUPP;
386	787
387	788	file_accessed(file);
..	..	@@ -403,13 +804,13 @@
403	804	handle_t *handle;
404	805	int err;
405	806
406		- if (likely(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED))
	807	+ if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
407	808	return 0;
408	809
409	810	if (sb_rdonly(sb) \|\| !sb_start_intwrite_trylock(sb))
410	811	return 0;
411	812
412		- sbi->s_mount_flags \|= EXT4_MF_MNTDIR_SAMPLED;
	813	+ ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
413	814	/*
414	815	* Sample where the filesystem has been mounted and
415	816	* store it in the superblock for sysadmin convenience
..	..	@@ -432,7 +833,7 @@
432	833	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
433	834	if (err)
434	835	goto out_journal;
435		- strlcpy(sbi->s_es->s_last_mounted, cp,
	836	+ strncpy(sbi->s_es->s_last_mounted, cp,
436	837	sizeof(sbi->s_es->s_last_mounted));
437	838	ext4_handle_dirty_super(handle, sb);
438	839	out_journal:
..	..	@@ -442,7 +843,7 @@
442	843	return err;
443	844	}
444	845
445		-static int ext4_file_open(struct inode * inode, struct file * filp)
	846	+static int ext4_file_open(struct inode inode, struct file filp)
446	847	{
447	848	int ret;
448	849
..	..	@@ -471,7 +872,7 @@
471	872	return ret;
472	873	}
473	874
474		- filp->f_mode \|= FMODE_NOWAIT;
	875	+ filp->f_mode \|= FMODE_NOWAIT \| FMODE_BUF_RASYNC;
475	876	return dquot_file_open(inode, filp);
476	877	}
477	878
..	..	@@ -496,12 +897,14 @@
496	897	maxbytes, i_size_read(inode));
497	898	case SEEK_HOLE:
498	899	inode_lock_shared(inode);
499		- offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
	900	+ offset = iomap_seek_hole(inode, offset,
	901	+ &ext4_iomap_report_ops);
500	902	inode_unlock_shared(inode);
501	903	break;
502	904	case SEEK_DATA:
503	905	inode_lock_shared(inode);
504		- offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
	906	+ offset = iomap_seek_data(inode, offset,
	907	+ &ext4_iomap_report_ops);
505	908	inode_unlock_shared(inode);
506	909	break;
507	910	}
..	..	@@ -515,6 +918,7 @@
515	918	.llseek = ext4_llseek,
516	919	.read_iter = ext4_file_read_iter,
517	920	.write_iter = ext4_file_write_iter,
	921	+ .iopoll = iomap_dio_iopoll,
518	922	.unlocked_ioctl = ext4_ioctl,
519	923	#ifdef CONFIG_COMPAT
520	924	.compat_ioctl = ext4_compat_ioctl,