forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-09 958e46acc8e900e8569dd467c1af9b8d2d019394
kernel/fs/ext4/file.c
....@@ -29,10 +29,60 @@
2929 #include <linux/pagevec.h>
3030 #include <linux/uio.h>
3131 #include <linux/mman.h>
32
+#include <linux/backing-dev.h>
3233 #include "ext4.h"
3334 #include "ext4_jbd2.h"
3435 #include "xattr.h"
3536 #include "acl.h"
37
+#include "truncate.h"
38
+
39
+static bool ext4_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
40
+{
41
+ struct inode *inode = file_inode(iocb->ki_filp);
42
+
43
+ if (!fscrypt_dio_supported(iocb, iter))
44
+ return false;
45
+ if (fsverity_active(inode))
46
+ return false;
47
+ if (ext4_should_journal_data(inode))
48
+ return false;
49
+ if (ext4_has_inline_data(inode))
50
+ return false;
51
+ return true;
52
+}
53
+
54
+static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
55
+{
56
+ ssize_t ret;
57
+ struct inode *inode = file_inode(iocb->ki_filp);
58
+
59
+ if (iocb->ki_flags & IOCB_NOWAIT) {
60
+ if (!inode_trylock_shared(inode))
61
+ return -EAGAIN;
62
+ } else {
63
+ inode_lock_shared(inode);
64
+ }
65
+
66
+ if (!ext4_dio_supported(iocb, to)) {
67
+ inode_unlock_shared(inode);
68
+ /*
69
+ * Fallback to buffered I/O if the operation being performed on
70
+ * the inode is not supported by direct I/O. The IOCB_DIRECT
71
+ * flag needs to be cleared here in order to ensure that the
72
+ * direct I/O path within generic_file_read_iter() is not
73
+ * taken.
74
+ */
75
+ iocb->ki_flags &= ~IOCB_DIRECT;
76
+ return generic_file_read_iter(iocb, to);
77
+ }
78
+
79
+ ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL,
80
+ is_sync_kiocb(iocb));
81
+ inode_unlock_shared(inode);
82
+
83
+ file_accessed(iocb->ki_filp);
84
+ return ret;
85
+}
3686
3787 #ifdef CONFIG_FS_DAX
3888 static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
....@@ -65,16 +115,21 @@
65115
66116 static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
67117 {
68
- if (unlikely(ext4_forced_shutdown(EXT4_SB(file_inode(iocb->ki_filp)->i_sb))))
118
+ struct inode *inode = file_inode(iocb->ki_filp);
119
+
120
+ if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
69121 return -EIO;
70122
71123 if (!iov_iter_count(to))
72124 return 0; /* skip atime */
73125
74126 #ifdef CONFIG_FS_DAX
75
- if (IS_DAX(file_inode(iocb->ki_filp)))
127
+ if (IS_DAX(inode))
76128 return ext4_dax_read_iter(iocb, to);
77129 #endif
130
+ if (iocb->ki_flags & IOCB_DIRECT)
131
+ return ext4_dio_read_iter(iocb, to);
132
+
78133 return generic_file_read_iter(iocb, to);
79134 }
80135
....@@ -92,23 +147,15 @@
92147 /* if we are the last writer on the inode, drop the block reservation */
93148 if ((filp->f_mode & FMODE_WRITE) &&
94149 (atomic_read(&inode->i_writecount) == 1) &&
95
- !EXT4_I(inode)->i_reserved_data_blocks)
96
- {
150
+ !EXT4_I(inode)->i_reserved_data_blocks) {
97151 down_write(&EXT4_I(inode)->i_data_sem);
98
- ext4_discard_preallocations(inode);
152
+ ext4_discard_preallocations(inode, 0);
99153 up_write(&EXT4_I(inode)->i_data_sem);
100154 }
101155 if (is_dx(inode) && filp->private_data)
102156 ext4_htree_free_dir_info(filp->private_data);
103157
104158 return 0;
105
-}
106
-
107
-static void ext4_unwritten_wait(struct inode *inode)
108
-{
109
- wait_queue_head_t *wq = ext4_ioend_wq(inode);
110
-
111
- wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
112159 }
113160
114161 /*
....@@ -120,19 +167,25 @@
120167 * threads are at work on the same unwritten block, they must be synchronized
121168 * or one thread will zero the other's data, causing corruption.
122169 */
123
-static int
124
-ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
170
+static bool
171
+ext4_unaligned_io(struct inode *inode, struct iov_iter *from, loff_t pos)
125172 {
126173 struct super_block *sb = inode->i_sb;
127
- int blockmask = sb->s_blocksize - 1;
128
-
129
- if (pos >= ALIGN(i_size_read(inode), sb->s_blocksize))
130
- return 0;
174
+ unsigned long blockmask = sb->s_blocksize - 1;
131175
132176 if ((pos | iov_iter_alignment(from)) & blockmask)
133
- return 1;
177
+ return true;
134178
135
- return 0;
179
+ return false;
180
+}
181
+
182
+static bool
183
+ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
184
+{
185
+ if (offset + len > i_size_read(inode) ||
186
+ offset + len > EXT4_I(inode)->i_disksize)
187
+ return true;
188
+ return false;
136189 }
137190
138191 /* Is IO overwriting allocated and initialized blocks? */
....@@ -158,17 +211,18 @@
158211 return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
159212 }
160213
161
-static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
214
+static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
215
+ struct iov_iter *from)
162216 {
163217 struct inode *inode = file_inode(iocb->ki_filp);
164218 ssize_t ret;
165219
220
+ if (unlikely(IS_IMMUTABLE(inode)))
221
+ return -EPERM;
222
+
166223 ret = generic_write_checks(iocb, from);
167224 if (ret <= 0)
168225 return ret;
169
-
170
- if (unlikely(IS_IMMUTABLE(inode)))
171
- return -EPERM;
172226
173227 /*
174228 * If we have encountered a bitmap-format file, the size limit
....@@ -181,15 +235,398 @@
181235 return -EFBIG;
182236 iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
183237 }
238
+
184239 return iov_iter_count(from);
240
+}
241
+
242
+static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
243
+{
244
+ ssize_t ret, count;
245
+
246
+ count = ext4_generic_write_checks(iocb, from);
247
+ if (count <= 0)
248
+ return count;
249
+
250
+ ret = file_modified(iocb->ki_filp);
251
+ if (ret)
252
+ return ret;
253
+ return count;
254
+}
255
+
256
+static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
257
+ struct iov_iter *from)
258
+{
259
+ ssize_t ret;
260
+ struct inode *inode = file_inode(iocb->ki_filp);
261
+
262
+ if (iocb->ki_flags & IOCB_NOWAIT)
263
+ return -EOPNOTSUPP;
264
+
265
+ ext4_fc_start_update(inode);
266
+ inode_lock(inode);
267
+ ret = ext4_write_checks(iocb, from);
268
+ if (ret <= 0)
269
+ goto out;
270
+
271
+ current->backing_dev_info = inode_to_bdi(inode);
272
+ ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
273
+ current->backing_dev_info = NULL;
274
+
275
+out:
276
+ inode_unlock(inode);
277
+ ext4_fc_stop_update(inode);
278
+ if (likely(ret > 0)) {
279
+ iocb->ki_pos += ret;
280
+ ret = generic_write_sync(iocb, ret);
281
+ }
282
+
283
+ return ret;
284
+}
285
+
286
+static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
287
+ ssize_t written, size_t count)
288
+{
289
+ handle_t *handle;
290
+ bool truncate = false;
291
+ u8 blkbits = inode->i_blkbits;
292
+ ext4_lblk_t written_blk, end_blk;
293
+ int ret;
294
+
295
+ /*
296
+ * Note that EXT4_I(inode)->i_disksize can get extended up to
297
+ * inode->i_size while the I/O was running due to writeback of delalloc
298
+ * blocks. But, the code in ext4_iomap_alloc() is careful to use
299
+ * zeroed/unwritten extents if this is possible; thus we won't leave
300
+ * uninitialized blocks in a file even if we didn't succeed in writing
301
+ * as much as we intended.
302
+ */
303
+ WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
304
+ if (offset + count <= EXT4_I(inode)->i_disksize) {
305
+ /*
306
+ * We need to ensure that the inode is removed from the orphan
307
+ * list if it has been added prematurely, due to writeback of
308
+ * delalloc blocks.
309
+ */
310
+ if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
311
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
312
+
313
+ if (IS_ERR(handle)) {
314
+ ext4_orphan_del(NULL, inode);
315
+ return PTR_ERR(handle);
316
+ }
317
+
318
+ ext4_orphan_del(handle, inode);
319
+ ext4_journal_stop(handle);
320
+ }
321
+
322
+ return written;
323
+ }
324
+
325
+ if (written < 0)
326
+ goto truncate;
327
+
328
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
329
+ if (IS_ERR(handle)) {
330
+ written = PTR_ERR(handle);
331
+ goto truncate;
332
+ }
333
+
334
+ if (ext4_update_inode_size(inode, offset + written)) {
335
+ ret = ext4_mark_inode_dirty(handle, inode);
336
+ if (unlikely(ret)) {
337
+ written = ret;
338
+ ext4_journal_stop(handle);
339
+ goto truncate;
340
+ }
341
+ }
342
+
343
+ /*
344
+ * We may need to truncate allocated but not written blocks beyond EOF.
345
+ */
346
+ written_blk = ALIGN(offset + written, 1 << blkbits);
347
+ end_blk = ALIGN(offset + count, 1 << blkbits);
348
+ if (written_blk < end_blk && ext4_can_truncate(inode))
349
+ truncate = true;
350
+
351
+ /*
352
+ * Remove the inode from the orphan list if it has been extended and
353
+ * everything went OK.
354
+ */
355
+ if (!truncate && inode->i_nlink)
356
+ ext4_orphan_del(handle, inode);
357
+ ext4_journal_stop(handle);
358
+
359
+ if (truncate) {
360
+truncate:
361
+ ext4_truncate_failed_write(inode);
362
+ /*
363
+ * If the truncate operation failed early, then the inode may
364
+ * still be on the orphan list. In that case, we need to try
365
+ * remove the inode from the in-memory linked list.
366
+ */
367
+ if (inode->i_nlink)
368
+ ext4_orphan_del(NULL, inode);
369
+ }
370
+
371
+ return written;
372
+}
373
+
374
+static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
375
+ int error, unsigned int flags)
376
+{
377
+ loff_t pos = iocb->ki_pos;
378
+ struct inode *inode = file_inode(iocb->ki_filp);
379
+
380
+ if (error)
381
+ return error;
382
+
383
+ if (size && flags & IOMAP_DIO_UNWRITTEN) {
384
+ error = ext4_convert_unwritten_extents(NULL, inode, pos, size);
385
+ if (error < 0)
386
+ return error;
387
+ }
388
+ /*
389
+ * If we are extending the file, we have to update i_size here before
390
+ * page cache gets invalidated in iomap_dio_rw(). Otherwise racing
391
+ * buffered reads could zero out too much from page cache pages. Update
392
+ * of on-disk size will happen later in ext4_dio_write_iter() where
393
+ * we have enough information to also perform orphan list handling etc.
394
+ * Note that we perform all extending writes synchronously under
395
+ * i_rwsem held exclusively so i_size update is safe here in that case.
396
+ * If the write was not extending, we cannot see pos > i_size here
397
+ * because operations reducing i_size like truncate wait for all
398
+ * outstanding DIO before updating i_size.
399
+ */
400
+ pos += size;
401
+ if (pos > i_size_read(inode))
402
+ i_size_write(inode, pos);
403
+
404
+ return 0;
405
+}
406
+
407
+static const struct iomap_dio_ops ext4_dio_write_ops = {
408
+ .end_io = ext4_dio_write_end_io,
409
+};
410
+
411
+/*
412
+ * The intention here is to start with shared lock acquired then see if any
413
+ * condition requires an exclusive inode lock. If yes, then we restart the
414
+ * whole operation by releasing the shared lock and acquiring exclusive lock.
415
+ *
416
+ * - For unaligned_io we never take shared lock as it may cause data corruption
417
+ * when two unaligned IO tries to modify the same block e.g. while zeroing.
418
+ *
419
+ * - For extending writes case we don't take the shared lock, since it requires
420
+ * updating inode i_disksize and/or orphan handling with exclusive lock.
421
+ *
422
+ * - shared locking will only be true mostly with overwrites. Otherwise we will
423
+ * switch to exclusive i_rwsem lock.
424
+ */
425
+static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
426
+ bool *ilock_shared, bool *extend)
427
+{
428
+ struct file *file = iocb->ki_filp;
429
+ struct inode *inode = file_inode(file);
430
+ loff_t offset;
431
+ size_t count;
432
+ ssize_t ret;
433
+
434
+restart:
435
+ ret = ext4_generic_write_checks(iocb, from);
436
+ if (ret <= 0)
437
+ goto out;
438
+
439
+ offset = iocb->ki_pos;
440
+ count = ret;
441
+ if (ext4_extending_io(inode, offset, count))
442
+ *extend = true;
443
+ /*
444
+ * Determine whether the IO operation will overwrite allocated
445
+ * and initialized blocks.
446
+ * We need exclusive i_rwsem for changing security info
447
+ * in file_modified().
448
+ */
449
+ if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
450
+ !ext4_overwrite_io(inode, offset, count))) {
451
+ if (iocb->ki_flags & IOCB_NOWAIT) {
452
+ ret = -EAGAIN;
453
+ goto out;
454
+ }
455
+ inode_unlock_shared(inode);
456
+ *ilock_shared = false;
457
+ inode_lock(inode);
458
+ goto restart;
459
+ }
460
+
461
+ ret = file_modified(file);
462
+ if (ret < 0)
463
+ goto out;
464
+
465
+ return count;
466
+out:
467
+ if (*ilock_shared)
468
+ inode_unlock_shared(inode);
469
+ else
470
+ inode_unlock(inode);
471
+ return ret;
472
+}
473
+
474
+static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
475
+{
476
+ ssize_t ret;
477
+ handle_t *handle;
478
+ struct inode *inode = file_inode(iocb->ki_filp);
479
+ loff_t offset = iocb->ki_pos;
480
+ size_t count = iov_iter_count(from);
481
+ const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
482
+ bool extend = false, unaligned_io = false;
483
+ bool ilock_shared = true;
484
+
485
+ /*
486
+ * We initially start with shared inode lock unless it is
487
+ * unaligned IO which needs exclusive lock anyways.
488
+ */
489
+ if (ext4_unaligned_io(inode, from, offset)) {
490
+ unaligned_io = true;
491
+ ilock_shared = false;
492
+ }
493
+ /*
494
+ * Quick check here without any i_rwsem lock to see if it is extending
495
+ * IO. A more reliable check is done in ext4_dio_write_checks() with
496
+ * proper locking in place.
497
+ */
498
+ if (offset + count > i_size_read(inode))
499
+ ilock_shared = false;
500
+
501
+ if (iocb->ki_flags & IOCB_NOWAIT) {
502
+ if (ilock_shared) {
503
+ if (!inode_trylock_shared(inode))
504
+ return -EAGAIN;
505
+ } else {
506
+ if (!inode_trylock(inode))
507
+ return -EAGAIN;
508
+ }
509
+ } else {
510
+ if (ilock_shared)
511
+ inode_lock_shared(inode);
512
+ else
513
+ inode_lock(inode);
514
+ }
515
+
516
+ /* Fallback to buffered I/O if the inode does not support direct I/O. */
517
+ if (!ext4_dio_supported(iocb, from)) {
518
+ if (ilock_shared)
519
+ inode_unlock_shared(inode);
520
+ else
521
+ inode_unlock(inode);
522
+ return ext4_buffered_write_iter(iocb, from);
523
+ }
524
+
525
+ ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend);
526
+ if (ret <= 0)
527
+ return ret;
528
+
529
+ /* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */
530
+ if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io || extend)) {
531
+ ret = -EAGAIN;
532
+ goto out;
533
+ }
534
+ /*
535
+ * Make sure inline data cannot be created anymore since we are going
536
+ * to allocate blocks for DIO. We know the inode does not have any
537
+ * inline data now because ext4_dio_supported() checked for that.
538
+ */
539
+ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
540
+
541
+ offset = iocb->ki_pos;
542
+ count = ret;
543
+
544
+ /*
545
+ * Unaligned direct IO must be serialized among each other as zeroing
546
+ * of partial blocks of two competing unaligned IOs can result in data
547
+ * corruption.
548
+ *
549
+ * So we make sure we don't allow any unaligned IO in flight.
550
+ * For IOs where we need not wait (like unaligned non-AIO DIO),
551
+ * below inode_dio_wait() may anyway become a no-op, since we start
552
+ * with exclusive lock.
553
+ */
554
+ if (unaligned_io)
555
+ inode_dio_wait(inode);
556
+
557
+ if (extend) {
558
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
559
+ if (IS_ERR(handle)) {
560
+ ret = PTR_ERR(handle);
561
+ goto out;
562
+ }
563
+
564
+ ext4_fc_start_update(inode);
565
+ ret = ext4_orphan_add(handle, inode);
566
+ ext4_fc_stop_update(inode);
567
+ if (ret) {
568
+ ext4_journal_stop(handle);
569
+ goto out;
570
+ }
571
+
572
+ ext4_journal_stop(handle);
573
+ }
574
+
575
+ if (ilock_shared)
576
+ iomap_ops = &ext4_iomap_overwrite_ops;
577
+ ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
578
+ is_sync_kiocb(iocb) || unaligned_io || extend);
579
+ if (ret == -ENOTBLK)
580
+ ret = 0;
581
+
582
+ if (extend)
583
+ ret = ext4_handle_inode_extension(inode, offset, ret, count);
584
+
585
+out:
586
+ if (ilock_shared)
587
+ inode_unlock_shared(inode);
588
+ else
589
+ inode_unlock(inode);
590
+
591
+ if (ret >= 0 && iov_iter_count(from)) {
592
+ ssize_t err;
593
+ loff_t endbyte;
594
+
595
+ offset = iocb->ki_pos;
596
+ err = ext4_buffered_write_iter(iocb, from);
597
+ if (err < 0)
598
+ return err;
599
+
600
+ /*
601
+ * We need to ensure that the pages within the page cache for
602
+ * the range covered by this I/O are written to disk and
603
+ * invalidated. This is in attempt to preserve the expected
604
+ * direct I/O semantics in the case we fallback to buffered I/O
605
+ * to complete off the I/O request.
606
+ */
607
+ ret += err;
608
+ endbyte = offset + err - 1;
609
+ err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
610
+ offset, endbyte);
611
+ if (!err)
612
+ invalidate_mapping_pages(iocb->ki_filp->f_mapping,
613
+ offset >> PAGE_SHIFT,
614
+ endbyte >> PAGE_SHIFT);
615
+ }
616
+
617
+ return ret;
185618 }
186619
187620 #ifdef CONFIG_FS_DAX
188621 static ssize_t
189622 ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
190623 {
191
- struct inode *inode = file_inode(iocb->ki_filp);
192624 ssize_t ret;
625
+ size_t count;
626
+ loff_t offset;
627
+ handle_t *handle;
628
+ bool extend = false;
629
+ struct inode *inode = file_inode(iocb->ki_filp);
193630
194631 if (iocb->ki_flags & IOCB_NOWAIT) {
195632 if (!inode_trylock(inode))
....@@ -197,17 +634,35 @@
197634 } else {
198635 inode_lock(inode);
199636 }
637
+
200638 ret = ext4_write_checks(iocb, from);
201639 if (ret <= 0)
202640 goto out;
203
- ret = file_remove_privs(iocb->ki_filp);
204
- if (ret)
205
- goto out;
206
- ret = file_update_time(iocb->ki_filp);
207
- if (ret)
208
- goto out;
641
+
642
+ offset = iocb->ki_pos;
643
+ count = iov_iter_count(from);
644
+
645
+ if (offset + count > EXT4_I(inode)->i_disksize) {
646
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
647
+ if (IS_ERR(handle)) {
648
+ ret = PTR_ERR(handle);
649
+ goto out;
650
+ }
651
+
652
+ ret = ext4_orphan_add(handle, inode);
653
+ if (ret) {
654
+ ext4_journal_stop(handle);
655
+ goto out;
656
+ }
657
+
658
+ extend = true;
659
+ ext4_journal_stop(handle);
660
+ }
209661
210662 ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
663
+
664
+ if (extend)
665
+ ret = ext4_handle_inode_extension(inode, offset, ret, count);
211666 out:
212667 inode_unlock(inode);
213668 if (ret > 0)
....@@ -220,10 +675,6 @@
220675 ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
221676 {
222677 struct inode *inode = file_inode(iocb->ki_filp);
223
- int o_direct = iocb->ki_flags & IOCB_DIRECT;
224
- int unaligned_aio = 0;
225
- int overwrite = 0;
226
- ssize_t ret;
227678
228679 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
229680 return -EIO;
....@@ -232,61 +683,10 @@
232683 if (IS_DAX(inode))
233684 return ext4_dax_write_iter(iocb, from);
234685 #endif
235
- if (!o_direct && (iocb->ki_flags & IOCB_NOWAIT))
236
- return -EOPNOTSUPP;
237
-
238
- if (!inode_trylock(inode)) {
239
- if (iocb->ki_flags & IOCB_NOWAIT)
240
- return -EAGAIN;
241
- inode_lock(inode);
242
- }
243
-
244
- ret = ext4_write_checks(iocb, from);
245
- if (ret <= 0)
246
- goto out;
247
-
248
- /*
249
- * Unaligned direct AIO must be serialized among each other as zeroing
250
- * of partial blocks of two competing unaligned AIOs can result in data
251
- * corruption.
252
- */
253
- if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
254
- !is_sync_kiocb(iocb) &&
255
- ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
256
- unaligned_aio = 1;
257
- ext4_unwritten_wait(inode);
258
- }
259
-
260
- iocb->private = &overwrite;
261
- /* Check whether we do a DIO overwrite or not */
262
- if (o_direct && !unaligned_aio) {
263
- if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
264
- if (ext4_should_dioread_nolock(inode))
265
- overwrite = 1;
266
- } else if (iocb->ki_flags & IOCB_NOWAIT) {
267
- ret = -EAGAIN;
268
- goto out;
269
- }
270
- }
271
-
272
- ret = __generic_file_write_iter(iocb, from);
273
- /*
274
- * Unaligned direct AIO must be the only IO in flight. Otherwise
275
- * overlapping aligned IO after unaligned might result in data
276
- * corruption.
277
- */
278
- if (ret == -EIOCBQUEUED && unaligned_aio)
279
- ext4_unwritten_wait(inode);
280
- inode_unlock(inode);
281
-
282
- if (ret > 0)
283
- ret = generic_write_sync(iocb, ret);
284
-
285
- return ret;
286
-
287
-out:
288
- inode_unlock(inode);
289
- return ret;
686
+ if (iocb->ki_flags & IOCB_DIRECT)
687
+ return ext4_dio_write_iter(iocb, from);
688
+ else
689
+ return ext4_buffered_write_iter(iocb, from);
290690 }
291691
292692 #ifdef CONFIG_FS_DAX
....@@ -368,20 +768,25 @@
368768 .fault = ext4_filemap_fault,
369769 .map_pages = filemap_map_pages,
370770 .page_mkwrite = ext4_page_mkwrite,
771
+#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
772
+ .allow_speculation = filemap_allow_speculation,
773
+#endif
371774 };
372775
373776 static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
374777 {
375778 struct inode *inode = file->f_mapping->host;
779
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
780
+ struct dax_device *dax_dev = sbi->s_daxdev;
376781
377
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
782
+ if (unlikely(ext4_forced_shutdown(sbi)))
378783 return -EIO;
379784
380785 /*
381
- * We don't support synchronous mappings for non-DAX files. At least
382
- * until someone comes with a sensible use case.
786
+ * We don't support synchronous mappings for non-DAX files and
787
+ * for DAX files if underneath dax_device is not synchronous.
383788 */
384
- if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
789
+ if (!daxdev_mapping_supported(vma, dax_dev))
385790 return -EOPNOTSUPP;
386791
387792 file_accessed(file);
....@@ -403,13 +808,13 @@
403808 handle_t *handle;
404809 int err;
405810
406
- if (likely(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED))
811
+ if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
407812 return 0;
408813
409814 if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
410815 return 0;
411816
412
- sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
817
+ ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
413818 /*
414819 * Sample where the filesystem has been mounted and
415820 * store it in the superblock for sysadmin convenience
....@@ -432,7 +837,7 @@
432837 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
433838 if (err)
434839 goto out_journal;
435
- strlcpy(sbi->s_es->s_last_mounted, cp,
840
+ strncpy(sbi->s_es->s_last_mounted, cp,
436841 sizeof(sbi->s_es->s_last_mounted));
437842 ext4_handle_dirty_super(handle, sb);
438843 out_journal:
....@@ -442,7 +847,7 @@
442847 return err;
443848 }
444849
445
-static int ext4_file_open(struct inode * inode, struct file * filp)
850
+static int ext4_file_open(struct inode *inode, struct file *filp)
446851 {
447852 int ret;
448853
....@@ -471,7 +876,7 @@
471876 return ret;
472877 }
473878
474
- filp->f_mode |= FMODE_NOWAIT;
879
+ filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
475880 return dquot_file_open(inode, filp);
476881 }
477882
....@@ -496,12 +901,14 @@
496901 maxbytes, i_size_read(inode));
497902 case SEEK_HOLE:
498903 inode_lock_shared(inode);
499
- offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
904
+ offset = iomap_seek_hole(inode, offset,
905
+ &ext4_iomap_report_ops);
500906 inode_unlock_shared(inode);
501907 break;
502908 case SEEK_DATA:
503909 inode_lock_shared(inode);
504
- offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
910
+ offset = iomap_seek_data(inode, offset,
911
+ &ext4_iomap_report_ops);
505912 inode_unlock_shared(inode);
506913 break;
507914 }
....@@ -515,6 +922,7 @@
515922 .llseek = ext4_llseek,
516923 .read_iter = ext4_file_read_iter,
517924 .write_iter = ext4_file_write_iter,
925
+ .iopoll = iomap_dio_iopoll,
518926 .unlocked_ioctl = ext4_ioctl,
519927 #ifdef CONFIG_COMPAT
520928 .compat_ioctl = ext4_compat_ioctl,