hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/fs/direct-io.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * fs/direct-io.c
34 *
....@@ -38,6 +39,8 @@
3839 #include <linux/uio.h>
3940 #include <linux/atomic.h>
4041 #include <linux/prefetch.h>
42
+
43
+#include "internal.h"
4144
4245 /*
4346 * How many user pages to map in one call to get_user_pages(). This determines
....@@ -221,29 +224,7 @@
221224 }
222225
223226 /*
224
- * Warn about a page cache invalidation failure during a direct io write.
225
- */
226
-void dio_warn_stale_pagecache(struct file *filp)
227
-{
228
- static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
229
- char pathname[128];
230
- struct inode *inode = file_inode(filp);
231
- char *path;
232
-
233
- errseq_set(&inode->i_mapping->wb_err, -EIO);
234
- if (__ratelimit(&_rs)) {
235
- path = file_path(filp, pathname, sizeof(pathname));
236
- if (IS_ERR(path))
237
- path = "(unknown)";
238
- pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n");
239
- pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid,
240
- current->comm);
241
- }
242
-}
243
-
244
-/**
245227 * dio_complete() - called when all DIO BIO I/O has been completed
246
- * @offset: the byte offset in the file of the completed operation
247228 *
248229 * This drops i_dio_count, lets interested parties know that a DIO operation
249230 * has completed, and calculates the resulting return code for the operation.
....@@ -406,25 +387,6 @@
406387 spin_unlock_irqrestore(&dio->bio_lock, flags);
407388 }
408389
409
-/**
410
- * dio_end_io - handle the end io action for the given bio
411
- * @bio: The direct io bio thats being completed
412
- *
413
- * This is meant to be called by any filesystem that uses their own dio_submit_t
414
- * so that the DIO specific endio actions are dealt with after the filesystem
415
- * has done it's completion work.
416
- */
417
-void dio_end_io(struct bio *bio)
418
-{
419
- struct dio *dio = bio->bi_private;
420
-
421
- if (dio->is_async)
422
- dio_bio_end_aio(bio);
423
- else
424
- dio_bio_end_io(bio);
425
-}
426
-EXPORT_SYMBOL_GPL(dio_end_io);
427
-
428390 static inline void
429391 dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
430392 struct block_device *bdev,
....@@ -523,8 +485,8 @@
523485 dio->waiter = current;
524486 spin_unlock_irqrestore(&dio->bio_lock, flags);
525487 if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
526
- !blk_poll(dio->bio_disk->queue, dio->bio_cookie))
527
- io_schedule();
488
+ !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true))
489
+ blk_io_schedule();
528490 /* wake up sets us TASK_RUNNING */
529491 spin_lock_irqsave(&dio->bio_lock, flags);
530492 dio->waiter = NULL;
....@@ -542,9 +504,8 @@
542504 */
543505 static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
544506 {
545
- struct bio_vec *bvec;
546
- unsigned i;
547507 blk_status_t err = bio->bi_status;
508
+ bool should_dirty = dio->op == REQ_OP_READ && dio->should_dirty;
548509
549510 if (err) {
550511 if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
....@@ -553,17 +514,10 @@
553514 dio->io_error = -EIO;
554515 }
555516
556
- if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
517
+ if (dio->is_async && should_dirty) {
557518 bio_check_pages_dirty(bio); /* transfers ownership */
558519 } else {
559
- bio_for_each_segment_all(bvec, bio, i) {
560
- struct page *page = bvec->bv_page;
561
-
562
- if (dio->op == REQ_OP_READ && !PageCompound(page) &&
563
- dio->should_dirty)
564
- set_page_dirty_lock(page);
565
- put_page(page);
566
- }
520
+ bio_release_pages(bio, should_dirty);
567521 bio_put(bio);
568522 }
569523 return err;
....@@ -1206,22 +1160,13 @@
12061160 * the early prefetch in the caller enough time.
12071161 */
12081162
1209
- if (align & blocksize_mask) {
1210
- if (bdev)
1211
- blkbits = blksize_bits(bdev_logical_block_size(bdev));
1212
- blocksize_mask = (1 << blkbits) - 1;
1213
- if (align & blocksize_mask)
1214
- goto out;
1215
- }
1216
-
12171163 /* watch out for a 0 len io from a tricksy fs */
12181164 if (iov_iter_rw(iter) == READ && !count)
12191165 return 0;
12201166
12211167 dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
1222
- retval = -ENOMEM;
12231168 if (!dio)
1224
- goto out;
1169
+ return -ENOMEM;
12251170 /*
12261171 * Believe it or not, zeroing out the page array caused a .5%
12271172 * performance regression in a database benchmark. So, we take
....@@ -1230,32 +1175,32 @@
12301175 memset(dio, 0, offsetof(struct dio, pages));
12311176
12321177 dio->flags = flags;
1233
- if (dio->flags & DIO_LOCKING) {
1234
- if (iov_iter_rw(iter) == READ) {
1235
- struct address_space *mapping =
1236
- iocb->ki_filp->f_mapping;
1237
-
1238
- /* will be released by direct_io_worker */
1239
- inode_lock(inode);
1240
-
1241
- retval = filemap_write_and_wait_range(mapping, offset,
1242
- end - 1);
1243
- if (retval) {
1244
- inode_unlock(inode);
1245
- kmem_cache_free(dio_cache, dio);
1246
- goto out;
1247
- }
1248
- }
1178
+ if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) {
1179
+ /* will be released by direct_io_worker */
1180
+ inode_lock(inode);
12491181 }
12501182
12511183 /* Once we sampled i_size check for reads beyond EOF */
12521184 dio->i_size = i_size_read(inode);
12531185 if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
1254
- if (dio->flags & DIO_LOCKING)
1255
- inode_unlock(inode);
1256
- kmem_cache_free(dio_cache, dio);
12571186 retval = 0;
1258
- goto out;
1187
+ goto fail_dio;
1188
+ }
1189
+
1190
+ if (align & blocksize_mask) {
1191
+ if (bdev)
1192
+ blkbits = blksize_bits(bdev_logical_block_size(bdev));
1193
+ blocksize_mask = (1 << blkbits) - 1;
1194
+ if (align & blocksize_mask)
1195
+ goto fail_dio;
1196
+ }
1197
+
1198
+ if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) {
1199
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
1200
+
1201
+ retval = filemap_write_and_wait_range(mapping, offset, end - 1);
1202
+ if (retval)
1203
+ goto fail_dio;
12591204 }
12601205
12611206 /*
....@@ -1280,6 +1225,8 @@
12801225 } else {
12811226 dio->op = REQ_OP_READ;
12821227 }
1228
+ if (iocb->ki_flags & IOCB_HIPRI)
1229
+ dio->op_flags |= REQ_HIPRI;
12831230
12841231 /*
12851232 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
....@@ -1297,14 +1244,8 @@
12971244 */
12981245 retval = sb_init_dio_done_wq(dio->inode->i_sb);
12991246 }
1300
- if (retval) {
1301
- /*
1302
- * We grab i_mutex only for reads so we don't have
1303
- * to release it here
1304
- */
1305
- kmem_cache_free(dio_cache, dio);
1306
- goto out;
1307
- }
1247
+ if (retval)
1248
+ goto fail_dio;
13081249 }
13091250
13101251 /*
....@@ -1328,7 +1269,7 @@
13281269 spin_lock_init(&dio->bio_lock);
13291270 dio->refcount = 1;
13301271
1331
- dio->should_dirty = (iter->type == ITER_IOVEC);
1272
+ dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ;
13321273 sdio.iter = iter;
13331274 sdio.final_block_in_request = end >> blkbits;
13341275
....@@ -1407,7 +1348,13 @@
14071348 } else
14081349 BUG_ON(retval != -EIOCBQUEUED);
14091350
1410
-out:
1351
+ return retval;
1352
+
1353
+fail_dio:
1354
+ if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ)
1355
+ inode_unlock(inode);
1356
+
1357
+ kmem_cache_free(dio_cache, dio);
14111358 return retval;
14121359 }
14131360
....@@ -1426,14 +1373,14 @@
14261373 * Attempt to prefetch the pieces we likely need later.
14271374 */
14281375 prefetch(&bdev->bd_disk->part_tbl);
1429
- prefetch(bdev->bd_queue);
1430
- prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
1376
+ prefetch(bdev->bd_disk->queue);
1377
+ prefetch((char *)bdev->bd_disk->queue + SMP_CACHE_BYTES);
14311378
14321379 return do_blockdev_direct_IO(iocb, inode, bdev, iter, get_block,
14331380 end_io, submit_io, flags);
14341381 }
14351382
1436
-EXPORT_SYMBOL(__blockdev_direct_IO);
1383
+EXPORT_SYMBOL_NS(__blockdev_direct_IO, ANDROID_GKI_VFS_EXPORT_ONLY);
14371384
14381385 static __init int dio_init(void)
14391386 {