From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/fs/direct-io.c | 141 ++++++++++++++-------------------------------- 1 files changed, 44 insertions(+), 97 deletions(-) diff --git a/kernel/fs/direct-io.c b/kernel/fs/direct-io.c index bc3a2bf..b4bf6bb 100644 --- a/kernel/fs/direct-io.c +++ b/kernel/fs/direct-io.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/direct-io.c * @@ -38,6 +39,8 @@ #include <linux/uio.h> #include <linux/atomic.h> #include <linux/prefetch.h> + +#include "internal.h" /* * How many user pages to map in one call to get_user_pages(). This determines @@ -221,29 +224,7 @@ } /* - * Warn about a page cache invalidation failure during a direct io write. - */ -void dio_warn_stale_pagecache(struct file *filp) -{ - static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); - char pathname[128]; - struct inode *inode = file_inode(filp); - char *path; - - errseq_set(&inode->i_mapping->wb_err, -EIO); - if (__ratelimit(&_rs)) { - path = file_path(filp, pathname, sizeof(pathname)); - if (IS_ERR(path)) - path = "(unknown)"; - pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n"); - pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid, - current->comm); - } -} - -/** * dio_complete() - called when all DIO BIO I/O has been completed - * @offset: the byte offset in the file of the completed operation * * This drops i_dio_count, lets interested parties know that a DIO operation * has completed, and calculates the resulting return code for the operation. @@ -406,25 +387,6 @@ spin_unlock_irqrestore(&dio->bio_lock, flags); } -/** - * dio_end_io - handle the end io action for the given bio - * @bio: The direct io bio thats being completed - * - * This is meant to be called by any filesystem that uses their own dio_submit_t - * so that the DIO specific endio actions are dealt with after the filesystem - * has done it's completion work. - */ -void dio_end_io(struct bio *bio) -{ - struct dio *dio = bio->bi_private; - - if (dio->is_async) - dio_bio_end_aio(bio); - else - dio_bio_end_io(bio); -} -EXPORT_SYMBOL_GPL(dio_end_io); - static inline void dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, struct block_device *bdev, @@ -523,8 +485,8 @@ dio->waiter = current; spin_unlock_irqrestore(&dio->bio_lock, flags); if (!(dio->iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(dio->bio_disk->queue, dio->bio_cookie)) - io_schedule(); + !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true)) + blk_io_schedule(); /* wake up sets us TASK_RUNNING */ spin_lock_irqsave(&dio->bio_lock, flags); dio->waiter = NULL; @@ -542,9 +504,8 @@ */ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) { - struct bio_vec *bvec; - unsigned i; blk_status_t err = bio->bi_status; + bool should_dirty = dio->op == REQ_OP_READ && dio->should_dirty; if (err) { if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT)) @@ -553,17 +514,10 @@ dio->io_error = -EIO; } - if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) { + if (dio->is_async && should_dirty) { bio_check_pages_dirty(bio); /* transfers ownership */ } else { - bio_for_each_segment_all(bvec, bio, i) { - struct page *page = bvec->bv_page; - - if (dio->op == REQ_OP_READ && !PageCompound(page) && - dio->should_dirty) - set_page_dirty_lock(page); - put_page(page); - } + bio_release_pages(bio, should_dirty); bio_put(bio); } return err; @@ -1206,22 +1160,13 @@ * the early prefetch in the caller enough time. */ - if (align & blocksize_mask) { - if (bdev) - blkbits = blksize_bits(bdev_logical_block_size(bdev)); - blocksize_mask = (1 << blkbits) - 1; - if (align & blocksize_mask) - goto out; - } - /* watch out for a 0 len io from a tricksy fs */ if (iov_iter_rw(iter) == READ && !count) return 0; dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); - retval = -ENOMEM; if (!dio) - goto out; + return -ENOMEM; /* * Believe it or not, zeroing out the page array caused a .5% * performance regression in a database benchmark. So, we take @@ -1230,32 +1175,32 @@ memset(dio, 0, offsetof(struct dio, pages)); dio->flags = flags; - if (dio->flags & DIO_LOCKING) { - if (iov_iter_rw(iter) == READ) { - struct address_space *mapping = - iocb->ki_filp->f_mapping; - - /* will be released by direct_io_worker */ - inode_lock(inode); - - retval = filemap_write_and_wait_range(mapping, offset, - end - 1); - if (retval) { - inode_unlock(inode); - kmem_cache_free(dio_cache, dio); - goto out; - } - } + if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) { + /* will be released by direct_io_worker */ + inode_lock(inode); } /* Once we sampled i_size check for reads beyond EOF */ dio->i_size = i_size_read(inode); if (iov_iter_rw(iter) == READ && offset >= dio->i_size) { - if (dio->flags & DIO_LOCKING) - inode_unlock(inode); - kmem_cache_free(dio_cache, dio); retval = 0; - goto out; + goto fail_dio; + } + + if (align & blocksize_mask) { + if (bdev) + blkbits = blksize_bits(bdev_logical_block_size(bdev)); + blocksize_mask = (1 << blkbits) - 1; + if (align & blocksize_mask) + goto fail_dio; + } + + if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) { + struct address_space *mapping = iocb->ki_filp->f_mapping; + + retval = filemap_write_and_wait_range(mapping, offset, end - 1); + if (retval) + goto fail_dio; } /* @@ -1280,6 +1225,8 @@ } else { dio->op = REQ_OP_READ; } + if (iocb->ki_flags & IOCB_HIPRI) + dio->op_flags |= REQ_HIPRI; /* * For AIO O_(D)SYNC writes we need to defer completions to a workqueue @@ -1297,14 +1244,8 @@ */ retval = sb_init_dio_done_wq(dio->inode->i_sb); } - if (retval) { - /* - * We grab i_mutex only for reads so we don't have - * to release it here - */ - kmem_cache_free(dio_cache, dio); - goto out; - } + if (retval) + goto fail_dio; } /* @@ -1328,7 +1269,7 @@ spin_lock_init(&dio->bio_lock); dio->refcount = 1; - dio->should_dirty = (iter->type == ITER_IOVEC); + dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ; sdio.iter = iter; sdio.final_block_in_request = end >> blkbits; @@ -1407,7 +1348,13 @@ } else BUG_ON(retval != -EIOCBQUEUED); -out: + return retval; + +fail_dio: + if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) + inode_unlock(inode); + + kmem_cache_free(dio_cache, dio); return retval; } @@ -1426,14 +1373,14 @@ * Attempt to prefetch the pieces we likely need later. */ prefetch(&bdev->bd_disk->part_tbl); - prefetch(bdev->bd_queue); - prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); + prefetch(bdev->bd_disk->queue); + prefetch((char *)bdev->bd_disk->queue + SMP_CACHE_BYTES); return do_blockdev_direct_IO(iocb, inode, bdev, iter, get_block, end_io, submit_io, flags); } -EXPORT_SYMBOL(__blockdev_direct_IO); +EXPORT_SYMBOL_NS(__blockdev_direct_IO, ANDROID_GKI_VFS_EXPORT_ONLY); static __init int dio_init(void) { -- Gitblit v1.6.2