.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * fs/direct-io.c |
---|
3 | 4 | * |
---|
.. | .. |
---|
38 | 39 | #include <linux/uio.h> |
---|
39 | 40 | #include <linux/atomic.h> |
---|
40 | 41 | #include <linux/prefetch.h> |
---|
| 42 | + |
---|
| 43 | +#include "internal.h" |
---|
41 | 44 | |
---|
42 | 45 | /* |
---|
43 | 46 | * How many user pages to map in one call to get_user_pages(). This determines |
---|
.. | .. |
---|
221 | 224 | } |
---|
222 | 225 | |
---|
223 | 226 | /* |
---|
224 | | - * Warn about a page cache invalidation failure during a direct io write. |
---|
225 | | - */ |
---|
226 | | -void dio_warn_stale_pagecache(struct file *filp) |
---|
227 | | -{ |
---|
228 | | - static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); |
---|
229 | | - char pathname[128]; |
---|
230 | | - struct inode *inode = file_inode(filp); |
---|
231 | | - char *path; |
---|
232 | | - |
---|
233 | | - errseq_set(&inode->i_mapping->wb_err, -EIO); |
---|
234 | | - if (__ratelimit(&_rs)) { |
---|
235 | | - path = file_path(filp, pathname, sizeof(pathname)); |
---|
236 | | - if (IS_ERR(path)) |
---|
237 | | - path = "(unknown)"; |
---|
238 | | - pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n"); |
---|
239 | | - pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid, |
---|
240 | | - current->comm); |
---|
241 | | - } |
---|
242 | | -} |
---|
243 | | - |
---|
244 | | -/** |
---|
245 | 227 | * dio_complete() - called when all DIO BIO I/O has been completed |
---|
246 | | - * @offset: the byte offset in the file of the completed operation |
---|
247 | 228 | * |
---|
248 | 229 | * This drops i_dio_count, lets interested parties know that a DIO operation |
---|
249 | 230 | * has completed, and calculates the resulting return code for the operation. |
---|
.. | .. |
---|
406 | 387 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
---|
407 | 388 | } |
---|
408 | 389 | |
---|
409 | | -/** |
---|
410 | | - * dio_end_io - handle the end io action for the given bio |
---|
411 | | - * @bio: The direct io bio thats being completed |
---|
412 | | - * |
---|
413 | | - * This is meant to be called by any filesystem that uses their own dio_submit_t |
---|
414 | | - * so that the DIO specific endio actions are dealt with after the filesystem |
---|
415 | | - * has done it's completion work. |
---|
416 | | - */ |
---|
417 | | -void dio_end_io(struct bio *bio) |
---|
418 | | -{ |
---|
419 | | - struct dio *dio = bio->bi_private; |
---|
420 | | - |
---|
421 | | - if (dio->is_async) |
---|
422 | | - dio_bio_end_aio(bio); |
---|
423 | | - else |
---|
424 | | - dio_bio_end_io(bio); |
---|
425 | | -} |
---|
426 | | -EXPORT_SYMBOL_GPL(dio_end_io); |
---|
427 | | - |
---|
428 | 390 | static inline void |
---|
429 | 391 | dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, |
---|
430 | 392 | struct block_device *bdev, |
---|
.. | .. |
---|
523 | 485 | dio->waiter = current; |
---|
524 | 486 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
---|
525 | 487 | if (!(dio->iocb->ki_flags & IOCB_HIPRI) || |
---|
526 | | - !blk_poll(dio->bio_disk->queue, dio->bio_cookie)) |
---|
527 | | - io_schedule(); |
---|
| 488 | + !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true)) |
---|
| 489 | + blk_io_schedule(); |
---|
528 | 490 | /* wake up sets us TASK_RUNNING */ |
---|
529 | 491 | spin_lock_irqsave(&dio->bio_lock, flags); |
---|
530 | 492 | dio->waiter = NULL; |
---|
.. | .. |
---|
542 | 504 | */ |
---|
543 | 505 | static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) |
---|
544 | 506 | { |
---|
545 | | - struct bio_vec *bvec; |
---|
546 | | - unsigned i; |
---|
547 | 507 | blk_status_t err = bio->bi_status; |
---|
| 508 | + bool should_dirty = dio->op == REQ_OP_READ && dio->should_dirty; |
---|
548 | 509 | |
---|
549 | 510 | if (err) { |
---|
550 | 511 | if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT)) |
---|
.. | .. |
---|
553 | 514 | dio->io_error = -EIO; |
---|
554 | 515 | } |
---|
555 | 516 | |
---|
556 | | - if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) { |
---|
| 517 | + if (dio->is_async && should_dirty) { |
---|
557 | 518 | bio_check_pages_dirty(bio); /* transfers ownership */ |
---|
558 | 519 | } else { |
---|
559 | | - bio_for_each_segment_all(bvec, bio, i) { |
---|
560 | | - struct page *page = bvec->bv_page; |
---|
561 | | - |
---|
562 | | - if (dio->op == REQ_OP_READ && !PageCompound(page) && |
---|
563 | | - dio->should_dirty) |
---|
564 | | - set_page_dirty_lock(page); |
---|
565 | | - put_page(page); |
---|
566 | | - } |
---|
| 520 | + bio_release_pages(bio, should_dirty); |
---|
567 | 521 | bio_put(bio); |
---|
568 | 522 | } |
---|
569 | 523 | return err; |
---|
.. | .. |
---|
1206 | 1160 | * the early prefetch in the caller enough time. |
---|
1207 | 1161 | */ |
---|
1208 | 1162 | |
---|
1209 | | - if (align & blocksize_mask) { |
---|
1210 | | - if (bdev) |
---|
1211 | | - blkbits = blksize_bits(bdev_logical_block_size(bdev)); |
---|
1212 | | - blocksize_mask = (1 << blkbits) - 1; |
---|
1213 | | - if (align & blocksize_mask) |
---|
1214 | | - goto out; |
---|
1215 | | - } |
---|
1216 | | - |
---|
1217 | 1163 | /* watch out for a 0 len io from a tricksy fs */ |
---|
1218 | 1164 | if (iov_iter_rw(iter) == READ && !count) |
---|
1219 | 1165 | return 0; |
---|
1220 | 1166 | |
---|
1221 | 1167 | dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); |
---|
1222 | | - retval = -ENOMEM; |
---|
1223 | 1168 | if (!dio) |
---|
1224 | | - goto out; |
---|
| 1169 | + return -ENOMEM; |
---|
1225 | 1170 | /* |
---|
1226 | 1171 | * Believe it or not, zeroing out the page array caused a .5% |
---|
1227 | 1172 | * performance regression in a database benchmark. So, we take |
---|
.. | .. |
---|
1230 | 1175 | memset(dio, 0, offsetof(struct dio, pages)); |
---|
1231 | 1176 | |
---|
1232 | 1177 | dio->flags = flags; |
---|
1233 | | - if (dio->flags & DIO_LOCKING) { |
---|
1234 | | - if (iov_iter_rw(iter) == READ) { |
---|
1235 | | - struct address_space *mapping = |
---|
1236 | | - iocb->ki_filp->f_mapping; |
---|
1237 | | - |
---|
1238 | | - /* will be released by direct_io_worker */ |
---|
1239 | | - inode_lock(inode); |
---|
1240 | | - |
---|
1241 | | - retval = filemap_write_and_wait_range(mapping, offset, |
---|
1242 | | - end - 1); |
---|
1243 | | - if (retval) { |
---|
1244 | | - inode_unlock(inode); |
---|
1245 | | - kmem_cache_free(dio_cache, dio); |
---|
1246 | | - goto out; |
---|
1247 | | - } |
---|
1248 | | - } |
---|
| 1178 | + if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) { |
---|
| 1179 | + /* will be released by direct_io_worker */ |
---|
| 1180 | + inode_lock(inode); |
---|
1249 | 1181 | } |
---|
1250 | 1182 | |
---|
1251 | 1183 | /* Once we sampled i_size check for reads beyond EOF */ |
---|
1252 | 1184 | dio->i_size = i_size_read(inode); |
---|
1253 | 1185 | if (iov_iter_rw(iter) == READ && offset >= dio->i_size) { |
---|
1254 | | - if (dio->flags & DIO_LOCKING) |
---|
1255 | | - inode_unlock(inode); |
---|
1256 | | - kmem_cache_free(dio_cache, dio); |
---|
1257 | 1186 | retval = 0; |
---|
1258 | | - goto out; |
---|
| 1187 | + goto fail_dio; |
---|
| 1188 | + } |
---|
| 1189 | + |
---|
| 1190 | + if (align & blocksize_mask) { |
---|
| 1191 | + if (bdev) |
---|
| 1192 | + blkbits = blksize_bits(bdev_logical_block_size(bdev)); |
---|
| 1193 | + blocksize_mask = (1 << blkbits) - 1; |
---|
| 1194 | + if (align & blocksize_mask) |
---|
| 1195 | + goto fail_dio; |
---|
| 1196 | + } |
---|
| 1197 | + |
---|
| 1198 | + if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) { |
---|
| 1199 | + struct address_space *mapping = iocb->ki_filp->f_mapping; |
---|
| 1200 | + |
---|
| 1201 | + retval = filemap_write_and_wait_range(mapping, offset, end - 1); |
---|
| 1202 | + if (retval) |
---|
| 1203 | + goto fail_dio; |
---|
1259 | 1204 | } |
---|
1260 | 1205 | |
---|
1261 | 1206 | /* |
---|
.. | .. |
---|
1280 | 1225 | } else { |
---|
1281 | 1226 | dio->op = REQ_OP_READ; |
---|
1282 | 1227 | } |
---|
| 1228 | + if (iocb->ki_flags & IOCB_HIPRI) |
---|
| 1229 | + dio->op_flags |= REQ_HIPRI; |
---|
1283 | 1230 | |
---|
1284 | 1231 | /* |
---|
1285 | 1232 | * For AIO O_(D)SYNC writes we need to defer completions to a workqueue |
---|
.. | .. |
---|
1297 | 1244 | */ |
---|
1298 | 1245 | retval = sb_init_dio_done_wq(dio->inode->i_sb); |
---|
1299 | 1246 | } |
---|
1300 | | - if (retval) { |
---|
1301 | | - /* |
---|
1302 | | - * We grab i_mutex only for reads so we don't have |
---|
1303 | | - * to release it here |
---|
1304 | | - */ |
---|
1305 | | - kmem_cache_free(dio_cache, dio); |
---|
1306 | | - goto out; |
---|
1307 | | - } |
---|
| 1247 | + if (retval) |
---|
| 1248 | + goto fail_dio; |
---|
1308 | 1249 | } |
---|
1309 | 1250 | |
---|
1310 | 1251 | /* |
---|
.. | .. |
---|
1328 | 1269 | spin_lock_init(&dio->bio_lock); |
---|
1329 | 1270 | dio->refcount = 1; |
---|
1330 | 1271 | |
---|
1331 | | - dio->should_dirty = (iter->type == ITER_IOVEC); |
---|
| 1272 | + dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ; |
---|
1332 | 1273 | sdio.iter = iter; |
---|
1333 | 1274 | sdio.final_block_in_request = end >> blkbits; |
---|
1334 | 1275 | |
---|
.. | .. |
---|
1407 | 1348 | } else |
---|
1408 | 1349 | BUG_ON(retval != -EIOCBQUEUED); |
---|
1409 | 1350 | |
---|
1410 | | -out: |
---|
| 1351 | + return retval; |
---|
| 1352 | + |
---|
| 1353 | +fail_dio: |
---|
| 1354 | + if (dio->flags & DIO_LOCKING && iov_iter_rw(iter) == READ) |
---|
| 1355 | + inode_unlock(inode); |
---|
| 1356 | + |
---|
| 1357 | + kmem_cache_free(dio_cache, dio); |
---|
1411 | 1358 | return retval; |
---|
1412 | 1359 | } |
---|
1413 | 1360 | |
---|
.. | .. |
---|
1426 | 1373 | * Attempt to prefetch the pieces we likely need later. |
---|
1427 | 1374 | */ |
---|
1428 | 1375 | prefetch(&bdev->bd_disk->part_tbl); |
---|
1429 | | - prefetch(bdev->bd_queue); |
---|
1430 | | - prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); |
---|
| 1376 | + prefetch(bdev->bd_disk->queue); |
---|
| 1377 | + prefetch((char *)bdev->bd_disk->queue + SMP_CACHE_BYTES); |
---|
1431 | 1378 | |
---|
1432 | 1379 | return do_blockdev_direct_IO(iocb, inode, bdev, iter, get_block, |
---|
1433 | 1380 | end_io, submit_io, flags); |
---|
1434 | 1381 | } |
---|
1435 | 1382 | |
---|
1436 | | -EXPORT_SYMBOL(__blockdev_direct_IO); |
---|
| 1383 | +EXPORT_SYMBOL_NS(__blockdev_direct_IO, ANDROID_GKI_VFS_EXPORT_ONLY); |
---|
1437 | 1384 | |
---|
1438 | 1385 | static __init int dio_init(void) |
---|
1439 | 1386 | { |
---|