| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * linux/fs/block_dev.c |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 18 | 19 | #include <linux/module.h> |
|---|
| 19 | 20 | #include <linux/blkpg.h> |
|---|
| 20 | 21 | #include <linux/magic.h> |
|---|
| 21 | | -#include <linux/dax.h> |
|---|
| 22 | 22 | #include <linux/buffer_head.h> |
|---|
| 23 | 23 | #include <linux/swap.h> |
|---|
| 24 | 24 | #include <linux/pagevec.h> |
|---|
| 25 | 25 | #include <linux/writeback.h> |
|---|
| 26 | 26 | #include <linux/mpage.h> |
|---|
| 27 | 27 | #include <linux/mount.h> |
|---|
| 28 | +#include <linux/pseudo_fs.h> |
|---|
| 28 | 29 | #include <linux/uio.h> |
|---|
| 29 | 30 | #include <linux/namei.h> |
|---|
| 30 | 31 | #include <linux/log2.h> |
|---|
| 31 | 32 | #include <linux/cleancache.h> |
|---|
| 32 | | -#include <linux/dax.h> |
|---|
| 33 | | -#include <linux/badblocks.h> |
|---|
| 34 | 33 | #include <linux/task_io_accounting_ops.h> |
|---|
| 35 | 34 | #include <linux/falloc.h> |
|---|
| 36 | 35 | #include <linux/uaccess.h> |
|---|
| 36 | +#include <linux/suspend.h> |
|---|
| 37 | 37 | #include "internal.h" |
|---|
| 38 | 38 | |
|---|
| 39 | 39 | struct bdev_inode { |
|---|
| .. | .. |
|---|
| 75 | 75 | } |
|---|
| 76 | 76 | |
|---|
| 77 | 77 | /* Kill _all_ buffers and pagecache , dirty or not.. */ |
|---|
| 78 | | -void kill_bdev(struct block_device *bdev) |
|---|
| 78 | +static void kill_bdev(struct block_device *bdev) |
|---|
| 79 | 79 | { |
|---|
| 80 | 80 | struct address_space *mapping = bdev->bd_inode->i_mapping; |
|---|
| 81 | 81 | |
|---|
| .. | .. |
|---|
| 84 | 84 | |
|---|
| 85 | 85 | invalidate_bh_lrus(); |
|---|
| 86 | 86 | truncate_inode_pages(mapping, 0); |
|---|
| 87 | | -} |
|---|
| 88 | | -EXPORT_SYMBOL(kill_bdev); |
|---|
| 87 | +} |
|---|
| 89 | 88 | |
|---|
| 90 | 89 | /* Invalidate clean unused buffers and pagecache. */ |
|---|
| 91 | 90 | void invalidate_bdev(struct block_device *bdev) |
|---|
| .. | .. |
|---|
| 104 | 103 | } |
|---|
| 105 | 104 | EXPORT_SYMBOL(invalidate_bdev); |
|---|
| 106 | 105 | |
|---|
| 106 | +/* |
|---|
| 107 | + * Drop all buffers & page cache for given bdev range. This function bails |
|---|
| 108 | + * with error if bdev has other exclusive owner (such as filesystem). |
|---|
| 109 | + */ |
|---|
| 110 | +int truncate_bdev_range(struct block_device *bdev, fmode_t mode, |
|---|
| 111 | + loff_t lstart, loff_t lend) |
|---|
| 112 | +{ |
|---|
| 113 | + struct block_device *claimed_bdev = NULL; |
|---|
| 114 | + int err; |
|---|
| 115 | + |
|---|
| 116 | + /* |
|---|
| 117 | + * If we don't hold exclusive handle for the device, upgrade to it |
|---|
| 118 | + * while we discard the buffer cache to avoid discarding buffers |
|---|
| 119 | + * under live filesystem. |
|---|
| 120 | + */ |
|---|
| 121 | + if (!(mode & FMODE_EXCL)) { |
|---|
| 122 | + claimed_bdev = bdev->bd_contains; |
|---|
| 123 | + err = bd_prepare_to_claim(bdev, claimed_bdev, |
|---|
| 124 | + truncate_bdev_range); |
|---|
| 125 | + if (err) |
|---|
| 126 | + goto invalidate; |
|---|
| 127 | + } |
|---|
| 128 | + truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend); |
|---|
| 129 | + if (claimed_bdev) |
|---|
| 130 | + bd_abort_claiming(bdev, claimed_bdev, truncate_bdev_range); |
|---|
| 131 | + return 0; |
|---|
| 132 | + |
|---|
| 133 | +invalidate: |
|---|
| 134 | + /* |
|---|
| 135 | + * Someone else has handle exclusively open. Try invalidating instead. |
|---|
| 136 | + * The 'end' argument is inclusive so the rounding is safe. |
|---|
| 137 | + */ |
|---|
| 138 | + return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping, |
|---|
| 139 | + lstart >> PAGE_SHIFT, |
|---|
| 140 | + lend >> PAGE_SHIFT); |
|---|
| 141 | +} |
|---|
| 142 | +EXPORT_SYMBOL(truncate_bdev_range); |
|---|
| 143 | + |
|---|
| 107 | 144 | static void set_init_blocksize(struct block_device *bdev) |
|---|
| 108 | 145 | { |
|---|
| 109 | | - unsigned bsize = bdev_logical_block_size(bdev); |
|---|
| 146 | + unsigned int bsize = bdev_logical_block_size(bdev); |
|---|
| 110 | 147 | loff_t size = i_size_read(bdev->bd_inode); |
|---|
| 111 | 148 | |
|---|
| 112 | 149 | while (bsize < PAGE_SIZE) { |
|---|
| .. | .. |
|---|
| 114 | 151 | break; |
|---|
| 115 | 152 | bsize <<= 1; |
|---|
| 116 | 153 | } |
|---|
| 117 | | - bdev->bd_block_size = bsize; |
|---|
| 118 | 154 | bdev->bd_inode->i_blkbits = blksize_bits(bsize); |
|---|
| 119 | 155 | } |
|---|
| 120 | 156 | |
|---|
| .. | .. |
|---|
| 129 | 165 | return -EINVAL; |
|---|
| 130 | 166 | |
|---|
| 131 | 167 | /* Don't change the size if it is same as current */ |
|---|
| 132 | | - if (bdev->bd_block_size != size) { |
|---|
| 168 | + if (bdev->bd_inode->i_blkbits != blksize_bits(size)) { |
|---|
| 133 | 169 | sync_blockdev(bdev); |
|---|
| 134 | | - bdev->bd_block_size = size; |
|---|
| 135 | 170 | bdev->bd_inode->i_blkbits = blksize_bits(size); |
|---|
| 136 | 171 | kill_bdev(bdev); |
|---|
| 137 | 172 | } |
|---|
| .. | .. |
|---|
| 151 | 186 | return sb->s_blocksize; |
|---|
| 152 | 187 | } |
|---|
| 153 | 188 | |
|---|
| 154 | | -EXPORT_SYMBOL(sb_set_blocksize); |
|---|
| 189 | +EXPORT_SYMBOL_NS(sb_set_blocksize, ANDROID_GKI_VFS_EXPORT_ONLY); |
|---|
| 155 | 190 | |
|---|
| 156 | 191 | int sb_min_blocksize(struct super_block *sb, int size) |
|---|
| 157 | 192 | { |
|---|
| .. | .. |
|---|
| 161 | 196 | return sb_set_blocksize(sb, size); |
|---|
| 162 | 197 | } |
|---|
| 163 | 198 | |
|---|
| 164 | | -EXPORT_SYMBOL(sb_min_blocksize); |
|---|
| 199 | +EXPORT_SYMBOL_NS(sb_min_blocksize, ANDROID_GKI_VFS_EXPORT_ONLY); |
|---|
| 165 | 200 | |
|---|
| 166 | 201 | static int |
|---|
| 167 | 202 | blkdev_get_block(struct inode *inode, sector_t iblock, |
|---|
| .. | .. |
|---|
| 195 | 230 | struct task_struct *waiter = bio->bi_private; |
|---|
| 196 | 231 | |
|---|
| 197 | 232 | WRITE_ONCE(bio->bi_private, NULL); |
|---|
| 198 | | - wake_up_process(waiter); |
|---|
| 233 | + blk_wake_io_task(waiter); |
|---|
| 199 | 234 | } |
|---|
| 200 | 235 | |
|---|
| 201 | 236 | static ssize_t |
|---|
| .. | .. |
|---|
| 204 | 239 | { |
|---|
| 205 | 240 | struct file *file = iocb->ki_filp; |
|---|
| 206 | 241 | struct block_device *bdev = I_BDEV(bdev_file_inode(file)); |
|---|
| 207 | | - struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec; |
|---|
| 242 | + struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; |
|---|
| 208 | 243 | loff_t pos = iocb->ki_pos; |
|---|
| 209 | 244 | bool should_dirty = false; |
|---|
| 210 | 245 | struct bio bio; |
|---|
| 211 | 246 | ssize_t ret; |
|---|
| 212 | 247 | blk_qc_t qc; |
|---|
| 213 | | - int i; |
|---|
| 214 | 248 | |
|---|
| 215 | 249 | if ((pos | iov_iter_alignment(iter)) & |
|---|
| 216 | 250 | (bdev_logical_block_size(bdev) - 1)) |
|---|
| .. | .. |
|---|
| 246 | 280 | bio.bi_opf = dio_bio_write_op(iocb); |
|---|
| 247 | 281 | task_io_account_write(ret); |
|---|
| 248 | 282 | } |
|---|
| 283 | + if (iocb->ki_flags & IOCB_NOWAIT) |
|---|
| 284 | + bio.bi_opf |= REQ_NOWAIT; |
|---|
| 285 | + if (iocb->ki_flags & IOCB_HIPRI) |
|---|
| 286 | + bio_set_polled(&bio, iocb); |
|---|
| 249 | 287 | |
|---|
| 250 | 288 | qc = submit_bio(&bio); |
|---|
| 251 | 289 | for (;;) { |
|---|
| .. | .. |
|---|
| 253 | 291 | if (!READ_ONCE(bio.bi_private)) |
|---|
| 254 | 292 | break; |
|---|
| 255 | 293 | if (!(iocb->ki_flags & IOCB_HIPRI) || |
|---|
| 256 | | - !blk_poll(bdev_get_queue(bdev), qc)) |
|---|
| 257 | | - io_schedule(); |
|---|
| 294 | + !blk_poll(bdev_get_queue(bdev), qc, true)) |
|---|
| 295 | + blk_io_schedule(); |
|---|
| 258 | 296 | } |
|---|
| 259 | 297 | __set_current_state(TASK_RUNNING); |
|---|
| 260 | 298 | |
|---|
| 261 | | - bio_for_each_segment_all(bvec, &bio, i) { |
|---|
| 262 | | - if (should_dirty && !PageCompound(bvec->bv_page)) |
|---|
| 263 | | - set_page_dirty_lock(bvec->bv_page); |
|---|
| 264 | | - put_page(bvec->bv_page); |
|---|
| 265 | | - } |
|---|
| 266 | | - |
|---|
| 299 | + bio_release_pages(&bio, should_dirty); |
|---|
| 267 | 300 | if (unlikely(bio.bi_status)) |
|---|
| 268 | 301 | ret = blk_status_to_errno(bio.bi_status); |
|---|
| 269 | 302 | |
|---|
| .. | .. |
|---|
| 291 | 324 | |
|---|
| 292 | 325 | static struct bio_set blkdev_dio_pool; |
|---|
| 293 | 326 | |
|---|
| 327 | +static int blkdev_iopoll(struct kiocb *kiocb, bool wait) |
|---|
| 328 | +{ |
|---|
| 329 | + struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host); |
|---|
| 330 | + struct request_queue *q = bdev_get_queue(bdev); |
|---|
| 331 | + |
|---|
| 332 | + return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait); |
|---|
| 333 | +} |
|---|
| 334 | + |
|---|
| 294 | 335 | static void blkdev_bio_end_io(struct bio *bio) |
|---|
| 295 | 336 | { |
|---|
| 296 | 337 | struct blkdev_dio *dio = bio->bi_private; |
|---|
| .. | .. |
|---|
| 312 | 353 | } |
|---|
| 313 | 354 | |
|---|
| 314 | 355 | dio->iocb->ki_complete(iocb, ret, 0); |
|---|
| 315 | | - bio_put(&dio->bio); |
|---|
| 356 | + if (dio->multi_bio) |
|---|
| 357 | + bio_put(&dio->bio); |
|---|
| 316 | 358 | } else { |
|---|
| 317 | 359 | struct task_struct *waiter = dio->waiter; |
|---|
| 318 | 360 | |
|---|
| 319 | 361 | WRITE_ONCE(dio->waiter, NULL); |
|---|
| 320 | | - wake_up_process(waiter); |
|---|
| 362 | + blk_wake_io_task(waiter); |
|---|
| 321 | 363 | } |
|---|
| 322 | 364 | } |
|---|
| 323 | 365 | |
|---|
| 324 | 366 | if (should_dirty) { |
|---|
| 325 | 367 | bio_check_pages_dirty(bio); |
|---|
| 326 | 368 | } else { |
|---|
| 327 | | - struct bio_vec *bvec; |
|---|
| 328 | | - int i; |
|---|
| 329 | | - |
|---|
| 330 | | - bio_for_each_segment_all(bvec, bio, i) |
|---|
| 331 | | - put_page(bvec->bv_page); |
|---|
| 369 | + bio_release_pages(bio, false); |
|---|
| 332 | 370 | bio_put(bio); |
|---|
| 333 | 371 | } |
|---|
| 334 | 372 | } |
|---|
| .. | .. |
|---|
| 342 | 380 | struct blk_plug plug; |
|---|
| 343 | 381 | struct blkdev_dio *dio; |
|---|
| 344 | 382 | struct bio *bio; |
|---|
| 383 | + bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; |
|---|
| 345 | 384 | bool is_read = (iov_iter_rw(iter) == READ), is_sync; |
|---|
| 346 | 385 | loff_t pos = iocb->ki_pos; |
|---|
| 347 | 386 | blk_qc_t qc = BLK_QC_T_NONE; |
|---|
| .. | .. |
|---|
| 352 | 391 | return -EINVAL; |
|---|
| 353 | 392 | |
|---|
| 354 | 393 | bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool); |
|---|
| 355 | | - bio_get(bio); /* extra ref for the completion handler */ |
|---|
| 356 | 394 | |
|---|
| 357 | 395 | dio = container_of(bio, struct blkdev_dio, bio); |
|---|
| 358 | 396 | dio->is_sync = is_sync = is_sync_kiocb(iocb); |
|---|
| 359 | | - if (dio->is_sync) |
|---|
| 397 | + if (dio->is_sync) { |
|---|
| 360 | 398 | dio->waiter = current; |
|---|
| 361 | | - else |
|---|
| 399 | + bio_get(bio); |
|---|
| 400 | + } else { |
|---|
| 362 | 401 | dio->iocb = iocb; |
|---|
| 402 | + } |
|---|
| 363 | 403 | |
|---|
| 364 | 404 | dio->size = 0; |
|---|
| 365 | 405 | dio->multi_bio = false; |
|---|
| 366 | | - dio->should_dirty = is_read && (iter->type == ITER_IOVEC); |
|---|
| 406 | + dio->should_dirty = is_read && iter_is_iovec(iter); |
|---|
| 367 | 407 | |
|---|
| 368 | | - blk_start_plug(&plug); |
|---|
| 408 | + /* |
|---|
| 409 | + * Don't plug for HIPRI/polled IO, as those should go straight |
|---|
| 410 | + * to issue |
|---|
| 411 | + */ |
|---|
| 412 | + if (!is_poll) |
|---|
| 413 | + blk_start_plug(&plug); |
|---|
| 414 | + |
|---|
| 369 | 415 | for (;;) { |
|---|
| 370 | 416 | bio_set_dev(bio, bdev); |
|---|
| 371 | 417 | bio->bi_iter.bi_sector = pos >> 9; |
|---|
| .. | .. |
|---|
| 389 | 435 | bio->bi_opf = dio_bio_write_op(iocb); |
|---|
| 390 | 436 | task_io_account_write(bio->bi_iter.bi_size); |
|---|
| 391 | 437 | } |
|---|
| 438 | + if (iocb->ki_flags & IOCB_NOWAIT) |
|---|
| 439 | + bio->bi_opf |= REQ_NOWAIT; |
|---|
| 392 | 440 | |
|---|
| 393 | 441 | dio->size += bio->bi_iter.bi_size; |
|---|
| 394 | 442 | pos += bio->bi_iter.bi_size; |
|---|
| 395 | 443 | |
|---|
| 396 | 444 | nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES); |
|---|
| 397 | 445 | if (!nr_pages) { |
|---|
| 446 | + bool polled = false; |
|---|
| 447 | + |
|---|
| 448 | + if (iocb->ki_flags & IOCB_HIPRI) { |
|---|
| 449 | + bio_set_polled(bio, iocb); |
|---|
| 450 | + polled = true; |
|---|
| 451 | + } |
|---|
| 452 | + |
|---|
| 398 | 453 | qc = submit_bio(bio); |
|---|
| 454 | + |
|---|
| 455 | + if (polled) |
|---|
| 456 | + WRITE_ONCE(iocb->ki_cookie, qc); |
|---|
| 399 | 457 | break; |
|---|
| 400 | 458 | } |
|---|
| 401 | 459 | |
|---|
| 402 | 460 | if (!dio->multi_bio) { |
|---|
| 461 | + /* |
|---|
| 462 | + * AIO needs an extra reference to ensure the dio |
|---|
| 463 | + * structure which is embedded into the first bio |
|---|
| 464 | + * stays around. |
|---|
| 465 | + */ |
|---|
| 466 | + if (!is_sync) |
|---|
| 467 | + bio_get(bio); |
|---|
| 403 | 468 | dio->multi_bio = true; |
|---|
| 404 | 469 | atomic_set(&dio->ref, 2); |
|---|
| 405 | 470 | } else { |
|---|
| .. | .. |
|---|
| 409 | 474 | submit_bio(bio); |
|---|
| 410 | 475 | bio = bio_alloc(GFP_KERNEL, nr_pages); |
|---|
| 411 | 476 | } |
|---|
| 412 | | - blk_finish_plug(&plug); |
|---|
| 477 | + |
|---|
| 478 | + if (!is_poll) |
|---|
| 479 | + blk_finish_plug(&plug); |
|---|
| 413 | 480 | |
|---|
| 414 | 481 | if (!is_sync) |
|---|
| 415 | 482 | return -EIOCBQUEUED; |
|---|
| .. | .. |
|---|
| 420 | 487 | break; |
|---|
| 421 | 488 | |
|---|
| 422 | 489 | if (!(iocb->ki_flags & IOCB_HIPRI) || |
|---|
| 423 | | - !blk_poll(bdev_get_queue(bdev), qc)) |
|---|
| 424 | | - io_schedule(); |
|---|
| 490 | + !blk_poll(bdev_get_queue(bdev), qc, true)) |
|---|
| 491 | + blk_io_schedule(); |
|---|
| 425 | 492 | } |
|---|
| 426 | 493 | __set_current_state(TASK_RUNNING); |
|---|
| 427 | 494 | |
|---|
| .. | .. |
|---|
| 502 | 569 | * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze |
|---|
| 503 | 570 | * actually. |
|---|
| 504 | 571 | */ |
|---|
| 505 | | -struct super_block *freeze_bdev(struct block_device *bdev) |
|---|
| 572 | +int freeze_bdev(struct block_device *bdev) |
|---|
| 506 | 573 | { |
|---|
| 507 | 574 | struct super_block *sb; |
|---|
| 508 | 575 | int error = 0; |
|---|
| 509 | 576 | |
|---|
| 510 | 577 | mutex_lock(&bdev->bd_fsfreeze_mutex); |
|---|
| 511 | | - if (++bdev->bd_fsfreeze_count > 1) { |
|---|
| 512 | | - /* |
|---|
| 513 | | - * We don't even need to grab a reference - the first call |
|---|
| 514 | | - * to freeze_bdev grab an active reference and only the last |
|---|
| 515 | | - * thaw_bdev drops it. |
|---|
| 516 | | - */ |
|---|
| 517 | | - sb = get_super(bdev); |
|---|
| 518 | | - if (sb) |
|---|
| 519 | | - drop_super(sb); |
|---|
| 520 | | - mutex_unlock(&bdev->bd_fsfreeze_mutex); |
|---|
| 521 | | - return sb; |
|---|
| 522 | | - } |
|---|
| 578 | + if (++bdev->bd_fsfreeze_count > 1) |
|---|
| 579 | + goto done; |
|---|
| 523 | 580 | |
|---|
| 524 | 581 | sb = get_active_super(bdev); |
|---|
| 525 | 582 | if (!sb) |
|---|
| 526 | | - goto out; |
|---|
| 583 | + goto sync; |
|---|
| 527 | 584 | if (sb->s_op->freeze_super) |
|---|
| 528 | 585 | error = sb->s_op->freeze_super(sb); |
|---|
| 529 | 586 | else |
|---|
| 530 | 587 | error = freeze_super(sb); |
|---|
| 531 | | - if (error) { |
|---|
| 532 | | - deactivate_super(sb); |
|---|
| 533 | | - bdev->bd_fsfreeze_count--; |
|---|
| 534 | | - mutex_unlock(&bdev->bd_fsfreeze_mutex); |
|---|
| 535 | | - return ERR_PTR(error); |
|---|
| 536 | | - } |
|---|
| 537 | 588 | deactivate_super(sb); |
|---|
| 538 | | - out: |
|---|
| 589 | + |
|---|
| 590 | + if (error) { |
|---|
| 591 | + bdev->bd_fsfreeze_count--; |
|---|
| 592 | + goto done; |
|---|
| 593 | + } |
|---|
| 594 | + bdev->bd_fsfreeze_sb = sb; |
|---|
| 595 | + |
|---|
| 596 | +sync: |
|---|
| 539 | 597 | sync_blockdev(bdev); |
|---|
| 598 | +done: |
|---|
| 540 | 599 | mutex_unlock(&bdev->bd_fsfreeze_mutex); |
|---|
| 541 | | - return sb; /* thaw_bdev releases s->s_umount */ |
|---|
| 600 | + return error; |
|---|
| 542 | 601 | } |
|---|
| 543 | 602 | EXPORT_SYMBOL(freeze_bdev); |
|---|
| 544 | 603 | |
|---|
| 545 | 604 | /** |
|---|
| 546 | 605 | * thaw_bdev -- unlock filesystem |
|---|
| 547 | 606 | * @bdev: blockdevice to unlock |
|---|
| 548 | | - * @sb: associated superblock |
|---|
| 549 | 607 | * |
|---|
| 550 | 608 | * Unlocks the filesystem and marks it writeable again after freeze_bdev(). |
|---|
| 551 | 609 | */ |
|---|
| 552 | | -int thaw_bdev(struct block_device *bdev, struct super_block *sb) |
|---|
| 610 | +int thaw_bdev(struct block_device *bdev) |
|---|
| 553 | 611 | { |
|---|
| 612 | + struct super_block *sb; |
|---|
| 554 | 613 | int error = -EINVAL; |
|---|
| 555 | 614 | |
|---|
| 556 | 615 | mutex_lock(&bdev->bd_fsfreeze_mutex); |
|---|
| .. | .. |
|---|
| 561 | 620 | if (--bdev->bd_fsfreeze_count > 0) |
|---|
| 562 | 621 | goto out; |
|---|
| 563 | 622 | |
|---|
| 623 | + sb = bdev->bd_fsfreeze_sb; |
|---|
| 564 | 624 | if (!sb) |
|---|
| 565 | 625 | goto out; |
|---|
| 566 | 626 | |
|---|
| .. | .. |
|---|
| 586 | 646 | return block_read_full_page(page, blkdev_get_block); |
|---|
| 587 | 647 | } |
|---|
| 588 | 648 | |
|---|
| 589 | | -static int blkdev_readpages(struct file *file, struct address_space *mapping, |
|---|
| 590 | | - struct list_head *pages, unsigned nr_pages) |
|---|
| 649 | +static void blkdev_readahead(struct readahead_control *rac) |
|---|
| 591 | 650 | { |
|---|
| 592 | | - return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block); |
|---|
| 651 | + mpage_readahead(rac, blkdev_get_block); |
|---|
| 593 | 652 | } |
|---|
| 594 | 653 | |
|---|
| 595 | 654 | static int blkdev_write_begin(struct file *file, struct address_space *mapping, |
|---|
| .. | .. |
|---|
| 644 | 703 | * i_mutex and doing so causes performance issues with concurrent |
|---|
| 645 | 704 | * O_SYNC writers to a block device. |
|---|
| 646 | 705 | */ |
|---|
| 647 | | - error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL); |
|---|
| 706 | + error = blkdev_issue_flush(bdev, GFP_KERNEL); |
|---|
| 648 | 707 | if (error == -EOPNOTSUPP) |
|---|
| 649 | 708 | error = 0; |
|---|
| 650 | 709 | |
|---|
| .. | .. |
|---|
| 677 | 736 | if (!ops->rw_page || bdev_get_integrity(bdev)) |
|---|
| 678 | 737 | return result; |
|---|
| 679 | 738 | |
|---|
| 680 | | - result = blk_queue_enter(bdev->bd_queue, 0); |
|---|
| 739 | + result = blk_queue_enter(bdev->bd_disk->queue, 0); |
|---|
| 681 | 740 | if (result) |
|---|
| 682 | 741 | return result; |
|---|
| 683 | 742 | result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, |
|---|
| 684 | 743 | REQ_OP_READ); |
|---|
| 685 | | - blk_queue_exit(bdev->bd_queue); |
|---|
| 744 | + blk_queue_exit(bdev->bd_disk->queue); |
|---|
| 686 | 745 | return result; |
|---|
| 687 | 746 | } |
|---|
| 688 | | -EXPORT_SYMBOL_GPL(bdev_read_page); |
|---|
| 689 | 747 | |
|---|
| 690 | 748 | /** |
|---|
| 691 | 749 | * bdev_write_page() - Start writing a page to a block device |
|---|
| .. | .. |
|---|
| 714 | 772 | |
|---|
| 715 | 773 | if (!ops->rw_page || bdev_get_integrity(bdev)) |
|---|
| 716 | 774 | return -EOPNOTSUPP; |
|---|
| 717 | | - result = blk_queue_enter(bdev->bd_queue, 0); |
|---|
| 775 | + result = blk_queue_enter(bdev->bd_disk->queue, 0); |
|---|
| 718 | 776 | if (result) |
|---|
| 719 | 777 | return result; |
|---|
| 720 | 778 | |
|---|
| .. | .. |
|---|
| 727 | 785 | clean_page_buffers(page); |
|---|
| 728 | 786 | unlock_page(page); |
|---|
| 729 | 787 | } |
|---|
| 730 | | - blk_queue_exit(bdev->bd_queue); |
|---|
| 788 | + blk_queue_exit(bdev->bd_disk->queue); |
|---|
| 731 | 789 | return result; |
|---|
| 732 | 790 | } |
|---|
| 733 | | -EXPORT_SYMBOL_GPL(bdev_write_page); |
|---|
| 734 | 791 | |
|---|
| 735 | 792 | /* |
|---|
| 736 | 793 | * pseudo-fs |
|---|
| .. | .. |
|---|
| 747 | 804 | return &ei->vfs_inode; |
|---|
| 748 | 805 | } |
|---|
| 749 | 806 | |
|---|
| 750 | | -static void bdev_i_callback(struct rcu_head *head) |
|---|
| 807 | +static void bdev_free_inode(struct inode *inode) |
|---|
| 751 | 808 | { |
|---|
| 752 | | - struct inode *inode = container_of(head, struct inode, i_rcu); |
|---|
| 753 | | - struct bdev_inode *bdi = BDEV_I(inode); |
|---|
| 754 | | - |
|---|
| 755 | | - kmem_cache_free(bdev_cachep, bdi); |
|---|
| 756 | | -} |
|---|
| 757 | | - |
|---|
| 758 | | -static void bdev_destroy_inode(struct inode *inode) |
|---|
| 759 | | -{ |
|---|
| 760 | | - call_rcu(&inode->i_rcu, bdev_i_callback); |
|---|
| 809 | + kmem_cache_free(bdev_cachep, BDEV_I(inode)); |
|---|
| 761 | 810 | } |
|---|
| 762 | 811 | |
|---|
| 763 | 812 | static void init_once(void *foo) |
|---|
| .. | .. |
|---|
| 767 | 816 | |
|---|
| 768 | 817 | memset(bdev, 0, sizeof(*bdev)); |
|---|
| 769 | 818 | mutex_init(&bdev->bd_mutex); |
|---|
| 770 | | - INIT_LIST_HEAD(&bdev->bd_list); |
|---|
| 771 | 819 | #ifdef CONFIG_SYSFS |
|---|
| 772 | 820 | INIT_LIST_HEAD(&bdev->bd_holder_disks); |
|---|
| 773 | 821 | #endif |
|---|
| .. | .. |
|---|
| 783 | 831 | truncate_inode_pages_final(&inode->i_data); |
|---|
| 784 | 832 | invalidate_inode_buffers(inode); /* is it needed here? */ |
|---|
| 785 | 833 | clear_inode(inode); |
|---|
| 786 | | - spin_lock(&bdev_lock); |
|---|
| 787 | | - list_del_init(&bdev->bd_list); |
|---|
| 788 | | - spin_unlock(&bdev_lock); |
|---|
| 789 | 834 | /* Detach inode from wb early as bdi_put() may free bdi->wb */ |
|---|
| 790 | 835 | inode_detach_wb(inode); |
|---|
| 791 | 836 | if (bdev->bd_bdi != &noop_backing_dev_info) { |
|---|
| .. | .. |
|---|
| 797 | 842 | static const struct super_operations bdev_sops = { |
|---|
| 798 | 843 | .statfs = simple_statfs, |
|---|
| 799 | 844 | .alloc_inode = bdev_alloc_inode, |
|---|
| 800 | | - .destroy_inode = bdev_destroy_inode, |
|---|
| 845 | + .free_inode = bdev_free_inode, |
|---|
| 801 | 846 | .drop_inode = generic_delete_inode, |
|---|
| 802 | 847 | .evict_inode = bdev_evict_inode, |
|---|
| 803 | 848 | }; |
|---|
| 804 | 849 | |
|---|
| 805 | | -static struct dentry *bd_mount(struct file_system_type *fs_type, |
|---|
| 806 | | - int flags, const char *dev_name, void *data) |
|---|
| 850 | +static int bd_init_fs_context(struct fs_context *fc) |
|---|
| 807 | 851 | { |
|---|
| 808 | | - struct dentry *dent; |
|---|
| 809 | | - dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC); |
|---|
| 810 | | - if (!IS_ERR(dent)) |
|---|
| 811 | | - dent->d_sb->s_iflags |= SB_I_CGROUPWB; |
|---|
| 812 | | - return dent; |
|---|
| 852 | + struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC); |
|---|
| 853 | + if (!ctx) |
|---|
| 854 | + return -ENOMEM; |
|---|
| 855 | + fc->s_iflags |= SB_I_CGROUPWB; |
|---|
| 856 | + ctx->ops = &bdev_sops; |
|---|
| 857 | + return 0; |
|---|
| 813 | 858 | } |
|---|
| 814 | 859 | |
|---|
| 815 | 860 | static struct file_system_type bd_type = { |
|---|
| 816 | 861 | .name = "bdev", |
|---|
| 817 | | - .mount = bd_mount, |
|---|
| 862 | + .init_fs_context = bd_init_fs_context, |
|---|
| 818 | 863 | .kill_sb = kill_anon_super, |
|---|
| 819 | 864 | }; |
|---|
| 820 | 865 | |
|---|
| .. | .. |
|---|
| 860 | 905 | return 0; |
|---|
| 861 | 906 | } |
|---|
| 862 | 907 | |
|---|
| 863 | | -static LIST_HEAD(all_bdevs); |
|---|
| 864 | | - |
|---|
| 865 | | -/* |
|---|
| 866 | | - * If there is a bdev inode for this device, unhash it so that it gets evicted |
|---|
| 867 | | - * as soon as last inode reference is dropped. |
|---|
| 868 | | - */ |
|---|
| 869 | | -void bdev_unhash_inode(dev_t dev) |
|---|
| 870 | | -{ |
|---|
| 871 | | - struct inode *inode; |
|---|
| 872 | | - |
|---|
| 873 | | - inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev); |
|---|
| 874 | | - if (inode) { |
|---|
| 875 | | - remove_inode_hash(inode); |
|---|
| 876 | | - iput(inode); |
|---|
| 877 | | - } |
|---|
| 878 | | -} |
|---|
| 879 | | - |
|---|
| 880 | | -struct block_device *bdget(dev_t dev) |
|---|
| 908 | +static struct block_device *bdget(dev_t dev) |
|---|
| 881 | 909 | { |
|---|
| 882 | 910 | struct block_device *bdev; |
|---|
| 883 | 911 | struct inode *inode; |
|---|
| .. | .. |
|---|
| 891 | 919 | bdev = &BDEV_I(inode)->bdev; |
|---|
| 892 | 920 | |
|---|
| 893 | 921 | if (inode->i_state & I_NEW) { |
|---|
| 922 | + spin_lock_init(&bdev->bd_size_lock); |
|---|
| 894 | 923 | bdev->bd_contains = NULL; |
|---|
| 895 | 924 | bdev->bd_super = NULL; |
|---|
| 896 | 925 | bdev->bd_inode = inode; |
|---|
| 897 | | - bdev->bd_block_size = i_blocksize(inode); |
|---|
| 898 | 926 | bdev->bd_part_count = 0; |
|---|
| 899 | | - bdev->bd_invalidated = 0; |
|---|
| 900 | 927 | inode->i_mode = S_IFBLK; |
|---|
| 901 | 928 | inode->i_rdev = dev; |
|---|
| 902 | 929 | inode->i_bdev = bdev; |
|---|
| 903 | 930 | inode->i_data.a_ops = &def_blk_aops; |
|---|
| 904 | 931 | mapping_set_gfp_mask(&inode->i_data, GFP_USER); |
|---|
| 905 | | - spin_lock(&bdev_lock); |
|---|
| 906 | | - list_add(&bdev->bd_list, &all_bdevs); |
|---|
| 907 | | - spin_unlock(&bdev_lock); |
|---|
| 908 | 932 | unlock_new_inode(inode); |
|---|
| 909 | 933 | } |
|---|
| 910 | 934 | return bdev; |
|---|
| 911 | 935 | } |
|---|
| 912 | | - |
|---|
| 913 | | -EXPORT_SYMBOL(bdget); |
|---|
| 914 | 936 | |
|---|
| 915 | 937 | /** |
|---|
| 916 | 938 | * bdgrab -- Grab a reference to an already referenced block device |
|---|
| .. | .. |
|---|
| 923 | 945 | } |
|---|
| 924 | 946 | EXPORT_SYMBOL(bdgrab); |
|---|
| 925 | 947 | |
|---|
| 948 | +struct block_device *bdget_part(struct hd_struct *part) |
|---|
| 949 | +{ |
|---|
| 950 | + return bdget(part_devt(part)); |
|---|
| 951 | +} |
|---|
| 952 | + |
|---|
| 926 | 953 | long nr_blockdev_pages(void) |
|---|
| 927 | 954 | { |
|---|
| 928 | | - struct block_device *bdev; |
|---|
| 955 | + struct inode *inode; |
|---|
| 929 | 956 | long ret = 0; |
|---|
| 930 | | - spin_lock(&bdev_lock); |
|---|
| 931 | | - list_for_each_entry(bdev, &all_bdevs, bd_list) { |
|---|
| 932 | | - ret += bdev->bd_inode->i_mapping->nrpages; |
|---|
| 933 | | - } |
|---|
| 934 | | - spin_unlock(&bdev_lock); |
|---|
| 957 | + |
|---|
| 958 | + spin_lock(&blockdev_superblock->s_inode_list_lock); |
|---|
| 959 | + list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) |
|---|
| 960 | + ret += inode->i_mapping->nrpages; |
|---|
| 961 | + spin_unlock(&blockdev_superblock->s_inode_list_lock); |
|---|
| 962 | + |
|---|
| 935 | 963 | return ret; |
|---|
| 936 | 964 | } |
|---|
| 937 | 965 | |
|---|
| .. | .. |
|---|
| 1033 | 1061 | } |
|---|
| 1034 | 1062 | |
|---|
| 1035 | 1063 | /** |
|---|
| 1036 | | - * bd_prepare_to_claim - prepare to claim a block device |
|---|
| 1064 | + * bd_prepare_to_claim - claim a block device |
|---|
| 1037 | 1065 | * @bdev: block device of interest |
|---|
| 1038 | 1066 | * @whole: the whole device containing @bdev, may equal @bdev |
|---|
| 1039 | 1067 | * @holder: holder trying to claim @bdev |
|---|
| 1040 | 1068 | * |
|---|
| 1041 | | - * Prepare to claim @bdev. This function fails if @bdev is already |
|---|
| 1042 | | - * claimed by another holder and waits if another claiming is in |
|---|
| 1043 | | - * progress. This function doesn't actually claim. On successful |
|---|
| 1044 | | - * return, the caller has ownership of bd_claiming and bd_holder[s]. |
|---|
| 1045 | | - * |
|---|
| 1046 | | - * CONTEXT: |
|---|
| 1047 | | - * spin_lock(&bdev_lock). Might release bdev_lock, sleep and regrab |
|---|
| 1048 | | - * it multiple times. |
|---|
| 1069 | + * Claim @bdev. This function fails if @bdev is already claimed by another |
|---|
| 1070 | + * holder and waits if another claiming is in progress. return, the caller |
|---|
| 1071 | + * has ownership of bd_claiming and bd_holder[s]. |
|---|
| 1049 | 1072 | * |
|---|
| 1050 | 1073 | * RETURNS: |
|---|
| 1051 | 1074 | * 0 if @bdev can be claimed, -EBUSY otherwise. |
|---|
| 1052 | 1075 | */ |
|---|
| 1053 | | -static int bd_prepare_to_claim(struct block_device *bdev, |
|---|
| 1054 | | - struct block_device *whole, void *holder) |
|---|
| 1076 | +int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, |
|---|
| 1077 | + void *holder) |
|---|
| 1055 | 1078 | { |
|---|
| 1056 | 1079 | retry: |
|---|
| 1080 | + spin_lock(&bdev_lock); |
|---|
| 1057 | 1081 | /* if someone else claimed, fail */ |
|---|
| 1058 | | - if (!bd_may_claim(bdev, whole, holder)) |
|---|
| 1082 | + if (!bd_may_claim(bdev, whole, holder)) { |
|---|
| 1083 | + spin_unlock(&bdev_lock); |
|---|
| 1059 | 1084 | return -EBUSY; |
|---|
| 1085 | + } |
|---|
| 1060 | 1086 | |
|---|
| 1061 | 1087 | /* if claiming is already in progress, wait for it to finish */ |
|---|
| 1062 | 1088 | if (whole->bd_claiming) { |
|---|
| .. | .. |
|---|
| 1067 | 1093 | spin_unlock(&bdev_lock); |
|---|
| 1068 | 1094 | schedule(); |
|---|
| 1069 | 1095 | finish_wait(wq, &wait); |
|---|
| 1070 | | - spin_lock(&bdev_lock); |
|---|
| 1071 | 1096 | goto retry; |
|---|
| 1072 | 1097 | } |
|---|
| 1073 | 1098 | |
|---|
| 1074 | 1099 | /* yay, all mine */ |
|---|
| 1100 | + whole->bd_claiming = holder; |
|---|
| 1101 | + spin_unlock(&bdev_lock); |
|---|
| 1075 | 1102 | return 0; |
|---|
| 1076 | 1103 | } |
|---|
| 1104 | +EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */ |
|---|
| 1077 | 1105 | |
|---|
| 1078 | 1106 | static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno) |
|---|
| 1079 | 1107 | { |
|---|
| .. | .. |
|---|
| 1096 | 1124 | return disk; |
|---|
| 1097 | 1125 | } |
|---|
| 1098 | 1126 | |
|---|
| 1099 | | -/** |
|---|
| 1100 | | - * bd_start_claiming - start claiming a block device |
|---|
| 1101 | | - * @bdev: block device of interest |
|---|
| 1102 | | - * @holder: holder trying to claim @bdev |
|---|
| 1103 | | - * |
|---|
| 1104 | | - * @bdev is about to be opened exclusively. Check @bdev can be opened |
|---|
| 1105 | | - * exclusively and mark that an exclusive open is in progress. Each |
|---|
| 1106 | | - * successful call to this function must be matched with a call to |
|---|
| 1107 | | - * either bd_finish_claiming() or bd_abort_claiming() (which do not |
|---|
| 1108 | | - * fail). |
|---|
| 1109 | | - * |
|---|
| 1110 | | - * This function is used to gain exclusive access to the block device |
|---|
| 1111 | | - * without actually causing other exclusive open attempts to fail. It |
|---|
| 1112 | | - * should be used when the open sequence itself requires exclusive |
|---|
| 1113 | | - * access but may subsequently fail. |
|---|
| 1114 | | - * |
|---|
| 1115 | | - * CONTEXT: |
|---|
| 1116 | | - * Might sleep. |
|---|
| 1117 | | - * |
|---|
| 1118 | | - * RETURNS: |
|---|
| 1119 | | - * Pointer to the block device containing @bdev on success, ERR_PTR() |
|---|
| 1120 | | - * value on failure. |
|---|
| 1121 | | - */ |
|---|
| 1122 | | -static struct block_device *bd_start_claiming(struct block_device *bdev, |
|---|
| 1123 | | - void *holder) |
|---|
| 1127 | +static void bd_clear_claiming(struct block_device *whole, void *holder) |
|---|
| 1124 | 1128 | { |
|---|
| 1125 | | - struct gendisk *disk; |
|---|
| 1126 | | - struct block_device *whole; |
|---|
| 1127 | | - int partno, err; |
|---|
| 1128 | | - |
|---|
| 1129 | | - might_sleep(); |
|---|
| 1130 | | - |
|---|
| 1131 | | - /* |
|---|
| 1132 | | - * @bdev might not have been initialized properly yet, look up |
|---|
| 1133 | | - * and grab the outer block device the hard way. |
|---|
| 1134 | | - */ |
|---|
| 1135 | | - disk = bdev_get_gendisk(bdev, &partno); |
|---|
| 1136 | | - if (!disk) |
|---|
| 1137 | | - return ERR_PTR(-ENXIO); |
|---|
| 1138 | | - |
|---|
| 1139 | | - /* |
|---|
| 1140 | | - * Normally, @bdev should equal what's returned from bdget_disk() |
|---|
| 1141 | | - * if partno is 0; however, some drivers (floppy) use multiple |
|---|
| 1142 | | - * bdev's for the same physical device and @bdev may be one of the |
|---|
| 1143 | | - * aliases. Keep @bdev if partno is 0. This means claimer |
|---|
| 1144 | | - * tracking is broken for those devices but it has always been that |
|---|
| 1145 | | - * way. |
|---|
| 1146 | | - */ |
|---|
| 1147 | | - if (partno) |
|---|
| 1148 | | - whole = bdget_disk(disk, 0); |
|---|
| 1149 | | - else |
|---|
| 1150 | | - whole = bdgrab(bdev); |
|---|
| 1151 | | - |
|---|
| 1152 | | - put_disk_and_module(disk); |
|---|
| 1153 | | - if (!whole) |
|---|
| 1154 | | - return ERR_PTR(-ENOMEM); |
|---|
| 1155 | | - |
|---|
| 1156 | | - /* prepare to claim, if successful, mark claiming in progress */ |
|---|
| 1157 | | - spin_lock(&bdev_lock); |
|---|
| 1158 | | - |
|---|
| 1159 | | - err = bd_prepare_to_claim(bdev, whole, holder); |
|---|
| 1160 | | - if (err == 0) { |
|---|
| 1161 | | - whole->bd_claiming = holder; |
|---|
| 1162 | | - spin_unlock(&bdev_lock); |
|---|
| 1163 | | - return whole; |
|---|
| 1164 | | - } else { |
|---|
| 1165 | | - spin_unlock(&bdev_lock); |
|---|
| 1166 | | - bdput(whole); |
|---|
| 1167 | | - return ERR_PTR(err); |
|---|
| 1168 | | - } |
|---|
| 1129 | + lockdep_assert_held(&bdev_lock); |
|---|
| 1130 | + /* tell others that we're done */ |
|---|
| 1131 | + BUG_ON(whole->bd_claiming != holder); |
|---|
| 1132 | + whole->bd_claiming = NULL; |
|---|
| 1133 | + wake_up_bit(&whole->bd_claiming, 0); |
|---|
| 1169 | 1134 | } |
|---|
| 1135 | + |
|---|
| 1136 | +/** |
|---|
| 1137 | + * bd_finish_claiming - finish claiming of a block device |
|---|
| 1138 | + * @bdev: block device of interest |
|---|
| 1139 | + * @whole: whole block device |
|---|
| 1140 | + * @holder: holder that has claimed @bdev |
|---|
| 1141 | + * |
|---|
| 1142 | + * Finish exclusive open of a block device. Mark the device as exlusively |
|---|
| 1143 | + * open by the holder and wake up all waiters for exclusive open to finish. |
|---|
| 1144 | + */ |
|---|
| 1145 | +static void bd_finish_claiming(struct block_device *bdev, |
|---|
| 1146 | + struct block_device *whole, void *holder) |
|---|
| 1147 | +{ |
|---|
| 1148 | + spin_lock(&bdev_lock); |
|---|
| 1149 | + BUG_ON(!bd_may_claim(bdev, whole, holder)); |
|---|
| 1150 | + /* |
|---|
| 1151 | + * Note that for a whole device bd_holders will be incremented twice, |
|---|
| 1152 | + * and bd_holder will be set to bd_may_claim before being set to holder |
|---|
| 1153 | + */ |
|---|
| 1154 | + whole->bd_holders++; |
|---|
| 1155 | + whole->bd_holder = bd_may_claim; |
|---|
| 1156 | + bdev->bd_holders++; |
|---|
| 1157 | + bdev->bd_holder = holder; |
|---|
| 1158 | + bd_clear_claiming(whole, holder); |
|---|
| 1159 | + spin_unlock(&bdev_lock); |
|---|
| 1160 | +} |
|---|
| 1161 | + |
|---|
| 1162 | +/** |
|---|
| 1163 | + * bd_abort_claiming - abort claiming of a block device |
|---|
| 1164 | + * @bdev: block device of interest |
|---|
| 1165 | + * @whole: whole block device |
|---|
| 1166 | + * @holder: holder that has claimed @bdev |
|---|
| 1167 | + * |
|---|
| 1168 | + * Abort claiming of a block device when the exclusive open failed. This can be |
|---|
| 1169 | + * also used when exclusive open is not actually desired and we just needed |
|---|
| 1170 | + * to block other exclusive openers for a while. |
|---|
| 1171 | + */ |
|---|
| 1172 | +void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, |
|---|
| 1173 | + void *holder) |
|---|
| 1174 | +{ |
|---|
| 1175 | + spin_lock(&bdev_lock); |
|---|
| 1176 | + bd_clear_claiming(whole, holder); |
|---|
| 1177 | + spin_unlock(&bdev_lock); |
|---|
| 1178 | +} |
|---|
| 1179 | +EXPORT_SYMBOL(bd_abort_claiming); |
|---|
| 1170 | 1180 | |
|---|
| 1171 | 1181 | #ifdef CONFIG_SYSFS |
|---|
| 1172 | 1182 | struct bd_holder_disk { |
|---|
| .. | .. |
|---|
| 1312 | 1322 | #endif |
|---|
| 1313 | 1323 | |
|---|
| 1314 | 1324 | /** |
|---|
| 1315 | | - * flush_disk - invalidates all buffer-cache entries on a disk |
|---|
| 1316 | | - * |
|---|
| 1317 | | - * @bdev: struct block device to be flushed |
|---|
| 1318 | | - * @kill_dirty: flag to guide handling of dirty inodes |
|---|
| 1319 | | - * |
|---|
| 1320 | | - * Invalidates all buffer-cache entries on a disk. It should be called |
|---|
| 1321 | | - * when a disk has been changed -- either by a media change or online |
|---|
| 1322 | | - * resize. |
|---|
| 1323 | | - */ |
|---|
| 1324 | | -static void flush_disk(struct block_device *bdev, bool kill_dirty) |
|---|
| 1325 | | -{ |
|---|
| 1326 | | - if (__invalidate_device(bdev, kill_dirty)) { |
|---|
| 1327 | | - printk(KERN_WARNING "VFS: busy inodes on changed media or " |
|---|
| 1328 | | - "resized disk %s\n", |
|---|
| 1329 | | - bdev->bd_disk ? bdev->bd_disk->disk_name : ""); |
|---|
| 1330 | | - } |
|---|
| 1331 | | - bdev->bd_invalidated = 1; |
|---|
| 1332 | | -} |
|---|
| 1333 | | - |
|---|
| 1334 | | -/** |
|---|
| 1335 | 1325 | * check_disk_size_change - checks for disk size change and adjusts bdev size. |
|---|
| 1336 | 1326 | * @disk: struct gendisk to check |
|---|
| 1337 | 1327 | * @bdev: struct bdev to adjust. |
|---|
| .. | .. |
|---|
| 1341 | 1331 | * and adjusts it if it differs. When shrinking the bdev size, its all caches |
|---|
| 1342 | 1332 | * are freed. |
|---|
| 1343 | 1333 | */ |
|---|
| 1344 | | -void check_disk_size_change(struct gendisk *disk, struct block_device *bdev, |
|---|
| 1345 | | - bool verbose) |
|---|
| 1334 | +static void check_disk_size_change(struct gendisk *disk, |
|---|
| 1335 | + struct block_device *bdev, bool verbose) |
|---|
| 1346 | 1336 | { |
|---|
| 1347 | 1337 | loff_t disk_size, bdev_size; |
|---|
| 1348 | 1338 | |
|---|
| 1339 | + spin_lock(&bdev->bd_size_lock); |
|---|
| 1349 | 1340 | disk_size = (loff_t)get_capacity(disk) << 9; |
|---|
| 1350 | 1341 | bdev_size = i_size_read(bdev->bd_inode); |
|---|
| 1351 | 1342 | if (disk_size != bdev_size) { |
|---|
| .. | .. |
|---|
| 1355 | 1346 | disk->disk_name, bdev_size, disk_size); |
|---|
| 1356 | 1347 | } |
|---|
| 1357 | 1348 | i_size_write(bdev->bd_inode, disk_size); |
|---|
| 1358 | | - if (bdev_size > disk_size) |
|---|
| 1359 | | - flush_disk(bdev, false); |
|---|
| 1349 | + } |
|---|
| 1350 | + spin_unlock(&bdev->bd_size_lock); |
|---|
| 1351 | + |
|---|
| 1352 | + if (bdev_size > disk_size) { |
|---|
| 1353 | + if (__invalidate_device(bdev, false)) |
|---|
| 1354 | + pr_warn("VFS: busy inodes on resized disk %s\n", |
|---|
| 1355 | + disk->disk_name); |
|---|
| 1360 | 1356 | } |
|---|
| 1361 | 1357 | } |
|---|
| 1362 | 1358 | |
|---|
| 1363 | 1359 | /** |
|---|
| 1364 | | - * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back |
|---|
| 1365 | | - * @disk: struct gendisk to be revalidated |
|---|
| 1360 | + * revalidate_disk_size - checks for disk size change and adjusts bdev size. |
|---|
| 1361 | + * @disk: struct gendisk to check |
|---|
| 1362 | + * @verbose: if %true log a message about a size change if there is any |
|---|
| 1366 | 1363 | * |
|---|
| 1367 | | - * This routine is a wrapper for lower-level driver's revalidate_disk |
|---|
| 1368 | | - * call-backs. It is used to do common pre and post operations needed |
|---|
| 1369 | | - * for all revalidate_disk operations. |
|---|
| 1364 | + * This routine checks to see if the bdev size does not match the disk size |
|---|
| 1365 | + * and adjusts it if it differs. When shrinking the bdev size, its all caches |
|---|
| 1366 | + * are freed. |
|---|
| 1370 | 1367 | */ |
|---|
| 1371 | | -int revalidate_disk(struct gendisk *disk) |
|---|
| 1368 | +void revalidate_disk_size(struct gendisk *disk, bool verbose) |
|---|
| 1372 | 1369 | { |
|---|
| 1373 | 1370 | struct block_device *bdev; |
|---|
| 1374 | | - int ret = 0; |
|---|
| 1375 | 1371 | |
|---|
| 1376 | | - if (disk->fops->revalidate_disk) |
|---|
| 1377 | | - ret = disk->fops->revalidate_disk(disk); |
|---|
| 1372 | + /* |
|---|
| 1373 | + * Hidden disks don't have associated bdev so there's no point in |
|---|
| 1374 | + * revalidating them. |
|---|
| 1375 | + */ |
|---|
| 1376 | + if (disk->flags & GENHD_FL_HIDDEN) |
|---|
| 1377 | + return; |
|---|
| 1378 | + |
|---|
| 1378 | 1379 | bdev = bdget_disk(disk, 0); |
|---|
| 1379 | | - if (!bdev) |
|---|
| 1380 | | - return ret; |
|---|
| 1381 | | - |
|---|
| 1382 | | - mutex_lock(&bdev->bd_mutex); |
|---|
| 1383 | | - check_disk_size_change(disk, bdev, ret == 0); |
|---|
| 1384 | | - bdev->bd_invalidated = 0; |
|---|
| 1385 | | - mutex_unlock(&bdev->bd_mutex); |
|---|
| 1386 | | - bdput(bdev); |
|---|
| 1387 | | - return ret; |
|---|
| 1380 | + if (bdev) { |
|---|
| 1381 | + check_disk_size_change(disk, bdev, verbose); |
|---|
| 1382 | + bdput(bdev); |
|---|
| 1383 | + } |
|---|
| 1388 | 1384 | } |
|---|
| 1389 | | -EXPORT_SYMBOL(revalidate_disk); |
|---|
| 1385 | +EXPORT_SYMBOL(revalidate_disk_size); |
|---|
| 1390 | 1386 | |
|---|
| 1391 | | -/* |
|---|
| 1392 | | - * This routine checks whether a removable media has been changed, |
|---|
| 1393 | | - * and invalidates all buffer-cache-entries in that case. This |
|---|
| 1394 | | - * is a relatively slow routine, so we have to try to minimize using |
|---|
| 1395 | | - * it. Thus it is called only upon a 'mount' or 'open'. This |
|---|
| 1396 | | - * is the best way of combining speed and utility, I think. |
|---|
| 1397 | | - * People changing diskettes in the middle of an operation deserve |
|---|
| 1398 | | - * to lose :-) |
|---|
| 1399 | | - */ |
|---|
| 1400 | | -int check_disk_change(struct block_device *bdev) |
|---|
| 1387 | +void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors) |
|---|
| 1401 | 1388 | { |
|---|
| 1402 | | - struct gendisk *disk = bdev->bd_disk; |
|---|
| 1403 | | - const struct block_device_operations *bdops = disk->fops; |
|---|
| 1404 | | - unsigned int events; |
|---|
| 1405 | | - |
|---|
| 1406 | | - events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE | |
|---|
| 1407 | | - DISK_EVENT_EJECT_REQUEST); |
|---|
| 1408 | | - if (!(events & DISK_EVENT_MEDIA_CHANGE)) |
|---|
| 1409 | | - return 0; |
|---|
| 1410 | | - |
|---|
| 1411 | | - flush_disk(bdev, true); |
|---|
| 1412 | | - if (bdops->revalidate_disk) |
|---|
| 1413 | | - bdops->revalidate_disk(bdev->bd_disk); |
|---|
| 1414 | | - return 1; |
|---|
| 1389 | + spin_lock(&bdev->bd_size_lock); |
|---|
| 1390 | + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); |
|---|
| 1391 | + spin_unlock(&bdev->bd_size_lock); |
|---|
| 1415 | 1392 | } |
|---|
| 1416 | | - |
|---|
| 1417 | | -EXPORT_SYMBOL(check_disk_change); |
|---|
| 1418 | | - |
|---|
| 1419 | | -void bd_set_size(struct block_device *bdev, loff_t size) |
|---|
| 1420 | | -{ |
|---|
| 1421 | | - inode_lock(bdev->bd_inode); |
|---|
| 1422 | | - i_size_write(bdev->bd_inode, size); |
|---|
| 1423 | | - inode_unlock(bdev->bd_inode); |
|---|
| 1424 | | -} |
|---|
| 1425 | | -EXPORT_SYMBOL(bd_set_size); |
|---|
| 1393 | +EXPORT_SYMBOL(bd_set_nr_sectors); |
|---|
| 1426 | 1394 | |
|---|
| 1427 | 1395 | static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); |
|---|
| 1428 | 1396 | |
|---|
| 1429 | | -static void bdev_disk_changed(struct block_device *bdev, bool invalidate) |
|---|
| 1397 | +int bdev_disk_changed(struct block_device *bdev, bool invalidate) |
|---|
| 1430 | 1398 | { |
|---|
| 1431 | | - if (disk_part_scan_enabled(bdev->bd_disk)) { |
|---|
| 1432 | | - if (invalidate) |
|---|
| 1433 | | - invalidate_partitions(bdev->bd_disk, bdev); |
|---|
| 1434 | | - else |
|---|
| 1435 | | - rescan_partitions(bdev->bd_disk, bdev); |
|---|
| 1399 | + struct gendisk *disk = bdev->bd_disk; |
|---|
| 1400 | + int ret; |
|---|
| 1401 | + |
|---|
| 1402 | + lockdep_assert_held(&bdev->bd_mutex); |
|---|
| 1403 | + |
|---|
| 1404 | + if (!(disk->flags & GENHD_FL_UP)) |
|---|
| 1405 | + return -ENXIO; |
|---|
| 1406 | + |
|---|
| 1407 | +rescan: |
|---|
| 1408 | + ret = blk_drop_partitions(bdev); |
|---|
| 1409 | + if (ret) |
|---|
| 1410 | + return ret; |
|---|
| 1411 | + |
|---|
| 1412 | + clear_bit(GD_NEED_PART_SCAN, &disk->state); |
|---|
| 1413 | + |
|---|
| 1414 | + /* |
|---|
| 1415 | + * Historically we only set the capacity to zero for devices that |
|---|
| 1416 | + * support partitions (independ of actually having partitions created). |
|---|
| 1417 | + * Doing that is rather inconsistent, but changing it broke legacy |
|---|
| 1418 | + * udisks polling for legacy ide-cdrom devices. Use the crude check |
|---|
| 1419 | + * below to get the sane behavior for most device while not breaking |
|---|
| 1420 | + * userspace for this particular setup. |
|---|
| 1421 | + */ |
|---|
| 1422 | + if (invalidate) { |
|---|
| 1423 | + if (disk_part_scan_enabled(disk) || |
|---|
| 1424 | + !(disk->flags & GENHD_FL_REMOVABLE)) |
|---|
| 1425 | + set_capacity(disk, 0); |
|---|
| 1436 | 1426 | } else { |
|---|
| 1437 | | - check_disk_size_change(bdev->bd_disk, bdev, !invalidate); |
|---|
| 1438 | | - bdev->bd_invalidated = 0; |
|---|
| 1427 | + if (disk->fops->revalidate_disk) |
|---|
| 1428 | + disk->fops->revalidate_disk(disk); |
|---|
| 1439 | 1429 | } |
|---|
| 1430 | + |
|---|
| 1431 | + check_disk_size_change(disk, bdev, !invalidate); |
|---|
| 1432 | + |
|---|
| 1433 | + if (get_capacity(disk)) { |
|---|
| 1434 | + ret = blk_add_partitions(disk, bdev); |
|---|
| 1435 | + if (ret == -EAGAIN) |
|---|
| 1436 | + goto rescan; |
|---|
| 1437 | + } else if (invalidate) { |
|---|
| 1438 | + /* |
|---|
| 1439 | + * Tell userspace that the media / partition table may have |
|---|
| 1440 | + * changed. |
|---|
| 1441 | + */ |
|---|
| 1442 | + kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); |
|---|
| 1443 | + } |
|---|
| 1444 | + |
|---|
| 1445 | + return ret; |
|---|
| 1440 | 1446 | } |
|---|
| 1447 | +/* |
|---|
| 1448 | + * Only exported for for loop and dasd for historic reasons. Don't use in new |
|---|
| 1449 | + * code! |
|---|
| 1450 | + */ |
|---|
| 1451 | +EXPORT_SYMBOL_GPL(bdev_disk_changed); |
|---|
| 1441 | 1452 | |
|---|
| 1442 | 1453 | /* |
|---|
| 1443 | 1454 | * bd_mutex locking: |
|---|
| .. | .. |
|---|
| 1446 | 1457 | * mutex_lock_nested(whole->bd_mutex, 1) |
|---|
| 1447 | 1458 | */ |
|---|
| 1448 | 1459 | |
|---|
| 1449 | | -static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) |
|---|
| 1460 | +static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, |
|---|
| 1461 | + int for_part) |
|---|
| 1450 | 1462 | { |
|---|
| 1463 | + struct block_device *whole = NULL, *claiming = NULL; |
|---|
| 1451 | 1464 | struct gendisk *disk; |
|---|
| 1452 | 1465 | int ret; |
|---|
| 1453 | 1466 | int partno; |
|---|
| 1454 | | - int perm = 0; |
|---|
| 1455 | | - bool first_open = false; |
|---|
| 1456 | | - |
|---|
| 1457 | | - if (mode & FMODE_READ) |
|---|
| 1458 | | - perm |= MAY_READ; |
|---|
| 1459 | | - if (mode & FMODE_WRITE) |
|---|
| 1460 | | - perm |= MAY_WRITE; |
|---|
| 1461 | | - /* |
|---|
| 1462 | | - * hooks: /n/, see "layering violations". |
|---|
| 1463 | | - */ |
|---|
| 1464 | | - if (!for_part) { |
|---|
| 1465 | | - ret = devcgroup_inode_permission(bdev->bd_inode, perm); |
|---|
| 1466 | | - if (ret != 0) |
|---|
| 1467 | | - return ret; |
|---|
| 1468 | | - } |
|---|
| 1467 | + bool first_open = false, unblock_events = true, need_restart; |
|---|
| 1469 | 1468 | |
|---|
| 1470 | 1469 | restart: |
|---|
| 1471 | | - |
|---|
| 1470 | + need_restart = false; |
|---|
| 1472 | 1471 | ret = -ENXIO; |
|---|
| 1473 | 1472 | disk = bdev_get_gendisk(bdev, &partno); |
|---|
| 1474 | 1473 | if (!disk) |
|---|
| 1475 | 1474 | goto out; |
|---|
| 1475 | + |
|---|
| 1476 | + if (partno) { |
|---|
| 1477 | + whole = bdget_disk(disk, 0); |
|---|
| 1478 | + if (!whole) { |
|---|
| 1479 | + ret = -ENOMEM; |
|---|
| 1480 | + goto out_put_disk; |
|---|
| 1481 | + } |
|---|
| 1482 | + } |
|---|
| 1483 | + |
|---|
| 1484 | + if (!for_part && (mode & FMODE_EXCL)) { |
|---|
| 1485 | + WARN_ON_ONCE(!holder); |
|---|
| 1486 | + if (whole) |
|---|
| 1487 | + claiming = whole; |
|---|
| 1488 | + else |
|---|
| 1489 | + claiming = bdev; |
|---|
| 1490 | + ret = bd_prepare_to_claim(bdev, claiming, holder); |
|---|
| 1491 | + if (ret) |
|---|
| 1492 | + goto out_put_whole; |
|---|
| 1493 | + } |
|---|
| 1476 | 1494 | |
|---|
| 1477 | 1495 | disk_block_events(disk); |
|---|
| 1478 | 1496 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
|---|
| 1479 | 1497 | if (!bdev->bd_openers) { |
|---|
| 1480 | 1498 | first_open = true; |
|---|
| 1481 | 1499 | bdev->bd_disk = disk; |
|---|
| 1482 | | - bdev->bd_queue = disk->queue; |
|---|
| 1483 | 1500 | bdev->bd_contains = bdev; |
|---|
| 1484 | 1501 | bdev->bd_partno = partno; |
|---|
| 1485 | 1502 | |
|---|
| .. | .. |
|---|
| 1492 | 1509 | ret = 0; |
|---|
| 1493 | 1510 | if (disk->fops->open) { |
|---|
| 1494 | 1511 | ret = disk->fops->open(bdev, mode); |
|---|
| 1495 | | - if (ret == -ERESTARTSYS) { |
|---|
| 1496 | | - /* Lost a race with 'disk' being |
|---|
| 1497 | | - * deleted, try again. |
|---|
| 1498 | | - * See md.c |
|---|
| 1499 | | - */ |
|---|
| 1500 | | - disk_put_part(bdev->bd_part); |
|---|
| 1501 | | - bdev->bd_part = NULL; |
|---|
| 1502 | | - bdev->bd_disk = NULL; |
|---|
| 1503 | | - bdev->bd_queue = NULL; |
|---|
| 1504 | | - mutex_unlock(&bdev->bd_mutex); |
|---|
| 1505 | | - disk_unblock_events(disk); |
|---|
| 1506 | | - put_disk_and_module(disk); |
|---|
| 1507 | | - goto restart; |
|---|
| 1508 | | - } |
|---|
| 1512 | + /* |
|---|
| 1513 | + * If we lost a race with 'disk' being deleted, |
|---|
| 1514 | + * try again. See md.c |
|---|
| 1515 | + */ |
|---|
| 1516 | + if (ret == -ERESTARTSYS) |
|---|
| 1517 | + need_restart = true; |
|---|
| 1509 | 1518 | } |
|---|
| 1510 | 1519 | |
|---|
| 1511 | 1520 | if (!ret) { |
|---|
| 1512 | | - bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
|---|
| 1521 | + bd_set_nr_sectors(bdev, get_capacity(disk)); |
|---|
| 1513 | 1522 | set_init_blocksize(bdev); |
|---|
| 1514 | 1523 | } |
|---|
| 1515 | 1524 | |
|---|
| .. | .. |
|---|
| 1519 | 1528 | * The latter is necessary to prevent ghost |
|---|
| 1520 | 1529 | * partitions on a removed medium. |
|---|
| 1521 | 1530 | */ |
|---|
| 1522 | | - if (bdev->bd_invalidated && |
|---|
| 1531 | + if (test_bit(GD_NEED_PART_SCAN, &disk->state) && |
|---|
| 1523 | 1532 | (!ret || ret == -ENOMEDIUM)) |
|---|
| 1524 | 1533 | bdev_disk_changed(bdev, ret == -ENOMEDIUM); |
|---|
| 1525 | 1534 | |
|---|
| 1526 | 1535 | if (ret) |
|---|
| 1527 | 1536 | goto out_clear; |
|---|
| 1528 | 1537 | } else { |
|---|
| 1529 | | - struct block_device *whole; |
|---|
| 1530 | | - whole = bdget_disk(disk, 0); |
|---|
| 1531 | | - ret = -ENOMEM; |
|---|
| 1532 | | - if (!whole) |
|---|
| 1533 | | - goto out_clear; |
|---|
| 1534 | 1538 | BUG_ON(for_part); |
|---|
| 1535 | | - ret = __blkdev_get(whole, mode, 1); |
|---|
| 1536 | | - if (ret) { |
|---|
| 1537 | | - bdput(whole); |
|---|
| 1539 | + ret = __blkdev_get(whole, mode, NULL, 1); |
|---|
| 1540 | + if (ret) |
|---|
| 1538 | 1541 | goto out_clear; |
|---|
| 1539 | | - } |
|---|
| 1540 | | - bdev->bd_contains = whole; |
|---|
| 1542 | + bdev->bd_contains = bdgrab(whole); |
|---|
| 1541 | 1543 | bdev->bd_part = disk_get_part(disk, partno); |
|---|
| 1542 | 1544 | if (!(disk->flags & GENHD_FL_UP) || |
|---|
| 1543 | 1545 | !bdev->bd_part || !bdev->bd_part->nr_sects) { |
|---|
| 1544 | 1546 | ret = -ENXIO; |
|---|
| 1545 | 1547 | goto out_clear; |
|---|
| 1546 | 1548 | } |
|---|
| 1547 | | - bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); |
|---|
| 1549 | + bd_set_nr_sectors(bdev, bdev->bd_part->nr_sects); |
|---|
| 1548 | 1550 | set_init_blocksize(bdev); |
|---|
| 1549 | 1551 | } |
|---|
| 1550 | 1552 | |
|---|
| .. | .. |
|---|
| 1556 | 1558 | if (bdev->bd_disk->fops->open) |
|---|
| 1557 | 1559 | ret = bdev->bd_disk->fops->open(bdev, mode); |
|---|
| 1558 | 1560 | /* the same as first opener case, read comment there */ |
|---|
| 1559 | | - if (bdev->bd_invalidated && |
|---|
| 1561 | + if (test_bit(GD_NEED_PART_SCAN, &disk->state) && |
|---|
| 1560 | 1562 | (!ret || ret == -ENOMEDIUM)) |
|---|
| 1561 | 1563 | bdev_disk_changed(bdev, ret == -ENOMEDIUM); |
|---|
| 1562 | 1564 | if (ret) |
|---|
| .. | .. |
|---|
| 1566 | 1568 | bdev->bd_openers++; |
|---|
| 1567 | 1569 | if (for_part) |
|---|
| 1568 | 1570 | bdev->bd_part_count++; |
|---|
| 1571 | + if (claiming) |
|---|
| 1572 | + bd_finish_claiming(bdev, claiming, holder); |
|---|
| 1573 | + |
|---|
| 1574 | + /* |
|---|
| 1575 | + * Block event polling for write claims if requested. Any write holder |
|---|
| 1576 | + * makes the write_holder state stick until all are released. This is |
|---|
| 1577 | + * good enough and tracking individual writeable reference is too |
|---|
| 1578 | + * fragile given the way @mode is used in blkdev_get/put(). |
|---|
| 1579 | + */ |
|---|
| 1580 | + if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder && |
|---|
| 1581 | + (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { |
|---|
| 1582 | + bdev->bd_write_holder = true; |
|---|
| 1583 | + unblock_events = false; |
|---|
| 1584 | + } |
|---|
| 1569 | 1585 | mutex_unlock(&bdev->bd_mutex); |
|---|
| 1570 | | - disk_unblock_events(disk); |
|---|
| 1586 | + |
|---|
| 1587 | + if (unblock_events) |
|---|
| 1588 | + disk_unblock_events(disk); |
|---|
| 1589 | + |
|---|
| 1571 | 1590 | /* only one opener holds refs to the module and disk */ |
|---|
| 1572 | 1591 | if (!first_open) |
|---|
| 1573 | 1592 | put_disk_and_module(disk); |
|---|
| 1593 | + if (whole) |
|---|
| 1594 | + bdput(whole); |
|---|
| 1574 | 1595 | return 0; |
|---|
| 1575 | 1596 | |
|---|
| 1576 | 1597 | out_clear: |
|---|
| 1577 | 1598 | disk_put_part(bdev->bd_part); |
|---|
| 1578 | 1599 | bdev->bd_disk = NULL; |
|---|
| 1579 | 1600 | bdev->bd_part = NULL; |
|---|
| 1580 | | - bdev->bd_queue = NULL; |
|---|
| 1581 | 1601 | if (bdev != bdev->bd_contains) |
|---|
| 1582 | 1602 | __blkdev_put(bdev->bd_contains, mode, 1); |
|---|
| 1583 | 1603 | bdev->bd_contains = NULL; |
|---|
| 1584 | 1604 | out_unlock_bdev: |
|---|
| 1605 | + if (claiming) |
|---|
| 1606 | + bd_abort_claiming(bdev, claiming, holder); |
|---|
| 1585 | 1607 | mutex_unlock(&bdev->bd_mutex); |
|---|
| 1586 | 1608 | disk_unblock_events(disk); |
|---|
| 1609 | + out_put_whole: |
|---|
| 1610 | + if (whole) |
|---|
| 1611 | + bdput(whole); |
|---|
| 1612 | + out_put_disk: |
|---|
| 1587 | 1613 | put_disk_and_module(disk); |
|---|
| 1614 | + if (need_restart) |
|---|
| 1615 | + goto restart; |
|---|
| 1588 | 1616 | out: |
|---|
| 1589 | | - |
|---|
| 1590 | 1617 | return ret; |
|---|
| 1591 | 1618 | } |
|---|
| 1592 | 1619 | |
|---|
| .. | .. |
|---|
| 1609 | 1636 | * RETURNS: |
|---|
| 1610 | 1637 | * 0 on success, -errno on failure. |
|---|
| 1611 | 1638 | */ |
|---|
| 1612 | | -int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) |
|---|
| 1639 | +static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) |
|---|
| 1613 | 1640 | { |
|---|
| 1614 | | - struct block_device *whole = NULL; |
|---|
| 1615 | | - int res; |
|---|
| 1641 | + int ret, perm = 0; |
|---|
| 1616 | 1642 | |
|---|
| 1617 | | - WARN_ON_ONCE((mode & FMODE_EXCL) && !holder); |
|---|
| 1643 | + if (mode & FMODE_READ) |
|---|
| 1644 | + perm |= MAY_READ; |
|---|
| 1645 | + if (mode & FMODE_WRITE) |
|---|
| 1646 | + perm |= MAY_WRITE; |
|---|
| 1647 | + ret = devcgroup_inode_permission(bdev->bd_inode, perm); |
|---|
| 1648 | + if (ret) |
|---|
| 1649 | + goto bdput; |
|---|
| 1618 | 1650 | |
|---|
| 1619 | | - if ((mode & FMODE_EXCL) && holder) { |
|---|
| 1620 | | - whole = bd_start_claiming(bdev, holder); |
|---|
| 1621 | | - if (IS_ERR(whole)) { |
|---|
| 1622 | | - bdput(bdev); |
|---|
| 1623 | | - return PTR_ERR(whole); |
|---|
| 1624 | | - } |
|---|
| 1625 | | - } |
|---|
| 1651 | + ret =__blkdev_get(bdev, mode, holder, 0); |
|---|
| 1652 | + if (ret) |
|---|
| 1653 | + goto bdput; |
|---|
| 1654 | + return 0; |
|---|
| 1626 | 1655 | |
|---|
| 1627 | | - res = __blkdev_get(bdev, mode, 0); |
|---|
| 1628 | | - |
|---|
| 1629 | | - if (whole) { |
|---|
| 1630 | | - struct gendisk *disk = whole->bd_disk; |
|---|
| 1631 | | - |
|---|
| 1632 | | - /* finish claiming */ |
|---|
| 1633 | | - mutex_lock(&bdev->bd_mutex); |
|---|
| 1634 | | - spin_lock(&bdev_lock); |
|---|
| 1635 | | - |
|---|
| 1636 | | - if (!res) { |
|---|
| 1637 | | - BUG_ON(!bd_may_claim(bdev, whole, holder)); |
|---|
| 1638 | | - /* |
|---|
| 1639 | | - * Note that for a whole device bd_holders |
|---|
| 1640 | | - * will be incremented twice, and bd_holder |
|---|
| 1641 | | - * will be set to bd_may_claim before being |
|---|
| 1642 | | - * set to holder |
|---|
| 1643 | | - */ |
|---|
| 1644 | | - whole->bd_holders++; |
|---|
| 1645 | | - whole->bd_holder = bd_may_claim; |
|---|
| 1646 | | - bdev->bd_holders++; |
|---|
| 1647 | | - bdev->bd_holder = holder; |
|---|
| 1648 | | - } |
|---|
| 1649 | | - |
|---|
| 1650 | | - /* tell others that we're done */ |
|---|
| 1651 | | - BUG_ON(whole->bd_claiming != holder); |
|---|
| 1652 | | - whole->bd_claiming = NULL; |
|---|
| 1653 | | - wake_up_bit(&whole->bd_claiming, 0); |
|---|
| 1654 | | - |
|---|
| 1655 | | - spin_unlock(&bdev_lock); |
|---|
| 1656 | | - |
|---|
| 1657 | | - /* |
|---|
| 1658 | | - * Block event polling for write claims if requested. Any |
|---|
| 1659 | | - * write holder makes the write_holder state stick until |
|---|
| 1660 | | - * all are released. This is good enough and tracking |
|---|
| 1661 | | - * individual writeable reference is too fragile given the |
|---|
| 1662 | | - * way @mode is used in blkdev_get/put(). |
|---|
| 1663 | | - */ |
|---|
| 1664 | | - if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder && |
|---|
| 1665 | | - (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { |
|---|
| 1666 | | - bdev->bd_write_holder = true; |
|---|
| 1667 | | - disk_block_events(disk); |
|---|
| 1668 | | - } |
|---|
| 1669 | | - |
|---|
| 1670 | | - mutex_unlock(&bdev->bd_mutex); |
|---|
| 1671 | | - bdput(whole); |
|---|
| 1672 | | - } |
|---|
| 1673 | | - |
|---|
| 1674 | | - if (res) |
|---|
| 1675 | | - bdput(bdev); |
|---|
| 1676 | | - |
|---|
| 1677 | | - return res; |
|---|
| 1656 | +bdput: |
|---|
| 1657 | + bdput(bdev); |
|---|
| 1658 | + return ret; |
|---|
| 1678 | 1659 | } |
|---|
| 1679 | | -EXPORT_SYMBOL(blkdev_get); |
|---|
| 1680 | 1660 | |
|---|
| 1681 | 1661 | /** |
|---|
| 1682 | 1662 | * blkdev_get_by_path - open a block device by name |
|---|
| .. | .. |
|---|
| 1769 | 1749 | */ |
|---|
| 1770 | 1750 | filp->f_flags |= O_LARGEFILE; |
|---|
| 1771 | 1751 | |
|---|
| 1772 | | - filp->f_mode |= FMODE_NOWAIT; |
|---|
| 1752 | + filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; |
|---|
| 1773 | 1753 | |
|---|
| 1774 | 1754 | if (filp->f_flags & O_NDELAY) |
|---|
| 1775 | 1755 | filp->f_mode |= FMODE_NDELAY; |
|---|
| .. | .. |
|---|
| 1925 | 1905 | if (bdev_read_only(I_BDEV(bd_inode))) |
|---|
| 1926 | 1906 | return -EPERM; |
|---|
| 1927 | 1907 | |
|---|
| 1928 | | - if (IS_SWAPFILE(bd_inode)) |
|---|
| 1908 | + if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev)) |
|---|
| 1929 | 1909 | return -ETXTBSY; |
|---|
| 1930 | 1910 | |
|---|
| 1931 | 1911 | if (!iov_iter_count(from)) |
|---|
| .. | .. |
|---|
| 1999 | 1979 | |
|---|
| 2000 | 1980 | static const struct address_space_operations def_blk_aops = { |
|---|
| 2001 | 1981 | .readpage = blkdev_readpage, |
|---|
| 2002 | | - .readpages = blkdev_readpages, |
|---|
| 1982 | + .readahead = blkdev_readahead, |
|---|
| 2003 | 1983 | .writepage = blkdev_writepage, |
|---|
| 2004 | 1984 | .write_begin = blkdev_write_begin, |
|---|
| 2005 | 1985 | .write_end = blkdev_write_end, |
|---|
| 2006 | 1986 | .writepages = blkdev_writepages, |
|---|
| 2007 | 1987 | .releasepage = blkdev_releasepage, |
|---|
| 2008 | 1988 | .direct_IO = blkdev_direct_IO, |
|---|
| 1989 | + .migratepage = buffer_migrate_page_norefs, |
|---|
| 2009 | 1990 | .is_dirty_writeback = buffer_check_dirty_writeback, |
|---|
| 2010 | 1991 | }; |
|---|
| 2011 | 1992 | |
|---|
| .. | .. |
|---|
| 2017 | 1998 | loff_t len) |
|---|
| 2018 | 1999 | { |
|---|
| 2019 | 2000 | struct block_device *bdev = I_BDEV(bdev_file_inode(file)); |
|---|
| 2020 | | - struct address_space *mapping; |
|---|
| 2021 | 2001 | loff_t end = start + len - 1; |
|---|
| 2022 | 2002 | loff_t isize; |
|---|
| 2023 | 2003 | int error; |
|---|
| .. | .. |
|---|
| 2045 | 2025 | return -EINVAL; |
|---|
| 2046 | 2026 | |
|---|
| 2047 | 2027 | /* Invalidate the page cache, including dirty pages. */ |
|---|
| 2048 | | - mapping = bdev->bd_inode->i_mapping; |
|---|
| 2049 | | - truncate_inode_pages_range(mapping, start, end); |
|---|
| 2028 | + error = truncate_bdev_range(bdev, file->f_mode, start, end); |
|---|
| 2029 | + if (error) |
|---|
| 2030 | + return error; |
|---|
| 2050 | 2031 | |
|---|
| 2051 | 2032 | switch (mode) { |
|---|
| 2052 | 2033 | case FALLOC_FL_ZERO_RANGE: |
|---|
| .. | .. |
|---|
| 2073 | 2054 | * the caller will be given -EBUSY. The third argument is |
|---|
| 2074 | 2055 | * inclusive, so the rounding here is safe. |
|---|
| 2075 | 2056 | */ |
|---|
| 2076 | | - return invalidate_inode_pages2_range(mapping, |
|---|
| 2057 | + return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping, |
|---|
| 2077 | 2058 | start >> PAGE_SHIFT, |
|---|
| 2078 | 2059 | end >> PAGE_SHIFT); |
|---|
| 2079 | 2060 | } |
|---|
| .. | .. |
|---|
| 2084 | 2065 | .llseek = block_llseek, |
|---|
| 2085 | 2066 | .read_iter = blkdev_read_iter, |
|---|
| 2086 | 2067 | .write_iter = blkdev_write_iter, |
|---|
| 2068 | + .iopoll = blkdev_iopoll, |
|---|
| 2087 | 2069 | .mmap = generic_file_mmap, |
|---|
| 2088 | 2070 | .fsync = blkdev_fsync, |
|---|
| 2089 | 2071 | .unlocked_ioctl = block_ioctl, |
|---|
| .. | .. |
|---|
| 2094 | 2076 | .splice_write = iter_file_splice_write, |
|---|
| 2095 | 2077 | .fallocate = blkdev_fallocate, |
|---|
| 2096 | 2078 | }; |
|---|
| 2097 | | - |
|---|
| 2098 | | -int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) |
|---|
| 2099 | | -{ |
|---|
| 2100 | | - int res; |
|---|
| 2101 | | - mm_segment_t old_fs = get_fs(); |
|---|
| 2102 | | - set_fs(KERNEL_DS); |
|---|
| 2103 | | - res = blkdev_ioctl(bdev, 0, cmd, arg); |
|---|
| 2104 | | - set_fs(old_fs); |
|---|
| 2105 | | - return res; |
|---|
| 2106 | | -} |
|---|
| 2107 | | - |
|---|
| 2108 | | -EXPORT_SYMBOL(ioctl_by_bdev); |
|---|
| 2109 | 2079 | |
|---|
| 2110 | 2080 | /** |
|---|
| 2111 | 2081 | * lookup_bdev - lookup a struct block_device by name |
|---|