hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/drivers/block/loop.c
....@@ -77,6 +77,7 @@
7777 #include <linux/falloc.h>
7878 #include <linux/uio.h>
7979 #include <linux/ioprio.h>
80
+#include <linux/blk-cgroup.h>
8081
8182 #include "loop.h"
8283
....@@ -228,19 +229,6 @@
228229 }
229230
230231 /**
231
- * loop_validate_block_size() - validates the passed in block size
232
- * @bsize: size to validate
233
- */
234
-static int
235
-loop_validate_block_size(unsigned short bsize)
236
-{
237
- if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
238
- return -EINVAL;
239
-
240
- return 0;
241
-}
242
-
243
-/**
244232 * loop_set_size() - sets device size and notifies userspace
245233 * @lo: struct loop_device to set the size for
246234 * @size: new size of the loop device
....@@ -252,10 +240,10 @@
252240 {
253241 struct block_device *bdev = lo->lo_device;
254242
255
- set_capacity(lo->lo_disk, size);
256
- bd_set_size(bdev, size << SECTOR_SHIFT);
257
- /* let user-space know about the new size */
258
- kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
243
+ bd_set_nr_sectors(bdev, size);
244
+
245
+ if (!set_capacity_revalidate_and_notify(lo->lo_disk, size, false))
246
+ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
259247 }
260248
261249 static inline int
....@@ -281,7 +269,7 @@
281269 struct iov_iter i;
282270 ssize_t bw;
283271
284
- iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len);
272
+ iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
285273
286274 file_start_write(file);
287275 bw = vfs_iter_write(file, &i, ppos, 0);
....@@ -359,7 +347,7 @@
359347 ssize_t len;
360348
361349 rq_for_each_segment(bvec, rq, iter) {
362
- iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len);
350
+ iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
363351 len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
364352 if (len < 0)
365353 return len;
....@@ -400,7 +388,7 @@
400388 b.bv_offset = 0;
401389 b.bv_len = bvec.bv_len;
402390
403
- iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len);
391
+ iov_iter_bvec(&i, READ, &b, 1, b.bv_len);
404392 len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
405393 if (len < 0) {
406394 ret = len;
....@@ -474,7 +462,7 @@
474462 if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
475463 req_op(rq) != REQ_OP_READ) {
476464 if (cmd->ret < 0)
477
- ret = BLK_STS_IOERR;
465
+ ret = errno_to_blk_status(cmd->ret);
478466 goto end_io;
479467 }
480468
....@@ -509,7 +497,8 @@
509497 return;
510498 kfree(cmd->bvec);
511499 cmd->bvec = NULL;
512
- blk_mq_complete_request(rq);
500
+ if (likely(!blk_should_fake_timeout(rq->q)))
501
+ blk_mq_complete_request(rq);
513502 }
514503
515504 static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
....@@ -526,21 +515,22 @@
526515 loff_t pos, bool rw)
527516 {
528517 struct iov_iter iter;
518
+ struct req_iterator rq_iter;
529519 struct bio_vec *bvec;
530520 struct request *rq = blk_mq_rq_from_pdu(cmd);
531521 struct bio *bio = rq->bio;
532522 struct file *file = lo->lo_backing_file;
523
+ struct bio_vec tmp;
533524 unsigned int offset;
534
- int segments = 0;
525
+ int nr_bvec = 0;
535526 int ret;
536527
537
- if (rq->bio != rq->biotail) {
538
- struct req_iterator iter;
539
- struct bio_vec tmp;
528
+ rq_for_each_bvec(tmp, rq, rq_iter)
529
+ nr_bvec++;
540530
541
- __rq_for_each_bio(bio, rq)
542
- segments += bio_segments(bio);
543
- bvec = kmalloc_array(segments, sizeof(struct bio_vec),
531
+ if (rq->bio != rq->biotail) {
532
+
533
+ bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
544534 GFP_NOIO);
545535 if (!bvec)
546536 return -EIO;
....@@ -549,10 +539,10 @@
549539 /*
550540 * The bios of the request may be started from the middle of
551541 * the 'bvec' because of bio splitting, so we can't directly
552
- * copy bio->bi_iov_vec to new bvec. The rq_for_each_segment
542
+ * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec
553543 * API will take care of all details for us.
554544 */
555
- rq_for_each_segment(tmp, rq, iter) {
545
+ rq_for_each_bvec(tmp, rq, rq_iter) {
556546 *bvec = tmp;
557547 bvec++;
558548 }
....@@ -566,12 +556,10 @@
566556 */
567557 offset = bio->bi_iter.bi_bvec_done;
568558 bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
569
- segments = bio_segments(bio);
570559 }
571560 atomic_set(&cmd->ref, 2);
572561
573
- iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
574
- segments, blk_rq_bytes(rq));
562
+ iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq));
575563 iter.iov_offset = offset;
576564
577565 cmd->iocb.ki_pos = pos;
....@@ -640,14 +628,13 @@
640628 default:
641629 WARN_ON_ONCE(1);
642630 return -EIO;
643
- break;
644631 }
645632 }
646633
647634 static inline void loop_update_dio(struct loop_device *lo)
648635 {
649
- __loop_update_dio(lo, io_is_direct(lo->lo_backing_file) |
650
- lo->use_dio);
636
+ __loop_update_dio(lo, (lo->lo_backing_file->f_flags & O_DIRECT) |
637
+ lo->use_dio);
651638 }
652639
653640 static void loop_reread_partitions(struct loop_device *lo,
....@@ -655,7 +642,9 @@
655642 {
656643 int rc;
657644
658
- rc = blkdev_reread_part(bdev);
645
+ mutex_lock(&bdev->bd_mutex);
646
+ rc = bdev_disk_changed(bdev, false);
647
+ mutex_unlock(&bdev->bd_mutex);
659648 if (rc)
660649 pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
661650 __func__, lo->lo_number, lo->lo_file_name, rc);
....@@ -808,33 +797,33 @@
808797
809798 static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
810799 {
811
- return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
800
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset);
812801 }
813802
814803 static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
815804 {
816
- return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
805
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
817806 }
818807
819808 static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
820809 {
821810 int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
822811
823
- return sprintf(buf, "%s\n", autoclear ? "1" : "0");
812
+ return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0");
824813 }
825814
826815 static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
827816 {
828817 int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
829818
830
- return sprintf(buf, "%s\n", partscan ? "1" : "0");
819
+ return sysfs_emit(buf, "%s\n", partscan ? "1" : "0");
831820 }
832821
833822 static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf)
834823 {
835824 int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO);
836825
837
- return sprintf(buf, "%s\n", dio ? "1" : "0");
826
+ return sysfs_emit(buf, "%s\n", dio ? "1" : "0");
838827 }
839828
840829 LOOP_ATTR_RO(backing_file);
....@@ -932,7 +921,7 @@
932921
933922 static int loop_kthread_worker_fn(void *worker_ptr)
934923 {
935
- current->flags |= PF_LESS_THROTTLE | PF_MEMALLOC_NOIO;
924
+ current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
936925 return kthread_worker_fn(worker_ptr);
937926 }
938927
....@@ -945,6 +934,24 @@
945934 return -ENOMEM;
946935 set_user_nice(lo->worker_task, MIN_NICE);
947936 return 0;
937
+}
938
+
939
+static void loop_update_rotational(struct loop_device *lo)
940
+{
941
+ struct file *file = lo->lo_backing_file;
942
+ struct inode *file_inode = file->f_mapping->host;
943
+ struct block_device *file_bdev = file_inode->i_sb->s_bdev;
944
+ struct request_queue *q = lo->lo_queue;
945
+ bool nonrot = true;
946
+
947
+ /* not all filesystems (e.g. tmpfs) have a sb->s_bdev */
948
+ if (file_bdev)
949
+ nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev));
950
+
951
+ if (nonrot)
952
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
953
+ else
954
+ blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
948955 }
949956
950957 static int
....@@ -1022,8 +1029,13 @@
10221029 if (err)
10231030 return err;
10241031
1032
+ /* Avoid assigning overflow values */
1033
+ if (info->lo_offset > LLONG_MAX || info->lo_sizelimit > LLONG_MAX)
1034
+ return -EOVERFLOW;
1035
+
10251036 lo->lo_offset = info->lo_offset;
10261037 lo->lo_sizelimit = info->lo_sizelimit;
1038
+
10271039 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
10281040 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
10291041 lo->lo_file_name[LO_NAME_SIZE-1] = 0;
....@@ -1055,6 +1067,7 @@
10551067 struct file *file;
10561068 struct inode *inode;
10571069 struct address_space *mapping;
1070
+ struct block_device *claimed_bdev = NULL;
10581071 int error;
10591072 loff_t size;
10601073 bool partscan;
....@@ -1068,9 +1081,20 @@
10681081 if (!file)
10691082 goto out;
10701083
1084
+ /*
1085
+ * If we don't hold exclusive handle for the device, upgrade to it
1086
+ * here to avoid changing device under exclusive owner.
1087
+ */
1088
+ if (!(mode & FMODE_EXCL)) {
1089
+ claimed_bdev = bdev->bd_contains;
1090
+ error = bd_prepare_to_claim(bdev, claimed_bdev, loop_configure);
1091
+ if (error)
1092
+ goto out_putf;
1093
+ }
1094
+
10711095 error = mutex_lock_killable(&loop_ctl_mutex);
10721096 if (error)
1073
- goto out_putf;
1097
+ goto out_bdev;
10741098
10751099 error = -EBUSY;
10761100 if (lo->lo_state != Lo_unbound)
....@@ -1089,7 +1113,7 @@
10891113 }
10901114
10911115 if (config->block_size) {
1092
- error = loop_validate_block_size(config->block_size);
1116
+ error = blk_validate_block_size(config->block_size);
10931117 if (error)
10941118 goto out_unlock;
10951119 }
....@@ -1106,8 +1130,6 @@
11061130 if (error)
11071131 goto out_unlock;
11081132
1109
- error = 0;
1110
-
11111133 set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
11121134
11131135 lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
....@@ -1121,7 +1143,7 @@
11211143
11221144 if (config->block_size)
11231145 bsize = config->block_size;
1124
- else if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev)
1146
+ else if ((lo->lo_backing_file->f_flags & O_DIRECT) && inode->i_sb->s_bdev)
11251147 /* In case of direct I/O, match underlying block size */
11261148 bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
11271149 else
....@@ -1131,6 +1153,8 @@
11311153 blk_queue_physical_block_size(lo->lo_queue, bsize);
11321154 blk_queue_io_min(lo->lo_queue, bsize);
11331155
1156
+ loop_config_discard(lo);
1157
+ loop_update_rotational(lo);
11341158 loop_update_dio(lo);
11351159 loop_sysfs_init(lo);
11361160
....@@ -1144,6 +1168,8 @@
11441168 if (part_shift)
11451169 lo->lo_flags |= LO_FLAGS_PARTSCAN;
11461170 partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
1171
+ if (partscan)
1172
+ lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
11471173
11481174 /* Grab the block_device to prevent its destruction after we
11491175 * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
....@@ -1152,10 +1178,15 @@
11521178 mutex_unlock(&loop_ctl_mutex);
11531179 if (partscan)
11541180 loop_reread_partitions(lo, bdev);
1181
+ if (claimed_bdev)
1182
+ bd_abort_claiming(bdev, claimed_bdev, loop_configure);
11551183 return 0;
11561184
11571185 out_unlock:
11581186 mutex_unlock(&loop_ctl_mutex);
1187
+out_bdev:
1188
+ if (claimed_bdev)
1189
+ bd_abort_claiming(bdev, claimed_bdev, loop_configure);
11591190 out_putf:
11601191 fput(file);
11611192 out:
....@@ -1184,6 +1215,9 @@
11841215 err = -EINVAL;
11851216 goto out_unlock;
11861217 }
1218
+
1219
+ if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
1220
+ blk_queue_write_cache(lo->lo_queue, false, false);
11871221
11881222 /* freeze request queue during the transition */
11891223 blk_mq_freeze_queue(lo->lo_queue);
....@@ -1214,7 +1248,7 @@
12141248 set_capacity(lo->lo_disk, 0);
12151249 loop_sysfs_exit(lo);
12161250 if (bdev) {
1217
- bd_set_size(bdev, 0);
1251
+ bd_set_nr_sectors(bdev, 0);
12181252 /* let user-space know about this change */
12191253 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
12201254 }
....@@ -1237,10 +1271,11 @@
12371271 * must be at least one and it can only become zero when the
12381272 * current holder is released.
12391273 */
1240
- if (release)
1241
- err = __blkdev_reread_part(bdev);
1242
- else
1243
- err = blkdev_reread_part(bdev);
1274
+ if (!release)
1275
+ mutex_lock(&bdev->bd_mutex);
1276
+ err = bdev_disk_changed(bdev, false);
1277
+ if (!release)
1278
+ mutex_unlock(&bdev->bd_mutex);
12441279 if (err)
12451280 pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
12461281 __func__, lo_number, err);
....@@ -1342,7 +1377,7 @@
13421377 blk_mq_freeze_queue(lo->lo_queue);
13431378
13441379 if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
1345
- /* If any pages were dirtied after kill_bdev(), try again */
1380
+ /* If any pages were dirtied after invalidate_bdev(), try again */
13461381 err = -EAGAIN;
13471382 pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
13481383 __func__, lo->lo_number, lo->lo_file_name,
....@@ -1574,7 +1609,7 @@
15741609 if (lo->lo_state != Lo_bound)
15751610 return -ENXIO;
15761611
1577
- err = loop_validate_block_size(arg);
1612
+ err = blk_validate_block_size(arg);
15781613 if (err)
15791614 return err;
15801615
....@@ -1684,7 +1719,7 @@
16841719 case LOOP_SET_BLOCK_SIZE:
16851720 if (!(mode & FMODE_WRITE) && !capable(CAP_SYS_ADMIN))
16861721 return -EPERM;
1687
- /* Fall through */
1722
+ fallthrough;
16881723 default:
16891724 err = lo_simple_ioctl(lo, cmd, arg);
16901725 break;
....@@ -1832,7 +1867,7 @@
18321867 case LOOP_SET_STATUS64:
18331868 case LOOP_CONFIGURE:
18341869 arg = (unsigned long) compat_ptr(arg);
1835
- /* fall through */
1870
+ fallthrough;
18361871 case LOOP_SET_FD:
18371872 case LOOP_CHANGE_FD:
18381873 case LOOP_SET_BLOCK_SIZE:
....@@ -1984,8 +2019,8 @@
19842019
19852020 /* always use the first bio's css */
19862021 #ifdef CONFIG_BLK_CGROUP
1987
- if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
1988
- cmd->css = rq->bio->bi_css;
2022
+ if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
2023
+ cmd->css = &bio_blkcg(rq->bio)->css;
19892024 css_get(cmd->css);
19902025 } else
19912026 #endif
....@@ -2011,8 +2046,12 @@
20112046 failed:
20122047 /* complete non-aio request */
20132048 if (!cmd->use_aio || ret) {
2014
- cmd->ret = ret ? -EIO : 0;
2015
- blk_mq_complete_request(rq);
2049
+ if (ret == -EOPNOTSUPP)
2050
+ cmd->ret = ret;
2051
+ else
2052
+ cmd->ret = ret ? -EIO : 0;
2053
+ if (likely(!blk_should_fake_timeout(rq->q)))
2054
+ blk_mq_complete_request(rq);
20162055 }
20172056 }
20182057
....@@ -2070,7 +2109,8 @@
20702109 lo->tag_set.queue_depth = 128;
20712110 lo->tag_set.numa_node = NUMA_NO_NODE;
20722111 lo->tag_set.cmd_size = sizeof(struct loop_cmd);
2073
- lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
2112
+ lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING |
2113
+ BLK_MQ_F_NO_SCHED_BY_DEFAULT;
20742114 lo->tag_set.driver_data = lo;
20752115
20762116 err = blk_mq_alloc_tag_set(&lo->tag_set);
....@@ -2078,7 +2118,7 @@
20782118 goto out_free_idr;
20792119
20802120 lo->lo_queue = blk_mq_init_queue(&lo->tag_set);
2081
- if (IS_ERR_OR_NULL(lo->lo_queue)) {
2121
+ if (IS_ERR(lo->lo_queue)) {
20822122 err = PTR_ERR(lo->lo_queue);
20832123 goto out_cleanup_tags;
20842124 }