From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/drivers/block/loop.c | 168 ++++++++++++++++++++++++++++++++++--------------------- 1 files changed, 104 insertions(+), 64 deletions(-) diff --git a/kernel/drivers/block/loop.c b/kernel/drivers/block/loop.c index dd8c481..38a2461 100644 --- a/kernel/drivers/block/loop.c +++ b/kernel/drivers/block/loop.c @@ -77,6 +77,7 @@ #include <linux/falloc.h> #include <linux/uio.h> #include <linux/ioprio.h> +#include <linux/blk-cgroup.h> #include "loop.h" @@ -228,19 +229,6 @@ } /** - * loop_validate_block_size() - validates the passed in block size - * @bsize: size to validate - */ -static int -loop_validate_block_size(unsigned short bsize) -{ - if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) - return -EINVAL; - - return 0; -} - -/** * loop_set_size() - sets device size and notifies userspace * @lo: struct loop_device to set the size for * @size: new size of the loop device @@ -252,10 +240,10 @@ { struct block_device *bdev = lo->lo_device; - set_capacity(lo->lo_disk, size); - bd_set_size(bdev, size << SECTOR_SHIFT); - /* let user-space know about the new size */ - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); + bd_set_nr_sectors(bdev, size); + + if (!set_capacity_revalidate_and_notify(lo->lo_disk, size, false)) + kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); } static inline int @@ -281,7 +269,7 @@ struct iov_iter i; ssize_t bw; - iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len); + iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len); file_start_write(file); bw = vfs_iter_write(file, &i, ppos, 0); @@ -359,7 +347,7 @@ ssize_t len; rq_for_each_segment(bvec, rq, iter) { - iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len); + iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len); len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); if (len < 0) return len; @@ -400,7 +388,7 @@ b.bv_offset = 0; b.bv_len = bvec.bv_len; - iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len); + iov_iter_bvec(&i, READ, &b, 1, b.bv_len); len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); if (len < 0) { ret = len; @@ -474,7 +462,7 @@ if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || req_op(rq) != REQ_OP_READ) { if (cmd->ret < 0) - ret = BLK_STS_IOERR; + ret = errno_to_blk_status(cmd->ret); goto end_io; } @@ -509,7 +497,8 @@ return; kfree(cmd->bvec); cmd->bvec = NULL; - blk_mq_complete_request(rq); + if (likely(!blk_should_fake_timeout(rq->q))) + blk_mq_complete_request(rq); } static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) @@ -526,21 +515,22 @@ loff_t pos, bool rw) { struct iov_iter iter; + struct req_iterator rq_iter; struct bio_vec *bvec; struct request *rq = blk_mq_rq_from_pdu(cmd); struct bio *bio = rq->bio; struct file *file = lo->lo_backing_file; + struct bio_vec tmp; unsigned int offset; - int segments = 0; + int nr_bvec = 0; int ret; - if (rq->bio != rq->biotail) { - struct req_iterator iter; - struct bio_vec tmp; + rq_for_each_bvec(tmp, rq, rq_iter) + nr_bvec++; - __rq_for_each_bio(bio, rq) - segments += bio_segments(bio); - bvec = kmalloc_array(segments, sizeof(struct bio_vec), + if (rq->bio != rq->biotail) { + + bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), GFP_NOIO); if (!bvec) return -EIO; @@ -549,10 +539,10 @@ /* * The bios of the request may be started from the middle of * the 'bvec' because of bio splitting, so we can't directly - * copy bio->bi_iov_vec to new bvec. The rq_for_each_segment + * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec * API will take care of all details for us. */ - rq_for_each_segment(tmp, rq, iter) { + rq_for_each_bvec(tmp, rq, rq_iter) { *bvec = tmp; bvec++; } @@ -566,12 +556,10 @@ */ offset = bio->bi_iter.bi_bvec_done; bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); - segments = bio_segments(bio); } atomic_set(&cmd->ref, 2); - iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, - segments, blk_rq_bytes(rq)); + iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq)); iter.iov_offset = offset; cmd->iocb.ki_pos = pos; @@ -640,14 +628,13 @@ default: WARN_ON_ONCE(1); return -EIO; - break; } } static inline void loop_update_dio(struct loop_device *lo) { - __loop_update_dio(lo, io_is_direct(lo->lo_backing_file) | - lo->use_dio); + __loop_update_dio(lo, (lo->lo_backing_file->f_flags & O_DIRECT) | + lo->use_dio); } static void loop_reread_partitions(struct loop_device *lo, @@ -655,7 +642,9 @@ { int rc; - rc = blkdev_reread_part(bdev); + mutex_lock(&bdev->bd_mutex); + rc = bdev_disk_changed(bdev, false); + mutex_unlock(&bdev->bd_mutex); if (rc) pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n", __func__, lo->lo_number, lo->lo_file_name, rc); @@ -808,33 +797,33 @@ static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset); } static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf) { - return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); + return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); } static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) { int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR); - return sprintf(buf, "%s\n", autoclear ? "1" : "0"); + return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0"); } static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) { int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN); - return sprintf(buf, "%s\n", partscan ? "1" : "0"); + return sysfs_emit(buf, "%s\n", partscan ? "1" : "0"); } static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf) { int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO); - return sprintf(buf, "%s\n", dio ? "1" : "0"); + return sysfs_emit(buf, "%s\n", dio ? "1" : "0"); } LOOP_ATTR_RO(backing_file); @@ -932,7 +921,7 @@ static int loop_kthread_worker_fn(void *worker_ptr) { - current->flags |= PF_LESS_THROTTLE | PF_MEMALLOC_NOIO; + current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; return kthread_worker_fn(worker_ptr); } @@ -945,6 +934,24 @@ return -ENOMEM; set_user_nice(lo->worker_task, MIN_NICE); return 0; +} + +static void loop_update_rotational(struct loop_device *lo) +{ + struct file *file = lo->lo_backing_file; + struct inode *file_inode = file->f_mapping->host; + struct block_device *file_bdev = file_inode->i_sb->s_bdev; + struct request_queue *q = lo->lo_queue; + bool nonrot = true; + + /* not all filesystems (e.g. tmpfs) have a sb->s_bdev */ + if (file_bdev) + nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev)); + + if (nonrot) + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); + else + blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); } static int @@ -1022,8 +1029,13 @@ if (err) return err; + /* Avoid assigning overflow values */ + if (info->lo_offset > LLONG_MAX || info->lo_sizelimit > LLONG_MAX) + return -EOVERFLOW; + lo->lo_offset = info->lo_offset; lo->lo_sizelimit = info->lo_sizelimit; + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); lo->lo_file_name[LO_NAME_SIZE-1] = 0; @@ -1055,6 +1067,7 @@ struct file *file; struct inode *inode; struct address_space *mapping; + struct block_device *claimed_bdev = NULL; int error; loff_t size; bool partscan; @@ -1068,9 +1081,20 @@ if (!file) goto out; + /* + * If we don't hold exclusive handle for the device, upgrade to it + * here to avoid changing device under exclusive owner. + */ + if (!(mode & FMODE_EXCL)) { + claimed_bdev = bdev->bd_contains; + error = bd_prepare_to_claim(bdev, claimed_bdev, loop_configure); + if (error) + goto out_putf; + } + error = mutex_lock_killable(&loop_ctl_mutex); if (error) - goto out_putf; + goto out_bdev; error = -EBUSY; if (lo->lo_state != Lo_unbound) @@ -1089,7 +1113,7 @@ } if (config->block_size) { - error = loop_validate_block_size(config->block_size); + error = blk_validate_block_size(config->block_size); if (error) goto out_unlock; } @@ -1106,8 +1130,6 @@ if (error) goto out_unlock; - error = 0; - set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; @@ -1121,7 +1143,7 @@ if (config->block_size) bsize = config->block_size; - else if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) + else if ((lo->lo_backing_file->f_flags & O_DIRECT) && inode->i_sb->s_bdev) /* In case of direct I/O, match underlying block size */ bsize = bdev_logical_block_size(inode->i_sb->s_bdev); else @@ -1131,6 +1153,8 @@ blk_queue_physical_block_size(lo->lo_queue, bsize); blk_queue_io_min(lo->lo_queue, bsize); + loop_config_discard(lo); + loop_update_rotational(lo); loop_update_dio(lo); loop_sysfs_init(lo); @@ -1144,6 +1168,8 @@ if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; + if (partscan) + lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; /* Grab the block_device to prevent its destruction after we * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). @@ -1152,10 +1178,15 @@ mutex_unlock(&loop_ctl_mutex); if (partscan) loop_reread_partitions(lo, bdev); + if (claimed_bdev) + bd_abort_claiming(bdev, claimed_bdev, loop_configure); return 0; out_unlock: mutex_unlock(&loop_ctl_mutex); +out_bdev: + if (claimed_bdev) + bd_abort_claiming(bdev, claimed_bdev, loop_configure); out_putf: fput(file); out: @@ -1184,6 +1215,9 @@ err = -EINVAL; goto out_unlock; } + + if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags)) + blk_queue_write_cache(lo->lo_queue, false, false); /* freeze request queue during the transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1214,7 +1248,7 @@ set_capacity(lo->lo_disk, 0); loop_sysfs_exit(lo); if (bdev) { - bd_set_size(bdev, 0); + bd_set_nr_sectors(bdev, 0); /* let user-space know about this change */ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); } @@ -1237,10 +1271,11 @@ * must be at least one and it can only become zero when the * current holder is released. */ - if (release) - err = __blkdev_reread_part(bdev); - else - err = blkdev_reread_part(bdev); + if (!release) + mutex_lock(&bdev->bd_mutex); + err = bdev_disk_changed(bdev, false); + if (!release) + mutex_unlock(&bdev->bd_mutex); if (err) pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", __func__, lo_number, err); @@ -1342,7 +1377,7 @@ blk_mq_freeze_queue(lo->lo_queue); if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) { - /* If any pages were dirtied after kill_bdev(), try again */ + /* If any pages were dirtied after invalidate_bdev(), try again */ err = -EAGAIN; pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", __func__, lo->lo_number, lo->lo_file_name, @@ -1574,7 +1609,7 @@ if (lo->lo_state != Lo_bound) return -ENXIO; - err = loop_validate_block_size(arg); + err = blk_validate_block_size(arg); if (err) return err; @@ -1684,7 +1719,7 @@ case LOOP_SET_BLOCK_SIZE: if (!(mode & FMODE_WRITE) && !capable(CAP_SYS_ADMIN)) return -EPERM; - /* Fall through */ + fallthrough; default: err = lo_simple_ioctl(lo, cmd, arg); break; @@ -1832,7 +1867,7 @@ case LOOP_SET_STATUS64: case LOOP_CONFIGURE: arg = (unsigned long) compat_ptr(arg); - /* fall through */ + fallthrough; case LOOP_SET_FD: case LOOP_CHANGE_FD: case LOOP_SET_BLOCK_SIZE: @@ -1984,8 +2019,8 @@ /* always use the first bio's css */ #ifdef CONFIG_BLK_CGROUP - if (cmd->use_aio && rq->bio && rq->bio->bi_css) { - cmd->css = rq->bio->bi_css; + if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { + cmd->css = &bio_blkcg(rq->bio)->css; css_get(cmd->css); } else #endif @@ -2011,8 +2046,12 @@ failed: /* complete non-aio request */ if (!cmd->use_aio || ret) { - cmd->ret = ret ? -EIO : 0; - blk_mq_complete_request(rq); + if (ret == -EOPNOTSUPP) + cmd->ret = ret; + else + cmd->ret = ret ? -EIO : 0; + if (likely(!blk_should_fake_timeout(rq->q))) + blk_mq_complete_request(rq); } } @@ -2070,7 +2109,8 @@ lo->tag_set.queue_depth = 128; lo->tag_set.numa_node = NUMA_NO_NODE; lo->tag_set.cmd_size = sizeof(struct loop_cmd); - lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; + lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING | + BLK_MQ_F_NO_SCHED_BY_DEFAULT; lo->tag_set.driver_data = lo; err = blk_mq_alloc_tag_set(&lo->tag_set); @@ -2078,7 +2118,7 @@ goto out_free_idr; lo->lo_queue = blk_mq_init_queue(&lo->tag_set); - if (IS_ERR_OR_NULL(lo->lo_queue)) { + if (IS_ERR(lo->lo_queue)) { err = PTR_ERR(lo->lo_queue); goto out_cleanup_tags; } -- Gitblit v1.6.2