From ea08eeccae9297f7aabd2ef7f0c2517ac4549acc Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 20 Feb 2024 01:18:26 +0000
Subject: [PATCH] write in 30M
---
kernel/drivers/block/loop.c | 168 ++++++++++++++++++++++++++++++++++---------------------
1 files changed, 104 insertions(+), 64 deletions(-)
diff --git a/kernel/drivers/block/loop.c b/kernel/drivers/block/loop.c
index dd8c481..38a2461 100644
--- a/kernel/drivers/block/loop.c
+++ b/kernel/drivers/block/loop.c
@@ -77,6 +77,7 @@
#include <linux/falloc.h>
#include <linux/uio.h>
#include <linux/ioprio.h>
+#include <linux/blk-cgroup.h>
#include "loop.h"
@@ -228,19 +229,6 @@
}
/**
- * loop_validate_block_size() - validates the passed in block size
- * @bsize: size to validate
- */
-static int
-loop_validate_block_size(unsigned short bsize)
-{
- if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
- return -EINVAL;
-
- return 0;
-}
-
-/**
* loop_set_size() - sets device size and notifies userspace
* @lo: struct loop_device to set the size for
* @size: new size of the loop device
@@ -252,10 +240,10 @@
{
struct block_device *bdev = lo->lo_device;
- set_capacity(lo->lo_disk, size);
- bd_set_size(bdev, size << SECTOR_SHIFT);
- /* let user-space know about the new size */
- kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
+ bd_set_nr_sectors(bdev, size);
+
+ if (!set_capacity_revalidate_and_notify(lo->lo_disk, size, false))
+ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
}
static inline int
@@ -281,7 +269,7 @@
struct iov_iter i;
ssize_t bw;
- iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len);
+ iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
file_start_write(file);
bw = vfs_iter_write(file, &i, ppos, 0);
@@ -359,7 +347,7 @@
ssize_t len;
rq_for_each_segment(bvec, rq, iter) {
- iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len);
+ iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
if (len < 0)
return len;
@@ -400,7 +388,7 @@
b.bv_offset = 0;
b.bv_len = bvec.bv_len;
- iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len);
+ iov_iter_bvec(&i, READ, &b, 1, b.bv_len);
len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
if (len < 0) {
ret = len;
@@ -474,7 +462,7 @@
if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
req_op(rq) != REQ_OP_READ) {
if (cmd->ret < 0)
- ret = BLK_STS_IOERR;
+ ret = errno_to_blk_status(cmd->ret);
goto end_io;
}
@@ -509,7 +497,8 @@
return;
kfree(cmd->bvec);
cmd->bvec = NULL;
- blk_mq_complete_request(rq);
+ if (likely(!blk_should_fake_timeout(rq->q)))
+ blk_mq_complete_request(rq);
}
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
@@ -526,21 +515,22 @@
loff_t pos, bool rw)
{
struct iov_iter iter;
+ struct req_iterator rq_iter;
struct bio_vec *bvec;
struct request *rq = blk_mq_rq_from_pdu(cmd);
struct bio *bio = rq->bio;
struct file *file = lo->lo_backing_file;
+ struct bio_vec tmp;
unsigned int offset;
- int segments = 0;
+ int nr_bvec = 0;
int ret;
- if (rq->bio != rq->biotail) {
- struct req_iterator iter;
- struct bio_vec tmp;
+ rq_for_each_bvec(tmp, rq, rq_iter)
+ nr_bvec++;
- __rq_for_each_bio(bio, rq)
- segments += bio_segments(bio);
- bvec = kmalloc_array(segments, sizeof(struct bio_vec),
+ if (rq->bio != rq->biotail) {
+
+ bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
GFP_NOIO);
if (!bvec)
return -EIO;
@@ -549,10 +539,10 @@
/*
* The bios of the request may be started from the middle of
* the 'bvec' because of bio splitting, so we can't directly
- * copy bio->bi_iov_vec to new bvec. The rq_for_each_segment
+ * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec
* API will take care of all details for us.
*/
- rq_for_each_segment(tmp, rq, iter) {
+ rq_for_each_bvec(tmp, rq, rq_iter) {
*bvec = tmp;
bvec++;
}
@@ -566,12 +556,10 @@
*/
offset = bio->bi_iter.bi_bvec_done;
bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
- segments = bio_segments(bio);
}
atomic_set(&cmd->ref, 2);
- iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
- segments, blk_rq_bytes(rq));
+ iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq));
iter.iov_offset = offset;
cmd->iocb.ki_pos = pos;
@@ -640,14 +628,13 @@
default:
WARN_ON_ONCE(1);
return -EIO;
- break;
}
}
static inline void loop_update_dio(struct loop_device *lo)
{
- __loop_update_dio(lo, io_is_direct(lo->lo_backing_file) |
- lo->use_dio);
+ __loop_update_dio(lo, (lo->lo_backing_file->f_flags & O_DIRECT) |
+ lo->use_dio);
}
static void loop_reread_partitions(struct loop_device *lo,
@@ -655,7 +642,9 @@
{
int rc;
- rc = blkdev_reread_part(bdev);
+ mutex_lock(&bdev->bd_mutex);
+ rc = bdev_disk_changed(bdev, false);
+ mutex_unlock(&bdev->bd_mutex);
if (rc)
pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
__func__, lo->lo_number, lo->lo_file_name, rc);
@@ -808,33 +797,33 @@
static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
{
- return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset);
}
static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
{
- return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
}
static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
{
int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
- return sprintf(buf, "%s\n", autoclear ? "1" : "0");
+ return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0");
}
static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
{
int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN);
- return sprintf(buf, "%s\n", partscan ? "1" : "0");
+ return sysfs_emit(buf, "%s\n", partscan ? "1" : "0");
}
static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf)
{
int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO);
- return sprintf(buf, "%s\n", dio ? "1" : "0");
+ return sysfs_emit(buf, "%s\n", dio ? "1" : "0");
}
LOOP_ATTR_RO(backing_file);
@@ -932,7 +921,7 @@
static int loop_kthread_worker_fn(void *worker_ptr)
{
- current->flags |= PF_LESS_THROTTLE | PF_MEMALLOC_NOIO;
+ current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
return kthread_worker_fn(worker_ptr);
}
@@ -945,6 +934,24 @@
return -ENOMEM;
set_user_nice(lo->worker_task, MIN_NICE);
return 0;
+}
+
+static void loop_update_rotational(struct loop_device *lo)
+{
+ struct file *file = lo->lo_backing_file;
+ struct inode *file_inode = file->f_mapping->host;
+ struct block_device *file_bdev = file_inode->i_sb->s_bdev;
+ struct request_queue *q = lo->lo_queue;
+ bool nonrot = true;
+
+ /* not all filesystems (e.g. tmpfs) have a sb->s_bdev */
+ if (file_bdev)
+ nonrot = blk_queue_nonrot(bdev_get_queue(file_bdev));
+
+ if (nonrot)
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+ else
+ blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
}
static int
@@ -1022,8 +1029,13 @@
if (err)
return err;
+ /* Avoid assigning overflow values */
+ if (info->lo_offset > LLONG_MAX || info->lo_sizelimit > LLONG_MAX)
+ return -EOVERFLOW;
+
lo->lo_offset = info->lo_offset;
lo->lo_sizelimit = info->lo_sizelimit;
+
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
@@ -1055,6 +1067,7 @@
struct file *file;
struct inode *inode;
struct address_space *mapping;
+ struct block_device *claimed_bdev = NULL;
int error;
loff_t size;
bool partscan;
@@ -1068,9 +1081,20 @@
if (!file)
goto out;
+ /*
+ * If we don't hold exclusive handle for the device, upgrade to it
+ * here to avoid changing device under exclusive owner.
+ */
+ if (!(mode & FMODE_EXCL)) {
+ claimed_bdev = bdev->bd_contains;
+ error = bd_prepare_to_claim(bdev, claimed_bdev, loop_configure);
+ if (error)
+ goto out_putf;
+ }
+
error = mutex_lock_killable(&loop_ctl_mutex);
if (error)
- goto out_putf;
+ goto out_bdev;
error = -EBUSY;
if (lo->lo_state != Lo_unbound)
@@ -1089,7 +1113,7 @@
}
if (config->block_size) {
- error = loop_validate_block_size(config->block_size);
+ error = blk_validate_block_size(config->block_size);
if (error)
goto out_unlock;
}
@@ -1106,8 +1130,6 @@
if (error)
goto out_unlock;
- error = 0;
-
set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
@@ -1121,7 +1143,7 @@
if (config->block_size)
bsize = config->block_size;
- else if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev)
+ else if ((lo->lo_backing_file->f_flags & O_DIRECT) && inode->i_sb->s_bdev)
/* In case of direct I/O, match underlying block size */
bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
else
@@ -1131,6 +1153,8 @@
blk_queue_physical_block_size(lo->lo_queue, bsize);
blk_queue_io_min(lo->lo_queue, bsize);
+ loop_config_discard(lo);
+ loop_update_rotational(lo);
loop_update_dio(lo);
loop_sysfs_init(lo);
@@ -1144,6 +1168,8 @@
if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN;
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
+ if (partscan)
+ lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
/* Grab the block_device to prevent its destruction after we
* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
@@ -1152,10 +1178,15 @@
mutex_unlock(&loop_ctl_mutex);
if (partscan)
loop_reread_partitions(lo, bdev);
+ if (claimed_bdev)
+ bd_abort_claiming(bdev, claimed_bdev, loop_configure);
return 0;
out_unlock:
mutex_unlock(&loop_ctl_mutex);
+out_bdev:
+ if (claimed_bdev)
+ bd_abort_claiming(bdev, claimed_bdev, loop_configure);
out_putf:
fput(file);
out:
@@ -1184,6 +1215,9 @@
err = -EINVAL;
goto out_unlock;
}
+
+ if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
+ blk_queue_write_cache(lo->lo_queue, false, false);
/* freeze request queue during the transition */
blk_mq_freeze_queue(lo->lo_queue);
@@ -1214,7 +1248,7 @@
set_capacity(lo->lo_disk, 0);
loop_sysfs_exit(lo);
if (bdev) {
- bd_set_size(bdev, 0);
+ bd_set_nr_sectors(bdev, 0);
/* let user-space know about this change */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
}
@@ -1237,10 +1271,11 @@
* must be at least one and it can only become zero when the
* current holder is released.
*/
- if (release)
- err = __blkdev_reread_part(bdev);
- else
- err = blkdev_reread_part(bdev);
+ if (!release)
+ mutex_lock(&bdev->bd_mutex);
+ err = bdev_disk_changed(bdev, false);
+ if (!release)
+ mutex_unlock(&bdev->bd_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo_number, err);
@@ -1342,7 +1377,7 @@
blk_mq_freeze_queue(lo->lo_queue);
if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
- /* If any pages were dirtied after kill_bdev(), try again */
+ /* If any pages were dirtied after invalidate_bdev(), try again */
err = -EAGAIN;
pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
@@ -1574,7 +1609,7 @@
if (lo->lo_state != Lo_bound)
return -ENXIO;
- err = loop_validate_block_size(arg);
+ err = blk_validate_block_size(arg);
if (err)
return err;
@@ -1684,7 +1719,7 @@
case LOOP_SET_BLOCK_SIZE:
if (!(mode & FMODE_WRITE) && !capable(CAP_SYS_ADMIN))
return -EPERM;
- /* Fall through */
+ fallthrough;
default:
err = lo_simple_ioctl(lo, cmd, arg);
break;
@@ -1832,7 +1867,7 @@
case LOOP_SET_STATUS64:
case LOOP_CONFIGURE:
arg = (unsigned long) compat_ptr(arg);
- /* fall through */
+ fallthrough;
case LOOP_SET_FD:
case LOOP_CHANGE_FD:
case LOOP_SET_BLOCK_SIZE:
@@ -1984,8 +2019,8 @@
/* always use the first bio's css */
#ifdef CONFIG_BLK_CGROUP
- if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
- cmd->css = rq->bio->bi_css;
+ if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
+ cmd->css = &bio_blkcg(rq->bio)->css;
css_get(cmd->css);
} else
#endif
@@ -2011,8 +2046,12 @@
failed:
/* complete non-aio request */
if (!cmd->use_aio || ret) {
- cmd->ret = ret ? -EIO : 0;
- blk_mq_complete_request(rq);
+ if (ret == -EOPNOTSUPP)
+ cmd->ret = ret;
+ else
+ cmd->ret = ret ? -EIO : 0;
+ if (likely(!blk_should_fake_timeout(rq->q)))
+ blk_mq_complete_request(rq);
}
}
@@ -2070,7 +2109,8 @@
lo->tag_set.queue_depth = 128;
lo->tag_set.numa_node = NUMA_NO_NODE;
lo->tag_set.cmd_size = sizeof(struct loop_cmd);
- lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+ lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING |
+ BLK_MQ_F_NO_SCHED_BY_DEFAULT;
lo->tag_set.driver_data = lo;
err = blk_mq_alloc_tag_set(&lo->tag_set);
@@ -2078,7 +2118,7 @@
goto out_free_idr;
lo->lo_queue = blk_mq_init_queue(&lo->tag_set);
- if (IS_ERR_OR_NULL(lo->lo_queue)) {
+ if (IS_ERR(lo->lo_queue)) {
err = PTR_ERR(lo->lo_queue);
goto out_cleanup_tags;
}
--
Gitblit v1.6.2