hc
2024-09-20 a36159eec6ca17402b0e146b86efaf76568dc353
kernel/drivers/md/raid5-ppl.c
....@@ -1,26 +1,18 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Partial Parity Log for closing the RAID5 write hole
34 * Copyright (c) 2017, Intel Corporation.
4
- *
5
- * This program is free software; you can redistribute it and/or modify it
6
- * under the terms and conditions of the GNU General Public License,
7
- * version 2, as published by the Free Software Foundation.
8
- *
9
- * This program is distributed in the hope it will be useful, but WITHOUT
10
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12
- * more details.
135 */
146
157 #include <linux/kernel.h>
168 #include <linux/blkdev.h>
179 #include <linux/slab.h>
1810 #include <linux/crc32c.h>
19
-#include <linux/flex_array.h>
2011 #include <linux/async_tx.h>
2112 #include <linux/raid/md_p.h>
2213 #include "md.h"
2314 #include "raid5.h"
15
+#include "raid5-log.h"
2416
2517 /*
2618 * PPL consists of a 4KB header (struct ppl_header) and at least 128KB for
....@@ -116,6 +108,8 @@
116108 /* stripes to retry if failed to allocate io_unit */
117109 struct list_head no_mem_stripes;
118110 spinlock_t no_mem_stripes_lock;
111
+
112
+ unsigned short write_hint;
119113 };
120114
121115 struct ppl_log {
....@@ -165,7 +159,7 @@
165159 struct dma_async_tx_descriptor *tx)
166160 {
167161 int disks = sh->disks;
168
- struct page **srcs = flex_array_get(percpu->scribble, 0);
162
+ struct page **srcs = percpu->scribble;
169163 int count = 0, pd_idx = sh->pd_idx, i;
170164 struct async_submit_ctl submit;
171165
....@@ -196,8 +190,7 @@
196190 }
197191
198192 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, tx,
199
- NULL, sh, flex_array_get(percpu->scribble, 0)
200
- + sizeof(struct page *) * (sh->disks + 2));
193
+ NULL, sh, (void *) (srcs + sh->disks + 2));
201194
202195 if (count == 1)
203196 tx = async_memcpy(sh->ppl_page, srcs[0], 0, 0, PAGE_SIZE,
....@@ -331,7 +324,7 @@
331324 * be just after the last logged stripe and write to the same
332325 * disks. Use bit shift and logarithm to avoid 64-bit division.
333326 */
334
- if ((sh->sector == sh_last->sector + STRIPE_SECTORS) &&
327
+ if ((sh->sector == sh_last->sector + RAID5_STRIPE_SECTORS(conf)) &&
335328 (data_sector >> ilog2(conf->chunk_sectors) ==
336329 data_sector_last >> ilog2(conf->chunk_sectors)) &&
337330 ((data_sector - data_sector_last) * data_disks ==
....@@ -476,6 +469,7 @@
476469 bio_set_dev(bio, log->rdev->bdev);
477470 bio->bi_iter.bi_sector = log->next_io_sector;
478471 bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
472
+ bio->bi_write_hint = ppl_conf->write_hint;
479473
480474 pr_debug("%s: log->current_io_sector: %llu\n", __func__,
481475 (unsigned long long)log->next_io_sector);
....@@ -505,6 +499,7 @@
505499 bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES,
506500 &ppl_conf->bs);
507501 bio->bi_opf = prev->bi_opf;
502
+ bio->bi_write_hint = prev->bi_write_hint;
508503 bio_copy_dev(bio, prev);
509504 bio->bi_iter.bi_sector = bio_end_sector(prev);
510505 bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0);
....@@ -849,9 +844,9 @@
849844
850845 /* if start and end is 4k aligned, use a 4k block */
851846 if (block_size == 512 &&
852
- (r_sector_first & (STRIPE_SECTORS - 1)) == 0 &&
853
- (r_sector_last & (STRIPE_SECTORS - 1)) == 0)
854
- block_size = STRIPE_SIZE;
847
+ (r_sector_first & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0 &&
848
+ (r_sector_last & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0)
849
+ block_size = RAID5_STRIPE_SIZE(conf);
855850
856851 /* iterate through blocks in strip */
857852 for (i = 0; i < strip_sectors; i += (block_size >> 9)) {
....@@ -1042,7 +1037,7 @@
10421037 }
10431038
10441039 /* flush the disk cache after recovery if necessary */
1045
- ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL, NULL);
1040
+ ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL);
10461041 out:
10471042 __free_page(page);
10481043 return ret;
....@@ -1279,7 +1274,8 @@
12791274 ppl_data_sectors = rdev->ppl.size - (PPL_HEADER_SIZE >> 9);
12801275
12811276 if (ppl_data_sectors > 0)
1282
- ppl_data_sectors = rounddown(ppl_data_sectors, STRIPE_SECTORS);
1277
+ ppl_data_sectors = rounddown(ppl_data_sectors,
1278
+ RAID5_STRIPE_SECTORS((struct r5conf *)rdev->mddev->private));
12831279
12841280 if (ppl_data_sectors <= 0) {
12851281 pr_warn("md/raid:%s: PPL space too small on %s\n",
....@@ -1365,7 +1361,7 @@
13651361 return -EINVAL;
13661362 }
13671363
1368
- max_disks = FIELD_SIZEOF(struct ppl_log, disk_flush_bitmap) *
1364
+ max_disks = sizeof_field(struct ppl_log, disk_flush_bitmap) *
13691365 BITS_PER_BYTE;
13701366 if (conf->raid_disks > max_disks) {
13711367 pr_warn("md/raid:%s PPL doesn't support over %d disks in the array\n",
....@@ -1409,6 +1405,7 @@
14091405 atomic64_set(&ppl_conf->seq, 0);
14101406 INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
14111407 spin_lock_init(&ppl_conf->no_mem_stripes_lock);
1408
+ ppl_conf->write_hint = RWH_WRITE_LIFE_NOT_SET;
14121409
14131410 if (!mddev->external) {
14141411 ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
....@@ -1503,3 +1500,60 @@
15031500
15041501 return ret;
15051502 }
1503
+
1504
+static ssize_t
1505
+ppl_write_hint_show(struct mddev *mddev, char *buf)
1506
+{
1507
+ size_t ret = 0;
1508
+ struct r5conf *conf;
1509
+ struct ppl_conf *ppl_conf = NULL;
1510
+
1511
+ spin_lock(&mddev->lock);
1512
+ conf = mddev->private;
1513
+ if (conf && raid5_has_ppl(conf))
1514
+ ppl_conf = conf->log_private;
1515
+ ret = sprintf(buf, "%d\n", ppl_conf ? ppl_conf->write_hint : 0);
1516
+ spin_unlock(&mddev->lock);
1517
+
1518
+ return ret;
1519
+}
1520
+
1521
+static ssize_t
1522
+ppl_write_hint_store(struct mddev *mddev, const char *page, size_t len)
1523
+{
1524
+ struct r5conf *conf;
1525
+ struct ppl_conf *ppl_conf;
1526
+ int err = 0;
1527
+ unsigned short new;
1528
+
1529
+ if (len >= PAGE_SIZE)
1530
+ return -EINVAL;
1531
+ if (kstrtou16(page, 10, &new))
1532
+ return -EINVAL;
1533
+
1534
+ err = mddev_lock(mddev);
1535
+ if (err)
1536
+ return err;
1537
+
1538
+ conf = mddev->private;
1539
+ if (!conf) {
1540
+ err = -ENODEV;
1541
+ } else if (raid5_has_ppl(conf)) {
1542
+ ppl_conf = conf->log_private;
1543
+ if (!ppl_conf)
1544
+ err = -EINVAL;
1545
+ else
1546
+ ppl_conf->write_hint = new;
1547
+ } else {
1548
+ err = -EINVAL;
1549
+ }
1550
+
1551
+ mddev_unlock(mddev);
1552
+
1553
+ return err ?: len;
1554
+}
1555
+
1556
+struct md_sysfs_entry
1557
+ppl_write_hint = __ATTR(ppl_write_hint, S_IRUGO | S_IWUSR,
1558
+ ppl_write_hint_show,
1559
+ ppl_write_hint_store);