.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Partial Parity Log for closing the RAID5 write hole |
---|
3 | 4 | * Copyright (c) 2017, Intel Corporation. |
---|
4 | | - * |
---|
5 | | - * This program is free software; you can redistribute it and/or modify it |
---|
6 | | - * under the terms and conditions of the GNU General Public License, |
---|
7 | | - * version 2, as published by the Free Software Foundation. |
---|
8 | | - * |
---|
9 | | - * This program is distributed in the hope it will be useful, but WITHOUT |
---|
10 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
---|
11 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
---|
12 | | - * more details. |
---|
13 | 5 | */ |
---|
14 | 6 | |
---|
15 | 7 | #include <linux/kernel.h> |
---|
16 | 8 | #include <linux/blkdev.h> |
---|
17 | 9 | #include <linux/slab.h> |
---|
18 | 10 | #include <linux/crc32c.h> |
---|
19 | | -#include <linux/flex_array.h> |
---|
20 | 11 | #include <linux/async_tx.h> |
---|
21 | 12 | #include <linux/raid/md_p.h> |
---|
22 | 13 | #include "md.h" |
---|
23 | 14 | #include "raid5.h" |
---|
| 15 | +#include "raid5-log.h" |
---|
24 | 16 | |
---|
25 | 17 | /* |
---|
26 | 18 | * PPL consists of a 4KB header (struct ppl_header) and at least 128KB for |
---|
.. | .. |
---|
116 | 108 | /* stripes to retry if failed to allocate io_unit */ |
---|
117 | 109 | struct list_head no_mem_stripes; |
---|
118 | 110 | spinlock_t no_mem_stripes_lock; |
---|
| 111 | + |
---|
| 112 | + unsigned short write_hint; |
---|
119 | 113 | }; |
---|
120 | 114 | |
---|
121 | 115 | struct ppl_log { |
---|
.. | .. |
---|
165 | 159 | struct dma_async_tx_descriptor *tx) |
---|
166 | 160 | { |
---|
167 | 161 | int disks = sh->disks; |
---|
168 | | - struct page **srcs = flex_array_get(percpu->scribble, 0); |
---|
| 162 | + struct page **srcs = percpu->scribble; |
---|
169 | 163 | int count = 0, pd_idx = sh->pd_idx, i; |
---|
170 | 164 | struct async_submit_ctl submit; |
---|
171 | 165 | |
---|
.. | .. |
---|
196 | 190 | } |
---|
197 | 191 | |
---|
198 | 192 | init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, tx, |
---|
199 | | - NULL, sh, flex_array_get(percpu->scribble, 0) |
---|
200 | | - + sizeof(struct page *) * (sh->disks + 2)); |
---|
| 193 | + NULL, sh, (void *) (srcs + sh->disks + 2)); |
---|
201 | 194 | |
---|
202 | 195 | if (count == 1) |
---|
203 | 196 | tx = async_memcpy(sh->ppl_page, srcs[0], 0, 0, PAGE_SIZE, |
---|
.. | .. |
---|
331 | 324 | * be just after the last logged stripe and write to the same |
---|
332 | 325 | * disks. Use bit shift and logarithm to avoid 64-bit division. |
---|
333 | 326 | */ |
---|
334 | | - if ((sh->sector == sh_last->sector + STRIPE_SECTORS) && |
---|
| 327 | + if ((sh->sector == sh_last->sector + RAID5_STRIPE_SECTORS(conf)) && |
---|
335 | 328 | (data_sector >> ilog2(conf->chunk_sectors) == |
---|
336 | 329 | data_sector_last >> ilog2(conf->chunk_sectors)) && |
---|
337 | 330 | ((data_sector - data_sector_last) * data_disks == |
---|
.. | .. |
---|
476 | 469 | bio_set_dev(bio, log->rdev->bdev); |
---|
477 | 470 | bio->bi_iter.bi_sector = log->next_io_sector; |
---|
478 | 471 | bio_add_page(bio, io->header_page, PAGE_SIZE, 0); |
---|
| 472 | + bio->bi_write_hint = ppl_conf->write_hint; |
---|
479 | 473 | |
---|
480 | 474 | pr_debug("%s: log->current_io_sector: %llu\n", __func__, |
---|
481 | 475 | (unsigned long long)log->next_io_sector); |
---|
.. | .. |
---|
505 | 499 | bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, |
---|
506 | 500 | &ppl_conf->bs); |
---|
507 | 501 | bio->bi_opf = prev->bi_opf; |
---|
| 502 | + bio->bi_write_hint = prev->bi_write_hint; |
---|
508 | 503 | bio_copy_dev(bio, prev); |
---|
509 | 504 | bio->bi_iter.bi_sector = bio_end_sector(prev); |
---|
510 | 505 | bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0); |
---|
.. | .. |
---|
849 | 844 | |
---|
850 | 845 | /* if start and end is 4k aligned, use a 4k block */ |
---|
851 | 846 | if (block_size == 512 && |
---|
852 | | - (r_sector_first & (STRIPE_SECTORS - 1)) == 0 && |
---|
853 | | - (r_sector_last & (STRIPE_SECTORS - 1)) == 0) |
---|
854 | | - block_size = STRIPE_SIZE; |
---|
| 847 | + (r_sector_first & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0 && |
---|
| 848 | + (r_sector_last & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0) |
---|
| 849 | + block_size = RAID5_STRIPE_SIZE(conf); |
---|
855 | 850 | |
---|
856 | 851 | /* iterate through blocks in strip */ |
---|
857 | 852 | for (i = 0; i < strip_sectors; i += (block_size >> 9)) { |
---|
.. | .. |
---|
1042 | 1037 | } |
---|
1043 | 1038 | |
---|
1044 | 1039 | /* flush the disk cache after recovery if necessary */ |
---|
1045 | | - ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL, NULL); |
---|
| 1040 | + ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL); |
---|
1046 | 1041 | out: |
---|
1047 | 1042 | __free_page(page); |
---|
1048 | 1043 | return ret; |
---|
.. | .. |
---|
1279 | 1274 | ppl_data_sectors = rdev->ppl.size - (PPL_HEADER_SIZE >> 9); |
---|
1280 | 1275 | |
---|
1281 | 1276 | if (ppl_data_sectors > 0) |
---|
1282 | | - ppl_data_sectors = rounddown(ppl_data_sectors, STRIPE_SECTORS); |
---|
| 1277 | + ppl_data_sectors = rounddown(ppl_data_sectors, |
---|
| 1278 | + RAID5_STRIPE_SECTORS((struct r5conf *)rdev->mddev->private)); |
---|
1283 | 1279 | |
---|
1284 | 1280 | if (ppl_data_sectors <= 0) { |
---|
1285 | 1281 | pr_warn("md/raid:%s: PPL space too small on %s\n", |
---|
.. | .. |
---|
1365 | 1361 | return -EINVAL; |
---|
1366 | 1362 | } |
---|
1367 | 1363 | |
---|
1368 | | - max_disks = FIELD_SIZEOF(struct ppl_log, disk_flush_bitmap) * |
---|
| 1364 | + max_disks = sizeof_field(struct ppl_log, disk_flush_bitmap) * |
---|
1369 | 1365 | BITS_PER_BYTE; |
---|
1370 | 1366 | if (conf->raid_disks > max_disks) { |
---|
1371 | 1367 | pr_warn("md/raid:%s PPL doesn't support over %d disks in the array\n", |
---|
.. | .. |
---|
1409 | 1405 | atomic64_set(&ppl_conf->seq, 0); |
---|
1410 | 1406 | INIT_LIST_HEAD(&ppl_conf->no_mem_stripes); |
---|
1411 | 1407 | spin_lock_init(&ppl_conf->no_mem_stripes_lock); |
---|
| 1408 | + ppl_conf->write_hint = RWH_WRITE_LIFE_NOT_SET; |
---|
1412 | 1409 | |
---|
1413 | 1410 | if (!mddev->external) { |
---|
1414 | 1411 | ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid)); |
---|
.. | .. |
---|
1503 | 1500 | |
---|
1504 | 1501 | return ret; |
---|
1505 | 1502 | } |
---|
| 1503 | + |
---|
| 1504 | +static ssize_t |
---|
| 1505 | +ppl_write_hint_show(struct mddev *mddev, char *buf) |
---|
| 1506 | +{ |
---|
| 1507 | + size_t ret = 0; |
---|
| 1508 | + struct r5conf *conf; |
---|
| 1509 | + struct ppl_conf *ppl_conf = NULL; |
---|
| 1510 | + |
---|
| 1511 | + spin_lock(&mddev->lock); |
---|
| 1512 | + conf = mddev->private; |
---|
| 1513 | + if (conf && raid5_has_ppl(conf)) |
---|
| 1514 | + ppl_conf = conf->log_private; |
---|
| 1515 | + ret = sprintf(buf, "%d\n", ppl_conf ? ppl_conf->write_hint : 0); |
---|
| 1516 | + spin_unlock(&mddev->lock); |
---|
| 1517 | + |
---|
| 1518 | + return ret; |
---|
| 1519 | +} |
---|
| 1520 | + |
---|
| 1521 | +static ssize_t |
---|
| 1522 | +ppl_write_hint_store(struct mddev *mddev, const char *page, size_t len) |
---|
| 1523 | +{ |
---|
| 1524 | + struct r5conf *conf; |
---|
| 1525 | + struct ppl_conf *ppl_conf; |
---|
| 1526 | + int err = 0; |
---|
| 1527 | + unsigned short new; |
---|
| 1528 | + |
---|
| 1529 | + if (len >= PAGE_SIZE) |
---|
| 1530 | + return -EINVAL; |
---|
| 1531 | + if (kstrtou16(page, 10, &new)) |
---|
| 1532 | + return -EINVAL; |
---|
| 1533 | + |
---|
| 1534 | + err = mddev_lock(mddev); |
---|
| 1535 | + if (err) |
---|
| 1536 | + return err; |
---|
| 1537 | + |
---|
| 1538 | + conf = mddev->private; |
---|
| 1539 | + if (!conf) { |
---|
| 1540 | + err = -ENODEV; |
---|
| 1541 | + } else if (raid5_has_ppl(conf)) { |
---|
| 1542 | + ppl_conf = conf->log_private; |
---|
| 1543 | + if (!ppl_conf) |
---|
| 1544 | + err = -EINVAL; |
---|
| 1545 | + else |
---|
| 1546 | + ppl_conf->write_hint = new; |
---|
| 1547 | + } else { |
---|
| 1548 | + err = -EINVAL; |
---|
| 1549 | + } |
---|
| 1550 | + |
---|
| 1551 | + mddev_unlock(mddev); |
---|
| 1552 | + |
---|
| 1553 | + return err ?: len; |
---|
| 1554 | +} |
---|
| 1555 | + |
---|
| 1556 | +struct md_sysfs_entry |
---|
| 1557 | +ppl_write_hint = __ATTR(ppl_write_hint, S_IRUGO | S_IWUSR, |
---|
| 1558 | + ppl_write_hint_show, |
---|
| 1559 | + ppl_write_hint_store); |
---|