| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Partial Parity Log for closing the RAID5 write hole |
|---|
| 3 | 4 | * Copyright (c) 2017, Intel Corporation. |
|---|
| 4 | | - * |
|---|
| 5 | | - * This program is free software; you can redistribute it and/or modify it |
|---|
| 6 | | - * under the terms and conditions of the GNU General Public License, |
|---|
| 7 | | - * version 2, as published by the Free Software Foundation. |
|---|
| 8 | | - * |
|---|
| 9 | | - * This program is distributed in the hope it will be useful, but WITHOUT |
|---|
| 10 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|---|
| 11 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|---|
| 12 | | - * more details. |
|---|
| 13 | 5 | */ |
|---|
| 14 | 6 | |
|---|
| 15 | 7 | #include <linux/kernel.h> |
|---|
| 16 | 8 | #include <linux/blkdev.h> |
|---|
| 17 | 9 | #include <linux/slab.h> |
|---|
| 18 | 10 | #include <linux/crc32c.h> |
|---|
| 19 | | -#include <linux/flex_array.h> |
|---|
| 20 | 11 | #include <linux/async_tx.h> |
|---|
| 21 | 12 | #include <linux/raid/md_p.h> |
|---|
| 22 | 13 | #include "md.h" |
|---|
| 23 | 14 | #include "raid5.h" |
|---|
| 15 | +#include "raid5-log.h" |
|---|
| 24 | 16 | |
|---|
| 25 | 17 | /* |
|---|
| 26 | 18 | * PPL consists of a 4KB header (struct ppl_header) and at least 128KB for |
|---|
| .. | .. |
|---|
| 116 | 108 | /* stripes to retry if failed to allocate io_unit */ |
|---|
| 117 | 109 | struct list_head no_mem_stripes; |
|---|
| 118 | 110 | spinlock_t no_mem_stripes_lock; |
|---|
| 111 | + |
|---|
| 112 | + unsigned short write_hint; |
|---|
| 119 | 113 | }; |
|---|
| 120 | 114 | |
|---|
| 121 | 115 | struct ppl_log { |
|---|
| .. | .. |
|---|
| 165 | 159 | struct dma_async_tx_descriptor *tx) |
|---|
| 166 | 160 | { |
|---|
| 167 | 161 | int disks = sh->disks; |
|---|
| 168 | | - struct page **srcs = flex_array_get(percpu->scribble, 0); |
|---|
| 162 | + struct page **srcs = percpu->scribble; |
|---|
| 169 | 163 | int count = 0, pd_idx = sh->pd_idx, i; |
|---|
| 170 | 164 | struct async_submit_ctl submit; |
|---|
| 171 | 165 | |
|---|
| .. | .. |
|---|
| 196 | 190 | } |
|---|
| 197 | 191 | |
|---|
| 198 | 192 | init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, tx, |
|---|
| 199 | | - NULL, sh, flex_array_get(percpu->scribble, 0) |
|---|
| 200 | | - + sizeof(struct page *) * (sh->disks + 2)); |
|---|
| 193 | + NULL, sh, (void *) (srcs + sh->disks + 2)); |
|---|
| 201 | 194 | |
|---|
| 202 | 195 | if (count == 1) |
|---|
| 203 | 196 | tx = async_memcpy(sh->ppl_page, srcs[0], 0, 0, PAGE_SIZE, |
|---|
| .. | .. |
|---|
| 331 | 324 | * be just after the last logged stripe and write to the same |
|---|
| 332 | 325 | * disks. Use bit shift and logarithm to avoid 64-bit division. |
|---|
| 333 | 326 | */ |
|---|
| 334 | | - if ((sh->sector == sh_last->sector + STRIPE_SECTORS) && |
|---|
| 327 | + if ((sh->sector == sh_last->sector + RAID5_STRIPE_SECTORS(conf)) && |
|---|
| 335 | 328 | (data_sector >> ilog2(conf->chunk_sectors) == |
|---|
| 336 | 329 | data_sector_last >> ilog2(conf->chunk_sectors)) && |
|---|
| 337 | 330 | ((data_sector - data_sector_last) * data_disks == |
|---|
| .. | .. |
|---|
| 476 | 469 | bio_set_dev(bio, log->rdev->bdev); |
|---|
| 477 | 470 | bio->bi_iter.bi_sector = log->next_io_sector; |
|---|
| 478 | 471 | bio_add_page(bio, io->header_page, PAGE_SIZE, 0); |
|---|
| 472 | + bio->bi_write_hint = ppl_conf->write_hint; |
|---|
| 479 | 473 | |
|---|
| 480 | 474 | pr_debug("%s: log->current_io_sector: %llu\n", __func__, |
|---|
| 481 | 475 | (unsigned long long)log->next_io_sector); |
|---|
| .. | .. |
|---|
| 505 | 499 | bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, |
|---|
| 506 | 500 | &ppl_conf->bs); |
|---|
| 507 | 501 | bio->bi_opf = prev->bi_opf; |
|---|
| 502 | + bio->bi_write_hint = prev->bi_write_hint; |
|---|
| 508 | 503 | bio_copy_dev(bio, prev); |
|---|
| 509 | 504 | bio->bi_iter.bi_sector = bio_end_sector(prev); |
|---|
| 510 | 505 | bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0); |
|---|
| .. | .. |
|---|
| 849 | 844 | |
|---|
| 850 | 845 | /* if start and end is 4k aligned, use a 4k block */ |
|---|
| 851 | 846 | if (block_size == 512 && |
|---|
| 852 | | - (r_sector_first & (STRIPE_SECTORS - 1)) == 0 && |
|---|
| 853 | | - (r_sector_last & (STRIPE_SECTORS - 1)) == 0) |
|---|
| 854 | | - block_size = STRIPE_SIZE; |
|---|
| 847 | + (r_sector_first & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0 && |
|---|
| 848 | + (r_sector_last & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0) |
|---|
| 849 | + block_size = RAID5_STRIPE_SIZE(conf); |
|---|
| 855 | 850 | |
|---|
| 856 | 851 | /* iterate through blocks in strip */ |
|---|
| 857 | 852 | for (i = 0; i < strip_sectors; i += (block_size >> 9)) { |
|---|
| .. | .. |
|---|
| 1042 | 1037 | } |
|---|
| 1043 | 1038 | |
|---|
| 1044 | 1039 | /* flush the disk cache after recovery if necessary */ |
|---|
| 1045 | | - ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL, NULL); |
|---|
| 1040 | + ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL); |
|---|
| 1046 | 1041 | out: |
|---|
| 1047 | 1042 | __free_page(page); |
|---|
| 1048 | 1043 | return ret; |
|---|
| .. | .. |
|---|
| 1279 | 1274 | ppl_data_sectors = rdev->ppl.size - (PPL_HEADER_SIZE >> 9); |
|---|
| 1280 | 1275 | |
|---|
| 1281 | 1276 | if (ppl_data_sectors > 0) |
|---|
| 1282 | | - ppl_data_sectors = rounddown(ppl_data_sectors, STRIPE_SECTORS); |
|---|
| 1277 | + ppl_data_sectors = rounddown(ppl_data_sectors, |
|---|
| 1278 | + RAID5_STRIPE_SECTORS((struct r5conf *)rdev->mddev->private)); |
|---|
| 1283 | 1279 | |
|---|
| 1284 | 1280 | if (ppl_data_sectors <= 0) { |
|---|
| 1285 | 1281 | pr_warn("md/raid:%s: PPL space too small on %s\n", |
|---|
| .. | .. |
|---|
| 1365 | 1361 | return -EINVAL; |
|---|
| 1366 | 1362 | } |
|---|
| 1367 | 1363 | |
|---|
| 1368 | | - max_disks = FIELD_SIZEOF(struct ppl_log, disk_flush_bitmap) * |
|---|
| 1364 | + max_disks = sizeof_field(struct ppl_log, disk_flush_bitmap) * |
|---|
| 1369 | 1365 | BITS_PER_BYTE; |
|---|
| 1370 | 1366 | if (conf->raid_disks > max_disks) { |
|---|
| 1371 | 1367 | pr_warn("md/raid:%s PPL doesn't support over %d disks in the array\n", |
|---|
| .. | .. |
|---|
| 1409 | 1405 | atomic64_set(&ppl_conf->seq, 0); |
|---|
| 1410 | 1406 | INIT_LIST_HEAD(&ppl_conf->no_mem_stripes); |
|---|
| 1411 | 1407 | spin_lock_init(&ppl_conf->no_mem_stripes_lock); |
|---|
| 1408 | + ppl_conf->write_hint = RWH_WRITE_LIFE_NOT_SET; |
|---|
| 1412 | 1409 | |
|---|
| 1413 | 1410 | if (!mddev->external) { |
|---|
| 1414 | 1411 | ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid)); |
|---|
| .. | .. |
|---|
| 1503 | 1500 | |
|---|
| 1504 | 1501 | return ret; |
|---|
| 1505 | 1502 | } |
|---|
| 1503 | + |
|---|
| 1504 | +static ssize_t |
|---|
| 1505 | +ppl_write_hint_show(struct mddev *mddev, char *buf) |
|---|
| 1506 | +{ |
|---|
| 1507 | + size_t ret = 0; |
|---|
| 1508 | + struct r5conf *conf; |
|---|
| 1509 | + struct ppl_conf *ppl_conf = NULL; |
|---|
| 1510 | + |
|---|
| 1511 | + spin_lock(&mddev->lock); |
|---|
| 1512 | + conf = mddev->private; |
|---|
| 1513 | + if (conf && raid5_has_ppl(conf)) |
|---|
| 1514 | + ppl_conf = conf->log_private; |
|---|
| 1515 | + ret = sprintf(buf, "%d\n", ppl_conf ? ppl_conf->write_hint : 0); |
|---|
| 1516 | + spin_unlock(&mddev->lock); |
|---|
| 1517 | + |
|---|
| 1518 | + return ret; |
|---|
| 1519 | +} |
|---|
| 1520 | + |
|---|
| 1521 | +static ssize_t |
|---|
| 1522 | +ppl_write_hint_store(struct mddev *mddev, const char *page, size_t len) |
|---|
| 1523 | +{ |
|---|
| 1524 | + struct r5conf *conf; |
|---|
| 1525 | + struct ppl_conf *ppl_conf; |
|---|
| 1526 | + int err = 0; |
|---|
| 1527 | + unsigned short new; |
|---|
| 1528 | + |
|---|
| 1529 | + if (len >= PAGE_SIZE) |
|---|
| 1530 | + return -EINVAL; |
|---|
| 1531 | + if (kstrtou16(page, 10, &new)) |
|---|
| 1532 | + return -EINVAL; |
|---|
| 1533 | + |
|---|
| 1534 | + err = mddev_lock(mddev); |
|---|
| 1535 | + if (err) |
|---|
| 1536 | + return err; |
|---|
| 1537 | + |
|---|
| 1538 | + conf = mddev->private; |
|---|
| 1539 | + if (!conf) { |
|---|
| 1540 | + err = -ENODEV; |
|---|
| 1541 | + } else if (raid5_has_ppl(conf)) { |
|---|
| 1542 | + ppl_conf = conf->log_private; |
|---|
| 1543 | + if (!ppl_conf) |
|---|
| 1544 | + err = -EINVAL; |
|---|
| 1545 | + else |
|---|
| 1546 | + ppl_conf->write_hint = new; |
|---|
| 1547 | + } else { |
|---|
| 1548 | + err = -EINVAL; |
|---|
| 1549 | + } |
|---|
| 1550 | + |
|---|
| 1551 | + mddev_unlock(mddev); |
|---|
| 1552 | + |
|---|
| 1553 | + return err ?: len; |
|---|
| 1554 | +} |
|---|
| 1555 | + |
|---|
| 1556 | +struct md_sysfs_entry |
|---|
| 1557 | +ppl_write_hint = __ATTR(ppl_write_hint, S_IRUGO | S_IWUSR, |
|---|
| 1558 | + ppl_write_hint_show, |
|---|
| 1559 | + ppl_write_hint_store); |
|---|