| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Persistent Memory Driver |
|---|
| 3 | 4 | * |
|---|
| 4 | 5 | * Copyright (c) 2014-2015, Intel Corporation. |
|---|
| 5 | 6 | * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>. |
|---|
| 6 | 7 | * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>. |
|---|
| 7 | | - * |
|---|
| 8 | | - * This program is free software; you can redistribute it and/or modify it |
|---|
| 9 | | - * under the terms and conditions of the GNU General Public License, |
|---|
| 10 | | - * version 2, as published by the Free Software Foundation. |
|---|
| 11 | | - * |
|---|
| 12 | | - * This program is distributed in the hope it will be useful, but WITHOUT |
|---|
| 13 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|---|
| 14 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|---|
| 15 | | - * more details. |
|---|
| 16 | 8 | */ |
|---|
| 17 | 9 | |
|---|
| 18 | | -#include <asm/cacheflush.h> |
|---|
| 19 | 10 | #include <linux/blkdev.h> |
|---|
| 20 | 11 | #include <linux/hdreg.h> |
|---|
| 21 | 12 | #include <linux/init.h> |
|---|
| .. | .. |
|---|
| 33 | 24 | #include <linux/dax.h> |
|---|
| 34 | 25 | #include <linux/nd.h> |
|---|
| 35 | 26 | #include <linux/backing-dev.h> |
|---|
| 27 | +#include <linux/mm.h> |
|---|
| 28 | +#include <asm/cacheflush.h> |
|---|
| 36 | 29 | #include "pmem.h" |
|---|
| 37 | 30 | #include "pfn.h" |
|---|
| 38 | 31 | #include "nd.h" |
|---|
| 39 | | -#include "nd-core.h" |
|---|
| 40 | 32 | |
|---|
| 41 | 33 | static struct device *to_dev(struct pmem_device *pmem) |
|---|
| 42 | 34 | { |
|---|
| .. | .. |
|---|
| 133 | 125 | while (len) { |
|---|
| 134 | 126 | mem = kmap_atomic(page); |
|---|
| 135 | 127 | chunk = min_t(unsigned int, len, PAGE_SIZE - off); |
|---|
| 136 | | - rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); |
|---|
| 128 | + rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk); |
|---|
| 137 | 129 | kunmap_atomic(mem); |
|---|
| 138 | 130 | if (rem) |
|---|
| 139 | 131 | return BLK_STS_IOERR; |
|---|
| .. | .. |
|---|
| 145 | 137 | return BLK_STS_OK; |
|---|
| 146 | 138 | } |
|---|
| 147 | 139 | |
|---|
| 148 | | -static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, |
|---|
| 149 | | - unsigned int len, unsigned int off, unsigned int op, |
|---|
| 150 | | - sector_t sector) |
|---|
| 140 | +static blk_status_t pmem_do_read(struct pmem_device *pmem, |
|---|
| 141 | + struct page *page, unsigned int page_off, |
|---|
| 142 | + sector_t sector, unsigned int len) |
|---|
| 143 | +{ |
|---|
| 144 | + blk_status_t rc; |
|---|
| 145 | + phys_addr_t pmem_off = sector * 512 + pmem->data_offset; |
|---|
| 146 | + void *pmem_addr = pmem->virt_addr + pmem_off; |
|---|
| 147 | + |
|---|
| 148 | + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) |
|---|
| 149 | + return BLK_STS_IOERR; |
|---|
| 150 | + |
|---|
| 151 | + rc = read_pmem(page, page_off, pmem_addr, len); |
|---|
| 152 | + flush_dcache_page(page); |
|---|
| 153 | + return rc; |
|---|
| 154 | +} |
|---|
| 155 | + |
|---|
| 156 | +static blk_status_t pmem_do_write(struct pmem_device *pmem, |
|---|
| 157 | + struct page *page, unsigned int page_off, |
|---|
| 158 | + sector_t sector, unsigned int len) |
|---|
| 151 | 159 | { |
|---|
| 152 | 160 | blk_status_t rc = BLK_STS_OK; |
|---|
| 153 | 161 | bool bad_pmem = false; |
|---|
| .. | .. |
|---|
| 157 | 165 | if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) |
|---|
| 158 | 166 | bad_pmem = true; |
|---|
| 159 | 167 | |
|---|
| 160 | | - if (!op_is_write(op)) { |
|---|
| 161 | | - if (unlikely(bad_pmem)) |
|---|
| 162 | | - rc = BLK_STS_IOERR; |
|---|
| 163 | | - else { |
|---|
| 164 | | - rc = read_pmem(page, off, pmem_addr, len); |
|---|
| 165 | | - flush_dcache_page(page); |
|---|
| 166 | | - } |
|---|
| 167 | | - } else { |
|---|
| 168 | | - /* |
|---|
| 169 | | - * Note that we write the data both before and after |
|---|
| 170 | | - * clearing poison. The write before clear poison |
|---|
| 171 | | - * handles situations where the latest written data is |
|---|
| 172 | | - * preserved and the clear poison operation simply marks |
|---|
| 173 | | - * the address range as valid without changing the data. |
|---|
| 174 | | - * In this case application software can assume that an |
|---|
| 175 | | - * interrupted write will either return the new good |
|---|
| 176 | | - * data or an error. |
|---|
| 177 | | - * |
|---|
| 178 | | - * However, if pmem_clear_poison() leaves the data in an |
|---|
| 179 | | - * indeterminate state we need to perform the write |
|---|
| 180 | | - * after clear poison. |
|---|
| 181 | | - */ |
|---|
| 182 | | - flush_dcache_page(page); |
|---|
| 183 | | - write_pmem(pmem_addr, page, off, len); |
|---|
| 184 | | - if (unlikely(bad_pmem)) { |
|---|
| 185 | | - rc = pmem_clear_poison(pmem, pmem_off, len); |
|---|
| 186 | | - write_pmem(pmem_addr, page, off, len); |
|---|
| 187 | | - } |
|---|
| 168 | + /* |
|---|
| 169 | + * Note that we write the data both before and after |
|---|
| 170 | + * clearing poison. The write before clear poison |
|---|
| 171 | + * handles situations where the latest written data is |
|---|
| 172 | + * preserved and the clear poison operation simply marks |
|---|
| 173 | + * the address range as valid without changing the data. |
|---|
| 174 | + * In this case application software can assume that an |
|---|
| 175 | + * interrupted write will either return the new good |
|---|
| 176 | + * data or an error. |
|---|
| 177 | + * |
|---|
| 178 | + * However, if pmem_clear_poison() leaves the data in an |
|---|
| 179 | + * indeterminate state we need to perform the write |
|---|
| 180 | + * after clear poison. |
|---|
| 181 | + */ |
|---|
| 182 | + flush_dcache_page(page); |
|---|
| 183 | + write_pmem(pmem_addr, page, page_off, len); |
|---|
| 184 | + if (unlikely(bad_pmem)) { |
|---|
| 185 | + rc = pmem_clear_poison(pmem, pmem_off, len); |
|---|
| 186 | + write_pmem(pmem_addr, page, page_off, len); |
|---|
| 188 | 187 | } |
|---|
| 189 | 188 | |
|---|
| 190 | 189 | return rc; |
|---|
| 191 | 190 | } |
|---|
| 192 | 191 | |
|---|
| 193 | | -static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) |
|---|
| 192 | +static blk_qc_t pmem_submit_bio(struct bio *bio) |
|---|
| 194 | 193 | { |
|---|
| 195 | 194 | int ret = 0; |
|---|
| 196 | 195 | blk_status_t rc = 0; |
|---|
| .. | .. |
|---|
| 198 | 197 | unsigned long start; |
|---|
| 199 | 198 | struct bio_vec bvec; |
|---|
| 200 | 199 | struct bvec_iter iter; |
|---|
| 201 | | - struct pmem_device *pmem = q->queuedata; |
|---|
| 200 | + struct pmem_device *pmem = bio->bi_disk->private_data; |
|---|
| 202 | 201 | struct nd_region *nd_region = to_region(pmem); |
|---|
| 203 | 202 | |
|---|
| 204 | 203 | if (bio->bi_opf & REQ_PREFLUSH) |
|---|
| 205 | 204 | ret = nvdimm_flush(nd_region, bio); |
|---|
| 206 | 205 | |
|---|
| 207 | | - do_acct = nd_iostat_start(bio, &start); |
|---|
| 206 | + do_acct = blk_queue_io_stat(bio->bi_disk->queue); |
|---|
| 207 | + if (do_acct) |
|---|
| 208 | + start = bio_start_io_acct(bio); |
|---|
| 208 | 209 | bio_for_each_segment(bvec, bio, iter) { |
|---|
| 209 | | - rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, |
|---|
| 210 | | - bvec.bv_offset, bio_op(bio), iter.bi_sector); |
|---|
| 210 | + if (op_is_write(bio_op(bio))) |
|---|
| 211 | + rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset, |
|---|
| 212 | + iter.bi_sector, bvec.bv_len); |
|---|
| 213 | + else |
|---|
| 214 | + rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset, |
|---|
| 215 | + iter.bi_sector, bvec.bv_len); |
|---|
| 211 | 216 | if (rc) { |
|---|
| 212 | 217 | bio->bi_status = rc; |
|---|
| 213 | 218 | break; |
|---|
| 214 | 219 | } |
|---|
| 215 | 220 | } |
|---|
| 216 | 221 | if (do_acct) |
|---|
| 217 | | - nd_iostat_end(bio, start); |
|---|
| 222 | + bio_end_io_acct(bio, start); |
|---|
| 218 | 223 | |
|---|
| 219 | 224 | if (bio->bi_opf & REQ_FUA) |
|---|
| 220 | 225 | ret = nvdimm_flush(nd_region, bio); |
|---|
| .. | .. |
|---|
| 229 | 234 | static int pmem_rw_page(struct block_device *bdev, sector_t sector, |
|---|
| 230 | 235 | struct page *page, unsigned int op) |
|---|
| 231 | 236 | { |
|---|
| 232 | | - struct pmem_device *pmem = bdev->bd_queue->queuedata; |
|---|
| 237 | + struct pmem_device *pmem = bdev->bd_disk->private_data; |
|---|
| 233 | 238 | blk_status_t rc; |
|---|
| 234 | 239 | |
|---|
| 235 | | - rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, |
|---|
| 236 | | - 0, op, sector); |
|---|
| 237 | | - |
|---|
| 240 | + if (op_is_write(op)) |
|---|
| 241 | + rc = pmem_do_write(pmem, page, 0, sector, thp_size(page)); |
|---|
| 242 | + else |
|---|
| 243 | + rc = pmem_do_read(pmem, page, 0, sector, thp_size(page)); |
|---|
| 238 | 244 | /* |
|---|
| 239 | 245 | * The ->rw_page interface is subtle and tricky. The core |
|---|
| 240 | 246 | * retries on any error, so we can only invoke page_endio() in |
|---|
| .. | .. |
|---|
| 273 | 279 | |
|---|
| 274 | 280 | static const struct block_device_operations pmem_fops = { |
|---|
| 275 | 281 | .owner = THIS_MODULE, |
|---|
| 282 | + .submit_bio = pmem_submit_bio, |
|---|
| 276 | 283 | .rw_page = pmem_rw_page, |
|---|
| 277 | | - .revalidate_disk = nvdimm_revalidate_disk, |
|---|
| 278 | 284 | }; |
|---|
| 285 | + |
|---|
| 286 | +static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, |
|---|
| 287 | + size_t nr_pages) |
|---|
| 288 | +{ |
|---|
| 289 | + struct pmem_device *pmem = dax_get_private(dax_dev); |
|---|
| 290 | + |
|---|
| 291 | + return blk_status_to_errno(pmem_do_write(pmem, ZERO_PAGE(0), 0, |
|---|
| 292 | + PFN_PHYS(pgoff) >> SECTOR_SHIFT, |
|---|
| 293 | + PAGE_SIZE)); |
|---|
| 294 | +} |
|---|
| 279 | 295 | |
|---|
| 280 | 296 | static long pmem_dax_direct_access(struct dax_device *dax_dev, |
|---|
| 281 | 297 | pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) |
|---|
| .. | .. |
|---|
| 287 | 303 | |
|---|
| 288 | 304 | /* |
|---|
| 289 | 305 | * Use the 'no check' versions of copy_from_iter_flushcache() and |
|---|
| 290 | | - * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds |
|---|
| 306 | + * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds |
|---|
| 291 | 307 | * checking, both file offset and device offset, is handled by |
|---|
| 292 | 308 | * dax_iomap_actor() |
|---|
| 293 | 309 | */ |
|---|
| .. | .. |
|---|
| 300 | 316 | static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, |
|---|
| 301 | 317 | void *addr, size_t bytes, struct iov_iter *i) |
|---|
| 302 | 318 | { |
|---|
| 303 | | - return _copy_to_iter_mcsafe(addr, bytes, i); |
|---|
| 319 | + return _copy_mc_to_iter(addr, bytes, i); |
|---|
| 304 | 320 | } |
|---|
| 305 | 321 | |
|---|
| 306 | 322 | static const struct dax_operations pmem_dax_ops = { |
|---|
| 307 | 323 | .direct_access = pmem_dax_direct_access, |
|---|
| 324 | + .dax_supported = generic_fsdax_supported, |
|---|
| 308 | 325 | .copy_from_iter = pmem_copy_from_iter, |
|---|
| 309 | 326 | .copy_to_iter = pmem_copy_to_iter, |
|---|
| 327 | + .zero_page_range = pmem_dax_zero_page_range, |
|---|
| 310 | 328 | }; |
|---|
| 311 | 329 | |
|---|
| 312 | 330 | static const struct attribute_group *pmem_attribute_groups[] = { |
|---|
| .. | .. |
|---|
| 314 | 332 | NULL, |
|---|
| 315 | 333 | }; |
|---|
| 316 | 334 | |
|---|
| 317 | | -static void pmem_release_queue(void *q) |
|---|
| 335 | +static void pmem_pagemap_cleanup(struct dev_pagemap *pgmap) |
|---|
| 318 | 336 | { |
|---|
| 337 | + struct request_queue *q = |
|---|
| 338 | + container_of(pgmap->ref, struct request_queue, q_usage_counter); |
|---|
| 339 | + |
|---|
| 319 | 340 | blk_cleanup_queue(q); |
|---|
| 320 | 341 | } |
|---|
| 321 | 342 | |
|---|
| 322 | | -static void pmem_freeze_queue(struct percpu_ref *ref) |
|---|
| 343 | +static void pmem_release_queue(void *pgmap) |
|---|
| 323 | 344 | { |
|---|
| 324 | | - struct request_queue *q; |
|---|
| 345 | + pmem_pagemap_cleanup(pgmap); |
|---|
| 346 | +} |
|---|
| 325 | 347 | |
|---|
| 326 | | - q = container_of(ref, typeof(*q), q_usage_counter); |
|---|
| 348 | +static void pmem_pagemap_kill(struct dev_pagemap *pgmap) |
|---|
| 349 | +{ |
|---|
| 350 | + struct request_queue *q = |
|---|
| 351 | + container_of(pgmap->ref, struct request_queue, q_usage_counter); |
|---|
| 352 | + |
|---|
| 327 | 353 | blk_freeze_queue_start(q); |
|---|
| 328 | 354 | } |
|---|
| 329 | 355 | |
|---|
| .. | .. |
|---|
| 337 | 363 | put_disk(pmem->disk); |
|---|
| 338 | 364 | } |
|---|
| 339 | 365 | |
|---|
| 340 | | -static void pmem_release_pgmap_ops(void *__pgmap) |
|---|
| 341 | | -{ |
|---|
| 342 | | - dev_pagemap_put_ops(); |
|---|
| 343 | | -} |
|---|
| 344 | | - |
|---|
| 345 | | -static void fsdax_pagefree(struct page *page, void *data) |
|---|
| 346 | | -{ |
|---|
| 347 | | - wake_up_var(&page->_refcount); |
|---|
| 348 | | -} |
|---|
| 349 | | - |
|---|
| 350 | | -static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap) |
|---|
| 351 | | -{ |
|---|
| 352 | | - dev_pagemap_get_ops(); |
|---|
| 353 | | - if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap)) |
|---|
| 354 | | - return -ENOMEM; |
|---|
| 355 | | - pgmap->type = MEMORY_DEVICE_FS_DAX; |
|---|
| 356 | | - pgmap->page_free = fsdax_pagefree; |
|---|
| 357 | | - |
|---|
| 358 | | - return 0; |
|---|
| 359 | | -} |
|---|
| 366 | +static const struct dev_pagemap_ops fsdax_pagemap_ops = { |
|---|
| 367 | + .kill = pmem_pagemap_kill, |
|---|
| 368 | + .cleanup = pmem_pagemap_cleanup, |
|---|
| 369 | +}; |
|---|
| 360 | 370 | |
|---|
| 361 | 371 | static int pmem_attach_disk(struct device *dev, |
|---|
| 362 | 372 | struct nd_namespace_common *ndns) |
|---|
| .. | .. |
|---|
| 365 | 375 | struct nd_region *nd_region = to_nd_region(dev->parent); |
|---|
| 366 | 376 | int nid = dev_to_node(dev), fua; |
|---|
| 367 | 377 | struct resource *res = &nsio->res; |
|---|
| 368 | | - struct resource bb_res; |
|---|
| 378 | + struct range bb_range; |
|---|
| 369 | 379 | struct nd_pfn *nd_pfn = NULL; |
|---|
| 370 | 380 | struct dax_device *dax_dev; |
|---|
| 371 | 381 | struct nd_pfn_sb *pfn_sb; |
|---|
| .. | .. |
|---|
| 375 | 385 | struct gendisk *disk; |
|---|
| 376 | 386 | void *addr; |
|---|
| 377 | 387 | int rc; |
|---|
| 388 | + unsigned long flags = 0UL; |
|---|
| 378 | 389 | |
|---|
| 379 | 390 | pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); |
|---|
| 380 | 391 | if (!pmem) |
|---|
| 381 | 392 | return -ENOMEM; |
|---|
| 393 | + |
|---|
| 394 | + rc = devm_namespace_enable(dev, ndns, nd_info_block_reserve()); |
|---|
| 395 | + if (rc) |
|---|
| 396 | + return rc; |
|---|
| 382 | 397 | |
|---|
| 383 | 398 | /* while nsio_rw_bytes is active, parse a pfn info block if present */ |
|---|
| 384 | 399 | if (is_nd_pfn(dev)) { |
|---|
| .. | .. |
|---|
| 389 | 404 | } |
|---|
| 390 | 405 | |
|---|
| 391 | 406 | /* we're attaching a block device, disable raw namespace access */ |
|---|
| 392 | | - devm_nsio_disable(dev, nsio); |
|---|
| 407 | + devm_namespace_disable(dev, ndns); |
|---|
| 393 | 408 | |
|---|
| 394 | 409 | dev_set_drvdata(dev, pmem); |
|---|
| 395 | 410 | pmem->phys_addr = res->start; |
|---|
| .. | .. |
|---|
| 406 | 421 | return -EBUSY; |
|---|
| 407 | 422 | } |
|---|
| 408 | 423 | |
|---|
| 409 | | - q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL); |
|---|
| 424 | + q = blk_alloc_queue(dev_to_node(dev)); |
|---|
| 410 | 425 | if (!q) |
|---|
| 411 | | - return -ENOMEM; |
|---|
| 412 | | - |
|---|
| 413 | | - if (devm_add_action_or_reset(dev, pmem_release_queue, q)) |
|---|
| 414 | 426 | return -ENOMEM; |
|---|
| 415 | 427 | |
|---|
| 416 | 428 | pmem->pfn_flags = PFN_DEV; |
|---|
| 417 | 429 | pmem->pgmap.ref = &q->q_usage_counter; |
|---|
| 418 | | - pmem->pgmap.kill = pmem_freeze_queue; |
|---|
| 419 | 430 | if (is_nd_pfn(dev)) { |
|---|
| 420 | | - if (setup_pagemap_fsdax(dev, &pmem->pgmap)) |
|---|
| 421 | | - return -ENOMEM; |
|---|
| 431 | + pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; |
|---|
| 432 | + pmem->pgmap.ops = &fsdax_pagemap_ops; |
|---|
| 422 | 433 | addr = devm_memremap_pages(dev, &pmem->pgmap); |
|---|
| 423 | 434 | pfn_sb = nd_pfn->pfn_sb; |
|---|
| 424 | 435 | pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); |
|---|
| 425 | 436 | pmem->pfn_pad = resource_size(res) - |
|---|
| 426 | | - resource_size(&pmem->pgmap.res); |
|---|
| 437 | + range_len(&pmem->pgmap.range); |
|---|
| 427 | 438 | pmem->pfn_flags |= PFN_MAP; |
|---|
| 428 | | - memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); |
|---|
| 429 | | - bb_res.start += pmem->data_offset; |
|---|
| 439 | + bb_range = pmem->pgmap.range; |
|---|
| 440 | + bb_range.start += pmem->data_offset; |
|---|
| 430 | 441 | } else if (pmem_should_map_pages(dev)) { |
|---|
| 431 | | - memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); |
|---|
| 432 | | - pmem->pgmap.altmap_valid = false; |
|---|
| 433 | | - if (setup_pagemap_fsdax(dev, &pmem->pgmap)) |
|---|
| 434 | | - return -ENOMEM; |
|---|
| 442 | + pmem->pgmap.range.start = res->start; |
|---|
| 443 | + pmem->pgmap.range.end = res->end; |
|---|
| 444 | + pmem->pgmap.nr_range = 1; |
|---|
| 445 | + pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; |
|---|
| 446 | + pmem->pgmap.ops = &fsdax_pagemap_ops; |
|---|
| 435 | 447 | addr = devm_memremap_pages(dev, &pmem->pgmap); |
|---|
| 436 | 448 | pmem->pfn_flags |= PFN_MAP; |
|---|
| 437 | | - memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); |
|---|
| 449 | + bb_range = pmem->pgmap.range; |
|---|
| 438 | 450 | } else { |
|---|
| 439 | 451 | addr = devm_memremap(dev, pmem->phys_addr, |
|---|
| 440 | 452 | pmem->size, ARCH_MEMREMAP_PMEM); |
|---|
| 441 | | - memcpy(&bb_res, &nsio->res, sizeof(bb_res)); |
|---|
| 453 | + if (devm_add_action_or_reset(dev, pmem_release_queue, |
|---|
| 454 | + &pmem->pgmap)) |
|---|
| 455 | + return -ENOMEM; |
|---|
| 456 | + bb_range.start = res->start; |
|---|
| 457 | + bb_range.end = res->end; |
|---|
| 442 | 458 | } |
|---|
| 443 | 459 | |
|---|
| 444 | 460 | if (IS_ERR(addr)) |
|---|
| .. | .. |
|---|
| 446 | 462 | pmem->virt_addr = addr; |
|---|
| 447 | 463 | |
|---|
| 448 | 464 | blk_queue_write_cache(q, true, fua); |
|---|
| 449 | | - blk_queue_make_request(q, pmem_make_request); |
|---|
| 450 | 465 | blk_queue_physical_block_size(q, PAGE_SIZE); |
|---|
| 451 | 466 | blk_queue_logical_block_size(q, pmem_sector_size(ndns)); |
|---|
| 452 | 467 | blk_queue_max_hw_sectors(q, UINT_MAX); |
|---|
| 453 | 468 | blk_queue_flag_set(QUEUE_FLAG_NONROT, q); |
|---|
| 454 | 469 | if (pmem->pfn_flags & PFN_MAP) |
|---|
| 455 | 470 | blk_queue_flag_set(QUEUE_FLAG_DAX, q); |
|---|
| 456 | | - q->queuedata = pmem; |
|---|
| 457 | 471 | |
|---|
| 458 | 472 | disk = alloc_disk_node(0, nid); |
|---|
| 459 | 473 | if (!disk) |
|---|
| .. | .. |
|---|
| 463 | 477 | disk->fops = &pmem_fops; |
|---|
| 464 | 478 | disk->queue = q; |
|---|
| 465 | 479 | disk->flags = GENHD_FL_EXT_DEVT; |
|---|
| 466 | | - disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; |
|---|
| 480 | + disk->private_data = pmem; |
|---|
| 467 | 481 | nvdimm_namespace_disk_name(ndns, disk->disk_name); |
|---|
| 468 | 482 | set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) |
|---|
| 469 | 483 | / 512); |
|---|
| 470 | 484 | if (devm_init_badblocks(dev, &pmem->bb)) |
|---|
| 471 | 485 | return -ENOMEM; |
|---|
| 472 | | - nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res); |
|---|
| 486 | + nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range); |
|---|
| 473 | 487 | disk->bb = &pmem->bb; |
|---|
| 474 | 488 | |
|---|
| 475 | | - dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops); |
|---|
| 476 | | - if (!dax_dev) { |
|---|
| 489 | + if (is_nvdimm_sync(nd_region)) |
|---|
| 490 | + flags = DAXDEV_F_SYNC; |
|---|
| 491 | + dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags); |
|---|
| 492 | + if (IS_ERR(dax_dev)) { |
|---|
| 477 | 493 | put_disk(disk); |
|---|
| 478 | | - return -ENOMEM; |
|---|
| 494 | + return PTR_ERR(dax_dev); |
|---|
| 479 | 495 | } |
|---|
| 480 | 496 | dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); |
|---|
| 481 | 497 | pmem->dax_dev = dax_dev; |
|---|
| 482 | 498 | gendev = disk_to_dev(disk); |
|---|
| 483 | 499 | gendev->groups = pmem_attribute_groups; |
|---|
| 484 | 500 | |
|---|
| 485 | | - device_add_disk(dev, disk); |
|---|
| 501 | + device_add_disk(dev, disk, NULL); |
|---|
| 486 | 502 | if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) |
|---|
| 487 | 503 | return -ENOMEM; |
|---|
| 488 | 504 | |
|---|
| 489 | | - revalidate_disk(disk); |
|---|
| 505 | + nvdimm_check_and_set_ro(disk); |
|---|
| 490 | 506 | |
|---|
| 491 | 507 | pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd, |
|---|
| 492 | 508 | "badblocks"); |
|---|
| .. | .. |
|---|
| 498 | 514 | |
|---|
| 499 | 515 | static int nd_pmem_probe(struct device *dev) |
|---|
| 500 | 516 | { |
|---|
| 517 | + int ret; |
|---|
| 501 | 518 | struct nd_namespace_common *ndns; |
|---|
| 502 | 519 | |
|---|
| 503 | 520 | ndns = nvdimm_namespace_common_probe(dev); |
|---|
| 504 | 521 | if (IS_ERR(ndns)) |
|---|
| 505 | 522 | return PTR_ERR(ndns); |
|---|
| 506 | | - |
|---|
| 507 | | - if (devm_nsio_enable(dev, to_nd_namespace_io(&ndns->dev))) |
|---|
| 508 | | - return -ENXIO; |
|---|
| 509 | 523 | |
|---|
| 510 | 524 | if (is_nd_btt(dev)) |
|---|
| 511 | 525 | return nvdimm_namespace_attach_btt(ndns); |
|---|
| .. | .. |
|---|
| 513 | 527 | if (is_nd_pfn(dev)) |
|---|
| 514 | 528 | return pmem_attach_disk(dev, ndns); |
|---|
| 515 | 529 | |
|---|
| 516 | | - /* if we find a valid info-block we'll come back as that personality */ |
|---|
| 517 | | - if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0 |
|---|
| 518 | | - || nd_dax_probe(dev, ndns) == 0) |
|---|
| 530 | + ret = devm_namespace_enable(dev, ndns, nd_info_block_reserve()); |
|---|
| 531 | + if (ret) |
|---|
| 532 | + return ret; |
|---|
| 533 | + |
|---|
| 534 | + ret = nd_btt_probe(dev, ndns); |
|---|
| 535 | + if (ret == 0) |
|---|
| 519 | 536 | return -ENXIO; |
|---|
| 520 | 537 | |
|---|
| 521 | | - /* ...otherwise we're just a raw pmem device */ |
|---|
| 538 | + /* |
|---|
| 539 | + * We have two failure conditions here, there is no |
|---|
| 540 | + * info reserver block or we found a valid info reserve block |
|---|
| 541 | + * but failed to initialize the pfn superblock. |
|---|
| 542 | + * |
|---|
| 543 | + * For the first case consider namespace as a raw pmem namespace |
|---|
| 544 | + * and attach a disk. |
|---|
| 545 | + * |
|---|
| 546 | + * For the latter, consider this a success and advance the namespace |
|---|
| 547 | + * seed. |
|---|
| 548 | + */ |
|---|
| 549 | + ret = nd_pfn_probe(dev, ndns); |
|---|
| 550 | + if (ret == 0) |
|---|
| 551 | + return -ENXIO; |
|---|
| 552 | + else if (ret == -EOPNOTSUPP) |
|---|
| 553 | + return ret; |
|---|
| 554 | + |
|---|
| 555 | + ret = nd_dax_probe(dev, ndns); |
|---|
| 556 | + if (ret == 0) |
|---|
| 557 | + return -ENXIO; |
|---|
| 558 | + else if (ret == -EOPNOTSUPP) |
|---|
| 559 | + return ret; |
|---|
| 560 | + |
|---|
| 561 | + /* probe complete, attach handles namespace enabling */ |
|---|
| 562 | + devm_namespace_disable(dev, ndns); |
|---|
| 563 | + |
|---|
| 522 | 564 | return pmem_attach_disk(dev, ndns); |
|---|
| 523 | 565 | } |
|---|
| 524 | 566 | |
|---|
| .. | .. |
|---|
| 530 | 572 | nvdimm_namespace_detach_btt(to_nd_btt(dev)); |
|---|
| 531 | 573 | else { |
|---|
| 532 | 574 | /* |
|---|
| 533 | | - * Note, this assumes device_lock() context to not race |
|---|
| 534 | | - * nd_pmem_notify() |
|---|
| 575 | + * Note, this assumes nd_device_lock() context to not |
|---|
| 576 | + * race nd_pmem_notify() |
|---|
| 535 | 577 | */ |
|---|
| 536 | 578 | sysfs_put(pmem->bb_state); |
|---|
| 537 | 579 | pmem->bb_state = NULL; |
|---|
| .. | .. |
|---|
| 552 | 594 | resource_size_t offset = 0, end_trunc = 0; |
|---|
| 553 | 595 | struct nd_namespace_common *ndns; |
|---|
| 554 | 596 | struct nd_namespace_io *nsio; |
|---|
| 555 | | - struct resource res; |
|---|
| 556 | 597 | struct badblocks *bb; |
|---|
| 598 | + struct range range; |
|---|
| 557 | 599 | struct kernfs_node *bb_state; |
|---|
| 558 | 600 | |
|---|
| 559 | 601 | if (event != NVDIMM_REVALIDATE_POISON) |
|---|
| .. | .. |
|---|
| 589 | 631 | nsio = to_nd_namespace_io(&ndns->dev); |
|---|
| 590 | 632 | } |
|---|
| 591 | 633 | |
|---|
| 592 | | - res.start = nsio->res.start + offset; |
|---|
| 593 | | - res.end = nsio->res.end - end_trunc; |
|---|
| 594 | | - nvdimm_badblocks_populate(nd_region, bb, &res); |
|---|
| 634 | + range.start = nsio->res.start + offset; |
|---|
| 635 | + range.end = nsio->res.end - end_trunc; |
|---|
| 636 | + nvdimm_badblocks_populate(nd_region, bb, &range); |
|---|
| 595 | 637 | if (bb_state) |
|---|
| 596 | 638 | sysfs_notify_dirent(bb_state); |
|---|
| 597 | 639 | } |
|---|