.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Persistent Memory Driver |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (c) 2014-2015, Intel Corporation. |
---|
5 | 6 | * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>. |
---|
6 | 7 | * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>. |
---|
7 | | - * |
---|
8 | | - * This program is free software; you can redistribute it and/or modify it |
---|
9 | | - * under the terms and conditions of the GNU General Public License, |
---|
10 | | - * version 2, as published by the Free Software Foundation. |
---|
11 | | - * |
---|
12 | | - * This program is distributed in the hope it will be useful, but WITHOUT |
---|
13 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
---|
14 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
---|
15 | | - * more details. |
---|
16 | 8 | */ |
---|
17 | 9 | |
---|
18 | | -#include <asm/cacheflush.h> |
---|
19 | 10 | #include <linux/blkdev.h> |
---|
20 | 11 | #include <linux/hdreg.h> |
---|
21 | 12 | #include <linux/init.h> |
---|
.. | .. |
---|
33 | 24 | #include <linux/dax.h> |
---|
34 | 25 | #include <linux/nd.h> |
---|
35 | 26 | #include <linux/backing-dev.h> |
---|
| 27 | +#include <linux/mm.h> |
---|
| 28 | +#include <asm/cacheflush.h> |
---|
36 | 29 | #include "pmem.h" |
---|
37 | 30 | #include "pfn.h" |
---|
38 | 31 | #include "nd.h" |
---|
39 | | -#include "nd-core.h" |
---|
40 | 32 | |
---|
41 | 33 | static struct device *to_dev(struct pmem_device *pmem) |
---|
42 | 34 | { |
---|
.. | .. |
---|
133 | 125 | while (len) { |
---|
134 | 126 | mem = kmap_atomic(page); |
---|
135 | 127 | chunk = min_t(unsigned int, len, PAGE_SIZE - off); |
---|
136 | | - rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); |
---|
| 128 | + rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk); |
---|
137 | 129 | kunmap_atomic(mem); |
---|
138 | 130 | if (rem) |
---|
139 | 131 | return BLK_STS_IOERR; |
---|
.. | .. |
---|
145 | 137 | return BLK_STS_OK; |
---|
146 | 138 | } |
---|
147 | 139 | |
---|
148 | | -static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, |
---|
149 | | - unsigned int len, unsigned int off, unsigned int op, |
---|
150 | | - sector_t sector) |
---|
| 140 | +static blk_status_t pmem_do_read(struct pmem_device *pmem, |
---|
| 141 | + struct page *page, unsigned int page_off, |
---|
| 142 | + sector_t sector, unsigned int len) |
---|
| 143 | +{ |
---|
| 144 | + blk_status_t rc; |
---|
| 145 | + phys_addr_t pmem_off = sector * 512 + pmem->data_offset; |
---|
| 146 | + void *pmem_addr = pmem->virt_addr + pmem_off; |
---|
| 147 | + |
---|
| 148 | + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) |
---|
| 149 | + return BLK_STS_IOERR; |
---|
| 150 | + |
---|
| 151 | + rc = read_pmem(page, page_off, pmem_addr, len); |
---|
| 152 | + flush_dcache_page(page); |
---|
| 153 | + return rc; |
---|
| 154 | +} |
---|
| 155 | + |
---|
| 156 | +static blk_status_t pmem_do_write(struct pmem_device *pmem, |
---|
| 157 | + struct page *page, unsigned int page_off, |
---|
| 158 | + sector_t sector, unsigned int len) |
---|
151 | 159 | { |
---|
152 | 160 | blk_status_t rc = BLK_STS_OK; |
---|
153 | 161 | bool bad_pmem = false; |
---|
.. | .. |
---|
157 | 165 | if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) |
---|
158 | 166 | bad_pmem = true; |
---|
159 | 167 | |
---|
160 | | - if (!op_is_write(op)) { |
---|
161 | | - if (unlikely(bad_pmem)) |
---|
162 | | - rc = BLK_STS_IOERR; |
---|
163 | | - else { |
---|
164 | | - rc = read_pmem(page, off, pmem_addr, len); |
---|
165 | | - flush_dcache_page(page); |
---|
166 | | - } |
---|
167 | | - } else { |
---|
168 | | - /* |
---|
169 | | - * Note that we write the data both before and after |
---|
170 | | - * clearing poison. The write before clear poison |
---|
171 | | - * handles situations where the latest written data is |
---|
172 | | - * preserved and the clear poison operation simply marks |
---|
173 | | - * the address range as valid without changing the data. |
---|
174 | | - * In this case application software can assume that an |
---|
175 | | - * interrupted write will either return the new good |
---|
176 | | - * data or an error. |
---|
177 | | - * |
---|
178 | | - * However, if pmem_clear_poison() leaves the data in an |
---|
179 | | - * indeterminate state we need to perform the write |
---|
180 | | - * after clear poison. |
---|
181 | | - */ |
---|
182 | | - flush_dcache_page(page); |
---|
183 | | - write_pmem(pmem_addr, page, off, len); |
---|
184 | | - if (unlikely(bad_pmem)) { |
---|
185 | | - rc = pmem_clear_poison(pmem, pmem_off, len); |
---|
186 | | - write_pmem(pmem_addr, page, off, len); |
---|
187 | | - } |
---|
| 168 | + /* |
---|
| 169 | + * Note that we write the data both before and after |
---|
| 170 | + * clearing poison. The write before clear poison |
---|
| 171 | + * handles situations where the latest written data is |
---|
| 172 | + * preserved and the clear poison operation simply marks |
---|
| 173 | + * the address range as valid without changing the data. |
---|
| 174 | + * In this case application software can assume that an |
---|
| 175 | + * interrupted write will either return the new good |
---|
| 176 | + * data or an error. |
---|
| 177 | + * |
---|
| 178 | + * However, if pmem_clear_poison() leaves the data in an |
---|
| 179 | + * indeterminate state we need to perform the write |
---|
| 180 | + * after clear poison. |
---|
| 181 | + */ |
---|
| 182 | + flush_dcache_page(page); |
---|
| 183 | + write_pmem(pmem_addr, page, page_off, len); |
---|
| 184 | + if (unlikely(bad_pmem)) { |
---|
| 185 | + rc = pmem_clear_poison(pmem, pmem_off, len); |
---|
| 186 | + write_pmem(pmem_addr, page, page_off, len); |
---|
188 | 187 | } |
---|
189 | 188 | |
---|
190 | 189 | return rc; |
---|
191 | 190 | } |
---|
192 | 191 | |
---|
193 | | -static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) |
---|
| 192 | +static blk_qc_t pmem_submit_bio(struct bio *bio) |
---|
194 | 193 | { |
---|
195 | 194 | int ret = 0; |
---|
196 | 195 | blk_status_t rc = 0; |
---|
.. | .. |
---|
198 | 197 | unsigned long start; |
---|
199 | 198 | struct bio_vec bvec; |
---|
200 | 199 | struct bvec_iter iter; |
---|
201 | | - struct pmem_device *pmem = q->queuedata; |
---|
| 200 | + struct pmem_device *pmem = bio->bi_disk->private_data; |
---|
202 | 201 | struct nd_region *nd_region = to_region(pmem); |
---|
203 | 202 | |
---|
204 | 203 | if (bio->bi_opf & REQ_PREFLUSH) |
---|
205 | 204 | ret = nvdimm_flush(nd_region, bio); |
---|
206 | 205 | |
---|
207 | | - do_acct = nd_iostat_start(bio, &start); |
---|
| 206 | + do_acct = blk_queue_io_stat(bio->bi_disk->queue); |
---|
| 207 | + if (do_acct) |
---|
| 208 | + start = bio_start_io_acct(bio); |
---|
208 | 209 | bio_for_each_segment(bvec, bio, iter) { |
---|
209 | | - rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, |
---|
210 | | - bvec.bv_offset, bio_op(bio), iter.bi_sector); |
---|
| 210 | + if (op_is_write(bio_op(bio))) |
---|
| 211 | + rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset, |
---|
| 212 | + iter.bi_sector, bvec.bv_len); |
---|
| 213 | + else |
---|
| 214 | + rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset, |
---|
| 215 | + iter.bi_sector, bvec.bv_len); |
---|
211 | 216 | if (rc) { |
---|
212 | 217 | bio->bi_status = rc; |
---|
213 | 218 | break; |
---|
214 | 219 | } |
---|
215 | 220 | } |
---|
216 | 221 | if (do_acct) |
---|
217 | | - nd_iostat_end(bio, start); |
---|
| 222 | + bio_end_io_acct(bio, start); |
---|
218 | 223 | |
---|
219 | 224 | if (bio->bi_opf & REQ_FUA) |
---|
220 | 225 | ret = nvdimm_flush(nd_region, bio); |
---|
.. | .. |
---|
229 | 234 | static int pmem_rw_page(struct block_device *bdev, sector_t sector, |
---|
230 | 235 | struct page *page, unsigned int op) |
---|
231 | 236 | { |
---|
232 | | - struct pmem_device *pmem = bdev->bd_queue->queuedata; |
---|
| 237 | + struct pmem_device *pmem = bdev->bd_disk->private_data; |
---|
233 | 238 | blk_status_t rc; |
---|
234 | 239 | |
---|
235 | | - rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, |
---|
236 | | - 0, op, sector); |
---|
237 | | - |
---|
| 240 | + if (op_is_write(op)) |
---|
| 241 | + rc = pmem_do_write(pmem, page, 0, sector, thp_size(page)); |
---|
| 242 | + else |
---|
| 243 | + rc = pmem_do_read(pmem, page, 0, sector, thp_size(page)); |
---|
238 | 244 | /* |
---|
239 | 245 | * The ->rw_page interface is subtle and tricky. The core |
---|
240 | 246 | * retries on any error, so we can only invoke page_endio() in |
---|
.. | .. |
---|
273 | 279 | |
---|
274 | 280 | static const struct block_device_operations pmem_fops = { |
---|
275 | 281 | .owner = THIS_MODULE, |
---|
| 282 | + .submit_bio = pmem_submit_bio, |
---|
276 | 283 | .rw_page = pmem_rw_page, |
---|
277 | | - .revalidate_disk = nvdimm_revalidate_disk, |
---|
278 | 284 | }; |
---|
| 285 | + |
---|
| 286 | +static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, |
---|
| 287 | + size_t nr_pages) |
---|
| 288 | +{ |
---|
| 289 | + struct pmem_device *pmem = dax_get_private(dax_dev); |
---|
| 290 | + |
---|
| 291 | + return blk_status_to_errno(pmem_do_write(pmem, ZERO_PAGE(0), 0, |
---|
| 292 | + PFN_PHYS(pgoff) >> SECTOR_SHIFT, |
---|
| 293 | + PAGE_SIZE)); |
---|
| 294 | +} |
---|
279 | 295 | |
---|
280 | 296 | static long pmem_dax_direct_access(struct dax_device *dax_dev, |
---|
281 | 297 | pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) |
---|
.. | .. |
---|
287 | 303 | |
---|
288 | 304 | /* |
---|
289 | 305 | * Use the 'no check' versions of copy_from_iter_flushcache() and |
---|
290 | | - * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds |
---|
| 306 | + * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds |
---|
291 | 307 | * checking, both file offset and device offset, is handled by |
---|
292 | 308 | * dax_iomap_actor() |
---|
293 | 309 | */ |
---|
.. | .. |
---|
300 | 316 | static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, |
---|
301 | 317 | void *addr, size_t bytes, struct iov_iter *i) |
---|
302 | 318 | { |
---|
303 | | - return _copy_to_iter_mcsafe(addr, bytes, i); |
---|
| 319 | + return _copy_mc_to_iter(addr, bytes, i); |
---|
304 | 320 | } |
---|
305 | 321 | |
---|
306 | 322 | static const struct dax_operations pmem_dax_ops = { |
---|
307 | 323 | .direct_access = pmem_dax_direct_access, |
---|
| 324 | + .dax_supported = generic_fsdax_supported, |
---|
308 | 325 | .copy_from_iter = pmem_copy_from_iter, |
---|
309 | 326 | .copy_to_iter = pmem_copy_to_iter, |
---|
| 327 | + .zero_page_range = pmem_dax_zero_page_range, |
---|
310 | 328 | }; |
---|
311 | 329 | |
---|
312 | 330 | static const struct attribute_group *pmem_attribute_groups[] = { |
---|
.. | .. |
---|
314 | 332 | NULL, |
---|
315 | 333 | }; |
---|
316 | 334 | |
---|
317 | | -static void pmem_release_queue(void *q) |
---|
| 335 | +static void pmem_pagemap_cleanup(struct dev_pagemap *pgmap) |
---|
318 | 336 | { |
---|
| 337 | + struct request_queue *q = |
---|
| 338 | + container_of(pgmap->ref, struct request_queue, q_usage_counter); |
---|
| 339 | + |
---|
319 | 340 | blk_cleanup_queue(q); |
---|
320 | 341 | } |
---|
321 | 342 | |
---|
322 | | -static void pmem_freeze_queue(struct percpu_ref *ref) |
---|
| 343 | +static void pmem_release_queue(void *pgmap) |
---|
323 | 344 | { |
---|
324 | | - struct request_queue *q; |
---|
| 345 | + pmem_pagemap_cleanup(pgmap); |
---|
| 346 | +} |
---|
325 | 347 | |
---|
326 | | - q = container_of(ref, typeof(*q), q_usage_counter); |
---|
| 348 | +static void pmem_pagemap_kill(struct dev_pagemap *pgmap) |
---|
| 349 | +{ |
---|
| 350 | + struct request_queue *q = |
---|
| 351 | + container_of(pgmap->ref, struct request_queue, q_usage_counter); |
---|
| 352 | + |
---|
327 | 353 | blk_freeze_queue_start(q); |
---|
328 | 354 | } |
---|
329 | 355 | |
---|
.. | .. |
---|
337 | 363 | put_disk(pmem->disk); |
---|
338 | 364 | } |
---|
339 | 365 | |
---|
340 | | -static void pmem_release_pgmap_ops(void *__pgmap) |
---|
341 | | -{ |
---|
342 | | - dev_pagemap_put_ops(); |
---|
343 | | -} |
---|
344 | | - |
---|
345 | | -static void fsdax_pagefree(struct page *page, void *data) |
---|
346 | | -{ |
---|
347 | | - wake_up_var(&page->_refcount); |
---|
348 | | -} |
---|
349 | | - |
---|
350 | | -static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap) |
---|
351 | | -{ |
---|
352 | | - dev_pagemap_get_ops(); |
---|
353 | | - if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap)) |
---|
354 | | - return -ENOMEM; |
---|
355 | | - pgmap->type = MEMORY_DEVICE_FS_DAX; |
---|
356 | | - pgmap->page_free = fsdax_pagefree; |
---|
357 | | - |
---|
358 | | - return 0; |
---|
359 | | -} |
---|
| 366 | +static const struct dev_pagemap_ops fsdax_pagemap_ops = { |
---|
| 367 | + .kill = pmem_pagemap_kill, |
---|
| 368 | + .cleanup = pmem_pagemap_cleanup, |
---|
| 369 | +}; |
---|
360 | 370 | |
---|
361 | 371 | static int pmem_attach_disk(struct device *dev, |
---|
362 | 372 | struct nd_namespace_common *ndns) |
---|
.. | .. |
---|
365 | 375 | struct nd_region *nd_region = to_nd_region(dev->parent); |
---|
366 | 376 | int nid = dev_to_node(dev), fua; |
---|
367 | 377 | struct resource *res = &nsio->res; |
---|
368 | | - struct resource bb_res; |
---|
| 378 | + struct range bb_range; |
---|
369 | 379 | struct nd_pfn *nd_pfn = NULL; |
---|
370 | 380 | struct dax_device *dax_dev; |
---|
371 | 381 | struct nd_pfn_sb *pfn_sb; |
---|
.. | .. |
---|
375 | 385 | struct gendisk *disk; |
---|
376 | 386 | void *addr; |
---|
377 | 387 | int rc; |
---|
| 388 | + unsigned long flags = 0UL; |
---|
378 | 389 | |
---|
379 | 390 | pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); |
---|
380 | 391 | if (!pmem) |
---|
381 | 392 | return -ENOMEM; |
---|
| 393 | + |
---|
| 394 | + rc = devm_namespace_enable(dev, ndns, nd_info_block_reserve()); |
---|
| 395 | + if (rc) |
---|
| 396 | + return rc; |
---|
382 | 397 | |
---|
383 | 398 | /* while nsio_rw_bytes is active, parse a pfn info block if present */ |
---|
384 | 399 | if (is_nd_pfn(dev)) { |
---|
.. | .. |
---|
389 | 404 | } |
---|
390 | 405 | |
---|
391 | 406 | /* we're attaching a block device, disable raw namespace access */ |
---|
392 | | - devm_nsio_disable(dev, nsio); |
---|
| 407 | + devm_namespace_disable(dev, ndns); |
---|
393 | 408 | |
---|
394 | 409 | dev_set_drvdata(dev, pmem); |
---|
395 | 410 | pmem->phys_addr = res->start; |
---|
.. | .. |
---|
406 | 421 | return -EBUSY; |
---|
407 | 422 | } |
---|
408 | 423 | |
---|
409 | | - q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL); |
---|
| 424 | + q = blk_alloc_queue(dev_to_node(dev)); |
---|
410 | 425 | if (!q) |
---|
411 | | - return -ENOMEM; |
---|
412 | | - |
---|
413 | | - if (devm_add_action_or_reset(dev, pmem_release_queue, q)) |
---|
414 | 426 | return -ENOMEM; |
---|
415 | 427 | |
---|
416 | 428 | pmem->pfn_flags = PFN_DEV; |
---|
417 | 429 | pmem->pgmap.ref = &q->q_usage_counter; |
---|
418 | | - pmem->pgmap.kill = pmem_freeze_queue; |
---|
419 | 430 | if (is_nd_pfn(dev)) { |
---|
420 | | - if (setup_pagemap_fsdax(dev, &pmem->pgmap)) |
---|
421 | | - return -ENOMEM; |
---|
| 431 | + pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; |
---|
| 432 | + pmem->pgmap.ops = &fsdax_pagemap_ops; |
---|
422 | 433 | addr = devm_memremap_pages(dev, &pmem->pgmap); |
---|
423 | 434 | pfn_sb = nd_pfn->pfn_sb; |
---|
424 | 435 | pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); |
---|
425 | 436 | pmem->pfn_pad = resource_size(res) - |
---|
426 | | - resource_size(&pmem->pgmap.res); |
---|
| 437 | + range_len(&pmem->pgmap.range); |
---|
427 | 438 | pmem->pfn_flags |= PFN_MAP; |
---|
428 | | - memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); |
---|
429 | | - bb_res.start += pmem->data_offset; |
---|
| 439 | + bb_range = pmem->pgmap.range; |
---|
| 440 | + bb_range.start += pmem->data_offset; |
---|
430 | 441 | } else if (pmem_should_map_pages(dev)) { |
---|
431 | | - memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); |
---|
432 | | - pmem->pgmap.altmap_valid = false; |
---|
433 | | - if (setup_pagemap_fsdax(dev, &pmem->pgmap)) |
---|
434 | | - return -ENOMEM; |
---|
| 442 | + pmem->pgmap.range.start = res->start; |
---|
| 443 | + pmem->pgmap.range.end = res->end; |
---|
| 444 | + pmem->pgmap.nr_range = 1; |
---|
| 445 | + pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; |
---|
| 446 | + pmem->pgmap.ops = &fsdax_pagemap_ops; |
---|
435 | 447 | addr = devm_memremap_pages(dev, &pmem->pgmap); |
---|
436 | 448 | pmem->pfn_flags |= PFN_MAP; |
---|
437 | | - memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); |
---|
| 449 | + bb_range = pmem->pgmap.range; |
---|
438 | 450 | } else { |
---|
439 | 451 | addr = devm_memremap(dev, pmem->phys_addr, |
---|
440 | 452 | pmem->size, ARCH_MEMREMAP_PMEM); |
---|
441 | | - memcpy(&bb_res, &nsio->res, sizeof(bb_res)); |
---|
| 453 | + if (devm_add_action_or_reset(dev, pmem_release_queue, |
---|
| 454 | + &pmem->pgmap)) |
---|
| 455 | + return -ENOMEM; |
---|
| 456 | + bb_range.start = res->start; |
---|
| 457 | + bb_range.end = res->end; |
---|
442 | 458 | } |
---|
443 | 459 | |
---|
444 | 460 | if (IS_ERR(addr)) |
---|
.. | .. |
---|
446 | 462 | pmem->virt_addr = addr; |
---|
447 | 463 | |
---|
448 | 464 | blk_queue_write_cache(q, true, fua); |
---|
449 | | - blk_queue_make_request(q, pmem_make_request); |
---|
450 | 465 | blk_queue_physical_block_size(q, PAGE_SIZE); |
---|
451 | 466 | blk_queue_logical_block_size(q, pmem_sector_size(ndns)); |
---|
452 | 467 | blk_queue_max_hw_sectors(q, UINT_MAX); |
---|
453 | 468 | blk_queue_flag_set(QUEUE_FLAG_NONROT, q); |
---|
454 | 469 | if (pmem->pfn_flags & PFN_MAP) |
---|
455 | 470 | blk_queue_flag_set(QUEUE_FLAG_DAX, q); |
---|
456 | | - q->queuedata = pmem; |
---|
457 | 471 | |
---|
458 | 472 | disk = alloc_disk_node(0, nid); |
---|
459 | 473 | if (!disk) |
---|
.. | .. |
---|
463 | 477 | disk->fops = &pmem_fops; |
---|
464 | 478 | disk->queue = q; |
---|
465 | 479 | disk->flags = GENHD_FL_EXT_DEVT; |
---|
466 | | - disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; |
---|
| 480 | + disk->private_data = pmem; |
---|
467 | 481 | nvdimm_namespace_disk_name(ndns, disk->disk_name); |
---|
468 | 482 | set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) |
---|
469 | 483 | / 512); |
---|
470 | 484 | if (devm_init_badblocks(dev, &pmem->bb)) |
---|
471 | 485 | return -ENOMEM; |
---|
472 | | - nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res); |
---|
| 486 | + nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range); |
---|
473 | 487 | disk->bb = &pmem->bb; |
---|
474 | 488 | |
---|
475 | | - dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops); |
---|
476 | | - if (!dax_dev) { |
---|
| 489 | + if (is_nvdimm_sync(nd_region)) |
---|
| 490 | + flags = DAXDEV_F_SYNC; |
---|
| 491 | + dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags); |
---|
| 492 | + if (IS_ERR(dax_dev)) { |
---|
477 | 493 | put_disk(disk); |
---|
478 | | - return -ENOMEM; |
---|
| 494 | + return PTR_ERR(dax_dev); |
---|
479 | 495 | } |
---|
480 | 496 | dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); |
---|
481 | 497 | pmem->dax_dev = dax_dev; |
---|
482 | 498 | gendev = disk_to_dev(disk); |
---|
483 | 499 | gendev->groups = pmem_attribute_groups; |
---|
484 | 500 | |
---|
485 | | - device_add_disk(dev, disk); |
---|
| 501 | + device_add_disk(dev, disk, NULL); |
---|
486 | 502 | if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) |
---|
487 | 503 | return -ENOMEM; |
---|
488 | 504 | |
---|
489 | | - revalidate_disk(disk); |
---|
| 505 | + nvdimm_check_and_set_ro(disk); |
---|
490 | 506 | |
---|
491 | 507 | pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd, |
---|
492 | 508 | "badblocks"); |
---|
.. | .. |
---|
498 | 514 | |
---|
499 | 515 | static int nd_pmem_probe(struct device *dev) |
---|
500 | 516 | { |
---|
| 517 | + int ret; |
---|
501 | 518 | struct nd_namespace_common *ndns; |
---|
502 | 519 | |
---|
503 | 520 | ndns = nvdimm_namespace_common_probe(dev); |
---|
504 | 521 | if (IS_ERR(ndns)) |
---|
505 | 522 | return PTR_ERR(ndns); |
---|
506 | | - |
---|
507 | | - if (devm_nsio_enable(dev, to_nd_namespace_io(&ndns->dev))) |
---|
508 | | - return -ENXIO; |
---|
509 | 523 | |
---|
510 | 524 | if (is_nd_btt(dev)) |
---|
511 | 525 | return nvdimm_namespace_attach_btt(ndns); |
---|
.. | .. |
---|
513 | 527 | if (is_nd_pfn(dev)) |
---|
514 | 528 | return pmem_attach_disk(dev, ndns); |
---|
515 | 529 | |
---|
516 | | - /* if we find a valid info-block we'll come back as that personality */ |
---|
517 | | - if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0 |
---|
518 | | - || nd_dax_probe(dev, ndns) == 0) |
---|
| 530 | + ret = devm_namespace_enable(dev, ndns, nd_info_block_reserve()); |
---|
| 531 | + if (ret) |
---|
| 532 | + return ret; |
---|
| 533 | + |
---|
| 534 | + ret = nd_btt_probe(dev, ndns); |
---|
| 535 | + if (ret == 0) |
---|
519 | 536 | return -ENXIO; |
---|
520 | 537 | |
---|
521 | | - /* ...otherwise we're just a raw pmem device */ |
---|
| 538 | + /* |
---|
| 539 | + * We have two failure conditions here, there is no |
---|
| 540 | + * info reserver block or we found a valid info reserve block |
---|
| 541 | + * but failed to initialize the pfn superblock. |
---|
| 542 | + * |
---|
| 543 | + * For the first case consider namespace as a raw pmem namespace |
---|
| 544 | + * and attach a disk. |
---|
| 545 | + * |
---|
| 546 | + * For the latter, consider this a success and advance the namespace |
---|
| 547 | + * seed. |
---|
| 548 | + */ |
---|
| 549 | + ret = nd_pfn_probe(dev, ndns); |
---|
| 550 | + if (ret == 0) |
---|
| 551 | + return -ENXIO; |
---|
| 552 | + else if (ret == -EOPNOTSUPP) |
---|
| 553 | + return ret; |
---|
| 554 | + |
---|
| 555 | + ret = nd_dax_probe(dev, ndns); |
---|
| 556 | + if (ret == 0) |
---|
| 557 | + return -ENXIO; |
---|
| 558 | + else if (ret == -EOPNOTSUPP) |
---|
| 559 | + return ret; |
---|
| 560 | + |
---|
| 561 | + /* probe complete, attach handles namespace enabling */ |
---|
| 562 | + devm_namespace_disable(dev, ndns); |
---|
| 563 | + |
---|
522 | 564 | return pmem_attach_disk(dev, ndns); |
---|
523 | 565 | } |
---|
524 | 566 | |
---|
.. | .. |
---|
530 | 572 | nvdimm_namespace_detach_btt(to_nd_btt(dev)); |
---|
531 | 573 | else { |
---|
532 | 574 | /* |
---|
533 | | - * Note, this assumes device_lock() context to not race |
---|
534 | | - * nd_pmem_notify() |
---|
| 575 | + * Note, this assumes nd_device_lock() context to not |
---|
| 576 | + * race nd_pmem_notify() |
---|
535 | 577 | */ |
---|
536 | 578 | sysfs_put(pmem->bb_state); |
---|
537 | 579 | pmem->bb_state = NULL; |
---|
.. | .. |
---|
552 | 594 | resource_size_t offset = 0, end_trunc = 0; |
---|
553 | 595 | struct nd_namespace_common *ndns; |
---|
554 | 596 | struct nd_namespace_io *nsio; |
---|
555 | | - struct resource res; |
---|
556 | 597 | struct badblocks *bb; |
---|
| 598 | + struct range range; |
---|
557 | 599 | struct kernfs_node *bb_state; |
---|
558 | 600 | |
---|
559 | 601 | if (event != NVDIMM_REVALIDATE_POISON) |
---|
.. | .. |
---|
589 | 631 | nsio = to_nd_namespace_io(&ndns->dev); |
---|
590 | 632 | } |
---|
591 | 633 | |
---|
592 | | - res.start = nsio->res.start + offset; |
---|
593 | | - res.end = nsio->res.end - end_trunc; |
---|
594 | | - nvdimm_badblocks_populate(nd_region, bb, &res); |
---|
| 634 | + range.start = nsio->res.start + offset; |
---|
| 635 | + range.end = nsio->res.end - end_trunc; |
---|
| 636 | + nvdimm_badblocks_populate(nd_region, bb, &range); |
---|
595 | 637 | if (bb_state) |
---|
596 | 638 | sysfs_notify_dirent(bb_state); |
---|
597 | 639 | } |
---|