.. | .. |
---|
12 | 12 | #include "xfs_bit.h" |
---|
13 | 13 | #include "xfs_sb.h" |
---|
14 | 14 | #include "xfs_mount.h" |
---|
15 | | -#include "xfs_defer.h" |
---|
16 | | -#include "xfs_da_format.h" |
---|
17 | | -#include "xfs_da_btree.h" |
---|
18 | 15 | #include "xfs_inode.h" |
---|
19 | 16 | #include "xfs_dir2.h" |
---|
20 | 17 | #include "xfs_ialloc.h" |
---|
.. | .. |
---|
27 | 24 | #include "xfs_error.h" |
---|
28 | 25 | #include "xfs_quota.h" |
---|
29 | 26 | #include "xfs_fsops.h" |
---|
30 | | -#include "xfs_trace.h" |
---|
31 | 27 | #include "xfs_icache.h" |
---|
32 | 28 | #include "xfs_sysfs.h" |
---|
33 | 29 | #include "xfs_rmap_btree.h" |
---|
34 | 30 | #include "xfs_refcount_btree.h" |
---|
35 | 31 | #include "xfs_reflink.h" |
---|
36 | 32 | #include "xfs_extent_busy.h" |
---|
37 | | - |
---|
| 33 | +#include "xfs_health.h" |
---|
| 34 | +#include "xfs_trace.h" |
---|
38 | 35 | |
---|
39 | 36 | static DEFINE_MUTEX(xfs_uuid_table_mutex); |
---|
40 | 37 | static int xfs_uuid_table_size; |
---|
.. | .. |
---|
83 | 80 | } |
---|
84 | 81 | |
---|
85 | 82 | if (hole < 0) { |
---|
86 | | - xfs_uuid_table = kmem_realloc(xfs_uuid_table, |
---|
| 83 | + xfs_uuid_table = krealloc(xfs_uuid_table, |
---|
87 | 84 | (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table), |
---|
88 | | - KM_SLEEP); |
---|
| 85 | + GFP_KERNEL | __GFP_NOFAIL); |
---|
89 | 86 | hole = xfs_uuid_table_size++; |
---|
90 | 87 | } |
---|
91 | 88 | xfs_uuid_table[hole] = *uuid; |
---|
.. | .. |
---|
149 | 146 | spin_unlock(&mp->m_perag_lock); |
---|
150 | 147 | ASSERT(pag); |
---|
151 | 148 | ASSERT(atomic_read(&pag->pag_ref) == 0); |
---|
| 149 | + xfs_iunlink_destroy(pag); |
---|
152 | 150 | xfs_buf_hash_destroy(pag); |
---|
153 | | - mutex_destroy(&pag->pag_ici_reclaim_lock); |
---|
154 | 151 | call_rcu(&pag->rcu_head, __xfs_free_perag); |
---|
155 | 152 | } |
---|
156 | 153 | } |
---|
.. | .. |
---|
197 | 194 | } |
---|
198 | 195 | |
---|
199 | 196 | pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); |
---|
200 | | - if (!pag) |
---|
| 197 | + if (!pag) { |
---|
| 198 | + error = -ENOMEM; |
---|
201 | 199 | goto out_unwind_new_pags; |
---|
| 200 | + } |
---|
202 | 201 | pag->pag_agno = index; |
---|
203 | 202 | pag->pag_mount = mp; |
---|
204 | 203 | spin_lock_init(&pag->pag_ici_lock); |
---|
205 | | - mutex_init(&pag->pag_ici_reclaim_lock); |
---|
206 | 204 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); |
---|
207 | | - if (xfs_buf_hash_init(pag)) |
---|
| 205 | + |
---|
| 206 | + error = xfs_buf_hash_init(pag); |
---|
| 207 | + if (error) |
---|
208 | 208 | goto out_free_pag; |
---|
209 | 209 | init_waitqueue_head(&pag->pagb_wait); |
---|
210 | 210 | spin_lock_init(&pag->pagb_lock); |
---|
211 | 211 | pag->pagb_count = 0; |
---|
212 | 212 | pag->pagb_tree = RB_ROOT; |
---|
213 | 213 | |
---|
214 | | - if (radix_tree_preload(GFP_NOFS)) |
---|
| 214 | + error = radix_tree_preload(GFP_NOFS); |
---|
| 215 | + if (error) |
---|
215 | 216 | goto out_hash_destroy; |
---|
216 | 217 | |
---|
217 | 218 | spin_lock(&mp->m_perag_lock); |
---|
218 | 219 | if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { |
---|
219 | | - BUG(); |
---|
| 220 | + WARN_ON_ONCE(1); |
---|
220 | 221 | spin_unlock(&mp->m_perag_lock); |
---|
221 | 222 | radix_tree_preload_end(); |
---|
222 | 223 | error = -EEXIST; |
---|
.. | .. |
---|
227 | 228 | /* first new pag is fully initialized */ |
---|
228 | 229 | if (first_initialised == NULLAGNUMBER) |
---|
229 | 230 | first_initialised = index; |
---|
| 231 | + error = xfs_iunlink_init(pag); |
---|
| 232 | + if (error) |
---|
| 233 | + goto out_hash_destroy; |
---|
| 234 | + spin_lock_init(&pag->pag_state_lock); |
---|
230 | 235 | } |
---|
231 | 236 | |
---|
232 | 237 | index = xfs_set_inode_alloc(mp, agcount); |
---|
.. | .. |
---|
240 | 245 | out_hash_destroy: |
---|
241 | 246 | xfs_buf_hash_destroy(pag); |
---|
242 | 247 | out_free_pag: |
---|
243 | | - mutex_destroy(&pag->pag_ici_reclaim_lock); |
---|
244 | 248 | kmem_free(pag); |
---|
245 | 249 | out_unwind_new_pags: |
---|
246 | 250 | /* unwind any prior newly initialized pags */ |
---|
.. | .. |
---|
249 | 253 | if (!pag) |
---|
250 | 254 | break; |
---|
251 | 255 | xfs_buf_hash_destroy(pag); |
---|
252 | | - mutex_destroy(&pag->pag_ici_reclaim_lock); |
---|
| 256 | + xfs_iunlink_destroy(pag); |
---|
253 | 257 | kmem_free(pag); |
---|
254 | 258 | } |
---|
255 | 259 | return error; |
---|
.. | .. |
---|
307 | 311 | /* |
---|
308 | 312 | * Initialize the mount structure from the superblock. |
---|
309 | 313 | */ |
---|
310 | | - xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); |
---|
| 314 | + xfs_sb_from_disk(sbp, bp->b_addr); |
---|
311 | 315 | |
---|
312 | 316 | /* |
---|
313 | 317 | * If we haven't validated the superblock, do so now before we try |
---|
.. | .. |
---|
357 | 361 | } |
---|
358 | 362 | |
---|
359 | 363 | /* |
---|
360 | | - * Update alignment values based on mount options and sb values |
---|
| 364 | + * If the sunit/swidth change would move the precomputed root inode value, we |
---|
| 365 | + * must reject the ondisk change because repair will stumble over that. |
---|
| 366 | + * However, we allow the mount to proceed because we never rejected this |
---|
| 367 | + * combination before. Returns true to update the sb, false otherwise. |
---|
| 368 | + */ |
---|
| 369 | +static inline int |
---|
| 370 | +xfs_check_new_dalign( |
---|
| 371 | + struct xfs_mount *mp, |
---|
| 372 | + int new_dalign, |
---|
| 373 | + bool *update_sb) |
---|
| 374 | +{ |
---|
| 375 | + struct xfs_sb *sbp = &mp->m_sb; |
---|
| 376 | + xfs_ino_t calc_ino; |
---|
| 377 | + |
---|
| 378 | + calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign); |
---|
| 379 | + trace_xfs_check_new_dalign(mp, new_dalign, calc_ino); |
---|
| 380 | + |
---|
| 381 | + if (sbp->sb_rootino == calc_ino) { |
---|
| 382 | + *update_sb = true; |
---|
| 383 | + return 0; |
---|
| 384 | + } |
---|
| 385 | + |
---|
| 386 | + xfs_warn(mp, |
---|
| 387 | +"Cannot change stripe alignment; would require moving root inode."); |
---|
| 388 | + |
---|
| 389 | + /* |
---|
| 390 | + * XXX: Next time we add a new incompat feature, this should start |
---|
| 391 | + * returning -EINVAL to fail the mount. Until then, spit out a warning |
---|
| 392 | + * that we're ignoring the administrator's instructions. |
---|
| 393 | + */ |
---|
| 394 | + xfs_warn(mp, "Skipping superblock stripe alignment update."); |
---|
| 395 | + *update_sb = false; |
---|
| 396 | + return 0; |
---|
| 397 | +} |
---|
| 398 | + |
---|
| 399 | +/* |
---|
| 400 | + * If we were provided with new sunit/swidth values as mount options, make sure |
---|
| 401 | + * that they pass basic alignment and superblock feature checks, and convert |
---|
| 402 | + * them into the same units (FSB) that everything else expects. This step |
---|
| 403 | + * /must/ be done before computing the inode geometry. |
---|
361 | 404 | */ |
---|
362 | 405 | STATIC int |
---|
363 | | -xfs_update_alignment(xfs_mount_t *mp) |
---|
| 406 | +xfs_validate_new_dalign( |
---|
| 407 | + struct xfs_mount *mp) |
---|
364 | 408 | { |
---|
365 | | - xfs_sb_t *sbp = &(mp->m_sb); |
---|
| 409 | + if (mp->m_dalign == 0) |
---|
| 410 | + return 0; |
---|
366 | 411 | |
---|
367 | | - if (mp->m_dalign) { |
---|
| 412 | + /* |
---|
| 413 | + * If stripe unit and stripe width are not multiples |
---|
| 414 | + * of the fs blocksize turn off alignment. |
---|
| 415 | + */ |
---|
| 416 | + if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || |
---|
| 417 | + (BBTOB(mp->m_swidth) & mp->m_blockmask)) { |
---|
| 418 | + xfs_warn(mp, |
---|
| 419 | + "alignment check failed: sunit/swidth vs. blocksize(%d)", |
---|
| 420 | + mp->m_sb.sb_blocksize); |
---|
| 421 | + return -EINVAL; |
---|
| 422 | + } else { |
---|
368 | 423 | /* |
---|
369 | | - * If stripe unit and stripe width are not multiples |
---|
370 | | - * of the fs blocksize turn off alignment. |
---|
| 424 | + * Convert the stripe unit and width to FSBs. |
---|
371 | 425 | */ |
---|
372 | | - if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || |
---|
373 | | - (BBTOB(mp->m_swidth) & mp->m_blockmask)) { |
---|
| 426 | + mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); |
---|
| 427 | + if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) { |
---|
374 | 428 | xfs_warn(mp, |
---|
375 | | - "alignment check failed: sunit/swidth vs. blocksize(%d)", |
---|
376 | | - sbp->sb_blocksize); |
---|
| 429 | + "alignment check failed: sunit/swidth vs. agsize(%d)", |
---|
| 430 | + mp->m_sb.sb_agblocks); |
---|
377 | 431 | return -EINVAL; |
---|
378 | | - } else { |
---|
379 | | - /* |
---|
380 | | - * Convert the stripe unit and width to FSBs. |
---|
381 | | - */ |
---|
382 | | - mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); |
---|
383 | | - if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { |
---|
384 | | - xfs_warn(mp, |
---|
385 | | - "alignment check failed: sunit/swidth vs. agsize(%d)", |
---|
386 | | - sbp->sb_agblocks); |
---|
387 | | - return -EINVAL; |
---|
388 | | - } else if (mp->m_dalign) { |
---|
389 | | - mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); |
---|
390 | | - } else { |
---|
391 | | - xfs_warn(mp, |
---|
392 | | - "alignment check failed: sunit(%d) less than bsize(%d)", |
---|
393 | | - mp->m_dalign, sbp->sb_blocksize); |
---|
394 | | - return -EINVAL; |
---|
395 | | - } |
---|
396 | | - } |
---|
397 | | - |
---|
398 | | - /* |
---|
399 | | - * Update superblock with new values |
---|
400 | | - * and log changes |
---|
401 | | - */ |
---|
402 | | - if (xfs_sb_version_hasdalign(sbp)) { |
---|
403 | | - if (sbp->sb_unit != mp->m_dalign) { |
---|
404 | | - sbp->sb_unit = mp->m_dalign; |
---|
405 | | - mp->m_update_sb = true; |
---|
406 | | - } |
---|
407 | | - if (sbp->sb_width != mp->m_swidth) { |
---|
408 | | - sbp->sb_width = mp->m_swidth; |
---|
409 | | - mp->m_update_sb = true; |
---|
410 | | - } |
---|
| 432 | + } else if (mp->m_dalign) { |
---|
| 433 | + mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); |
---|
411 | 434 | } else { |
---|
412 | 435 | xfs_warn(mp, |
---|
413 | | - "cannot change alignment: superblock does not support data alignment"); |
---|
| 436 | + "alignment check failed: sunit(%d) less than bsize(%d)", |
---|
| 437 | + mp->m_dalign, mp->m_sb.sb_blocksize); |
---|
414 | 438 | return -EINVAL; |
---|
415 | 439 | } |
---|
416 | | - } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && |
---|
417 | | - xfs_sb_version_hasdalign(&mp->m_sb)) { |
---|
418 | | - mp->m_dalign = sbp->sb_unit; |
---|
419 | | - mp->m_swidth = sbp->sb_width; |
---|
| 440 | + } |
---|
| 441 | + |
---|
| 442 | + if (!xfs_sb_version_hasdalign(&mp->m_sb)) { |
---|
| 443 | + xfs_warn(mp, |
---|
| 444 | +"cannot change alignment: superblock does not support data alignment"); |
---|
| 445 | + return -EINVAL; |
---|
420 | 446 | } |
---|
421 | 447 | |
---|
422 | 448 | return 0; |
---|
423 | 449 | } |
---|
424 | 450 | |
---|
425 | | -/* |
---|
426 | | - * Set the maximum inode count for this filesystem |
---|
427 | | - */ |
---|
428 | | -STATIC void |
---|
429 | | -xfs_set_maxicount(xfs_mount_t *mp) |
---|
| 451 | +/* Update alignment values based on mount options and sb values. */ |
---|
| 452 | +STATIC int |
---|
| 453 | +xfs_update_alignment( |
---|
| 454 | + struct xfs_mount *mp) |
---|
430 | 455 | { |
---|
431 | | - xfs_sb_t *sbp = &(mp->m_sb); |
---|
432 | | - uint64_t icount; |
---|
| 456 | + struct xfs_sb *sbp = &mp->m_sb; |
---|
433 | 457 | |
---|
434 | | - if (sbp->sb_imax_pct) { |
---|
435 | | - /* |
---|
436 | | - * Make sure the maximum inode count is a multiple |
---|
437 | | - * of the units we allocate inodes in. |
---|
438 | | - */ |
---|
439 | | - icount = sbp->sb_dblocks * sbp->sb_imax_pct; |
---|
440 | | - do_div(icount, 100); |
---|
441 | | - do_div(icount, mp->m_ialloc_blks); |
---|
442 | | - mp->m_maxicount = (icount * mp->m_ialloc_blks) << |
---|
443 | | - sbp->sb_inopblog; |
---|
444 | | - } else { |
---|
445 | | - mp->m_maxicount = 0; |
---|
446 | | - } |
---|
447 | | -} |
---|
| 458 | + if (mp->m_dalign) { |
---|
| 459 | + bool update_sb; |
---|
| 460 | + int error; |
---|
448 | 461 | |
---|
449 | | -/* |
---|
450 | | - * Set the default minimum read and write sizes unless |
---|
451 | | - * already specified in a mount option. |
---|
452 | | - * We use smaller I/O sizes when the file system |
---|
453 | | - * is being used for NFS service (wsync mount option). |
---|
454 | | - */ |
---|
455 | | -STATIC void |
---|
456 | | -xfs_set_rw_sizes(xfs_mount_t *mp) |
---|
457 | | -{ |
---|
458 | | - xfs_sb_t *sbp = &(mp->m_sb); |
---|
459 | | - int readio_log, writeio_log; |
---|
| 462 | + if (sbp->sb_unit == mp->m_dalign && |
---|
| 463 | + sbp->sb_width == mp->m_swidth) |
---|
| 464 | + return 0; |
---|
460 | 465 | |
---|
461 | | - if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { |
---|
462 | | - if (mp->m_flags & XFS_MOUNT_WSYNC) { |
---|
463 | | - readio_log = XFS_WSYNC_READIO_LOG; |
---|
464 | | - writeio_log = XFS_WSYNC_WRITEIO_LOG; |
---|
465 | | - } else { |
---|
466 | | - readio_log = XFS_READIO_LOG_LARGE; |
---|
467 | | - writeio_log = XFS_WRITEIO_LOG_LARGE; |
---|
468 | | - } |
---|
469 | | - } else { |
---|
470 | | - readio_log = mp->m_readio_log; |
---|
471 | | - writeio_log = mp->m_writeio_log; |
---|
| 466 | + error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb); |
---|
| 467 | + if (error || !update_sb) |
---|
| 468 | + return error; |
---|
| 469 | + |
---|
| 470 | + sbp->sb_unit = mp->m_dalign; |
---|
| 471 | + sbp->sb_width = mp->m_swidth; |
---|
| 472 | + mp->m_update_sb = true; |
---|
| 473 | + } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && |
---|
| 474 | + xfs_sb_version_hasdalign(&mp->m_sb)) { |
---|
| 475 | + mp->m_dalign = sbp->sb_unit; |
---|
| 476 | + mp->m_swidth = sbp->sb_width; |
---|
472 | 477 | } |
---|
473 | 478 | |
---|
474 | | - if (sbp->sb_blocklog > readio_log) { |
---|
475 | | - mp->m_readio_log = sbp->sb_blocklog; |
---|
476 | | - } else { |
---|
477 | | - mp->m_readio_log = readio_log; |
---|
478 | | - } |
---|
479 | | - mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog); |
---|
480 | | - if (sbp->sb_blocklog > writeio_log) { |
---|
481 | | - mp->m_writeio_log = sbp->sb_blocklog; |
---|
482 | | - } else { |
---|
483 | | - mp->m_writeio_log = writeio_log; |
---|
484 | | - } |
---|
485 | | - mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog); |
---|
| 479 | + return 0; |
---|
486 | 480 | } |
---|
487 | 481 | |
---|
488 | 482 | /* |
---|
.. | .. |
---|
500 | 494 | do_div(space, 100); |
---|
501 | 495 | mp->m_low_space[i] = space * (i + 1); |
---|
502 | 496 | } |
---|
503 | | -} |
---|
504 | | - |
---|
505 | | - |
---|
506 | | -/* |
---|
507 | | - * Set whether we're using inode alignment. |
---|
508 | | - */ |
---|
509 | | -STATIC void |
---|
510 | | -xfs_set_inoalignment(xfs_mount_t *mp) |
---|
511 | | -{ |
---|
512 | | - if (xfs_sb_version_hasalign(&mp->m_sb) && |
---|
513 | | - mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) |
---|
514 | | - mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; |
---|
515 | | - else |
---|
516 | | - mp->m_inoalign_mask = 0; |
---|
517 | | - /* |
---|
518 | | - * If we are using stripe alignment, check whether |
---|
519 | | - * the stripe unit is a multiple of the inode alignment |
---|
520 | | - */ |
---|
521 | | - if (mp->m_dalign && mp->m_inoalign_mask && |
---|
522 | | - !(mp->m_dalign & mp->m_inoalign_mask)) |
---|
523 | | - mp->m_sinoalign = mp->m_dalign; |
---|
524 | | - else |
---|
525 | | - mp->m_sinoalign = 0; |
---|
526 | 497 | } |
---|
527 | 498 | |
---|
528 | 499 | /* |
---|
.. | .. |
---|
639 | 610 | (mp->m_sb.sb_fdblocks > mp->m_sb.sb_dblocks || |
---|
640 | 611 | !xfs_verify_icount(mp, mp->m_sb.sb_icount) || |
---|
641 | 612 | mp->m_sb.sb_ifree > mp->m_sb.sb_icount)) |
---|
642 | | - mp->m_flags |= XFS_MOUNT_BAD_SUMMARY; |
---|
| 613 | + xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS); |
---|
643 | 614 | |
---|
644 | 615 | /* |
---|
645 | 616 | * We can safely re-initialise incore superblock counters from the |
---|
.. | .. |
---|
654 | 625 | */ |
---|
655 | 626 | if ((!xfs_sb_version_haslazysbcount(&mp->m_sb) || |
---|
656 | 627 | XFS_LAST_UNMOUNT_WAS_CLEAN(mp)) && |
---|
657 | | - !(mp->m_flags & XFS_MOUNT_BAD_SUMMARY)) |
---|
| 628 | + !xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS)) |
---|
658 | 629 | return 0; |
---|
659 | 630 | |
---|
660 | 631 | return xfs_initialize_perag_data(mp, mp->m_sb.sb_agcount); |
---|
| 632 | +} |
---|
| 633 | + |
---|
| 634 | +/* |
---|
| 635 | + * Flush and reclaim dirty inodes in preparation for unmount. Inodes and |
---|
| 636 | + * internal inode structures can be sitting in the CIL and AIL at this point, |
---|
| 637 | + * so we need to unpin them, write them back and/or reclaim them before unmount |
---|
| 638 | + * can proceed. |
---|
| 639 | + * |
---|
| 640 | + * An inode cluster that has been freed can have its buffer still pinned in |
---|
| 641 | + * memory because the transaction is still sitting in a iclog. The stale inodes |
---|
| 642 | + * on that buffer will be pinned to the buffer until the transaction hits the |
---|
| 643 | + * disk and the callbacks run. Pushing the AIL will skip the stale inodes and |
---|
| 644 | + * may never see the pinned buffer, so nothing will push out the iclog and |
---|
| 645 | + * unpin the buffer. |
---|
| 646 | + * |
---|
| 647 | + * Hence we need to force the log to unpin everything first. However, log |
---|
| 648 | + * forces don't wait for the discards they issue to complete, so we have to |
---|
| 649 | + * explicitly wait for them to complete here as well. |
---|
| 650 | + * |
---|
| 651 | + * Then we can tell the world we are unmounting so that error handling knows |
---|
| 652 | + * that the filesystem is going away and we should error out anything that we |
---|
| 653 | + * have been retrying in the background. This will prevent never-ending |
---|
| 654 | + * retries in AIL pushing from hanging the unmount. |
---|
| 655 | + * |
---|
| 656 | + * Finally, we can push the AIL to clean all the remaining dirty objects, then |
---|
| 657 | + * reclaim the remaining inodes that are still in memory at this point in time. |
---|
| 658 | + */ |
---|
| 659 | +static void |
---|
| 660 | +xfs_unmount_flush_inodes( |
---|
| 661 | + struct xfs_mount *mp) |
---|
| 662 | +{ |
---|
| 663 | + xfs_log_force(mp, XFS_LOG_SYNC); |
---|
| 664 | + xfs_extent_busy_wait_all(mp); |
---|
| 665 | + flush_workqueue(xfs_discard_wq); |
---|
| 666 | + |
---|
| 667 | + mp->m_flags |= XFS_MOUNT_UNMOUNTING; |
---|
| 668 | + |
---|
| 669 | + xfs_ail_push_all_sync(mp->m_ail); |
---|
| 670 | + cancel_delayed_work_sync(&mp->m_reclaim_work); |
---|
| 671 | + xfs_reclaim_inodes(mp); |
---|
| 672 | + xfs_health_unmount(mp); |
---|
661 | 673 | } |
---|
662 | 674 | |
---|
663 | 675 | /* |
---|
.. | .. |
---|
676 | 688 | { |
---|
677 | 689 | struct xfs_sb *sbp = &(mp->m_sb); |
---|
678 | 690 | struct xfs_inode *rip; |
---|
| 691 | + struct xfs_ino_geometry *igeo = M_IGEO(mp); |
---|
679 | 692 | uint64_t resblks; |
---|
680 | 693 | uint quotamount = 0; |
---|
681 | 694 | uint quotaflags = 0; |
---|
.. | .. |
---|
730 | 743 | } |
---|
731 | 744 | |
---|
732 | 745 | /* |
---|
733 | | - * Check if sb_agblocks is aligned at stripe boundary |
---|
734 | | - * If sb_agblocks is NOT aligned turn off m_dalign since |
---|
735 | | - * allocator alignment is within an ag, therefore ag has |
---|
736 | | - * to be aligned at stripe boundary. |
---|
| 746 | + * If we were given new sunit/swidth options, do some basic validation |
---|
| 747 | + * checks and convert the incore dalign and swidth values to the |
---|
| 748 | + * same units (FSB) that everything else uses. This /must/ happen |
---|
| 749 | + * before computing the inode geometry. |
---|
737 | 750 | */ |
---|
738 | | - error = xfs_update_alignment(mp); |
---|
| 751 | + error = xfs_validate_new_dalign(mp); |
---|
739 | 752 | if (error) |
---|
740 | 753 | goto out; |
---|
741 | 754 | |
---|
742 | 755 | xfs_alloc_compute_maxlevels(mp); |
---|
743 | 756 | xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); |
---|
744 | 757 | xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); |
---|
745 | | - xfs_ialloc_compute_maxlevels(mp); |
---|
| 758 | + xfs_ialloc_setup_geometry(mp); |
---|
746 | 759 | xfs_rmapbt_compute_maxlevels(mp); |
---|
747 | 760 | xfs_refcountbt_compute_maxlevels(mp); |
---|
748 | 761 | |
---|
749 | | - xfs_set_maxicount(mp); |
---|
| 762 | + /* |
---|
| 763 | + * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks |
---|
| 764 | + * is NOT aligned turn off m_dalign since allocator alignment is within |
---|
| 765 | + * an ag, therefore ag has to be aligned at stripe boundary. Note that |
---|
| 766 | + * we must compute the free space and rmap btree geometry before doing |
---|
| 767 | + * this. |
---|
| 768 | + */ |
---|
| 769 | + error = xfs_update_alignment(mp); |
---|
| 770 | + if (error) |
---|
| 771 | + goto out; |
---|
750 | 772 | |
---|
751 | 773 | /* enable fail_at_unmount as default */ |
---|
752 | 774 | mp->m_fail_unmount = true; |
---|
753 | 775 | |
---|
754 | | - error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); |
---|
| 776 | + error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, |
---|
| 777 | + NULL, mp->m_super->s_id); |
---|
755 | 778 | if (error) |
---|
756 | 779 | goto out; |
---|
757 | 780 | |
---|
.. | .. |
---|
773 | 796 | goto out_remove_errortag; |
---|
774 | 797 | |
---|
775 | 798 | /* |
---|
776 | | - * Set the minimum read and write sizes |
---|
| 799 | + * Update the preferred write size based on the information from the |
---|
| 800 | + * on-disk superblock. |
---|
777 | 801 | */ |
---|
778 | | - xfs_set_rw_sizes(mp); |
---|
| 802 | + mp->m_allocsize_log = |
---|
| 803 | + max_t(uint32_t, sbp->sb_blocklog, mp->m_allocsize_log); |
---|
| 804 | + mp->m_allocsize_blocks = 1U << (mp->m_allocsize_log - sbp->sb_blocklog); |
---|
779 | 805 | |
---|
780 | 806 | /* set the low space thresholds for dynamic preallocation */ |
---|
781 | 807 | xfs_set_low_space_thresholds(mp); |
---|
782 | | - |
---|
783 | | - /* |
---|
784 | | - * Set the inode cluster size. |
---|
785 | | - * This may still be overridden by the file system |
---|
786 | | - * block size if it is larger than the chosen cluster size. |
---|
787 | | - * |
---|
788 | | - * For v5 filesystems, scale the cluster size with the inode size to |
---|
789 | | - * keep a constant ratio of inode per cluster buffer, but only if mkfs |
---|
790 | | - * has set the inode alignment value appropriately for larger cluster |
---|
791 | | - * sizes. |
---|
792 | | - */ |
---|
793 | | - mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; |
---|
794 | | - if (xfs_sb_version_hascrc(&mp->m_sb)) { |
---|
795 | | - int new_size = mp->m_inode_cluster_size; |
---|
796 | | - |
---|
797 | | - new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; |
---|
798 | | - if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) |
---|
799 | | - mp->m_inode_cluster_size = new_size; |
---|
800 | | - } |
---|
801 | 808 | |
---|
802 | 809 | /* |
---|
803 | 810 | * If enabled, sparse inode chunk alignment is expected to match the |
---|
.. | .. |
---|
806 | 813 | */ |
---|
807 | 814 | if (xfs_sb_version_hassparseinodes(&mp->m_sb) && |
---|
808 | 815 | mp->m_sb.sb_spino_align != |
---|
809 | | - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) { |
---|
| 816 | + XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) { |
---|
810 | 817 | xfs_warn(mp, |
---|
811 | 818 | "Sparse inode block alignment (%u) must match cluster size (%llu).", |
---|
812 | 819 | mp->m_sb.sb_spino_align, |
---|
813 | | - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)); |
---|
| 820 | + XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)); |
---|
814 | 821 | error = -EINVAL; |
---|
815 | 822 | goto out_remove_uuid; |
---|
816 | 823 | } |
---|
817 | | - |
---|
818 | | - /* |
---|
819 | | - * Set inode alignment fields |
---|
820 | | - */ |
---|
821 | | - xfs_set_inoalignment(mp); |
---|
822 | 824 | |
---|
823 | 825 | /* |
---|
824 | 826 | * Check that the data (and log if separate) is an ok size. |
---|
.. | .. |
---|
865 | 867 | goto out_free_dir; |
---|
866 | 868 | } |
---|
867 | 869 | |
---|
868 | | - if (!sbp->sb_logblocks) { |
---|
| 870 | + if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) { |
---|
869 | 871 | xfs_warn(mp, "no log defined"); |
---|
870 | | - XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); |
---|
871 | 872 | error = -EFSCORRUPTED; |
---|
872 | 873 | goto out_free_perag; |
---|
873 | 874 | } |
---|
.. | .. |
---|
905 | 906 | |
---|
906 | 907 | ASSERT(rip != NULL); |
---|
907 | 908 | |
---|
908 | | - if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) { |
---|
| 909 | + if (XFS_IS_CORRUPT(mp, !S_ISDIR(VFS_I(rip)->i_mode))) { |
---|
909 | 910 | xfs_warn(mp, "corrupted root inode %llu: not a directory", |
---|
910 | 911 | (unsigned long long)rip->i_ino); |
---|
911 | 912 | xfs_iunlock(rip, XFS_ILOCK_EXCL); |
---|
912 | | - XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, |
---|
913 | | - mp); |
---|
914 | 913 | error = -EFSCORRUPTED; |
---|
915 | 914 | goto out_rele_rip; |
---|
916 | 915 | } |
---|
.. | .. |
---|
969 | 968 | /* |
---|
970 | 969 | * Finish recovering the file system. This part needed to be delayed |
---|
971 | 970 | * until after the root and real-time bitmap inodes were consistently |
---|
972 | | - * read in. |
---|
| 971 | + * read in. Temporarily create per-AG space reservations for metadata |
---|
| 972 | + * btree shape changes because space freeing transactions (for inode |
---|
| 973 | + * inactivation) require the per-AG reservation in lieu of reserving |
---|
| 974 | + * blocks. |
---|
973 | 975 | */ |
---|
| 976 | + error = xfs_fs_reserve_ag_blocks(mp); |
---|
| 977 | + if (error && error == -ENOSPC) |
---|
| 978 | + xfs_warn(mp, |
---|
| 979 | + "ENOSPC reserving per-AG metadata pool, log recovery may fail."); |
---|
974 | 980 | error = xfs_log_mount_finish(mp); |
---|
| 981 | + xfs_fs_unreserve_ag_blocks(mp); |
---|
975 | 982 | if (error) { |
---|
976 | 983 | xfs_warn(mp, "log mount finish failed"); |
---|
977 | 984 | goto out_rtunmount; |
---|
.. | .. |
---|
1047 | 1054 | /* Clean out dquots that might be in memory after quotacheck. */ |
---|
1048 | 1055 | xfs_qm_unmount(mp); |
---|
1049 | 1056 | /* |
---|
1050 | | - * Cancel all delayed reclaim work and reclaim the inodes directly. |
---|
| 1057 | + * Flush all inode reclamation work and flush the log. |
---|
1051 | 1058 | * We have to do this /after/ rtunmount and qm_unmount because those |
---|
1052 | 1059 | * two will have scheduled delayed reclaim for the rt/quota inodes. |
---|
1053 | 1060 | * |
---|
.. | .. |
---|
1057 | 1064 | * qm_unmount_quotas and therefore rely on qm_unmount to release the |
---|
1058 | 1065 | * quota inodes. |
---|
1059 | 1066 | */ |
---|
1060 | | - cancel_delayed_work_sync(&mp->m_reclaim_work); |
---|
1061 | | - xfs_reclaim_inodes(mp, SYNC_WAIT); |
---|
| 1067 | + xfs_unmount_flush_inodes(mp); |
---|
1062 | 1068 | out_log_dealloc: |
---|
1063 | | - mp->m_flags |= XFS_MOUNT_UNMOUNTING; |
---|
1064 | 1069 | xfs_log_mount_cancel(mp); |
---|
1065 | 1070 | out_fail_wait: |
---|
1066 | 1071 | if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) |
---|
.. | .. |
---|
1095 | 1100 | uint64_t resblks; |
---|
1096 | 1101 | int error; |
---|
1097 | 1102 | |
---|
1098 | | - xfs_icache_disable_reclaim(mp); |
---|
| 1103 | + xfs_stop_block_reaping(mp); |
---|
1099 | 1104 | xfs_fs_unreserve_ag_blocks(mp); |
---|
1100 | 1105 | xfs_qm_unmount_quotas(mp); |
---|
1101 | 1106 | xfs_rtunmount_inodes(mp); |
---|
1102 | 1107 | xfs_irele(mp->m_rootip); |
---|
1103 | 1108 | |
---|
1104 | | - /* |
---|
1105 | | - * We can potentially deadlock here if we have an inode cluster |
---|
1106 | | - * that has been freed has its buffer still pinned in memory because |
---|
1107 | | - * the transaction is still sitting in a iclog. The stale inodes |
---|
1108 | | - * on that buffer will have their flush locks held until the |
---|
1109 | | - * transaction hits the disk and the callbacks run. the inode |
---|
1110 | | - * flush takes the flush lock unconditionally and with nothing to |
---|
1111 | | - * push out the iclog we will never get that unlocked. hence we |
---|
1112 | | - * need to force the log first. |
---|
1113 | | - */ |
---|
1114 | | - xfs_log_force(mp, XFS_LOG_SYNC); |
---|
1115 | | - |
---|
1116 | | - /* |
---|
1117 | | - * Wait for all busy extents to be freed, including completion of |
---|
1118 | | - * any discard operation. |
---|
1119 | | - */ |
---|
1120 | | - xfs_extent_busy_wait_all(mp); |
---|
1121 | | - flush_workqueue(xfs_discard_wq); |
---|
1122 | | - |
---|
1123 | | - /* |
---|
1124 | | - * We now need to tell the world we are unmounting. This will allow |
---|
1125 | | - * us to detect that the filesystem is going away and we should error |
---|
1126 | | - * out anything that we have been retrying in the background. This will |
---|
1127 | | - * prevent neverending retries in AIL pushing from hanging the unmount. |
---|
1128 | | - */ |
---|
1129 | | - mp->m_flags |= XFS_MOUNT_UNMOUNTING; |
---|
1130 | | - |
---|
1131 | | - /* |
---|
1132 | | - * Flush all pending changes from the AIL. |
---|
1133 | | - */ |
---|
1134 | | - xfs_ail_push_all_sync(mp->m_ail); |
---|
1135 | | - |
---|
1136 | | - /* |
---|
1137 | | - * And reclaim all inodes. At this point there should be no dirty |
---|
1138 | | - * inodes and none should be pinned or locked, but use synchronous |
---|
1139 | | - * reclaim just to be sure. We can stop background inode reclaim |
---|
1140 | | - * here as well if it is still running. |
---|
1141 | | - */ |
---|
1142 | | - cancel_delayed_work_sync(&mp->m_reclaim_work); |
---|
1143 | | - xfs_reclaim_inodes(mp, SYNC_WAIT); |
---|
| 1109 | + xfs_unmount_flush_inodes(mp); |
---|
1144 | 1110 | |
---|
1145 | 1111 | xfs_qm_unmount(mp); |
---|
1146 | 1112 | |
---|
.. | .. |
---|
1216 | 1182 | int |
---|
1217 | 1183 | xfs_log_sbcount(xfs_mount_t *mp) |
---|
1218 | 1184 | { |
---|
1219 | | - /* allow this to proceed during the freeze sequence... */ |
---|
1220 | | - if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) |
---|
| 1185 | + if (!xfs_log_writable(mp)) |
---|
1221 | 1186 | return 0; |
---|
1222 | 1187 | |
---|
1223 | 1188 | /* |
---|
.. | .. |
---|
1228 | 1193 | return 0; |
---|
1229 | 1194 | |
---|
1230 | 1195 | return xfs_sync_sb(mp, true); |
---|
1231 | | -} |
---|
1232 | | - |
---|
1233 | | -/* |
---|
1234 | | - * Deltas for the inode count are +/-64, hence we use a large batch size |
---|
1235 | | - * of 128 so we don't need to take the counter lock on every update. |
---|
1236 | | - */ |
---|
1237 | | -#define XFS_ICOUNT_BATCH 128 |
---|
1238 | | -int |
---|
1239 | | -xfs_mod_icount( |
---|
1240 | | - struct xfs_mount *mp, |
---|
1241 | | - int64_t delta) |
---|
1242 | | -{ |
---|
1243 | | - percpu_counter_add_batch(&mp->m_icount, delta, XFS_ICOUNT_BATCH); |
---|
1244 | | - if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) { |
---|
1245 | | - ASSERT(0); |
---|
1246 | | - percpu_counter_add(&mp->m_icount, -delta); |
---|
1247 | | - return -EINVAL; |
---|
1248 | | - } |
---|
1249 | | - return 0; |
---|
1250 | | -} |
---|
1251 | | - |
---|
1252 | | -int |
---|
1253 | | -xfs_mod_ifree( |
---|
1254 | | - struct xfs_mount *mp, |
---|
1255 | | - int64_t delta) |
---|
1256 | | -{ |
---|
1257 | | - percpu_counter_add(&mp->m_ifree, delta); |
---|
1258 | | - if (percpu_counter_compare(&mp->m_ifree, 0) < 0) { |
---|
1259 | | - ASSERT(0); |
---|
1260 | | - percpu_counter_add(&mp->m_ifree, -delta); |
---|
1261 | | - return -EINVAL; |
---|
1262 | | - } |
---|
1263 | | - return 0; |
---|
1264 | 1196 | } |
---|
1265 | 1197 | |
---|
1266 | 1198 | /* |
---|
.. | .. |
---|
1341 | 1273 | spin_unlock(&mp->m_sb_lock); |
---|
1342 | 1274 | return 0; |
---|
1343 | 1275 | } |
---|
1344 | | - printk_once(KERN_WARNING |
---|
1345 | | - "Filesystem \"%s\": reserve blocks depleted! " |
---|
1346 | | - "Consider increasing reserve pool size.", |
---|
1347 | | - mp->m_fsname); |
---|
| 1276 | + xfs_warn_once(mp, |
---|
| 1277 | +"Reserve blocks depleted! Consider increasing reserve pool size."); |
---|
| 1278 | + |
---|
1348 | 1279 | fdblocks_enospc: |
---|
1349 | 1280 | spin_unlock(&mp->m_sb_lock); |
---|
1350 | 1281 | return -ENOSPC; |
---|
.. | .. |
---|
1366 | 1297 | mp->m_sb.sb_frextents = lcounter; |
---|
1367 | 1298 | spin_unlock(&mp->m_sb_lock); |
---|
1368 | 1299 | return ret; |
---|
1369 | | -} |
---|
1370 | | - |
---|
1371 | | -/* |
---|
1372 | | - * xfs_getsb() is called to obtain the buffer for the superblock. |
---|
1373 | | - * The buffer is returned locked and read in from disk. |
---|
1374 | | - * The buffer should be released with a call to xfs_brelse(). |
---|
1375 | | - * |
---|
1376 | | - * If the flags parameter is BUF_TRYLOCK, then we'll only return |
---|
1377 | | - * the superblock buffer if it can be locked without sleeping. |
---|
1378 | | - * If it can't then we'll return NULL. |
---|
1379 | | - */ |
---|
1380 | | -struct xfs_buf * |
---|
1381 | | -xfs_getsb( |
---|
1382 | | - struct xfs_mount *mp, |
---|
1383 | | - int flags) |
---|
1384 | | -{ |
---|
1385 | | - struct xfs_buf *bp = mp->m_sb_bp; |
---|
1386 | | - |
---|
1387 | | - if (!xfs_buf_trylock(bp)) { |
---|
1388 | | - if (flags & XBF_TRYLOCK) |
---|
1389 | | - return NULL; |
---|
1390 | | - xfs_buf_lock(bp); |
---|
1391 | | - } |
---|
1392 | | - |
---|
1393 | | - xfs_buf_hold(bp); |
---|
1394 | | - ASSERT(bp->b_flags & XBF_DONE); |
---|
1395 | | - return bp; |
---|
1396 | 1300 | } |
---|
1397 | 1301 | |
---|
1398 | 1302 | /* |
---|
.. | .. |
---|
1436 | 1340 | if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) |
---|
1437 | 1341 | return; |
---|
1438 | 1342 | |
---|
1439 | | - spin_lock(&mp->m_sb_lock); |
---|
1440 | | - mp->m_flags |= XFS_MOUNT_BAD_SUMMARY; |
---|
1441 | | - spin_unlock(&mp->m_sb_lock); |
---|
| 1343 | + xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS); |
---|
| 1344 | +} |
---|
| 1345 | + |
---|
| 1346 | +/* |
---|
| 1347 | + * Update the in-core delayed block counter. |
---|
| 1348 | + * |
---|
| 1349 | + * We prefer to update the counter without having to take a spinlock for every |
---|
| 1350 | + * counter update (i.e. batching). Each change to delayed allocation |
---|
| 1351 | + * reservations can change can easily exceed the default percpu counter |
---|
| 1352 | + * batching, so we use a larger batch factor here. |
---|
| 1353 | + * |
---|
| 1354 | + * Note that we don't currently have any callers requiring fast summation |
---|
| 1355 | + * (e.g. percpu_counter_read) so we can use a big batch value here. |
---|
| 1356 | + */ |
---|
| 1357 | +#define XFS_DELALLOC_BATCH (4096) |
---|
| 1358 | +void |
---|
| 1359 | +xfs_mod_delalloc( |
---|
| 1360 | + struct xfs_mount *mp, |
---|
| 1361 | + int64_t delta) |
---|
| 1362 | +{ |
---|
| 1363 | + percpu_counter_add_batch(&mp->m_delalloc_blks, delta, |
---|
| 1364 | + XFS_DELALLOC_BATCH); |
---|
1442 | 1365 | } |
---|