.. | .. |
---|
5 | 5 | */ |
---|
6 | 6 | #include "xfs.h" |
---|
7 | 7 | #include "xfs_fs.h" |
---|
| 8 | +#include "xfs_shared.h" |
---|
8 | 9 | #include "xfs_format.h" |
---|
9 | 10 | #include "xfs_log_format.h" |
---|
10 | 11 | #include "xfs_trans_resv.h" |
---|
11 | 12 | #include "xfs_bit.h" |
---|
12 | | -#include "xfs_sb.h" |
---|
13 | 13 | #include "xfs_mount.h" |
---|
14 | 14 | #include "xfs_trans.h" |
---|
15 | | -#include "xfs_buf_item.h" |
---|
16 | 15 | #include "xfs_trans_priv.h" |
---|
17 | | -#include "xfs_error.h" |
---|
| 16 | +#include "xfs_buf_item.h" |
---|
| 17 | +#include "xfs_inode.h" |
---|
| 18 | +#include "xfs_inode_item.h" |
---|
| 19 | +#include "xfs_quota.h" |
---|
| 20 | +#include "xfs_dquot_item.h" |
---|
| 21 | +#include "xfs_dquot.h" |
---|
18 | 22 | #include "xfs_trace.h" |
---|
19 | 23 | #include "xfs_log.h" |
---|
20 | | -#include "xfs_inode.h" |
---|
21 | 24 | |
---|
22 | 25 | |
---|
23 | 26 | kmem_zone_t *xfs_buf_item_zone; |
---|
.. | .. |
---|
27 | 30 | return container_of(lip, struct xfs_buf_log_item, bli_item); |
---|
28 | 31 | } |
---|
29 | 32 | |
---|
30 | | -STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); |
---|
| 33 | +/* Is this log iovec plausibly large enough to contain the buffer log format? */ |
---|
| 34 | +bool |
---|
| 35 | +xfs_buf_log_check_iovec( |
---|
| 36 | + struct xfs_log_iovec *iovec) |
---|
| 37 | +{ |
---|
| 38 | + struct xfs_buf_log_format *blfp = iovec->i_addr; |
---|
| 39 | + char *bmp_end; |
---|
| 40 | + char *item_end; |
---|
| 41 | + |
---|
| 42 | + if (offsetof(struct xfs_buf_log_format, blf_data_map) > iovec->i_len) |
---|
| 43 | + return false; |
---|
| 44 | + |
---|
| 45 | + item_end = (char *)iovec->i_addr + iovec->i_len; |
---|
| 46 | + bmp_end = (char *)&blfp->blf_data_map[blfp->blf_map_size]; |
---|
| 47 | + return bmp_end <= item_end; |
---|
| 48 | +} |
---|
31 | 49 | |
---|
32 | 50 | static inline int |
---|
33 | 51 | xfs_buf_log_format_size( |
---|
.. | .. |
---|
38 | 56 | } |
---|
39 | 57 | |
---|
40 | 58 | /* |
---|
41 | | - * This returns the number of log iovecs needed to log the |
---|
42 | | - * given buf log item. |
---|
| 59 | + * Return the number of log iovecs and space needed to log the given buf log |
---|
| 60 | + * item segment. |
---|
43 | 61 | * |
---|
44 | | - * It calculates this as 1 iovec for the buf log format structure |
---|
45 | | - * and 1 for each stretch of non-contiguous chunks to be logged. |
---|
46 | | - * Contiguous chunks are logged in a single iovec. |
---|
47 | | - * |
---|
48 | | - * If the XFS_BLI_STALE flag has been set, then log nothing. |
---|
| 62 | + * It calculates this as 1 iovec for the buf log format structure and 1 for each |
---|
| 63 | + * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged |
---|
| 64 | + * in a single iovec. |
---|
49 | 65 | */ |
---|
50 | 66 | STATIC void |
---|
51 | 67 | xfs_buf_item_size_segment( |
---|
.. | .. |
---|
101 | 117 | } |
---|
102 | 118 | |
---|
103 | 119 | /* |
---|
104 | | - * This returns the number of log iovecs needed to log the given buf log item. |
---|
| 120 | + * Return the number of log iovecs and space needed to log the given buf log |
---|
| 121 | + * item. |
---|
105 | 122 | * |
---|
106 | | - * It calculates this as 1 iovec for the buf log format structure and 1 for each |
---|
107 | | - * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged |
---|
108 | | - * in a single iovec. |
---|
109 | | - * |
---|
110 | | - * Discontiguous buffers need a format structure per region that that is being |
---|
| 123 | + * Discontiguous buffers need a format structure per region that is being |
---|
111 | 124 | * logged. This makes the changes in the buffer appear to log recovery as though |
---|
112 | 125 | * they came from separate buffers, just like would occur if multiple buffers |
---|
113 | 126 | * were used instead of a single discontiguous buffer. This enables |
---|
.. | .. |
---|
115 | 128 | * what ends up on disk. |
---|
116 | 129 | * |
---|
117 | 130 | * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log |
---|
118 | | - * format structures. |
---|
| 131 | + * format structures. If the item has previously been logged and has dirty |
---|
| 132 | + * regions, we do not relog them in stale buffers. This has the effect of |
---|
| 133 | + * reducing the size of the relogged item by the amount of dirty data tracked |
---|
| 134 | + * by the log item. This can result in the committing transaction reducing the |
---|
| 135 | + * amount of space being consumed by the CIL. |
---|
119 | 136 | */ |
---|
120 | 137 | STATIC void |
---|
121 | 138 | xfs_buf_item_size( |
---|
.. | .. |
---|
129 | 146 | ASSERT(atomic_read(&bip->bli_refcount) > 0); |
---|
130 | 147 | if (bip->bli_flags & XFS_BLI_STALE) { |
---|
131 | 148 | /* |
---|
132 | | - * The buffer is stale, so all we need to log |
---|
133 | | - * is the buf log format structure with the |
---|
134 | | - * cancel flag in it. |
---|
| 149 | + * The buffer is stale, so all we need to log is the buf log |
---|
| 150 | + * format structure with the cancel flag in it as we are never |
---|
| 151 | + * going to replay the changes tracked in the log item. |
---|
135 | 152 | */ |
---|
136 | 153 | trace_xfs_buf_item_size_stale(bip); |
---|
137 | 154 | ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); |
---|
.. | .. |
---|
146 | 163 | |
---|
147 | 164 | if (bip->bli_flags & XFS_BLI_ORDERED) { |
---|
148 | 165 | /* |
---|
149 | | - * The buffer has been logged just to order it. |
---|
150 | | - * It is not being included in the transaction |
---|
151 | | - * commit, so no vectors are used at all. |
---|
| 166 | + * The buffer has been logged just to order it. It is not being |
---|
| 167 | + * included in the transaction commit, so no vectors are used at |
---|
| 168 | + * all. |
---|
152 | 169 | */ |
---|
153 | 170 | trace_xfs_buf_item_size_ordered(bip); |
---|
154 | 171 | *nvecs = XFS_LOG_VEC_ORDERED; |
---|
.. | .. |
---|
330 | 347 | * occurs during recovery. |
---|
331 | 348 | */ |
---|
332 | 349 | if (bip->bli_flags & XFS_BLI_INODE_BUF) { |
---|
333 | | - if (xfs_sb_version_hascrc(&lip->li_mountp->m_sb) || |
---|
| 350 | + if (xfs_sb_version_has_v3inode(&lip->li_mountp->m_sb) || |
---|
334 | 351 | !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && |
---|
335 | 352 | xfs_log_item_in_current_chkpt(lip))) |
---|
336 | 353 | bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF; |
---|
.. | .. |
---|
376 | 393 | } |
---|
377 | 394 | |
---|
378 | 395 | /* |
---|
379 | | - * This is called to unpin the buffer associated with the buf log |
---|
380 | | - * item which was previously pinned with a call to xfs_buf_item_pin(). |
---|
381 | | - * |
---|
382 | | - * Also drop the reference to the buf item for the current transaction. |
---|
383 | | - * If the XFS_BLI_STALE flag is set and we are the last reference, |
---|
384 | | - * then free up the buf log item and unlock the buffer. |
---|
385 | | - * |
---|
386 | | - * If the remove flag is set we are called from uncommit in the |
---|
387 | | - * forced-shutdown path. If that is true and the reference count on |
---|
388 | | - * the log item is going to drop to zero we need to free the item's |
---|
389 | | - * descriptor in the transaction. |
---|
| 396 | + * This is called to unpin the buffer associated with the buf log item which |
---|
| 397 | + * was previously pinned with a call to xfs_buf_item_pin(). |
---|
390 | 398 | */ |
---|
391 | 399 | STATIC void |
---|
392 | 400 | xfs_buf_item_unpin( |
---|
.. | .. |
---|
395 | 403 | { |
---|
396 | 404 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); |
---|
397 | 405 | xfs_buf_t *bp = bip->bli_buf; |
---|
398 | | - struct xfs_ail *ailp = lip->li_ailp; |
---|
399 | 406 | int stale = bip->bli_flags & XFS_BLI_STALE; |
---|
400 | 407 | int freed; |
---|
401 | 408 | |
---|
.. | .. |
---|
404 | 411 | |
---|
405 | 412 | trace_xfs_buf_item_unpin(bip); |
---|
406 | 413 | |
---|
| 414 | + /* |
---|
| 415 | + * Drop the bli ref associated with the pin and grab the hold required |
---|
| 416 | + * for the I/O simulation failure in the abort case. We have to do this |
---|
| 417 | + * before the pin count drops because the AIL doesn't acquire a bli |
---|
| 418 | + * reference. Therefore if the refcount drops to zero, the bli could |
---|
| 419 | + * still be AIL resident and the buffer submitted for I/O (and freed on |
---|
| 420 | + * completion) at any point before we return. This can be removed once |
---|
| 421 | + * the AIL properly holds a reference on the bli. |
---|
| 422 | + */ |
---|
407 | 423 | freed = atomic_dec_and_test(&bip->bli_refcount); |
---|
408 | | - |
---|
| 424 | + if (freed && !stale && remove) |
---|
| 425 | + xfs_buf_hold(bp); |
---|
409 | 426 | if (atomic_dec_and_test(&bp->b_pin_count)) |
---|
410 | 427 | wake_up_all(&bp->b_waiters); |
---|
411 | 428 | |
---|
412 | | - if (freed && stale) { |
---|
| 429 | + /* nothing to do but drop the pin count if the bli is active */ |
---|
| 430 | + if (!freed) |
---|
| 431 | + return; |
---|
| 432 | + |
---|
| 433 | + if (stale) { |
---|
413 | 434 | ASSERT(bip->bli_flags & XFS_BLI_STALE); |
---|
414 | 435 | ASSERT(xfs_buf_islocked(bp)); |
---|
415 | 436 | ASSERT(bp->b_flags & XBF_STALE); |
---|
416 | 437 | ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); |
---|
| 438 | + ASSERT(list_empty(&lip->li_trans)); |
---|
| 439 | + ASSERT(!bp->b_transp); |
---|
417 | 440 | |
---|
418 | 441 | trace_xfs_buf_item_unpin_stale(bip); |
---|
419 | 442 | |
---|
420 | | - if (remove) { |
---|
421 | | - /* |
---|
422 | | - * If we are in a transaction context, we have to |
---|
423 | | - * remove the log item from the transaction as we are |
---|
424 | | - * about to release our reference to the buffer. If we |
---|
425 | | - * don't, the unlock that occurs later in |
---|
426 | | - * xfs_trans_uncommit() will try to reference the |
---|
427 | | - * buffer which we no longer have a hold on. |
---|
428 | | - */ |
---|
429 | | - if (!list_empty(&lip->li_trans)) |
---|
430 | | - xfs_trans_del_item(lip); |
---|
431 | | - |
---|
432 | | - /* |
---|
433 | | - * Since the transaction no longer refers to the buffer, |
---|
434 | | - * the buffer should no longer refer to the transaction. |
---|
435 | | - */ |
---|
436 | | - bp->b_transp = NULL; |
---|
437 | | - } |
---|
438 | | - |
---|
439 | 443 | /* |
---|
440 | | - * If we get called here because of an IO error, we may |
---|
441 | | - * or may not have the item on the AIL. xfs_trans_ail_delete() |
---|
442 | | - * will take care of that situation. |
---|
443 | | - * xfs_trans_ail_delete() drops the AIL lock. |
---|
| 444 | + * If we get called here because of an IO error, we may or may |
---|
| 445 | + * not have the item on the AIL. xfs_trans_ail_delete() will |
---|
| 446 | + * take care of that situation. xfs_trans_ail_delete() drops |
---|
| 447 | + * the AIL lock. |
---|
444 | 448 | */ |
---|
445 | 449 | if (bip->bli_flags & XFS_BLI_STALE_INODE) { |
---|
446 | | - xfs_buf_do_callbacks(bp); |
---|
447 | | - bp->b_log_item = NULL; |
---|
448 | | - list_del_init(&bp->b_li_list); |
---|
449 | | - bp->b_iodone = NULL; |
---|
| 450 | + xfs_buf_item_done(bp); |
---|
| 451 | + xfs_buf_inode_iodone(bp); |
---|
| 452 | + ASSERT(list_empty(&bp->b_li_list)); |
---|
450 | 453 | } else { |
---|
451 | | - spin_lock(&ailp->ail_lock); |
---|
452 | | - xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR); |
---|
| 454 | + xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); |
---|
453 | 455 | xfs_buf_item_relse(bp); |
---|
454 | 456 | ASSERT(bp->b_log_item == NULL); |
---|
455 | 457 | } |
---|
456 | 458 | xfs_buf_relse(bp); |
---|
457 | | - } else if (freed && remove) { |
---|
| 459 | + } else if (remove) { |
---|
458 | 460 | /* |
---|
459 | | - * There are currently two references to the buffer - the active |
---|
460 | | - * LRU reference and the buf log item. What we are about to do |
---|
461 | | - * here - simulate a failed IO completion - requires 3 |
---|
462 | | - * references. |
---|
463 | | - * |
---|
464 | | - * The LRU reference is removed by the xfs_buf_stale() call. The |
---|
465 | | - * buf item reference is removed by the xfs_buf_iodone() |
---|
466 | | - * callback that is run by xfs_buf_do_callbacks() during ioend |
---|
467 | | - * processing (via the bp->b_iodone callback), and then finally |
---|
468 | | - * the ioend processing will drop the IO reference if the buffer |
---|
469 | | - * is marked XBF_ASYNC. |
---|
470 | | - * |
---|
471 | | - * Hence we need to take an additional reference here so that IO |
---|
472 | | - * completion processing doesn't free the buffer prematurely. |
---|
| 461 | + * The buffer must be locked and held by the caller to simulate |
---|
| 462 | + * an async I/O failure. We acquired the hold for this case |
---|
| 463 | + * before the buffer was unpinned. |
---|
473 | 464 | */ |
---|
474 | 465 | xfs_buf_lock(bp); |
---|
475 | | - xfs_buf_hold(bp); |
---|
476 | 466 | bp->b_flags |= XBF_ASYNC; |
---|
477 | | - xfs_buf_ioerror(bp, -EIO); |
---|
478 | | - bp->b_flags &= ~XBF_DONE; |
---|
479 | | - xfs_buf_stale(bp); |
---|
480 | | - xfs_buf_ioend(bp); |
---|
| 467 | + xfs_buf_ioend_fail(bp); |
---|
481 | 468 | } |
---|
482 | 469 | } |
---|
483 | | - |
---|
484 | | -/* |
---|
485 | | - * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30 |
---|
486 | | - * seconds so as to not spam logs too much on repeated detection of the same |
---|
487 | | - * buffer being bad.. |
---|
488 | | - */ |
---|
489 | | - |
---|
490 | | -static DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); |
---|
491 | 470 | |
---|
492 | 471 | STATIC uint |
---|
493 | 472 | xfs_buf_item_push( |
---|
.. | .. |
---|
518 | 497 | trace_xfs_buf_item_push(bip); |
---|
519 | 498 | |
---|
520 | 499 | /* has a previous flush failed due to IO errors? */ |
---|
521 | | - if ((bp->b_flags & XBF_WRITE_FAIL) && |
---|
522 | | - ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) { |
---|
523 | | - xfs_warn(bp->b_target->bt_mount, |
---|
524 | | -"Failing async write on buffer block 0x%llx. Retrying async write.", |
---|
525 | | - (long long)bp->b_bn); |
---|
| 500 | + if (bp->b_flags & XBF_WRITE_FAIL) { |
---|
| 501 | + xfs_buf_alert_ratelimited(bp, "XFS: Failing async write", |
---|
| 502 | + "Failing async write on buffer block 0x%llx. Retrying async write.", |
---|
| 503 | + (long long)bp->b_bn); |
---|
526 | 504 | } |
---|
527 | 505 | |
---|
528 | 506 | if (!xfs_buf_delwri_queue(bp, buffer_list)) |
---|
.. | .. |
---|
569 | 547 | * state. |
---|
570 | 548 | */ |
---|
571 | 549 | if (aborted) |
---|
572 | | - xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); |
---|
| 550 | + xfs_trans_ail_delete(lip, 0); |
---|
573 | 551 | xfs_buf_item_relse(bip->bli_buf); |
---|
574 | 552 | return true; |
---|
575 | 553 | } |
---|
.. | .. |
---|
594 | 572 | * free the item. |
---|
595 | 573 | */ |
---|
596 | 574 | STATIC void |
---|
597 | | -xfs_buf_item_unlock( |
---|
| 575 | +xfs_buf_item_release( |
---|
598 | 576 | struct xfs_log_item *lip) |
---|
599 | 577 | { |
---|
600 | 578 | struct xfs_buf_log_item *bip = BUF_ITEM(lip); |
---|
.. | .. |
---|
605 | 583 | #if defined(DEBUG) || defined(XFS_WARN) |
---|
606 | 584 | bool ordered = bip->bli_flags & XFS_BLI_ORDERED; |
---|
607 | 585 | bool dirty = bip->bli_flags & XFS_BLI_DIRTY; |
---|
| 586 | + bool aborted = test_bit(XFS_LI_ABORTED, |
---|
| 587 | + &lip->li_flags); |
---|
608 | 588 | #endif |
---|
609 | 589 | |
---|
610 | | - trace_xfs_buf_item_unlock(bip); |
---|
| 590 | + trace_xfs_buf_item_release(bip); |
---|
611 | 591 | |
---|
612 | 592 | /* |
---|
613 | 593 | * The bli dirty state should match whether the blf has logged segments |
---|
.. | .. |
---|
633 | 613 | released = xfs_buf_item_put(bip); |
---|
634 | 614 | if (hold || (stale && !released)) |
---|
635 | 615 | return; |
---|
636 | | - ASSERT(!stale || test_bit(XFS_LI_ABORTED, &lip->li_flags)); |
---|
| 616 | + ASSERT(!stale || aborted); |
---|
637 | 617 | xfs_buf_relse(bp); |
---|
| 618 | +} |
---|
| 619 | + |
---|
| 620 | +STATIC void |
---|
| 621 | +xfs_buf_item_committing( |
---|
| 622 | + struct xfs_log_item *lip, |
---|
| 623 | + xfs_csn_t seq) |
---|
| 624 | +{ |
---|
| 625 | + return xfs_buf_item_release(lip); |
---|
638 | 626 | } |
---|
639 | 627 | |
---|
640 | 628 | /* |
---|
.. | .. |
---|
669 | 657 | return lsn; |
---|
670 | 658 | } |
---|
671 | 659 | |
---|
672 | | -STATIC void |
---|
673 | | -xfs_buf_item_committing( |
---|
674 | | - struct xfs_log_item *lip, |
---|
675 | | - xfs_lsn_t commit_lsn) |
---|
676 | | -{ |
---|
677 | | -} |
---|
678 | | - |
---|
679 | | -/* |
---|
680 | | - * This is the ops vector shared by all buf log items. |
---|
681 | | - */ |
---|
682 | 660 | static const struct xfs_item_ops xfs_buf_item_ops = { |
---|
683 | 661 | .iop_size = xfs_buf_item_size, |
---|
684 | 662 | .iop_format = xfs_buf_item_format, |
---|
685 | 663 | .iop_pin = xfs_buf_item_pin, |
---|
686 | 664 | .iop_unpin = xfs_buf_item_unpin, |
---|
687 | | - .iop_unlock = xfs_buf_item_unlock, |
---|
| 665 | + .iop_release = xfs_buf_item_release, |
---|
| 666 | + .iop_committing = xfs_buf_item_committing, |
---|
688 | 667 | .iop_committed = xfs_buf_item_committed, |
---|
689 | 668 | .iop_push = xfs_buf_item_push, |
---|
690 | | - .iop_committing = xfs_buf_item_committing |
---|
691 | 669 | }; |
---|
692 | 670 | |
---|
693 | | -STATIC int |
---|
| 671 | +STATIC void |
---|
694 | 672 | xfs_buf_item_get_format( |
---|
695 | 673 | struct xfs_buf_log_item *bip, |
---|
696 | 674 | int count) |
---|
.. | .. |
---|
700 | 678 | |
---|
701 | 679 | if (count == 1) { |
---|
702 | 680 | bip->bli_formats = &bip->__bli_format; |
---|
703 | | - return 0; |
---|
| 681 | + return; |
---|
704 | 682 | } |
---|
705 | 683 | |
---|
706 | 684 | bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format), |
---|
707 | | - KM_SLEEP); |
---|
708 | | - if (!bip->bli_formats) |
---|
709 | | - return -ENOMEM; |
---|
710 | | - return 0; |
---|
| 685 | + 0); |
---|
711 | 686 | } |
---|
712 | 687 | |
---|
713 | 688 | STATIC void |
---|
.. | .. |
---|
733 | 708 | struct xfs_buf_log_item *bip = bp->b_log_item; |
---|
734 | 709 | int chunks; |
---|
735 | 710 | int map_size; |
---|
736 | | - int error; |
---|
737 | 711 | int i; |
---|
738 | 712 | |
---|
739 | 713 | /* |
---|
.. | .. |
---|
741 | 715 | * this buffer. If we do already have one, there is |
---|
742 | 716 | * nothing to do here so return. |
---|
743 | 717 | */ |
---|
744 | | - ASSERT(bp->b_target->bt_mount == mp); |
---|
| 718 | + ASSERT(bp->b_mount == mp); |
---|
745 | 719 | if (bip) { |
---|
746 | 720 | ASSERT(bip->bli_item.li_type == XFS_LI_BUF); |
---|
747 | 721 | ASSERT(!bp->b_transp); |
---|
.. | .. |
---|
749 | 723 | return 0; |
---|
750 | 724 | } |
---|
751 | 725 | |
---|
752 | | - bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP); |
---|
| 726 | + bip = kmem_cache_zalloc(xfs_buf_item_zone, GFP_KERNEL | __GFP_NOFAIL); |
---|
753 | 727 | xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); |
---|
754 | 728 | bip->bli_buf = bp; |
---|
755 | 729 | |
---|
.. | .. |
---|
762 | 736 | * Discontiguous buffer support follows the layout of the underlying |
---|
763 | 737 | * buffer. This makes the implementation as simple as possible. |
---|
764 | 738 | */ |
---|
765 | | - error = xfs_buf_item_get_format(bip, bp->b_map_count); |
---|
766 | | - ASSERT(error == 0); |
---|
767 | | - if (error) { /* to stop gcc throwing set-but-unused warnings */ |
---|
768 | | - kmem_zone_free(xfs_buf_item_zone, bip); |
---|
769 | | - return error; |
---|
770 | | - } |
---|
771 | | - |
---|
| 739 | + xfs_buf_item_get_format(bip, bp->b_map_count); |
---|
772 | 740 | |
---|
773 | 741 | for (i = 0; i < bip->bli_format_count; i++) { |
---|
774 | 742 | chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len), |
---|
775 | 743 | XFS_BLF_CHUNK); |
---|
776 | 744 | map_size = DIV_ROUND_UP(chunks, NBWORD); |
---|
| 745 | + |
---|
| 746 | + if (map_size > XFS_BLF_DATAMAP_SIZE) { |
---|
| 747 | + kmem_cache_free(xfs_buf_item_zone, bip); |
---|
| 748 | + xfs_err(mp, |
---|
| 749 | + "buffer item dirty bitmap (%u uints) too small to reflect %u bytes!", |
---|
| 750 | + map_size, |
---|
| 751 | + BBTOB(bp->b_maps[i].bm_len)); |
---|
| 752 | + return -EFSCORRUPTED; |
---|
| 753 | + } |
---|
777 | 754 | |
---|
778 | 755 | bip->bli_formats[i].blf_type = XFS_LI_BUF; |
---|
779 | 756 | bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn; |
---|
.. | .. |
---|
806 | 783 | uint bit; |
---|
807 | 784 | uint end_bit; |
---|
808 | 785 | uint mask; |
---|
| 786 | + |
---|
| 787 | + ASSERT(first < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD); |
---|
| 788 | + ASSERT(last < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD); |
---|
809 | 789 | |
---|
810 | 790 | /* |
---|
811 | 791 | * Convert byte offsets to bit numbers. |
---|
.. | .. |
---|
853 | 833 | * first_bit and last_bit. |
---|
854 | 834 | */ |
---|
855 | 835 | while ((bits_to_set - bits_set) >= NBWORD) { |
---|
856 | | - *wordp |= 0xffffffff; |
---|
| 836 | + *wordp = 0xffffffff; |
---|
857 | 837 | bits_set += NBWORD; |
---|
858 | 838 | wordp++; |
---|
859 | 839 | } |
---|
.. | .. |
---|
941 | 921 | { |
---|
942 | 922 | xfs_buf_item_free_format(bip); |
---|
943 | 923 | kmem_free(bip->bli_item.li_lv_shadow); |
---|
944 | | - kmem_zone_free(xfs_buf_item_zone, bip); |
---|
| 924 | + kmem_cache_free(xfs_buf_item_zone, bip); |
---|
945 | 925 | } |
---|
946 | 926 | |
---|
947 | 927 | /* |
---|
948 | | - * This is called when the buf log item is no longer needed. It should |
---|
949 | | - * free the buf log item associated with the given buffer and clear |
---|
950 | | - * the buffer's pointer to the buf log item. If there are no more |
---|
951 | | - * items in the list, clear the b_iodone field of the buffer (see |
---|
952 | | - * xfs_buf_attach_iodone() below). |
---|
| 928 | + * xfs_buf_item_relse() is called when the buf log item is no longer needed. |
---|
953 | 929 | */ |
---|
954 | 930 | void |
---|
955 | 931 | xfs_buf_item_relse( |
---|
.. | .. |
---|
958 | 934 | struct xfs_buf_log_item *bip = bp->b_log_item; |
---|
959 | 935 | |
---|
960 | 936 | trace_xfs_buf_item_relse(bp, _RET_IP_); |
---|
961 | | - ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); |
---|
| 937 | + ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); |
---|
962 | 938 | |
---|
963 | 939 | bp->b_log_item = NULL; |
---|
964 | | - if (list_empty(&bp->b_li_list)) |
---|
965 | | - bp->b_iodone = NULL; |
---|
966 | | - |
---|
967 | 940 | xfs_buf_rele(bp); |
---|
968 | 941 | xfs_buf_item_free(bip); |
---|
969 | 942 | } |
---|
970 | 943 | |
---|
971 | | - |
---|
972 | | -/* |
---|
973 | | - * Add the given log item with its callback to the list of callbacks |
---|
974 | | - * to be called when the buffer's I/O completes. If it is not set |
---|
975 | | - * already, set the buffer's b_iodone() routine to be |
---|
976 | | - * xfs_buf_iodone_callbacks() and link the log item into the list of |
---|
977 | | - * items rooted at b_li_list. |
---|
978 | | - */ |
---|
979 | 944 | void |
---|
980 | | -xfs_buf_attach_iodone( |
---|
981 | | - xfs_buf_t *bp, |
---|
982 | | - void (*cb)(xfs_buf_t *, xfs_log_item_t *), |
---|
983 | | - xfs_log_item_t *lip) |
---|
984 | | -{ |
---|
985 | | - ASSERT(xfs_buf_islocked(bp)); |
---|
986 | | - |
---|
987 | | - lip->li_cb = cb; |
---|
988 | | - list_add_tail(&lip->li_bio_list, &bp->b_li_list); |
---|
989 | | - |
---|
990 | | - ASSERT(bp->b_iodone == NULL || |
---|
991 | | - bp->b_iodone == xfs_buf_iodone_callbacks); |
---|
992 | | - bp->b_iodone = xfs_buf_iodone_callbacks; |
---|
993 | | -} |
---|
994 | | - |
---|
995 | | -/* |
---|
996 | | - * We can have many callbacks on a buffer. Running the callbacks individually |
---|
997 | | - * can cause a lot of contention on the AIL lock, so we allow for a single |
---|
998 | | - * callback to be able to scan the remaining items in bp->b_li_list for other |
---|
999 | | - * items of the same type and callback to be processed in the first call. |
---|
1000 | | - * |
---|
1001 | | - * As a result, the loop walking the callback list below will also modify the |
---|
1002 | | - * list. it removes the first item from the list and then runs the callback. |
---|
1003 | | - * The loop then restarts from the new first item int the list. This allows the |
---|
1004 | | - * callback to scan and modify the list attached to the buffer and we don't |
---|
1005 | | - * have to care about maintaining a next item pointer. |
---|
1006 | | - */ |
---|
1007 | | -STATIC void |
---|
1008 | | -xfs_buf_do_callbacks( |
---|
1009 | | - struct xfs_buf *bp) |
---|
1010 | | -{ |
---|
1011 | | - struct xfs_buf_log_item *blip = bp->b_log_item; |
---|
1012 | | - struct xfs_log_item *lip; |
---|
1013 | | - |
---|
1014 | | - /* If there is a buf_log_item attached, run its callback */ |
---|
1015 | | - if (blip) { |
---|
1016 | | - lip = &blip->bli_item; |
---|
1017 | | - lip->li_cb(bp, lip); |
---|
1018 | | - } |
---|
1019 | | - |
---|
1020 | | - while (!list_empty(&bp->b_li_list)) { |
---|
1021 | | - lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, |
---|
1022 | | - li_bio_list); |
---|
1023 | | - |
---|
1024 | | - /* |
---|
1025 | | - * Remove the item from the list, so we don't have any |
---|
1026 | | - * confusion if the item is added to another buf. |
---|
1027 | | - * Don't touch the log item after calling its |
---|
1028 | | - * callback, because it could have freed itself. |
---|
1029 | | - */ |
---|
1030 | | - list_del_init(&lip->li_bio_list); |
---|
1031 | | - lip->li_cb(bp, lip); |
---|
1032 | | - } |
---|
1033 | | -} |
---|
1034 | | - |
---|
1035 | | -/* |
---|
1036 | | - * Invoke the error state callback for each log item affected by the failed I/O. |
---|
1037 | | - * |
---|
1038 | | - * If a metadata buffer write fails with a non-permanent error, the buffer is |
---|
1039 | | - * eventually resubmitted and so the completion callbacks are not run. The error |
---|
1040 | | - * state may need to be propagated to the log items attached to the buffer, |
---|
1041 | | - * however, so the next AIL push of the item knows hot to handle it correctly. |
---|
1042 | | - */ |
---|
1043 | | -STATIC void |
---|
1044 | | -xfs_buf_do_callbacks_fail( |
---|
1045 | | - struct xfs_buf *bp) |
---|
1046 | | -{ |
---|
1047 | | - struct xfs_log_item *lip; |
---|
1048 | | - struct xfs_ail *ailp; |
---|
1049 | | - |
---|
1050 | | - /* |
---|
1051 | | - * Buffer log item errors are handled directly by xfs_buf_item_push() |
---|
1052 | | - * and xfs_buf_iodone_callback_error, and they have no IO error |
---|
1053 | | - * callbacks. Check only for items in b_li_list. |
---|
1054 | | - */ |
---|
1055 | | - if (list_empty(&bp->b_li_list)) |
---|
1056 | | - return; |
---|
1057 | | - |
---|
1058 | | - lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, |
---|
1059 | | - li_bio_list); |
---|
1060 | | - ailp = lip->li_ailp; |
---|
1061 | | - spin_lock(&ailp->ail_lock); |
---|
1062 | | - list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { |
---|
1063 | | - if (lip->li_ops->iop_error) |
---|
1064 | | - lip->li_ops->iop_error(lip, bp); |
---|
1065 | | - } |
---|
1066 | | - spin_unlock(&ailp->ail_lock); |
---|
1067 | | -} |
---|
1068 | | - |
---|
1069 | | -static bool |
---|
1070 | | -xfs_buf_iodone_callback_error( |
---|
1071 | | - struct xfs_buf *bp) |
---|
1072 | | -{ |
---|
1073 | | - struct xfs_buf_log_item *bip = bp->b_log_item; |
---|
1074 | | - struct xfs_log_item *lip; |
---|
1075 | | - struct xfs_mount *mp; |
---|
1076 | | - static ulong lasttime; |
---|
1077 | | - static xfs_buftarg_t *lasttarg; |
---|
1078 | | - struct xfs_error_cfg *cfg; |
---|
1079 | | - |
---|
1080 | | - /* |
---|
1081 | | - * The failed buffer might not have a buf_log_item attached or the |
---|
1082 | | - * log_item list might be empty. Get the mp from the available |
---|
1083 | | - * xfs_log_item |
---|
1084 | | - */ |
---|
1085 | | - lip = list_first_entry_or_null(&bp->b_li_list, struct xfs_log_item, |
---|
1086 | | - li_bio_list); |
---|
1087 | | - mp = lip ? lip->li_mountp : bip->bli_item.li_mountp; |
---|
1088 | | - |
---|
1089 | | - /* |
---|
1090 | | - * If we've already decided to shutdown the filesystem because of |
---|
1091 | | - * I/O errors, there's no point in giving this a retry. |
---|
1092 | | - */ |
---|
1093 | | - if (XFS_FORCED_SHUTDOWN(mp)) |
---|
1094 | | - goto out_stale; |
---|
1095 | | - |
---|
1096 | | - if (bp->b_target != lasttarg || |
---|
1097 | | - time_after(jiffies, (lasttime + 5*HZ))) { |
---|
1098 | | - lasttime = jiffies; |
---|
1099 | | - xfs_buf_ioerror_alert(bp, __func__); |
---|
1100 | | - } |
---|
1101 | | - lasttarg = bp->b_target; |
---|
1102 | | - |
---|
1103 | | - /* synchronous writes will have callers process the error */ |
---|
1104 | | - if (!(bp->b_flags & XBF_ASYNC)) |
---|
1105 | | - goto out_stale; |
---|
1106 | | - |
---|
1107 | | - trace_xfs_buf_item_iodone_async(bp, _RET_IP_); |
---|
1108 | | - ASSERT(bp->b_iodone != NULL); |
---|
1109 | | - |
---|
1110 | | - cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error); |
---|
1111 | | - |
---|
1112 | | - /* |
---|
1113 | | - * If the write was asynchronous then no one will be looking for the |
---|
1114 | | - * error. If this is the first failure of this type, clear the error |
---|
1115 | | - * state and write the buffer out again. This means we always retry an |
---|
1116 | | - * async write failure at least once, but we also need to set the buffer |
---|
1117 | | - * up to behave correctly now for repeated failures. |
---|
1118 | | - */ |
---|
1119 | | - if (!(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) || |
---|
1120 | | - bp->b_last_error != bp->b_error) { |
---|
1121 | | - bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL); |
---|
1122 | | - bp->b_last_error = bp->b_error; |
---|
1123 | | - if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && |
---|
1124 | | - !bp->b_first_retry_time) |
---|
1125 | | - bp->b_first_retry_time = jiffies; |
---|
1126 | | - |
---|
1127 | | - xfs_buf_ioerror(bp, 0); |
---|
1128 | | - xfs_buf_submit(bp); |
---|
1129 | | - return true; |
---|
1130 | | - } |
---|
1131 | | - |
---|
1132 | | - /* |
---|
1133 | | - * Repeated failure on an async write. Take action according to the |
---|
1134 | | - * error configuration we have been set up to use. |
---|
1135 | | - */ |
---|
1136 | | - |
---|
1137 | | - if (cfg->max_retries != XFS_ERR_RETRY_FOREVER && |
---|
1138 | | - ++bp->b_retries > cfg->max_retries) |
---|
1139 | | - goto permanent_error; |
---|
1140 | | - if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && |
---|
1141 | | - time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time)) |
---|
1142 | | - goto permanent_error; |
---|
1143 | | - |
---|
1144 | | - /* At unmount we may treat errors differently */ |
---|
1145 | | - if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount) |
---|
1146 | | - goto permanent_error; |
---|
1147 | | - |
---|
1148 | | - /* |
---|
1149 | | - * Still a transient error, run IO completion failure callbacks and let |
---|
1150 | | - * the higher layers retry the buffer. |
---|
1151 | | - */ |
---|
1152 | | - xfs_buf_do_callbacks_fail(bp); |
---|
1153 | | - xfs_buf_ioerror(bp, 0); |
---|
1154 | | - xfs_buf_relse(bp); |
---|
1155 | | - return true; |
---|
1156 | | - |
---|
1157 | | - /* |
---|
1158 | | - * Permanent error - we need to trigger a shutdown if we haven't already |
---|
1159 | | - * to indicate that inconsistency will result from this action. |
---|
1160 | | - */ |
---|
1161 | | -permanent_error: |
---|
1162 | | - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); |
---|
1163 | | -out_stale: |
---|
1164 | | - xfs_buf_stale(bp); |
---|
1165 | | - bp->b_flags |= XBF_DONE; |
---|
1166 | | - trace_xfs_buf_error_relse(bp, _RET_IP_); |
---|
1167 | | - return false; |
---|
1168 | | -} |
---|
1169 | | - |
---|
1170 | | -/* |
---|
1171 | | - * This is the iodone() function for buffers which have had callbacks attached |
---|
1172 | | - * to them by xfs_buf_attach_iodone(). We need to iterate the items on the |
---|
1173 | | - * callback list, mark the buffer as having no more callbacks and then push the |
---|
1174 | | - * buffer through IO completion processing. |
---|
1175 | | - */ |
---|
1176 | | -void |
---|
1177 | | -xfs_buf_iodone_callbacks( |
---|
| 945 | +xfs_buf_item_done( |
---|
1178 | 946 | struct xfs_buf *bp) |
---|
1179 | 947 | { |
---|
1180 | 948 | /* |
---|
1181 | | - * If there is an error, process it. Some errors require us |
---|
1182 | | - * to run callbacks after failure processing is done so we |
---|
1183 | | - * detect that and take appropriate action. |
---|
1184 | | - */ |
---|
1185 | | - if (bp->b_error && xfs_buf_iodone_callback_error(bp)) |
---|
1186 | | - return; |
---|
1187 | | - |
---|
1188 | | - /* |
---|
1189 | | - * Successful IO or permanent error. Either way, we can clear the |
---|
1190 | | - * retry state here in preparation for the next error that may occur. |
---|
1191 | | - */ |
---|
1192 | | - bp->b_last_error = 0; |
---|
1193 | | - bp->b_retries = 0; |
---|
1194 | | - bp->b_first_retry_time = 0; |
---|
1195 | | - |
---|
1196 | | - xfs_buf_do_callbacks(bp); |
---|
1197 | | - bp->b_log_item = NULL; |
---|
1198 | | - list_del_init(&bp->b_li_list); |
---|
1199 | | - bp->b_iodone = NULL; |
---|
1200 | | - xfs_buf_ioend(bp); |
---|
1201 | | -} |
---|
1202 | | - |
---|
1203 | | -/* |
---|
1204 | | - * This is the iodone() function for buffers which have been |
---|
1205 | | - * logged. It is called when they are eventually flushed out. |
---|
1206 | | - * It should remove the buf item from the AIL, and free the buf item. |
---|
1207 | | - * It is called by xfs_buf_iodone_callbacks() above which will take |
---|
1208 | | - * care of cleaning up the buffer itself. |
---|
1209 | | - */ |
---|
1210 | | -void |
---|
1211 | | -xfs_buf_iodone( |
---|
1212 | | - struct xfs_buf *bp, |
---|
1213 | | - struct xfs_log_item *lip) |
---|
1214 | | -{ |
---|
1215 | | - struct xfs_ail *ailp = lip->li_ailp; |
---|
1216 | | - |
---|
1217 | | - ASSERT(BUF_ITEM(lip)->bli_buf == bp); |
---|
1218 | | - |
---|
1219 | | - xfs_buf_rele(bp); |
---|
1220 | | - |
---|
1221 | | - /* |
---|
1222 | | - * If we are forcibly shutting down, this may well be |
---|
1223 | | - * off the AIL already. That's because we simulate the |
---|
1224 | | - * log-committed callbacks to unpin these buffers. Or we may never |
---|
1225 | | - * have put this item on AIL because of the transaction was |
---|
1226 | | - * aborted forcibly. xfs_trans_ail_delete() takes care of these. |
---|
| 949 | + * If we are forcibly shutting down, this may well be off the AIL |
---|
| 950 | + * already. That's because we simulate the log-committed callbacks to |
---|
| 951 | + * unpin these buffers. Or we may never have put this item on AIL |
---|
| 952 | + * because of the transaction was aborted forcibly. |
---|
| 953 | + * xfs_trans_ail_delete() takes care of these. |
---|
1227 | 954 | * |
---|
1228 | 955 | * Either way, AIL is useless if we're forcing a shutdown. |
---|
| 956 | + * |
---|
| 957 | + * Note that log recovery writes might have buffer items that are not on |
---|
| 958 | + * the AIL even when the file system is not shut down. |
---|
1229 | 959 | */ |
---|
1230 | | - spin_lock(&ailp->ail_lock); |
---|
1231 | | - xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); |
---|
1232 | | - xfs_buf_item_free(BUF_ITEM(lip)); |
---|
1233 | | -} |
---|
1234 | | - |
---|
1235 | | -/* |
---|
1236 | | - * Requeue a failed buffer for writeback. |
---|
1237 | | - * |
---|
1238 | | - * We clear the log item failed state here as well, but we have to be careful |
---|
1239 | | - * about reference counts because the only active reference counts on the buffer |
---|
1240 | | - * may be the failed log items. Hence if we clear the log item failed state |
---|
1241 | | - * before queuing the buffer for IO we can release all active references to |
---|
1242 | | - * the buffer and free it, leading to use after free problems in |
---|
1243 | | - * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which |
---|
1244 | | - * order we process them in - the buffer is locked, and we own the buffer list |
---|
1245 | | - * so nothing on them is going to change while we are performing this action. |
---|
1246 | | - * |
---|
1247 | | - * Hence we can safely queue the buffer for IO before we clear the failed log |
---|
1248 | | - * item state, therefore always having an active reference to the buffer and |
---|
1249 | | - * avoiding the transient zero-reference state that leads to use-after-free. |
---|
1250 | | - * |
---|
1251 | | - * Return true if the buffer was added to the buffer list, false if it was |
---|
1252 | | - * already on the buffer list. |
---|
1253 | | - */ |
---|
1254 | | -bool |
---|
1255 | | -xfs_buf_resubmit_failed_buffers( |
---|
1256 | | - struct xfs_buf *bp, |
---|
1257 | | - struct list_head *buffer_list) |
---|
1258 | | -{ |
---|
1259 | | - struct xfs_log_item *lip; |
---|
1260 | | - bool ret; |
---|
1261 | | - |
---|
1262 | | - ret = xfs_buf_delwri_queue(bp, buffer_list); |
---|
1263 | | - |
---|
1264 | | - /* |
---|
1265 | | - * XFS_LI_FAILED set/clear is protected by ail_lock, caller of this |
---|
1266 | | - * function already have it acquired |
---|
1267 | | - */ |
---|
1268 | | - list_for_each_entry(lip, &bp->b_li_list, li_bio_list) |
---|
1269 | | - xfs_clear_li_failed(lip); |
---|
1270 | | - |
---|
1271 | | - return ret; |
---|
| 960 | + xfs_trans_ail_delete(&bp->b_log_item->bli_item, |
---|
| 961 | + (bp->b_flags & _XBF_LOGRECOVERY) ? 0 : |
---|
| 962 | + SHUTDOWN_CORRUPT_INCORE); |
---|
| 963 | + xfs_buf_item_relse(bp); |
---|
1272 | 964 | } |
---|