.. | .. |
---|
16 | 16 | #include "xfs_trans_priv.h" |
---|
17 | 17 | #include "xfs_log.h" |
---|
18 | 18 | #include "xfs_log_priv.h" |
---|
19 | | -#include "xfs_log_recover.h" |
---|
20 | | -#include "xfs_inode.h" |
---|
21 | 19 | #include "xfs_trace.h" |
---|
22 | | -#include "xfs_fsops.h" |
---|
23 | | -#include "xfs_cksum.h" |
---|
24 | 20 | #include "xfs_sysfs.h" |
---|
25 | 21 | #include "xfs_sb.h" |
---|
| 22 | +#include "xfs_health.h" |
---|
26 | 23 | |
---|
27 | 24 | kmem_zone_t *xfs_log_ticket_zone; |
---|
28 | 25 | |
---|
29 | 26 | /* Local miscellaneous function prototypes */ |
---|
30 | | -STATIC int |
---|
31 | | -xlog_commit_record( |
---|
32 | | - struct xlog *log, |
---|
33 | | - struct xlog_ticket *ticket, |
---|
34 | | - struct xlog_in_core **iclog, |
---|
35 | | - xfs_lsn_t *commitlsnp); |
---|
36 | | - |
---|
37 | 27 | STATIC struct xlog * |
---|
38 | 28 | xlog_alloc_log( |
---|
39 | 29 | struct xfs_mount *mp, |
---|
.. | .. |
---|
44 | 34 | xlog_space_left( |
---|
45 | 35 | struct xlog *log, |
---|
46 | 36 | atomic64_t *head); |
---|
47 | | -STATIC int |
---|
48 | | -xlog_sync( |
---|
49 | | - struct xlog *log, |
---|
50 | | - struct xlog_in_core *iclog); |
---|
51 | 37 | STATIC void |
---|
52 | 38 | xlog_dealloc_log( |
---|
53 | 39 | struct xlog *log); |
---|
54 | 40 | |
---|
55 | 41 | /* local state machine functions */ |
---|
56 | | -STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); |
---|
57 | | -STATIC void |
---|
58 | | -xlog_state_do_callback( |
---|
59 | | - struct xlog *log, |
---|
60 | | - int aborted, |
---|
| 42 | +STATIC void xlog_state_done_syncing( |
---|
61 | 43 | struct xlog_in_core *iclog); |
---|
62 | 44 | STATIC int |
---|
63 | 45 | xlog_state_get_iclog_space( |
---|
.. | .. |
---|
67 | 49 | struct xlog_ticket *ticket, |
---|
68 | 50 | int *continued_write, |
---|
69 | 51 | int *logoffsetp); |
---|
70 | | -STATIC int |
---|
71 | | -xlog_state_release_iclog( |
---|
72 | | - struct xlog *log, |
---|
73 | | - struct xlog_in_core *iclog); |
---|
74 | 52 | STATIC void |
---|
75 | 53 | xlog_state_switch_iclogs( |
---|
76 | 54 | struct xlog *log, |
---|
77 | 55 | struct xlog_in_core *iclog, |
---|
78 | 56 | int eventual_size); |
---|
79 | 57 | STATIC void |
---|
80 | | -xlog_state_want_sync( |
---|
81 | | - struct xlog *log, |
---|
82 | | - struct xlog_in_core *iclog); |
---|
83 | | - |
---|
84 | | -STATIC void |
---|
85 | 58 | xlog_grant_push_ail( |
---|
86 | 59 | struct xlog *log, |
---|
87 | 60 | int need_bytes); |
---|
88 | 61 | STATIC void |
---|
89 | | -xlog_regrant_reserve_log_space( |
---|
| 62 | +xlog_sync( |
---|
90 | 63 | struct xlog *log, |
---|
91 | | - struct xlog_ticket *ticket); |
---|
92 | | -STATIC void |
---|
93 | | -xlog_ungrant_log_space( |
---|
94 | | - struct xlog *log, |
---|
95 | | - struct xlog_ticket *ticket); |
---|
96 | | - |
---|
| 64 | + struct xlog_in_core *iclog); |
---|
97 | 65 | #if defined(DEBUG) |
---|
98 | 66 | STATIC void |
---|
99 | 67 | xlog_verify_dest_ptr( |
---|
.. | .. |
---|
106 | 74 | xlog_verify_iclog( |
---|
107 | 75 | struct xlog *log, |
---|
108 | 76 | struct xlog_in_core *iclog, |
---|
109 | | - int count, |
---|
110 | | - bool syncing); |
---|
| 77 | + int count); |
---|
111 | 78 | STATIC void |
---|
112 | 79 | xlog_verify_tail_lsn( |
---|
113 | 80 | struct xlog *log, |
---|
.. | .. |
---|
116 | 83 | #else |
---|
117 | 84 | #define xlog_verify_dest_ptr(a,b) |
---|
118 | 85 | #define xlog_verify_grant_tail(a) |
---|
119 | | -#define xlog_verify_iclog(a,b,c,d) |
---|
| 86 | +#define xlog_verify_iclog(a,b,c) |
---|
120 | 87 | #define xlog_verify_tail_lsn(a,b,c) |
---|
121 | 88 | #endif |
---|
122 | 89 | |
---|
.. | .. |
---|
225 | 192 | { |
---|
226 | 193 | struct xlog_ticket *tic; |
---|
227 | 194 | int need_bytes; |
---|
| 195 | + bool woken_task = false; |
---|
228 | 196 | |
---|
229 | 197 | list_for_each_entry(tic, &head->waiters, t_queue) { |
---|
| 198 | + |
---|
| 199 | + /* |
---|
| 200 | + * There is a chance that the size of the CIL checkpoints in |
---|
| 201 | + * progress at the last AIL push target calculation resulted in |
---|
| 202 | + * limiting the target to the log head (l_last_sync_lsn) at the |
---|
| 203 | + * time. This may not reflect where the log head is now as the |
---|
| 204 | + * CIL checkpoints may have completed. |
---|
| 205 | + * |
---|
| 206 | + * Hence when we are woken here, it may be that the head of the |
---|
| 207 | + * log that has moved rather than the tail. As the tail didn't |
---|
| 208 | + * move, there still won't be space available for the |
---|
| 209 | + * reservation we require. However, if the AIL has already |
---|
| 210 | + * pushed to the target defined by the old log head location, we |
---|
| 211 | + * will hang here waiting for something else to update the AIL |
---|
| 212 | + * push target. |
---|
| 213 | + * |
---|
| 214 | + * Therefore, if there isn't space to wake the first waiter on |
---|
| 215 | + * the grant head, we need to push the AIL again to ensure the |
---|
| 216 | + * target reflects both the current log tail and log head |
---|
| 217 | + * position before we wait for the tail to move again. |
---|
| 218 | + */ |
---|
| 219 | + |
---|
230 | 220 | need_bytes = xlog_ticket_reservation(log, head, tic); |
---|
231 | | - if (*free_bytes < need_bytes) |
---|
| 221 | + if (*free_bytes < need_bytes) { |
---|
| 222 | + if (!woken_task) |
---|
| 223 | + xlog_grant_push_ail(log, need_bytes); |
---|
232 | 224 | return false; |
---|
| 225 | + } |
---|
233 | 226 | |
---|
234 | 227 | *free_bytes -= need_bytes; |
---|
235 | 228 | trace_xfs_log_grant_wake_up(log, tic); |
---|
236 | 229 | wake_up_process(tic->t_task); |
---|
| 230 | + woken_task = true; |
---|
237 | 231 | } |
---|
238 | 232 | |
---|
239 | 233 | return true; |
---|
.. | .. |
---|
353 | 347 | tic->t_res_num++; |
---|
354 | 348 | } |
---|
355 | 349 | |
---|
| 350 | +bool |
---|
| 351 | +xfs_log_writable( |
---|
| 352 | + struct xfs_mount *mp) |
---|
| 353 | +{ |
---|
| 354 | + /* |
---|
| 355 | + * Never write to the log on norecovery mounts, if the block device is |
---|
| 356 | + * read-only, or if the filesystem is shutdown. Read-only mounts still |
---|
| 357 | + * allow internal writes for log recovery and unmount purposes, so don't |
---|
| 358 | + * restrict that case here. |
---|
| 359 | + */ |
---|
| 360 | + if (mp->m_flags & XFS_MOUNT_NORECOVERY) |
---|
| 361 | + return false; |
---|
| 362 | + if (xfs_readonly_buftarg(mp->m_log->l_targ)) |
---|
| 363 | + return false; |
---|
| 364 | + if (XFS_FORCED_SHUTDOWN(mp)) |
---|
| 365 | + return false; |
---|
| 366 | + return true; |
---|
| 367 | +} |
---|
| 368 | + |
---|
356 | 369 | /* |
---|
357 | 370 | * Replenish the byte reservation required by moving the grant write head. |
---|
358 | 371 | */ |
---|
.. | .. |
---|
439 | 452 | XFS_STATS_INC(mp, xs_try_logspace); |
---|
440 | 453 | |
---|
441 | 454 | ASSERT(*ticp == NULL); |
---|
442 | | - tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, |
---|
443 | | - KM_SLEEP | KM_MAYFAIL); |
---|
444 | | - if (!tic) |
---|
445 | | - return -ENOMEM; |
---|
446 | | - |
---|
| 455 | + tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent); |
---|
447 | 456 | *ticp = tic; |
---|
448 | 457 | |
---|
449 | 458 | xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt |
---|
.. | .. |
---|
473 | 482 | return error; |
---|
474 | 483 | } |
---|
475 | 484 | |
---|
476 | | - |
---|
477 | | -/* |
---|
478 | | - * NOTES: |
---|
479 | | - * |
---|
480 | | - * 1. currblock field gets updated at startup and after in-core logs |
---|
481 | | - * marked as with WANT_SYNC. |
---|
482 | | - */ |
---|
483 | | - |
---|
484 | | -/* |
---|
485 | | - * This routine is called when a user of a log manager ticket is done with |
---|
486 | | - * the reservation. If the ticket was ever used, then a commit record for |
---|
487 | | - * the associated transaction is written out as a log operation header with |
---|
488 | | - * no data. The flag XLOG_TIC_INITED is set when the first write occurs with |
---|
489 | | - * a given ticket. If the ticket was one with a permanent reservation, then |
---|
490 | | - * a few operations are done differently. Permanent reservation tickets by |
---|
491 | | - * default don't release the reservation. They just commit the current |
---|
492 | | - * transaction with the belief that the reservation is still needed. A flag |
---|
493 | | - * must be passed in before permanent reservations are actually released. |
---|
494 | | - * When these type of tickets are not released, they need to be set into |
---|
495 | | - * the inited state again. By doing this, a start record will be written |
---|
496 | | - * out when the next write occurs. |
---|
497 | | - */ |
---|
498 | | -xfs_lsn_t |
---|
499 | | -xfs_log_done( |
---|
500 | | - struct xfs_mount *mp, |
---|
501 | | - struct xlog_ticket *ticket, |
---|
502 | | - struct xlog_in_core **iclog, |
---|
503 | | - bool regrant) |
---|
504 | | -{ |
---|
505 | | - struct xlog *log = mp->m_log; |
---|
506 | | - xfs_lsn_t lsn = 0; |
---|
507 | | - |
---|
508 | | - if (XLOG_FORCED_SHUTDOWN(log) || |
---|
509 | | - /* |
---|
510 | | - * If nothing was ever written, don't write out commit record. |
---|
511 | | - * If we get an error, just continue and give back the log ticket. |
---|
512 | | - */ |
---|
513 | | - (((ticket->t_flags & XLOG_TIC_INITED) == 0) && |
---|
514 | | - (xlog_commit_record(log, ticket, iclog, &lsn)))) { |
---|
515 | | - lsn = (xfs_lsn_t) -1; |
---|
516 | | - regrant = false; |
---|
517 | | - } |
---|
518 | | - |
---|
519 | | - |
---|
520 | | - if (!regrant) { |
---|
521 | | - trace_xfs_log_done_nonperm(log, ticket); |
---|
522 | | - |
---|
523 | | - /* |
---|
524 | | - * Release ticket if not permanent reservation or a specific |
---|
525 | | - * request has been made to release a permanent reservation. |
---|
526 | | - */ |
---|
527 | | - xlog_ungrant_log_space(log, ticket); |
---|
528 | | - } else { |
---|
529 | | - trace_xfs_log_done_perm(log, ticket); |
---|
530 | | - |
---|
531 | | - xlog_regrant_reserve_log_space(log, ticket); |
---|
532 | | - /* If this ticket was a permanent reservation and we aren't |
---|
533 | | - * trying to release it, reset the inited flags; so next time |
---|
534 | | - * we write, a start record will be written out. |
---|
535 | | - */ |
---|
536 | | - ticket->t_flags |= XLOG_TIC_INITED; |
---|
537 | | - } |
---|
538 | | - |
---|
539 | | - xfs_log_ticket_put(ticket); |
---|
540 | | - return lsn; |
---|
541 | | -} |
---|
542 | | - |
---|
543 | | -/* |
---|
544 | | - * Attaches a new iclog I/O completion callback routine during |
---|
545 | | - * transaction commit. If the log is in error state, a non-zero |
---|
546 | | - * return code is handed back and the caller is responsible for |
---|
547 | | - * executing the callback at an appropriate time. |
---|
548 | | - */ |
---|
549 | | -int |
---|
550 | | -xfs_log_notify( |
---|
551 | | - struct xlog_in_core *iclog, |
---|
552 | | - xfs_log_callback_t *cb) |
---|
553 | | -{ |
---|
554 | | - int abortflg; |
---|
555 | | - |
---|
556 | | - spin_lock(&iclog->ic_callback_lock); |
---|
557 | | - abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); |
---|
558 | | - if (!abortflg) { |
---|
559 | | - ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || |
---|
560 | | - (iclog->ic_state == XLOG_STATE_WANT_SYNC)); |
---|
561 | | - cb->cb_next = NULL; |
---|
562 | | - *(iclog->ic_callback_tail) = cb; |
---|
563 | | - iclog->ic_callback_tail = &(cb->cb_next); |
---|
564 | | - } |
---|
565 | | - spin_unlock(&iclog->ic_callback_lock); |
---|
566 | | - return abortflg; |
---|
567 | | -} |
---|
568 | | - |
---|
569 | | -int |
---|
570 | | -xfs_log_release_iclog( |
---|
571 | | - struct xfs_mount *mp, |
---|
| 485 | +static bool |
---|
| 486 | +__xlog_state_release_iclog( |
---|
| 487 | + struct xlog *log, |
---|
572 | 488 | struct xlog_in_core *iclog) |
---|
573 | 489 | { |
---|
574 | | - if (xlog_state_release_iclog(mp->m_log, iclog)) { |
---|
575 | | - xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
---|
| 490 | + lockdep_assert_held(&log->l_icloglock); |
---|
| 491 | + |
---|
| 492 | + if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { |
---|
| 493 | + /* update tail before writing to iclog */ |
---|
| 494 | + xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); |
---|
| 495 | + |
---|
| 496 | + iclog->ic_state = XLOG_STATE_SYNCING; |
---|
| 497 | + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); |
---|
| 498 | + xlog_verify_tail_lsn(log, iclog, tail_lsn); |
---|
| 499 | + /* cycle incremented when incrementing curr_block */ |
---|
| 500 | + return true; |
---|
| 501 | + } |
---|
| 502 | + |
---|
| 503 | + ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); |
---|
| 504 | + return false; |
---|
| 505 | +} |
---|
| 506 | + |
---|
| 507 | +/* |
---|
| 508 | + * Flush iclog to disk if this is the last reference to the given iclog and the |
---|
| 509 | + * it is in the WANT_SYNC state. |
---|
| 510 | + */ |
---|
| 511 | +static int |
---|
| 512 | +xlog_state_release_iclog( |
---|
| 513 | + struct xlog *log, |
---|
| 514 | + struct xlog_in_core *iclog) |
---|
| 515 | +{ |
---|
| 516 | + lockdep_assert_held(&log->l_icloglock); |
---|
| 517 | + |
---|
| 518 | + if (iclog->ic_state == XLOG_STATE_IOERROR) |
---|
576 | 519 | return -EIO; |
---|
| 520 | + |
---|
| 521 | + if (atomic_dec_and_test(&iclog->ic_refcnt) && |
---|
| 522 | + __xlog_state_release_iclog(log, iclog)) { |
---|
| 523 | + spin_unlock(&log->l_icloglock); |
---|
| 524 | + xlog_sync(log, iclog); |
---|
| 525 | + spin_lock(&log->l_icloglock); |
---|
577 | 526 | } |
---|
578 | 527 | |
---|
579 | 528 | return 0; |
---|
| 529 | +} |
---|
| 530 | + |
---|
| 531 | +void |
---|
| 532 | +xfs_log_release_iclog( |
---|
| 533 | + struct xlog_in_core *iclog) |
---|
| 534 | +{ |
---|
| 535 | + struct xlog *log = iclog->ic_log; |
---|
| 536 | + bool sync = false; |
---|
| 537 | + |
---|
| 538 | + if (atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) { |
---|
| 539 | + if (iclog->ic_state != XLOG_STATE_IOERROR) |
---|
| 540 | + sync = __xlog_state_release_iclog(log, iclog); |
---|
| 541 | + spin_unlock(&log->l_icloglock); |
---|
| 542 | + } |
---|
| 543 | + |
---|
| 544 | + if (sync) |
---|
| 545 | + xlog_sync(log, iclog); |
---|
580 | 546 | } |
---|
581 | 547 | |
---|
582 | 548 | /* |
---|
.. | .. |
---|
799 | 765 | if (readonly) |
---|
800 | 766 | mp->m_flags |= XFS_MOUNT_RDONLY; |
---|
801 | 767 | |
---|
| 768 | + /* Make sure the log is dead if we're returning failure. */ |
---|
| 769 | + ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR)); |
---|
| 770 | + |
---|
802 | 771 | return error; |
---|
803 | 772 | } |
---|
804 | 773 | |
---|
.. | .. |
---|
806 | 775 | * The mount has failed. Cancel the recovery if it hasn't completed and destroy |
---|
807 | 776 | * the log. |
---|
808 | 777 | */ |
---|
809 | | -int |
---|
| 778 | +void |
---|
810 | 779 | xfs_log_mount_cancel( |
---|
811 | 780 | struct xfs_mount *mp) |
---|
812 | 781 | { |
---|
813 | | - int error; |
---|
814 | | - |
---|
815 | | - error = xlog_recover_cancel(mp->m_log); |
---|
| 782 | + xlog_recover_cancel(mp->m_log); |
---|
816 | 783 | xfs_log_unmount(mp); |
---|
817 | | - |
---|
818 | | - return error; |
---|
819 | 784 | } |
---|
820 | 785 | |
---|
821 | 786 | /* |
---|
822 | | - * Final log writes as part of unmount. |
---|
823 | | - * |
---|
824 | | - * Mark the filesystem clean as unmount happens. Note that during relocation |
---|
825 | | - * this routine needs to be executed as part of source-bag while the |
---|
826 | | - * deallocation must not be done until source-end. |
---|
| 787 | + * Wait for the iclog to be written disk, or return an error if the log has been |
---|
| 788 | + * shut down. |
---|
827 | 789 | */ |
---|
828 | | - |
---|
829 | | -/* Actually write the unmount record to disk. */ |
---|
830 | | -static void |
---|
831 | | -xfs_log_write_unmount_record( |
---|
832 | | - struct xfs_mount *mp) |
---|
| 790 | +static int |
---|
| 791 | +xlog_wait_on_iclog( |
---|
| 792 | + struct xlog_in_core *iclog) |
---|
| 793 | + __releases(iclog->ic_log->l_icloglock) |
---|
833 | 794 | { |
---|
834 | | - /* the data section must be 32 bit size aligned */ |
---|
835 | | - struct xfs_unmount_log_format magic = { |
---|
| 795 | + struct xlog *log = iclog->ic_log; |
---|
| 796 | + |
---|
| 797 | + if (!XLOG_FORCED_SHUTDOWN(log) && |
---|
| 798 | + iclog->ic_state != XLOG_STATE_ACTIVE && |
---|
| 799 | + iclog->ic_state != XLOG_STATE_DIRTY) { |
---|
| 800 | + XFS_STATS_INC(log->l_mp, xs_log_force_sleep); |
---|
| 801 | + xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); |
---|
| 802 | + } else { |
---|
| 803 | + spin_unlock(&log->l_icloglock); |
---|
| 804 | + } |
---|
| 805 | + |
---|
| 806 | + if (XLOG_FORCED_SHUTDOWN(log)) |
---|
| 807 | + return -EIO; |
---|
| 808 | + return 0; |
---|
| 809 | +} |
---|
| 810 | + |
---|
| 811 | +/* |
---|
| 812 | + * Write out an unmount record using the ticket provided. We have to account for |
---|
| 813 | + * the data space used in the unmount ticket as this write is not done from a |
---|
| 814 | + * transaction context that has already done the accounting for us. |
---|
| 815 | + */ |
---|
| 816 | +static int |
---|
| 817 | +xlog_write_unmount_record( |
---|
| 818 | + struct xlog *log, |
---|
| 819 | + struct xlog_ticket *ticket, |
---|
| 820 | + xfs_lsn_t *lsn, |
---|
| 821 | + uint flags) |
---|
| 822 | +{ |
---|
| 823 | + struct xfs_unmount_log_format ulf = { |
---|
836 | 824 | .magic = XLOG_UNMOUNT_TYPE, |
---|
837 | 825 | }; |
---|
838 | 826 | struct xfs_log_iovec reg = { |
---|
839 | | - .i_addr = &magic, |
---|
840 | | - .i_len = sizeof(magic), |
---|
| 827 | + .i_addr = &ulf, |
---|
| 828 | + .i_len = sizeof(ulf), |
---|
841 | 829 | .i_type = XLOG_REG_TYPE_UNMOUNT, |
---|
842 | 830 | }; |
---|
843 | 831 | struct xfs_log_vec vec = { |
---|
844 | 832 | .lv_niovecs = 1, |
---|
845 | 833 | .lv_iovecp = ®, |
---|
846 | 834 | }; |
---|
847 | | - struct xlog *log = mp->m_log; |
---|
| 835 | + |
---|
| 836 | + /* account for space used by record data */ |
---|
| 837 | + ticket->t_curr_res -= sizeof(ulf); |
---|
| 838 | + return xlog_write(log, &vec, ticket, lsn, NULL, flags, false); |
---|
| 839 | +} |
---|
| 840 | + |
---|
| 841 | +/* |
---|
| 842 | + * Mark the filesystem clean by writing an unmount record to the head of the |
---|
| 843 | + * log. |
---|
| 844 | + */ |
---|
| 845 | +static void |
---|
| 846 | +xlog_unmount_write( |
---|
| 847 | + struct xlog *log) |
---|
| 848 | +{ |
---|
| 849 | + struct xfs_mount *mp = log->l_mp; |
---|
848 | 850 | struct xlog_in_core *iclog; |
---|
849 | 851 | struct xlog_ticket *tic = NULL; |
---|
850 | 852 | xfs_lsn_t lsn; |
---|
.. | .. |
---|
855 | 857 | if (error) |
---|
856 | 858 | goto out_err; |
---|
857 | 859 | |
---|
858 | | - /* |
---|
859 | | - * If we think the summary counters are bad, clear the unmount header |
---|
860 | | - * flag in the unmount record so that the summary counters will be |
---|
861 | | - * recalculated during log recovery at next mount. Refer to |
---|
862 | | - * xlog_check_unmount_rec for more details. |
---|
863 | | - */ |
---|
864 | | - if (XFS_TEST_ERROR((mp->m_flags & XFS_MOUNT_BAD_SUMMARY), mp, |
---|
865 | | - XFS_ERRTAG_FORCE_SUMMARY_RECALC)) { |
---|
866 | | - xfs_alert(mp, "%s: will fix summary counters at next mount", |
---|
867 | | - __func__); |
---|
868 | | - flags &= ~XLOG_UNMOUNT_TRANS; |
---|
869 | | - } |
---|
870 | | - |
---|
871 | | - /* remove inited flag, and account for space used */ |
---|
872 | | - tic->t_flags = 0; |
---|
873 | | - tic->t_curr_res -= sizeof(magic); |
---|
874 | | - error = xlog_write(log, &vec, tic, &lsn, NULL, flags); |
---|
| 860 | + error = xlog_write_unmount_record(log, tic, &lsn, flags); |
---|
875 | 861 | /* |
---|
876 | 862 | * At this point, we're umounting anyway, so there's no point in |
---|
877 | 863 | * transitioning log state to IOERROR. Just continue... |
---|
.. | .. |
---|
883 | 869 | spin_lock(&log->l_icloglock); |
---|
884 | 870 | iclog = log->l_iclog; |
---|
885 | 871 | atomic_inc(&iclog->ic_refcnt); |
---|
886 | | - xlog_state_want_sync(log, iclog); |
---|
887 | | - spin_unlock(&log->l_icloglock); |
---|
| 872 | + if (iclog->ic_state == XLOG_STATE_ACTIVE) |
---|
| 873 | + xlog_state_switch_iclogs(log, iclog, 0); |
---|
| 874 | + else |
---|
| 875 | + ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || |
---|
| 876 | + iclog->ic_state == XLOG_STATE_IOERROR); |
---|
888 | 877 | error = xlog_state_release_iclog(log, iclog); |
---|
889 | | - |
---|
890 | | - spin_lock(&log->l_icloglock); |
---|
891 | | - switch (iclog->ic_state) { |
---|
892 | | - default: |
---|
893 | | - if (!XLOG_FORCED_SHUTDOWN(log)) { |
---|
894 | | - xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); |
---|
895 | | - break; |
---|
896 | | - } |
---|
897 | | - /* fall through */ |
---|
898 | | - case XLOG_STATE_ACTIVE: |
---|
899 | | - case XLOG_STATE_DIRTY: |
---|
900 | | - spin_unlock(&log->l_icloglock); |
---|
901 | | - break; |
---|
902 | | - } |
---|
| 878 | + xlog_wait_on_iclog(iclog); |
---|
903 | 879 | |
---|
904 | 880 | if (tic) { |
---|
905 | 881 | trace_xfs_log_umount_write(log, tic); |
---|
906 | | - xlog_ungrant_log_space(log, tic); |
---|
907 | | - xfs_log_ticket_put(tic); |
---|
| 882 | + xfs_log_ticket_ungrant(log, tic); |
---|
908 | 883 | } |
---|
| 884 | +} |
---|
| 885 | + |
---|
| 886 | +static void |
---|
| 887 | +xfs_log_unmount_verify_iclog( |
---|
| 888 | + struct xlog *log) |
---|
| 889 | +{ |
---|
| 890 | + struct xlog_in_core *iclog = log->l_iclog; |
---|
| 891 | + |
---|
| 892 | + do { |
---|
| 893 | + ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); |
---|
| 894 | + ASSERT(iclog->ic_offset == 0); |
---|
| 895 | + } while ((iclog = iclog->ic_next) != log->l_iclog); |
---|
909 | 896 | } |
---|
910 | 897 | |
---|
911 | 898 | /* |
---|
.. | .. |
---|
915 | 902 | * currently architecture converted and "Unmount" is a bit foo. |
---|
916 | 903 | * As far as I know, there weren't any dependencies on the old behaviour. |
---|
917 | 904 | */ |
---|
918 | | - |
---|
919 | | -static int |
---|
920 | | -xfs_log_unmount_write(xfs_mount_t *mp) |
---|
| 905 | +static void |
---|
| 906 | +xfs_log_unmount_write( |
---|
| 907 | + struct xfs_mount *mp) |
---|
921 | 908 | { |
---|
922 | | - struct xlog *log = mp->m_log; |
---|
923 | | - xlog_in_core_t *iclog; |
---|
924 | | -#ifdef DEBUG |
---|
925 | | - xlog_in_core_t *first_iclog; |
---|
926 | | -#endif |
---|
927 | | - int error; |
---|
| 909 | + struct xlog *log = mp->m_log; |
---|
| 910 | + |
---|
| 911 | + if (!xfs_log_writable(mp)) |
---|
| 912 | + return; |
---|
| 913 | + |
---|
| 914 | + xfs_log_force(mp, XFS_LOG_SYNC); |
---|
| 915 | + |
---|
| 916 | + if (XLOG_FORCED_SHUTDOWN(log)) |
---|
| 917 | + return; |
---|
928 | 918 | |
---|
929 | 919 | /* |
---|
930 | | - * Don't write out unmount record on norecovery mounts or ro devices. |
---|
931 | | - * Or, if we are doing a forced umount (typically because of IO errors). |
---|
| 920 | + * If we think the summary counters are bad, avoid writing the unmount |
---|
| 921 | + * record to force log recovery at next mount, after which the summary |
---|
| 922 | + * counters will be recalculated. Refer to xlog_check_unmount_rec for |
---|
| 923 | + * more details. |
---|
932 | 924 | */ |
---|
933 | | - if (mp->m_flags & XFS_MOUNT_NORECOVERY || |
---|
934 | | - xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { |
---|
935 | | - ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); |
---|
936 | | - return 0; |
---|
| 925 | + if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp, |
---|
| 926 | + XFS_ERRTAG_FORCE_SUMMARY_RECALC)) { |
---|
| 927 | + xfs_alert(mp, "%s: will fix summary counters at next mount", |
---|
| 928 | + __func__); |
---|
| 929 | + return; |
---|
937 | 930 | } |
---|
938 | 931 | |
---|
939 | | - error = xfs_log_force(mp, XFS_LOG_SYNC); |
---|
940 | | - ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); |
---|
941 | | - |
---|
942 | | -#ifdef DEBUG |
---|
943 | | - first_iclog = iclog = log->l_iclog; |
---|
944 | | - do { |
---|
945 | | - if (!(iclog->ic_state & XLOG_STATE_IOERROR)) { |
---|
946 | | - ASSERT(iclog->ic_state & XLOG_STATE_ACTIVE); |
---|
947 | | - ASSERT(iclog->ic_offset == 0); |
---|
948 | | - } |
---|
949 | | - iclog = iclog->ic_next; |
---|
950 | | - } while (iclog != first_iclog); |
---|
951 | | -#endif |
---|
952 | | - if (! (XLOG_FORCED_SHUTDOWN(log))) { |
---|
953 | | - xfs_log_write_unmount_record(mp); |
---|
954 | | - } else { |
---|
955 | | - /* |
---|
956 | | - * We're already in forced_shutdown mode, couldn't |
---|
957 | | - * even attempt to write out the unmount transaction. |
---|
958 | | - * |
---|
959 | | - * Go through the motions of sync'ing and releasing |
---|
960 | | - * the iclog, even though no I/O will actually happen, |
---|
961 | | - * we need to wait for other log I/Os that may already |
---|
962 | | - * be in progress. Do this as a separate section of |
---|
963 | | - * code so we'll know if we ever get stuck here that |
---|
964 | | - * we're in this odd situation of trying to unmount |
---|
965 | | - * a file system that went into forced_shutdown as |
---|
966 | | - * the result of an unmount.. |
---|
967 | | - */ |
---|
968 | | - spin_lock(&log->l_icloglock); |
---|
969 | | - iclog = log->l_iclog; |
---|
970 | | - atomic_inc(&iclog->ic_refcnt); |
---|
971 | | - |
---|
972 | | - xlog_state_want_sync(log, iclog); |
---|
973 | | - spin_unlock(&log->l_icloglock); |
---|
974 | | - error = xlog_state_release_iclog(log, iclog); |
---|
975 | | - |
---|
976 | | - spin_lock(&log->l_icloglock); |
---|
977 | | - |
---|
978 | | - if ( ! ( iclog->ic_state == XLOG_STATE_ACTIVE |
---|
979 | | - || iclog->ic_state == XLOG_STATE_DIRTY |
---|
980 | | - || iclog->ic_state == XLOG_STATE_IOERROR) ) { |
---|
981 | | - |
---|
982 | | - xlog_wait(&iclog->ic_force_wait, |
---|
983 | | - &log->l_icloglock); |
---|
984 | | - } else { |
---|
985 | | - spin_unlock(&log->l_icloglock); |
---|
986 | | - } |
---|
987 | | - } |
---|
988 | | - |
---|
989 | | - return error; |
---|
990 | | -} /* xfs_log_unmount_write */ |
---|
| 932 | + xfs_log_unmount_verify_iclog(log); |
---|
| 933 | + xlog_unmount_write(log); |
---|
| 934 | +} |
---|
991 | 935 | |
---|
992 | 936 | /* |
---|
993 | 937 | * Empty the log for unmount/freeze. |
---|
.. | .. |
---|
1243 | 1187 | } |
---|
1244 | 1188 | |
---|
1245 | 1189 | |
---|
1246 | | -/* |
---|
1247 | | - * Log function which is called when an io completes. |
---|
1248 | | - * |
---|
1249 | | - * The log manager needs its own routine, in order to control what |
---|
1250 | | - * happens with the buffer after the write completes. |
---|
1251 | | - */ |
---|
1252 | 1190 | static void |
---|
1253 | | -xlog_iodone(xfs_buf_t *bp) |
---|
| 1191 | +xlog_ioend_work( |
---|
| 1192 | + struct work_struct *work) |
---|
1254 | 1193 | { |
---|
1255 | | - struct xlog_in_core *iclog = bp->b_log_item; |
---|
1256 | | - struct xlog *l = iclog->ic_log; |
---|
1257 | | - int aborted = 0; |
---|
| 1194 | + struct xlog_in_core *iclog = |
---|
| 1195 | + container_of(work, struct xlog_in_core, ic_end_io_work); |
---|
| 1196 | + struct xlog *log = iclog->ic_log; |
---|
| 1197 | + int error; |
---|
| 1198 | + |
---|
| 1199 | + error = blk_status_to_errno(iclog->ic_bio.bi_status); |
---|
| 1200 | +#ifdef DEBUG |
---|
| 1201 | + /* treat writes with injected CRC errors as failed */ |
---|
| 1202 | + if (iclog->ic_fail_crc) |
---|
| 1203 | + error = -EIO; |
---|
| 1204 | +#endif |
---|
1258 | 1205 | |
---|
1259 | 1206 | /* |
---|
1260 | | - * Race to shutdown the filesystem if we see an error or the iclog is in |
---|
1261 | | - * IOABORT state. The IOABORT state is only set in DEBUG mode to inject |
---|
1262 | | - * CRC errors into log recovery. |
---|
| 1207 | + * Race to shutdown the filesystem if we see an error. |
---|
1263 | 1208 | */ |
---|
1264 | | - if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) || |
---|
1265 | | - iclog->ic_state & XLOG_STATE_IOABORT) { |
---|
1266 | | - if (iclog->ic_state & XLOG_STATE_IOABORT) |
---|
1267 | | - iclog->ic_state &= ~XLOG_STATE_IOABORT; |
---|
1268 | | - |
---|
1269 | | - xfs_buf_ioerror_alert(bp, __func__); |
---|
1270 | | - xfs_buf_stale(bp); |
---|
1271 | | - xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR); |
---|
1272 | | - /* |
---|
1273 | | - * This flag will be propagated to the trans-committed |
---|
1274 | | - * callback routines to let them know that the log-commit |
---|
1275 | | - * didn't succeed. |
---|
1276 | | - */ |
---|
1277 | | - aborted = XFS_LI_ABORTED; |
---|
1278 | | - } else if (iclog->ic_state & XLOG_STATE_IOERROR) { |
---|
1279 | | - aborted = XFS_LI_ABORTED; |
---|
| 1209 | + if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) { |
---|
| 1210 | + xfs_alert(log->l_mp, "log I/O error %d", error); |
---|
| 1211 | + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); |
---|
1280 | 1212 | } |
---|
1281 | 1213 | |
---|
1282 | | - /* log I/O is always issued ASYNC */ |
---|
1283 | | - ASSERT(bp->b_flags & XBF_ASYNC); |
---|
1284 | | - xlog_state_done_syncing(iclog, aborted); |
---|
| 1214 | + xlog_state_done_syncing(iclog); |
---|
| 1215 | + bio_uninit(&iclog->ic_bio); |
---|
1285 | 1216 | |
---|
1286 | 1217 | /* |
---|
1287 | | - * drop the buffer lock now that we are done. Nothing references |
---|
1288 | | - * the buffer after this, so an unmount waiting on this lock can now |
---|
1289 | | - * tear it down safely. As such, it is unsafe to reference the buffer |
---|
1290 | | - * (bp) after the unlock as we could race with it being freed. |
---|
| 1218 | + * Drop the lock to signal that we are done. Nothing references the |
---|
| 1219 | + * iclog after this, so an unmount waiting on this lock can now tear it |
---|
| 1220 | + * down safely. As such, it is unsafe to reference the iclog after the |
---|
| 1221 | + * unlock as we could race with it being freed. |
---|
1291 | 1222 | */ |
---|
1292 | | - xfs_buf_unlock(bp); |
---|
| 1223 | + up(&iclog->ic_sema); |
---|
1293 | 1224 | } |
---|
1294 | 1225 | |
---|
1295 | 1226 | /* |
---|
.. | .. |
---|
1300 | 1231 | * If the filesystem blocksize is too large, we may need to choose a |
---|
1301 | 1232 | * larger size since the directory code currently logs entire blocks. |
---|
1302 | 1233 | */ |
---|
1303 | | - |
---|
1304 | 1234 | STATIC void |
---|
1305 | 1235 | xlog_get_iclog_buffer_size( |
---|
1306 | 1236 | struct xfs_mount *mp, |
---|
1307 | 1237 | struct xlog *log) |
---|
1308 | 1238 | { |
---|
1309 | | - int size; |
---|
1310 | | - int xhdrs; |
---|
1311 | | - |
---|
1312 | 1239 | if (mp->m_logbufs <= 0) |
---|
1313 | | - log->l_iclog_bufs = XLOG_MAX_ICLOGS; |
---|
1314 | | - else |
---|
1315 | | - log->l_iclog_bufs = mp->m_logbufs; |
---|
| 1240 | + mp->m_logbufs = XLOG_MAX_ICLOGS; |
---|
| 1241 | + if (mp->m_logbsize <= 0) |
---|
| 1242 | + mp->m_logbsize = XLOG_BIG_RECORD_BSIZE; |
---|
| 1243 | + |
---|
| 1244 | + log->l_iclog_bufs = mp->m_logbufs; |
---|
| 1245 | + log->l_iclog_size = mp->m_logbsize; |
---|
1316 | 1246 | |
---|
1317 | 1247 | /* |
---|
1318 | | - * Buffer size passed in from mount system call. |
---|
| 1248 | + * # headers = size / 32k - one header holds cycles from 32k of data. |
---|
1319 | 1249 | */ |
---|
1320 | | - if (mp->m_logbsize > 0) { |
---|
1321 | | - size = log->l_iclog_size = mp->m_logbsize; |
---|
1322 | | - log->l_iclog_size_log = 0; |
---|
1323 | | - while (size != 1) { |
---|
1324 | | - log->l_iclog_size_log++; |
---|
1325 | | - size >>= 1; |
---|
1326 | | - } |
---|
1327 | | - |
---|
1328 | | - if (xfs_sb_version_haslogv2(&mp->m_sb)) { |
---|
1329 | | - /* # headers = size / 32k |
---|
1330 | | - * one header holds cycles from 32k of data |
---|
1331 | | - */ |
---|
1332 | | - |
---|
1333 | | - xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE; |
---|
1334 | | - if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE) |
---|
1335 | | - xhdrs++; |
---|
1336 | | - log->l_iclog_hsize = xhdrs << BBSHIFT; |
---|
1337 | | - log->l_iclog_heads = xhdrs; |
---|
1338 | | - } else { |
---|
1339 | | - ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE); |
---|
1340 | | - log->l_iclog_hsize = BBSIZE; |
---|
1341 | | - log->l_iclog_heads = 1; |
---|
1342 | | - } |
---|
1343 | | - goto done; |
---|
1344 | | - } |
---|
1345 | | - |
---|
1346 | | - /* All machines use 32kB buffers by default. */ |
---|
1347 | | - log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; |
---|
1348 | | - log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; |
---|
1349 | | - |
---|
1350 | | - /* the default log size is 16k or 32k which is one header sector */ |
---|
1351 | | - log->l_iclog_hsize = BBSIZE; |
---|
1352 | | - log->l_iclog_heads = 1; |
---|
1353 | | - |
---|
1354 | | -done: |
---|
1355 | | - /* are we being asked to make the sizes selected above visible? */ |
---|
1356 | | - if (mp->m_logbufs == 0) |
---|
1357 | | - mp->m_logbufs = log->l_iclog_bufs; |
---|
1358 | | - if (mp->m_logbsize == 0) |
---|
1359 | | - mp->m_logbsize = log->l_iclog_size; |
---|
1360 | | -} /* xlog_get_iclog_buffer_size */ |
---|
1361 | | - |
---|
| 1250 | + log->l_iclog_heads = |
---|
| 1251 | + DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE); |
---|
| 1252 | + log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT; |
---|
| 1253 | +} |
---|
1362 | 1254 | |
---|
1363 | 1255 | void |
---|
1364 | 1256 | xfs_log_work_queue( |
---|
.. | .. |
---|
1421 | 1313 | xlog_rec_header_t *head; |
---|
1422 | 1314 | xlog_in_core_t **iclogp; |
---|
1423 | 1315 | xlog_in_core_t *iclog, *prev_iclog=NULL; |
---|
1424 | | - xfs_buf_t *bp; |
---|
1425 | 1316 | int i; |
---|
1426 | 1317 | int error = -ENOMEM; |
---|
1427 | 1318 | uint log2_size = 0; |
---|
.. | .. |
---|
1479 | 1370 | |
---|
1480 | 1371 | xlog_get_iclog_buffer_size(mp, log); |
---|
1481 | 1372 | |
---|
1482 | | - /* |
---|
1483 | | - * Use a NULL block for the extra log buffer used during splits so that |
---|
1484 | | - * it will trigger errors if we ever try to do IO on it without first |
---|
1485 | | - * having set it up properly. |
---|
1486 | | - */ |
---|
1487 | | - error = -ENOMEM; |
---|
1488 | | - bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL, |
---|
1489 | | - BTOBB(log->l_iclog_size), XBF_NO_IOACCT); |
---|
1490 | | - if (!bp) |
---|
1491 | | - goto out_free_log; |
---|
1492 | | - |
---|
1493 | | - /* |
---|
1494 | | - * The iclogbuf buffer locks are held over IO but we are not going to do |
---|
1495 | | - * IO yet. Hence unlock the buffer so that the log IO path can grab it |
---|
1496 | | - * when appropriately. |
---|
1497 | | - */ |
---|
1498 | | - ASSERT(xfs_buf_islocked(bp)); |
---|
1499 | | - xfs_buf_unlock(bp); |
---|
1500 | | - |
---|
1501 | | - /* use high priority wq for log I/O completion */ |
---|
1502 | | - bp->b_ioend_wq = mp->m_log_workqueue; |
---|
1503 | | - bp->b_iodone = xlog_iodone; |
---|
1504 | | - log->l_xbuf = bp; |
---|
1505 | | - |
---|
1506 | 1373 | spin_lock_init(&log->l_icloglock); |
---|
1507 | 1374 | init_waitqueue_head(&log->l_flush_wait); |
---|
1508 | 1375 | |
---|
.. | .. |
---|
1515 | 1382 | * xlog_in_core_t in xfs_log_priv.h for details. |
---|
1516 | 1383 | */ |
---|
1517 | 1384 | ASSERT(log->l_iclog_size >= 4096); |
---|
1518 | | - for (i=0; i < log->l_iclog_bufs; i++) { |
---|
1519 | | - *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); |
---|
1520 | | - if (!*iclogp) |
---|
| 1385 | + for (i = 0; i < log->l_iclog_bufs; i++) { |
---|
| 1386 | + int align_mask = xfs_buftarg_dma_alignment(mp->m_logdev_targp); |
---|
| 1387 | + size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) * |
---|
| 1388 | + sizeof(struct bio_vec); |
---|
| 1389 | + |
---|
| 1390 | + iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL); |
---|
| 1391 | + if (!iclog) |
---|
1521 | 1392 | goto out_free_iclog; |
---|
1522 | 1393 | |
---|
1523 | | - iclog = *iclogp; |
---|
| 1394 | + *iclogp = iclog; |
---|
1524 | 1395 | iclog->ic_prev = prev_iclog; |
---|
1525 | 1396 | prev_iclog = iclog; |
---|
1526 | 1397 | |
---|
1527 | | - bp = xfs_buf_get_uncached(mp->m_logdev_targp, |
---|
1528 | | - BTOBB(log->l_iclog_size), |
---|
1529 | | - XBF_NO_IOACCT); |
---|
1530 | | - if (!bp) |
---|
| 1398 | + iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask, |
---|
| 1399 | + KM_MAYFAIL | KM_ZERO); |
---|
| 1400 | + if (!iclog->ic_data) |
---|
1531 | 1401 | goto out_free_iclog; |
---|
1532 | | - |
---|
1533 | | - ASSERT(xfs_buf_islocked(bp)); |
---|
1534 | | - xfs_buf_unlock(bp); |
---|
1535 | | - |
---|
1536 | | - /* use high priority wq for log I/O completion */ |
---|
1537 | | - bp->b_ioend_wq = mp->m_log_workqueue; |
---|
1538 | | - bp->b_iodone = xlog_iodone; |
---|
1539 | | - iclog->ic_bp = bp; |
---|
1540 | | - iclog->ic_data = bp->b_addr; |
---|
1541 | 1402 | #ifdef DEBUG |
---|
1542 | 1403 | log->l_iclog_bak[i] = &iclog->ic_header; |
---|
1543 | 1404 | #endif |
---|
.. | .. |
---|
1551 | 1412 | head->h_fmt = cpu_to_be32(XLOG_FMT); |
---|
1552 | 1413 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); |
---|
1553 | 1414 | |
---|
1554 | | - iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize; |
---|
| 1415 | + iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize; |
---|
1555 | 1416 | iclog->ic_state = XLOG_STATE_ACTIVE; |
---|
1556 | 1417 | iclog->ic_log = log; |
---|
1557 | 1418 | atomic_set(&iclog->ic_refcnt, 0); |
---|
1558 | 1419 | spin_lock_init(&iclog->ic_callback_lock); |
---|
1559 | | - iclog->ic_callback_tail = &(iclog->ic_callback); |
---|
| 1420 | + INIT_LIST_HEAD(&iclog->ic_callbacks); |
---|
1560 | 1421 | iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; |
---|
1561 | 1422 | |
---|
1562 | 1423 | init_waitqueue_head(&iclog->ic_force_wait); |
---|
1563 | 1424 | init_waitqueue_head(&iclog->ic_write_wait); |
---|
| 1425 | + INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work); |
---|
| 1426 | + sema_init(&iclog->ic_sema, 1); |
---|
1564 | 1427 | |
---|
1565 | 1428 | iclogp = &iclog->ic_next; |
---|
1566 | 1429 | } |
---|
1567 | 1430 | *iclogp = log->l_iclog; /* complete ring */ |
---|
1568 | 1431 | log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ |
---|
1569 | 1432 | |
---|
| 1433 | + log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s", |
---|
| 1434 | + WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0, |
---|
| 1435 | + mp->m_super->s_id); |
---|
| 1436 | + if (!log->l_ioend_workqueue) |
---|
| 1437 | + goto out_free_iclog; |
---|
| 1438 | + |
---|
1570 | 1439 | error = xlog_cil_init(log); |
---|
1571 | 1440 | if (error) |
---|
1572 | | - goto out_free_iclog; |
---|
| 1441 | + goto out_destroy_workqueue; |
---|
1573 | 1442 | return log; |
---|
1574 | 1443 | |
---|
| 1444 | +out_destroy_workqueue: |
---|
| 1445 | + destroy_workqueue(log->l_ioend_workqueue); |
---|
1575 | 1446 | out_free_iclog: |
---|
1576 | 1447 | for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { |
---|
1577 | 1448 | prev_iclog = iclog->ic_next; |
---|
1578 | | - if (iclog->ic_bp) |
---|
1579 | | - xfs_buf_free(iclog->ic_bp); |
---|
| 1449 | + kmem_free(iclog->ic_data); |
---|
1580 | 1450 | kmem_free(iclog); |
---|
1581 | 1451 | if (prev_iclog == log->l_iclog) |
---|
1582 | 1452 | break; |
---|
1583 | 1453 | } |
---|
1584 | | - spinlock_destroy(&log->l_icloglock); |
---|
1585 | | - xfs_buf_free(log->l_xbuf); |
---|
1586 | 1454 | out_free_log: |
---|
1587 | 1455 | kmem_free(log); |
---|
1588 | 1456 | out: |
---|
1589 | 1457 | return ERR_PTR(error); |
---|
1590 | 1458 | } /* xlog_alloc_log */ |
---|
1591 | 1459 | |
---|
1592 | | - |
---|
1593 | 1460 | /* |
---|
1594 | 1461 | * Write out the commit record of a transaction associated with the given |
---|
1595 | | - * ticket. Return the lsn of the commit record. |
---|
| 1462 | + * ticket to close off a running log write. Return the lsn of the commit record. |
---|
1596 | 1463 | */ |
---|
1597 | | -STATIC int |
---|
| 1464 | +int |
---|
1598 | 1465 | xlog_commit_record( |
---|
1599 | 1466 | struct xlog *log, |
---|
1600 | 1467 | struct xlog_ticket *ticket, |
---|
1601 | 1468 | struct xlog_in_core **iclog, |
---|
1602 | | - xfs_lsn_t *commitlsnp) |
---|
| 1469 | + xfs_lsn_t *lsn) |
---|
1603 | 1470 | { |
---|
1604 | | - struct xfs_mount *mp = log->l_mp; |
---|
1605 | | - int error; |
---|
1606 | 1471 | struct xfs_log_iovec reg = { |
---|
1607 | 1472 | .i_addr = NULL, |
---|
1608 | 1473 | .i_len = 0, |
---|
.. | .. |
---|
1612 | 1477 | .lv_niovecs = 1, |
---|
1613 | 1478 | .lv_iovecp = ®, |
---|
1614 | 1479 | }; |
---|
| 1480 | + int error; |
---|
1615 | 1481 | |
---|
1616 | | - ASSERT_ALWAYS(iclog); |
---|
1617 | | - error = xlog_write(log, &vec, ticket, commitlsnp, iclog, |
---|
1618 | | - XLOG_COMMIT_TRANS); |
---|
| 1482 | + if (XLOG_FORCED_SHUTDOWN(log)) |
---|
| 1483 | + return -EIO; |
---|
| 1484 | + |
---|
| 1485 | + error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS, |
---|
| 1486 | + false); |
---|
1619 | 1487 | if (error) |
---|
1620 | | - xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
---|
| 1488 | + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); |
---|
1621 | 1489 | return error; |
---|
1622 | 1490 | } |
---|
1623 | 1491 | |
---|
1624 | 1492 | /* |
---|
1625 | | - * Push on the buffer cache code if we ever use more than 75% of the on-disk |
---|
1626 | | - * log space. This code pushes on the lsn which would supposedly free up |
---|
1627 | | - * the 25% which we want to leave free. We may need to adopt a policy which |
---|
1628 | | - * pushes on an lsn which is further along in the log once we reach the high |
---|
1629 | | - * water mark. In this manner, we would be creating a low water mark. |
---|
| 1493 | + * Compute the LSN that we'd need to push the log tail towards in order to have |
---|
| 1494 | + * (a) enough on-disk log space to log the number of bytes specified, (b) at |
---|
| 1495 | + * least 25% of the log space free, and (c) at least 256 blocks free. If the |
---|
| 1496 | + * log free space already meets all three thresholds, this function returns |
---|
| 1497 | + * NULLCOMMITLSN. |
---|
1630 | 1498 | */ |
---|
1631 | | -STATIC void |
---|
1632 | | -xlog_grant_push_ail( |
---|
| 1499 | +xfs_lsn_t |
---|
| 1500 | +xlog_grant_push_threshold( |
---|
1633 | 1501 | struct xlog *log, |
---|
1634 | 1502 | int need_bytes) |
---|
1635 | 1503 | { |
---|
.. | .. |
---|
1655 | 1523 | free_threshold = max(free_threshold, (log->l_logBBsize >> 2)); |
---|
1656 | 1524 | free_threshold = max(free_threshold, 256); |
---|
1657 | 1525 | if (free_blocks >= free_threshold) |
---|
1658 | | - return; |
---|
| 1526 | + return NULLCOMMITLSN; |
---|
1659 | 1527 | |
---|
1660 | 1528 | xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle, |
---|
1661 | 1529 | &threshold_block); |
---|
.. | .. |
---|
1675 | 1543 | if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0) |
---|
1676 | 1544 | threshold_lsn = last_sync_lsn; |
---|
1677 | 1545 | |
---|
| 1546 | + return threshold_lsn; |
---|
| 1547 | +} |
---|
| 1548 | + |
---|
| 1549 | +/* |
---|
| 1550 | + * Push the tail of the log if we need to do so to maintain the free log space |
---|
| 1551 | + * thresholds set out by xlog_grant_push_threshold. We may need to adopt a |
---|
| 1552 | + * policy which pushes on an lsn which is further along in the log once we |
---|
| 1553 | + * reach the high water mark. In this manner, we would be creating a low water |
---|
| 1554 | + * mark. |
---|
| 1555 | + */ |
---|
| 1556 | +STATIC void |
---|
| 1557 | +xlog_grant_push_ail( |
---|
| 1558 | + struct xlog *log, |
---|
| 1559 | + int need_bytes) |
---|
| 1560 | +{ |
---|
| 1561 | + xfs_lsn_t threshold_lsn; |
---|
| 1562 | + |
---|
| 1563 | + threshold_lsn = xlog_grant_push_threshold(log, need_bytes); |
---|
| 1564 | + if (threshold_lsn == NULLCOMMITLSN || XLOG_FORCED_SHUTDOWN(log)) |
---|
| 1565 | + return; |
---|
| 1566 | + |
---|
1678 | 1567 | /* |
---|
1679 | 1568 | * Get the transaction layer to kick the dirty buffers out to |
---|
1680 | 1569 | * disk asynchronously. No point in trying to do this if |
---|
1681 | 1570 | * the filesystem is shutting down. |
---|
1682 | 1571 | */ |
---|
1683 | | - if (!XLOG_FORCED_SHUTDOWN(log)) |
---|
1684 | | - xfs_ail_push(log->l_ailp, threshold_lsn); |
---|
| 1572 | + xfs_ail_push(log->l_ailp, threshold_lsn); |
---|
1685 | 1573 | } |
---|
1686 | 1574 | |
---|
1687 | 1575 | /* |
---|
.. | .. |
---|
1751 | 1639 | int i; |
---|
1752 | 1640 | int xheads; |
---|
1753 | 1641 | |
---|
1754 | | - xheads = size / XLOG_HEADER_CYCLE_SIZE; |
---|
1755 | | - if (size % XLOG_HEADER_CYCLE_SIZE) |
---|
1756 | | - xheads++; |
---|
| 1642 | + xheads = DIV_ROUND_UP(size, XLOG_HEADER_CYCLE_SIZE); |
---|
1757 | 1643 | |
---|
1758 | 1644 | for (i = 1; i < xheads; i++) { |
---|
1759 | 1645 | crc = crc32c(crc, &xhdr[i].hic_xheader, |
---|
.. | .. |
---|
1767 | 1653 | return xfs_end_cksum(crc); |
---|
1768 | 1654 | } |
---|
1769 | 1655 | |
---|
1770 | | -/* |
---|
1771 | | - * The bdstrat callback function for log bufs. This gives us a central |
---|
1772 | | - * place to trap bufs in case we get hit by a log I/O error and need to |
---|
1773 | | - * shutdown. Actually, in practice, even when we didn't get a log error, |
---|
1774 | | - * we transition the iclogs to IOERROR state *after* flushing all existing |
---|
1775 | | - * iclogs to disk. This is because we don't want anymore new transactions to be |
---|
1776 | | - * started or completed afterwards. |
---|
1777 | | - * |
---|
1778 | | - * We lock the iclogbufs here so that we can serialise against IO completion |
---|
1779 | | - * during unmount. We might be processing a shutdown triggered during unmount, |
---|
1780 | | - * and that can occur asynchronously to the unmount thread, and hence we need to |
---|
1781 | | - * ensure that completes before tearing down the iclogbufs. Hence we need to |
---|
1782 | | - * hold the buffer lock across the log IO to acheive that. |
---|
1783 | | - */ |
---|
1784 | | -STATIC int |
---|
1785 | | -xlog_bdstrat( |
---|
1786 | | - struct xfs_buf *bp) |
---|
| 1656 | +static void |
---|
| 1657 | +xlog_bio_end_io( |
---|
| 1658 | + struct bio *bio) |
---|
1787 | 1659 | { |
---|
1788 | | - struct xlog_in_core *iclog = bp->b_log_item; |
---|
| 1660 | + struct xlog_in_core *iclog = bio->bi_private; |
---|
1789 | 1661 | |
---|
1790 | | - xfs_buf_lock(bp); |
---|
1791 | | - if (iclog->ic_state & XLOG_STATE_IOERROR) { |
---|
1792 | | - xfs_buf_ioerror(bp, -EIO); |
---|
1793 | | - xfs_buf_stale(bp); |
---|
1794 | | - xfs_buf_ioend(bp); |
---|
| 1662 | + queue_work(iclog->ic_log->l_ioend_workqueue, |
---|
| 1663 | + &iclog->ic_end_io_work); |
---|
| 1664 | +} |
---|
| 1665 | + |
---|
| 1666 | +static int |
---|
| 1667 | +xlog_map_iclog_data( |
---|
| 1668 | + struct bio *bio, |
---|
| 1669 | + void *data, |
---|
| 1670 | + size_t count) |
---|
| 1671 | +{ |
---|
| 1672 | + do { |
---|
| 1673 | + struct page *page = kmem_to_page(data); |
---|
| 1674 | + unsigned int off = offset_in_page(data); |
---|
| 1675 | + size_t len = min_t(size_t, count, PAGE_SIZE - off); |
---|
| 1676 | + |
---|
| 1677 | + if (bio_add_page(bio, page, len, off) != len) |
---|
| 1678 | + return -EIO; |
---|
| 1679 | + |
---|
| 1680 | + data += len; |
---|
| 1681 | + count -= len; |
---|
| 1682 | + } while (count); |
---|
| 1683 | + |
---|
| 1684 | + return 0; |
---|
| 1685 | +} |
---|
| 1686 | + |
---|
| 1687 | +STATIC void |
---|
| 1688 | +xlog_write_iclog( |
---|
| 1689 | + struct xlog *log, |
---|
| 1690 | + struct xlog_in_core *iclog, |
---|
| 1691 | + uint64_t bno, |
---|
| 1692 | + unsigned int count, |
---|
| 1693 | + bool need_flush) |
---|
| 1694 | +{ |
---|
| 1695 | + ASSERT(bno < log->l_logBBsize); |
---|
| 1696 | + |
---|
| 1697 | + /* |
---|
| 1698 | + * We lock the iclogbufs here so that we can serialise against I/O |
---|
| 1699 | + * completion during unmount. We might be processing a shutdown |
---|
| 1700 | + * triggered during unmount, and that can occur asynchronously to the |
---|
| 1701 | + * unmount thread, and hence we need to ensure that completes before |
---|
| 1702 | + * tearing down the iclogbufs. Hence we need to hold the buffer lock |
---|
| 1703 | + * across the log IO to archieve that. |
---|
| 1704 | + */ |
---|
| 1705 | + down(&iclog->ic_sema); |
---|
| 1706 | + if (unlikely(iclog->ic_state == XLOG_STATE_IOERROR)) { |
---|
1795 | 1707 | /* |
---|
1796 | 1708 | * It would seem logical to return EIO here, but we rely on |
---|
1797 | 1709 | * the log state machine to propagate I/O errors instead of |
---|
1798 | | - * doing it here. Similarly, IO completion will unlock the |
---|
1799 | | - * buffer, so we don't do it here. |
---|
| 1710 | + * doing it here. We kick of the state machine and unlock |
---|
| 1711 | + * the buffer manually, the code needs to be kept in sync |
---|
| 1712 | + * with the I/O completion path. |
---|
1800 | 1713 | */ |
---|
1801 | | - return 0; |
---|
| 1714 | + xlog_state_done_syncing(iclog); |
---|
| 1715 | + up(&iclog->ic_sema); |
---|
| 1716 | + return; |
---|
1802 | 1717 | } |
---|
1803 | 1718 | |
---|
1804 | | - xfs_buf_submit(bp); |
---|
1805 | | - return 0; |
---|
| 1719 | + bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE)); |
---|
| 1720 | + bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev); |
---|
| 1721 | + iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; |
---|
| 1722 | + iclog->ic_bio.bi_end_io = xlog_bio_end_io; |
---|
| 1723 | + iclog->ic_bio.bi_private = iclog; |
---|
| 1724 | + |
---|
| 1725 | + /* |
---|
| 1726 | + * We use REQ_SYNC | REQ_IDLE here to tell the block layer the are more |
---|
| 1727 | + * IOs coming immediately after this one. This prevents the block layer |
---|
| 1728 | + * writeback throttle from throttling log writes behind background |
---|
| 1729 | + * metadata writeback and causing priority inversions. |
---|
| 1730 | + */ |
---|
| 1731 | + iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | |
---|
| 1732 | + REQ_IDLE | REQ_FUA; |
---|
| 1733 | + if (need_flush) |
---|
| 1734 | + iclog->ic_bio.bi_opf |= REQ_PREFLUSH; |
---|
| 1735 | + |
---|
| 1736 | + if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) { |
---|
| 1737 | + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); |
---|
| 1738 | + return; |
---|
| 1739 | + } |
---|
| 1740 | + if (is_vmalloc_addr(iclog->ic_data)) |
---|
| 1741 | + flush_kernel_vmap_range(iclog->ic_data, count); |
---|
| 1742 | + |
---|
| 1743 | + /* |
---|
| 1744 | + * If this log buffer would straddle the end of the log we will have |
---|
| 1745 | + * to split it up into two bios, so that we can continue at the start. |
---|
| 1746 | + */ |
---|
| 1747 | + if (bno + BTOBB(count) > log->l_logBBsize) { |
---|
| 1748 | + struct bio *split; |
---|
| 1749 | + |
---|
| 1750 | + split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno, |
---|
| 1751 | + GFP_NOIO, &fs_bio_set); |
---|
| 1752 | + bio_chain(split, &iclog->ic_bio); |
---|
| 1753 | + submit_bio(split); |
---|
| 1754 | + |
---|
| 1755 | + /* restart at logical offset zero for the remainder */ |
---|
| 1756 | + iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart; |
---|
| 1757 | + } |
---|
| 1758 | + |
---|
| 1759 | + submit_bio(&iclog->ic_bio); |
---|
| 1760 | +} |
---|
| 1761 | + |
---|
| 1762 | +/* |
---|
| 1763 | + * We need to bump cycle number for the part of the iclog that is |
---|
| 1764 | + * written to the start of the log. Watch out for the header magic |
---|
| 1765 | + * number case, though. |
---|
| 1766 | + */ |
---|
| 1767 | +static void |
---|
| 1768 | +xlog_split_iclog( |
---|
| 1769 | + struct xlog *log, |
---|
| 1770 | + void *data, |
---|
| 1771 | + uint64_t bno, |
---|
| 1772 | + unsigned int count) |
---|
| 1773 | +{ |
---|
| 1774 | + unsigned int split_offset = BBTOB(log->l_logBBsize - bno); |
---|
| 1775 | + unsigned int i; |
---|
| 1776 | + |
---|
| 1777 | + for (i = split_offset; i < count; i += BBSIZE) { |
---|
| 1778 | + uint32_t cycle = get_unaligned_be32(data + i); |
---|
| 1779 | + |
---|
| 1780 | + if (++cycle == XLOG_HEADER_MAGIC_NUM) |
---|
| 1781 | + cycle++; |
---|
| 1782 | + put_unaligned_be32(cycle, data + i); |
---|
| 1783 | + } |
---|
| 1784 | +} |
---|
| 1785 | + |
---|
| 1786 | +static int |
---|
| 1787 | +xlog_calc_iclog_size( |
---|
| 1788 | + struct xlog *log, |
---|
| 1789 | + struct xlog_in_core *iclog, |
---|
| 1790 | + uint32_t *roundoff) |
---|
| 1791 | +{ |
---|
| 1792 | + uint32_t count_init, count; |
---|
| 1793 | + bool use_lsunit; |
---|
| 1794 | + |
---|
| 1795 | + use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) && |
---|
| 1796 | + log->l_mp->m_sb.sb_logsunit > 1; |
---|
| 1797 | + |
---|
| 1798 | + /* Add for LR header */ |
---|
| 1799 | + count_init = log->l_iclog_hsize + iclog->ic_offset; |
---|
| 1800 | + |
---|
| 1801 | + /* Round out the log write size */ |
---|
| 1802 | + if (use_lsunit) { |
---|
| 1803 | + /* we have a v2 stripe unit to use */ |
---|
| 1804 | + count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init)); |
---|
| 1805 | + } else { |
---|
| 1806 | + count = BBTOB(BTOBB(count_init)); |
---|
| 1807 | + } |
---|
| 1808 | + |
---|
| 1809 | + ASSERT(count >= count_init); |
---|
| 1810 | + *roundoff = count - count_init; |
---|
| 1811 | + |
---|
| 1812 | + if (use_lsunit) |
---|
| 1813 | + ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit); |
---|
| 1814 | + else |
---|
| 1815 | + ASSERT(*roundoff < BBTOB(1)); |
---|
| 1816 | + return count; |
---|
1806 | 1817 | } |
---|
1807 | 1818 | |
---|
1808 | 1819 | /* |
---|
.. | .. |
---|
1825 | 1836 | * log will require grabbing the lock though. |
---|
1826 | 1837 | * |
---|
1827 | 1838 | * The entire log manager uses a logical block numbering scheme. Only |
---|
1828 | | - * log_sync (and then only bwrite()) know about the fact that the log may |
---|
1829 | | - * not start with block zero on a given device. The log block start offset |
---|
1830 | | - * is added immediately before calling bwrite(). |
---|
| 1839 | + * xlog_write_iclog knows about the fact that the log may not start with |
---|
| 1840 | + * block zero on a given device. |
---|
1831 | 1841 | */ |
---|
1832 | | - |
---|
1833 | | -STATIC int |
---|
| 1842 | +STATIC void |
---|
1834 | 1843 | xlog_sync( |
---|
1835 | 1844 | struct xlog *log, |
---|
1836 | 1845 | struct xlog_in_core *iclog) |
---|
1837 | 1846 | { |
---|
1838 | | - xfs_buf_t *bp; |
---|
1839 | | - int i; |
---|
1840 | | - uint count; /* byte count of bwrite */ |
---|
1841 | | - uint count_init; /* initial count before roundup */ |
---|
1842 | | - int roundoff; /* roundoff to BB or stripe */ |
---|
1843 | | - int split = 0; /* split write into two regions */ |
---|
1844 | | - int error; |
---|
1845 | | - int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); |
---|
1846 | | - int size; |
---|
| 1847 | + unsigned int count; /* byte count of bwrite */ |
---|
| 1848 | + unsigned int roundoff; /* roundoff to BB or stripe */ |
---|
| 1849 | + uint64_t bno; |
---|
| 1850 | + unsigned int size; |
---|
| 1851 | + bool need_flush = true, split = false; |
---|
1847 | 1852 | |
---|
1848 | | - XFS_STATS_INC(log->l_mp, xs_log_writes); |
---|
1849 | 1853 | ASSERT(atomic_read(&iclog->ic_refcnt) == 0); |
---|
1850 | 1854 | |
---|
1851 | | - /* Add for LR header */ |
---|
1852 | | - count_init = log->l_iclog_hsize + iclog->ic_offset; |
---|
1853 | | - |
---|
1854 | | - /* Round out the log write size */ |
---|
1855 | | - if (v2 && log->l_mp->m_sb.sb_logsunit > 1) { |
---|
1856 | | - /* we have a v2 stripe unit to use */ |
---|
1857 | | - count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init)); |
---|
1858 | | - } else { |
---|
1859 | | - count = BBTOB(BTOBB(count_init)); |
---|
1860 | | - } |
---|
1861 | | - roundoff = count - count_init; |
---|
1862 | | - ASSERT(roundoff >= 0); |
---|
1863 | | - ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 && |
---|
1864 | | - roundoff < log->l_mp->m_sb.sb_logsunit) |
---|
1865 | | - || |
---|
1866 | | - (log->l_mp->m_sb.sb_logsunit <= 1 && |
---|
1867 | | - roundoff < BBTOB(1))); |
---|
| 1855 | + count = xlog_calc_iclog_size(log, iclog, &roundoff); |
---|
1868 | 1856 | |
---|
1869 | 1857 | /* move grant heads by roundoff in sync */ |
---|
1870 | 1858 | xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); |
---|
.. | .. |
---|
1875 | 1863 | |
---|
1876 | 1864 | /* real byte length */ |
---|
1877 | 1865 | size = iclog->ic_offset; |
---|
1878 | | - if (v2) |
---|
| 1866 | + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) |
---|
1879 | 1867 | size += roundoff; |
---|
1880 | 1868 | iclog->ic_header.h_len = cpu_to_be32(size); |
---|
1881 | 1869 | |
---|
1882 | | - bp = iclog->ic_bp; |
---|
1883 | | - XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); |
---|
1884 | | - |
---|
| 1870 | + XFS_STATS_INC(log->l_mp, xs_log_writes); |
---|
1885 | 1871 | XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count)); |
---|
1886 | 1872 | |
---|
| 1873 | + bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)); |
---|
| 1874 | + |
---|
1887 | 1875 | /* Do we need to split this write into 2 parts? */ |
---|
1888 | | - if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { |
---|
1889 | | - char *dptr; |
---|
1890 | | - |
---|
1891 | | - split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp))); |
---|
1892 | | - count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)); |
---|
1893 | | - iclog->ic_bwritecnt = 2; |
---|
1894 | | - |
---|
1895 | | - /* |
---|
1896 | | - * Bump the cycle numbers at the start of each block in the |
---|
1897 | | - * part of the iclog that ends up in the buffer that gets |
---|
1898 | | - * written to the start of the log. |
---|
1899 | | - * |
---|
1900 | | - * Watch out for the header magic number case, though. |
---|
1901 | | - */ |
---|
1902 | | - dptr = (char *)&iclog->ic_header + count; |
---|
1903 | | - for (i = 0; i < split; i += BBSIZE) { |
---|
1904 | | - uint32_t cycle = be32_to_cpu(*(__be32 *)dptr); |
---|
1905 | | - if (++cycle == XLOG_HEADER_MAGIC_NUM) |
---|
1906 | | - cycle++; |
---|
1907 | | - *(__be32 *)dptr = cpu_to_be32(cycle); |
---|
1908 | | - |
---|
1909 | | - dptr += BBSIZE; |
---|
1910 | | - } |
---|
1911 | | - } else { |
---|
1912 | | - iclog->ic_bwritecnt = 1; |
---|
| 1876 | + if (bno + BTOBB(count) > log->l_logBBsize) { |
---|
| 1877 | + xlog_split_iclog(log, &iclog->ic_header, bno, count); |
---|
| 1878 | + split = true; |
---|
1913 | 1879 | } |
---|
1914 | 1880 | |
---|
1915 | 1881 | /* calculcate the checksum */ |
---|
.. | .. |
---|
1922 | 1888 | * write on I/O completion and shutdown the fs. The subsequent mount |
---|
1923 | 1889 | * detects the bad CRC and attempts to recover. |
---|
1924 | 1890 | */ |
---|
| 1891 | +#ifdef DEBUG |
---|
1925 | 1892 | if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) { |
---|
1926 | 1893 | iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA); |
---|
1927 | | - iclog->ic_state |= XLOG_STATE_IOABORT; |
---|
| 1894 | + iclog->ic_fail_crc = true; |
---|
1928 | 1895 | xfs_warn(log->l_mp, |
---|
1929 | 1896 | "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.", |
---|
1930 | 1897 | be64_to_cpu(iclog->ic_header.h_lsn)); |
---|
1931 | 1898 | } |
---|
1932 | | - |
---|
1933 | | - bp->b_io_length = BTOBB(count); |
---|
1934 | | - bp->b_log_item = iclog; |
---|
1935 | | - bp->b_flags &= ~XBF_FLUSH; |
---|
1936 | | - bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); |
---|
| 1899 | +#endif |
---|
1937 | 1900 | |
---|
1938 | 1901 | /* |
---|
1939 | 1902 | * Flush the data device before flushing the log to make sure all meta |
---|
.. | .. |
---|
1943 | 1906 | * synchronously here; for an internal log we can simply use the block |
---|
1944 | 1907 | * layer state machine for preflushes. |
---|
1945 | 1908 | */ |
---|
1946 | | - if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) |
---|
| 1909 | + if (log->l_targ != log->l_mp->m_ddev_targp || split) { |
---|
1947 | 1910 | xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); |
---|
1948 | | - else |
---|
1949 | | - bp->b_flags |= XBF_FLUSH; |
---|
1950 | | - |
---|
1951 | | - ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); |
---|
1952 | | - ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); |
---|
1953 | | - |
---|
1954 | | - xlog_verify_iclog(log, iclog, count, true); |
---|
1955 | | - |
---|
1956 | | - /* account for log which doesn't start at block #0 */ |
---|
1957 | | - XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); |
---|
1958 | | - |
---|
1959 | | - /* |
---|
1960 | | - * Don't call xfs_bwrite here. We do log-syncs even when the filesystem |
---|
1961 | | - * is shutting down. |
---|
1962 | | - */ |
---|
1963 | | - error = xlog_bdstrat(bp); |
---|
1964 | | - if (error) { |
---|
1965 | | - xfs_buf_ioerror_alert(bp, "xlog_sync"); |
---|
1966 | | - return error; |
---|
| 1911 | + need_flush = false; |
---|
1967 | 1912 | } |
---|
1968 | | - if (split) { |
---|
1969 | | - bp = iclog->ic_log->l_xbuf; |
---|
1970 | | - XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ |
---|
1971 | | - xfs_buf_associate_memory(bp, |
---|
1972 | | - (char *)&iclog->ic_header + count, split); |
---|
1973 | | - bp->b_log_item = iclog; |
---|
1974 | | - bp->b_flags &= ~XBF_FLUSH; |
---|
1975 | | - bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); |
---|
1976 | 1913 | |
---|
1977 | | - ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); |
---|
1978 | | - ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); |
---|
1979 | | - |
---|
1980 | | - /* account for internal log which doesn't start at block #0 */ |
---|
1981 | | - XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); |
---|
1982 | | - error = xlog_bdstrat(bp); |
---|
1983 | | - if (error) { |
---|
1984 | | - xfs_buf_ioerror_alert(bp, "xlog_sync (split)"); |
---|
1985 | | - return error; |
---|
1986 | | - } |
---|
1987 | | - } |
---|
1988 | | - return 0; |
---|
1989 | | -} /* xlog_sync */ |
---|
| 1914 | + xlog_verify_iclog(log, iclog, count); |
---|
| 1915 | + xlog_write_iclog(log, iclog, bno, count, need_flush); |
---|
| 1916 | +} |
---|
1990 | 1917 | |
---|
1991 | 1918 | /* |
---|
1992 | 1919 | * Deallocate a log structure |
---|
.. | .. |
---|
2006 | 1933 | */ |
---|
2007 | 1934 | iclog = log->l_iclog; |
---|
2008 | 1935 | for (i = 0; i < log->l_iclog_bufs; i++) { |
---|
2009 | | - xfs_buf_lock(iclog->ic_bp); |
---|
2010 | | - xfs_buf_unlock(iclog->ic_bp); |
---|
| 1936 | + down(&iclog->ic_sema); |
---|
| 1937 | + up(&iclog->ic_sema); |
---|
2011 | 1938 | iclog = iclog->ic_next; |
---|
2012 | 1939 | } |
---|
2013 | 1940 | |
---|
2014 | | - /* |
---|
2015 | | - * Always need to ensure that the extra buffer does not point to memory |
---|
2016 | | - * owned by another log buffer before we free it. Also, cycle the lock |
---|
2017 | | - * first to ensure we've completed IO on it. |
---|
2018 | | - */ |
---|
2019 | | - xfs_buf_lock(log->l_xbuf); |
---|
2020 | | - xfs_buf_unlock(log->l_xbuf); |
---|
2021 | | - xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); |
---|
2022 | | - xfs_buf_free(log->l_xbuf); |
---|
2023 | | - |
---|
2024 | 1941 | iclog = log->l_iclog; |
---|
2025 | 1942 | for (i = 0; i < log->l_iclog_bufs; i++) { |
---|
2026 | | - xfs_buf_free(iclog->ic_bp); |
---|
2027 | 1943 | next_iclog = iclog->ic_next; |
---|
| 1944 | + kmem_free(iclog->ic_data); |
---|
2028 | 1945 | kmem_free(iclog); |
---|
2029 | 1946 | iclog = next_iclog; |
---|
2030 | 1947 | } |
---|
2031 | | - spinlock_destroy(&log->l_icloglock); |
---|
2032 | 1948 | |
---|
2033 | 1949 | log->l_mp->m_log = NULL; |
---|
| 1950 | + destroy_workqueue(log->l_ioend_workqueue); |
---|
2034 | 1951 | kmem_free(log); |
---|
2035 | | -} /* xlog_dealloc_log */ |
---|
| 1952 | +} |
---|
2036 | 1953 | |
---|
2037 | 1954 | /* |
---|
2038 | 1955 | * Update counters atomically now that memcpy is done. |
---|
2039 | 1956 | */ |
---|
2040 | | -/* ARGSUSED */ |
---|
2041 | 1957 | static inline void |
---|
2042 | 1958 | xlog_state_finish_copy( |
---|
2043 | 1959 | struct xlog *log, |
---|
.. | .. |
---|
2045 | 1961 | int record_cnt, |
---|
2046 | 1962 | int copy_bytes) |
---|
2047 | 1963 | { |
---|
2048 | | - spin_lock(&log->l_icloglock); |
---|
| 1964 | + lockdep_assert_held(&log->l_icloglock); |
---|
2049 | 1965 | |
---|
2050 | 1966 | be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt); |
---|
2051 | 1967 | iclog->ic_offset += copy_bytes; |
---|
2052 | | - |
---|
2053 | | - spin_unlock(&log->l_icloglock); |
---|
2054 | | -} /* xlog_state_finish_copy */ |
---|
2055 | | - |
---|
2056 | | - |
---|
2057 | | - |
---|
| 1968 | +} |
---|
2058 | 1969 | |
---|
2059 | 1970 | /* |
---|
2060 | 1971 | * print out info relating to regions written which consume |
---|
.. | .. |
---|
2070 | 1981 | |
---|
2071 | 1982 | /* match with XLOG_REG_TYPE_* in xfs_log.h */ |
---|
2072 | 1983 | #define REG_TYPE_STR(type, str) [XLOG_REG_TYPE_##type] = str |
---|
2073 | | - static char *res_type_str[XLOG_REG_TYPE_MAX + 1] = { |
---|
| 1984 | + static char *res_type_str[] = { |
---|
2074 | 1985 | REG_TYPE_STR(BFORMAT, "bformat"), |
---|
2075 | 1986 | REG_TYPE_STR(BCHUNK, "bchunk"), |
---|
2076 | 1987 | REG_TYPE_STR(EFI_FORMAT, "efi_format"), |
---|
.. | .. |
---|
2090 | 2001 | REG_TYPE_STR(UNMOUNT, "unmount"), |
---|
2091 | 2002 | REG_TYPE_STR(COMMIT, "commit"), |
---|
2092 | 2003 | REG_TYPE_STR(TRANSHDR, "trans header"), |
---|
2093 | | - REG_TYPE_STR(ICREATE, "inode create") |
---|
| 2004 | + REG_TYPE_STR(ICREATE, "inode create"), |
---|
| 2005 | + REG_TYPE_STR(RUI_FORMAT, "rui_format"), |
---|
| 2006 | + REG_TYPE_STR(RUD_FORMAT, "rud_format"), |
---|
| 2007 | + REG_TYPE_STR(CUI_FORMAT, "cui_format"), |
---|
| 2008 | + REG_TYPE_STR(CUD_FORMAT, "cud_format"), |
---|
| 2009 | + REG_TYPE_STR(BUI_FORMAT, "bui_format"), |
---|
| 2010 | + REG_TYPE_STR(BUD_FORMAT, "bud_format"), |
---|
2094 | 2011 | }; |
---|
| 2012 | + BUILD_BUG_ON(ARRAY_SIZE(res_type_str) != XLOG_REG_TYPE_MAX + 1); |
---|
2095 | 2013 | #undef REG_TYPE_STR |
---|
2096 | 2014 | |
---|
2097 | 2015 | xfs_warn(mp, "ticket reservation summary:"); |
---|
.. | .. |
---|
2168 | 2086 | } |
---|
2169 | 2087 | |
---|
2170 | 2088 | /* |
---|
2171 | | - * Calculate the potential space needed by the log vector. Each region gets |
---|
2172 | | - * its own xlog_op_header_t and may need to be double word aligned. |
---|
| 2089 | + * Calculate the potential space needed by the log vector. We may need a start |
---|
| 2090 | + * record, and each region gets its own struct xlog_op_header and may need to be |
---|
| 2091 | + * double word aligned. |
---|
2173 | 2092 | */ |
---|
2174 | 2093 | static int |
---|
2175 | 2094 | xlog_write_calc_vec_length( |
---|
2176 | 2095 | struct xlog_ticket *ticket, |
---|
2177 | | - struct xfs_log_vec *log_vector) |
---|
| 2096 | + struct xfs_log_vec *log_vector, |
---|
| 2097 | + bool need_start_rec) |
---|
2178 | 2098 | { |
---|
2179 | 2099 | struct xfs_log_vec *lv; |
---|
2180 | | - int headers = 0; |
---|
| 2100 | + int headers = need_start_rec ? 1 : 0; |
---|
2181 | 2101 | int len = 0; |
---|
2182 | 2102 | int i; |
---|
2183 | | - |
---|
2184 | | - /* acct for start rec of xact */ |
---|
2185 | | - if (ticket->t_flags & XLOG_TIC_INITED) |
---|
2186 | | - headers++; |
---|
2187 | 2103 | |
---|
2188 | 2104 | for (lv = log_vector; lv; lv = lv->lv_next) { |
---|
2189 | 2105 | /* we don't write ordered log vectors */ |
---|
.. | .. |
---|
2206 | 2122 | return len; |
---|
2207 | 2123 | } |
---|
2208 | 2124 | |
---|
2209 | | -/* |
---|
2210 | | - * If first write for transaction, insert start record We can't be trying to |
---|
2211 | | - * commit if we are inited. We can't have any "partial_copy" if we are inited. |
---|
2212 | | - */ |
---|
2213 | | -static int |
---|
| 2125 | +static void |
---|
2214 | 2126 | xlog_write_start_rec( |
---|
2215 | 2127 | struct xlog_op_header *ophdr, |
---|
2216 | 2128 | struct xlog_ticket *ticket) |
---|
2217 | 2129 | { |
---|
2218 | | - if (!(ticket->t_flags & XLOG_TIC_INITED)) |
---|
2219 | | - return 0; |
---|
2220 | | - |
---|
2221 | 2130 | ophdr->oh_tid = cpu_to_be32(ticket->t_tid); |
---|
2222 | 2131 | ophdr->oh_clientid = ticket->t_clientid; |
---|
2223 | 2132 | ophdr->oh_len = 0; |
---|
2224 | 2133 | ophdr->oh_flags = XLOG_START_TRANS; |
---|
2225 | 2134 | ophdr->oh_res2 = 0; |
---|
2226 | | - |
---|
2227 | | - ticket->t_flags &= ~XLOG_TIC_INITED; |
---|
2228 | | - |
---|
2229 | | - return sizeof(struct xlog_op_header); |
---|
2230 | 2135 | } |
---|
2231 | 2136 | |
---|
2232 | 2137 | static xlog_op_header_t * |
---|
.. | .. |
---|
2324 | 2229 | int log_offset, |
---|
2325 | 2230 | struct xlog_in_core **commit_iclog) |
---|
2326 | 2231 | { |
---|
| 2232 | + int error; |
---|
| 2233 | + |
---|
2327 | 2234 | if (*partial_copy) { |
---|
2328 | 2235 | /* |
---|
2329 | 2236 | * This iclog has already been marked WANT_SYNC by |
---|
2330 | 2237 | * xlog_state_get_iclog_space. |
---|
2331 | 2238 | */ |
---|
| 2239 | + spin_lock(&log->l_icloglock); |
---|
2332 | 2240 | xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); |
---|
2333 | 2241 | *record_cnt = 0; |
---|
2334 | 2242 | *data_cnt = 0; |
---|
2335 | | - return xlog_state_release_iclog(log, iclog); |
---|
| 2243 | + goto release_iclog; |
---|
2336 | 2244 | } |
---|
2337 | 2245 | |
---|
2338 | 2246 | *partial_copy = 0; |
---|
.. | .. |
---|
2340 | 2248 | |
---|
2341 | 2249 | if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) { |
---|
2342 | 2250 | /* no more space in this iclog - push it. */ |
---|
| 2251 | + spin_lock(&log->l_icloglock); |
---|
2343 | 2252 | xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); |
---|
2344 | 2253 | *record_cnt = 0; |
---|
2345 | 2254 | *data_cnt = 0; |
---|
2346 | 2255 | |
---|
2347 | | - spin_lock(&log->l_icloglock); |
---|
2348 | | - xlog_state_want_sync(log, iclog); |
---|
2349 | | - spin_unlock(&log->l_icloglock); |
---|
2350 | | - |
---|
| 2256 | + if (iclog->ic_state == XLOG_STATE_ACTIVE) |
---|
| 2257 | + xlog_state_switch_iclogs(log, iclog, 0); |
---|
| 2258 | + else |
---|
| 2259 | + ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || |
---|
| 2260 | + iclog->ic_state == XLOG_STATE_IOERROR); |
---|
2351 | 2261 | if (!commit_iclog) |
---|
2352 | | - return xlog_state_release_iclog(log, iclog); |
---|
| 2262 | + goto release_iclog; |
---|
| 2263 | + spin_unlock(&log->l_icloglock); |
---|
2353 | 2264 | ASSERT(flags & XLOG_COMMIT_TRANS); |
---|
2354 | 2265 | *commit_iclog = iclog; |
---|
2355 | 2266 | } |
---|
2356 | 2267 | |
---|
2357 | 2268 | return 0; |
---|
| 2269 | + |
---|
| 2270 | +release_iclog: |
---|
| 2271 | + error = xlog_state_release_iclog(log, iclog); |
---|
| 2272 | + spin_unlock(&log->l_icloglock); |
---|
| 2273 | + return error; |
---|
2358 | 2274 | } |
---|
2359 | 2275 | |
---|
2360 | 2276 | /* |
---|
.. | .. |
---|
2404 | 2320 | struct xlog_ticket *ticket, |
---|
2405 | 2321 | xfs_lsn_t *start_lsn, |
---|
2406 | 2322 | struct xlog_in_core **commit_iclog, |
---|
2407 | | - uint flags) |
---|
| 2323 | + uint flags, |
---|
| 2324 | + bool need_start_rec) |
---|
2408 | 2325 | { |
---|
2409 | 2326 | struct xlog_in_core *iclog = NULL; |
---|
2410 | | - struct xfs_log_iovec *vecp; |
---|
2411 | | - struct xfs_log_vec *lv; |
---|
| 2327 | + struct xfs_log_vec *lv = log_vector; |
---|
| 2328 | + struct xfs_log_iovec *vecp = lv->lv_iovecp; |
---|
| 2329 | + int index = 0; |
---|
2412 | 2330 | int len; |
---|
2413 | | - int index; |
---|
2414 | 2331 | int partial_copy = 0; |
---|
2415 | 2332 | int partial_copy_len = 0; |
---|
2416 | 2333 | int contwr = 0; |
---|
2417 | 2334 | int record_cnt = 0; |
---|
2418 | 2335 | int data_cnt = 0; |
---|
2419 | | - int error; |
---|
2420 | | - |
---|
2421 | | - *start_lsn = 0; |
---|
2422 | | - |
---|
2423 | | - len = xlog_write_calc_vec_length(ticket, log_vector); |
---|
| 2336 | + int error = 0; |
---|
2424 | 2337 | |
---|
2425 | 2338 | /* |
---|
2426 | | - * Region headers and bytes are already accounted for. |
---|
2427 | | - * We only need to take into account start records and |
---|
2428 | | - * split regions in this function. |
---|
| 2339 | + * If this is a commit or unmount transaction, we don't need a start |
---|
| 2340 | + * record to be written. We do, however, have to account for the |
---|
| 2341 | + * commit or unmount header that gets written. Hence we always have |
---|
| 2342 | + * to account for an extra xlog_op_header here. |
---|
2429 | 2343 | */ |
---|
2430 | | - if (ticket->t_flags & XLOG_TIC_INITED) |
---|
2431 | | - ticket->t_curr_res -= sizeof(xlog_op_header_t); |
---|
2432 | | - |
---|
2433 | | - /* |
---|
2434 | | - * Commit record headers need to be accounted for. These |
---|
2435 | | - * come in as separate writes so are easy to detect. |
---|
2436 | | - */ |
---|
2437 | | - if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) |
---|
2438 | | - ticket->t_curr_res -= sizeof(xlog_op_header_t); |
---|
2439 | | - |
---|
| 2344 | + ticket->t_curr_res -= sizeof(struct xlog_op_header); |
---|
2440 | 2345 | if (ticket->t_curr_res < 0) { |
---|
2441 | 2346 | xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, |
---|
2442 | 2347 | "ctx ticket reservation ran out. Need to up reservation"); |
---|
.. | .. |
---|
2444 | 2349 | xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); |
---|
2445 | 2350 | } |
---|
2446 | 2351 | |
---|
2447 | | - index = 0; |
---|
2448 | | - lv = log_vector; |
---|
2449 | | - vecp = lv->lv_iovecp; |
---|
| 2352 | + len = xlog_write_calc_vec_length(ticket, log_vector, need_start_rec); |
---|
| 2353 | + *start_lsn = 0; |
---|
2450 | 2354 | while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { |
---|
2451 | 2355 | void *ptr; |
---|
2452 | 2356 | int log_offset; |
---|
.. | .. |
---|
2470 | 2374 | while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { |
---|
2471 | 2375 | struct xfs_log_iovec *reg; |
---|
2472 | 2376 | struct xlog_op_header *ophdr; |
---|
2473 | | - int start_rec_copy; |
---|
2474 | 2377 | int copy_len; |
---|
2475 | 2378 | int copy_off; |
---|
2476 | 2379 | bool ordered = false; |
---|
.. | .. |
---|
2486 | 2389 | ASSERT(reg->i_len % sizeof(int32_t) == 0); |
---|
2487 | 2390 | ASSERT((unsigned long)ptr % sizeof(int32_t) == 0); |
---|
2488 | 2391 | |
---|
2489 | | - start_rec_copy = xlog_write_start_rec(ptr, ticket); |
---|
2490 | | - if (start_rec_copy) { |
---|
2491 | | - record_cnt++; |
---|
| 2392 | + /* |
---|
| 2393 | + * Before we start formatting log vectors, we need to |
---|
| 2394 | + * write a start record. Only do this for the first |
---|
| 2395 | + * iclog we write to. |
---|
| 2396 | + */ |
---|
| 2397 | + if (need_start_rec) { |
---|
| 2398 | + xlog_write_start_rec(ptr, ticket); |
---|
2492 | 2399 | xlog_write_adv_cnt(&ptr, &len, &log_offset, |
---|
2493 | | - start_rec_copy); |
---|
| 2400 | + sizeof(struct xlog_op_header)); |
---|
2494 | 2401 | } |
---|
2495 | 2402 | |
---|
2496 | 2403 | ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags); |
---|
.. | .. |
---|
2522 | 2429 | xlog_write_adv_cnt(&ptr, &len, &log_offset, |
---|
2523 | 2430 | copy_len); |
---|
2524 | 2431 | } |
---|
2525 | | - copy_len += start_rec_copy + sizeof(xlog_op_header_t); |
---|
| 2432 | + copy_len += sizeof(struct xlog_op_header); |
---|
2526 | 2433 | record_cnt++; |
---|
| 2434 | + if (need_start_rec) { |
---|
| 2435 | + copy_len += sizeof(struct xlog_op_header); |
---|
| 2436 | + record_cnt++; |
---|
| 2437 | + need_start_rec = false; |
---|
| 2438 | + } |
---|
2527 | 2439 | data_cnt += contwr ? copy_len : 0; |
---|
2528 | 2440 | |
---|
2529 | 2441 | error = xlog_write_copy_finish(log, iclog, flags, |
---|
.. | .. |
---|
2567 | 2479 | |
---|
2568 | 2480 | ASSERT(len == 0); |
---|
2569 | 2481 | |
---|
| 2482 | + spin_lock(&log->l_icloglock); |
---|
2570 | 2483 | xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); |
---|
2571 | | - if (!commit_iclog) |
---|
2572 | | - return xlog_state_release_iclog(log, iclog); |
---|
| 2484 | + if (commit_iclog) { |
---|
| 2485 | + ASSERT(flags & XLOG_COMMIT_TRANS); |
---|
| 2486 | + *commit_iclog = iclog; |
---|
| 2487 | + } else { |
---|
| 2488 | + error = xlog_state_release_iclog(log, iclog); |
---|
| 2489 | + } |
---|
| 2490 | + spin_unlock(&log->l_icloglock); |
---|
2573 | 2491 | |
---|
2574 | | - ASSERT(flags & XLOG_COMMIT_TRANS); |
---|
2575 | | - *commit_iclog = iclog; |
---|
2576 | | - return 0; |
---|
| 2492 | + return error; |
---|
2577 | 2493 | } |
---|
2578 | 2494 | |
---|
2579 | | - |
---|
2580 | | -/***************************************************************************** |
---|
2581 | | - * |
---|
2582 | | - * State Machine functions |
---|
2583 | | - * |
---|
2584 | | - ***************************************************************************** |
---|
2585 | | - */ |
---|
2586 | | - |
---|
2587 | | -/* Clean iclogs starting from the head. This ordering must be |
---|
2588 | | - * maintained, so an iclog doesn't become ACTIVE beyond one that |
---|
2589 | | - * is SYNCING. This is also required to maintain the notion that we use |
---|
2590 | | - * a ordered wait queue to hold off would be writers to the log when every |
---|
2591 | | - * iclog is trying to sync to disk. |
---|
2592 | | - * |
---|
2593 | | - * State Change: DIRTY -> ACTIVE |
---|
2594 | | - */ |
---|
2595 | | -STATIC void |
---|
2596 | | -xlog_state_clean_log( |
---|
2597 | | - struct xlog *log) |
---|
| 2495 | +static void |
---|
| 2496 | +xlog_state_activate_iclog( |
---|
| 2497 | + struct xlog_in_core *iclog, |
---|
| 2498 | + int *iclogs_changed) |
---|
2598 | 2499 | { |
---|
2599 | | - xlog_in_core_t *iclog; |
---|
2600 | | - int changed = 0; |
---|
| 2500 | + ASSERT(list_empty_careful(&iclog->ic_callbacks)); |
---|
2601 | 2501 | |
---|
2602 | | - iclog = log->l_iclog; |
---|
2603 | | - do { |
---|
2604 | | - if (iclog->ic_state == XLOG_STATE_DIRTY) { |
---|
2605 | | - iclog->ic_state = XLOG_STATE_ACTIVE; |
---|
2606 | | - iclog->ic_offset = 0; |
---|
2607 | | - ASSERT(iclog->ic_callback == NULL); |
---|
2608 | | - /* |
---|
2609 | | - * If the number of ops in this iclog indicate it just |
---|
2610 | | - * contains the dummy transaction, we can |
---|
2611 | | - * change state into IDLE (the second time around). |
---|
2612 | | - * Otherwise we should change the state into |
---|
2613 | | - * NEED a dummy. |
---|
2614 | | - * We don't need to cover the dummy. |
---|
2615 | | - */ |
---|
2616 | | - if (!changed && |
---|
2617 | | - (be32_to_cpu(iclog->ic_header.h_num_logops) == |
---|
2618 | | - XLOG_COVER_OPS)) { |
---|
2619 | | - changed = 1; |
---|
2620 | | - } else { |
---|
2621 | | - /* |
---|
2622 | | - * We have two dirty iclogs so start over |
---|
2623 | | - * This could also be num of ops indicates |
---|
2624 | | - * this is not the dummy going out. |
---|
2625 | | - */ |
---|
2626 | | - changed = 2; |
---|
2627 | | - } |
---|
2628 | | - iclog->ic_header.h_num_logops = 0; |
---|
2629 | | - memset(iclog->ic_header.h_cycle_data, 0, |
---|
2630 | | - sizeof(iclog->ic_header.h_cycle_data)); |
---|
2631 | | - iclog->ic_header.h_lsn = 0; |
---|
2632 | | - } else if (iclog->ic_state == XLOG_STATE_ACTIVE) |
---|
2633 | | - /* do nothing */; |
---|
2634 | | - else |
---|
2635 | | - break; /* stop cleaning */ |
---|
2636 | | - iclog = iclog->ic_next; |
---|
2637 | | - } while (iclog != log->l_iclog); |
---|
2638 | | - |
---|
2639 | | - /* log is locked when we are called */ |
---|
2640 | 2502 | /* |
---|
2641 | | - * Change state for the dummy log recording. |
---|
2642 | | - * We usually go to NEED. But we go to NEED2 if the changed indicates |
---|
2643 | | - * we are done writing the dummy record. |
---|
2644 | | - * If we are done with the second dummy recored (DONE2), then |
---|
2645 | | - * we go to IDLE. |
---|
| 2503 | + * If the number of ops in this iclog indicate it just contains the |
---|
| 2504 | + * dummy transaction, we can change state into IDLE (the second time |
---|
| 2505 | + * around). Otherwise we should change the state into NEED a dummy. |
---|
| 2506 | + * We don't need to cover the dummy. |
---|
2646 | 2507 | */ |
---|
2647 | | - if (changed) { |
---|
2648 | | - switch (log->l_covered_state) { |
---|
2649 | | - case XLOG_STATE_COVER_IDLE: |
---|
2650 | | - case XLOG_STATE_COVER_NEED: |
---|
2651 | | - case XLOG_STATE_COVER_NEED2: |
---|
2652 | | - log->l_covered_state = XLOG_STATE_COVER_NEED; |
---|
2653 | | - break; |
---|
2654 | | - |
---|
2655 | | - case XLOG_STATE_COVER_DONE: |
---|
2656 | | - if (changed == 1) |
---|
2657 | | - log->l_covered_state = XLOG_STATE_COVER_NEED2; |
---|
2658 | | - else |
---|
2659 | | - log->l_covered_state = XLOG_STATE_COVER_NEED; |
---|
2660 | | - break; |
---|
2661 | | - |
---|
2662 | | - case XLOG_STATE_COVER_DONE2: |
---|
2663 | | - if (changed == 1) |
---|
2664 | | - log->l_covered_state = XLOG_STATE_COVER_IDLE; |
---|
2665 | | - else |
---|
2666 | | - log->l_covered_state = XLOG_STATE_COVER_NEED; |
---|
2667 | | - break; |
---|
2668 | | - |
---|
2669 | | - default: |
---|
2670 | | - ASSERT(0); |
---|
2671 | | - } |
---|
| 2508 | + if (*iclogs_changed == 0 && |
---|
| 2509 | + iclog->ic_header.h_num_logops == cpu_to_be32(XLOG_COVER_OPS)) { |
---|
| 2510 | + *iclogs_changed = 1; |
---|
| 2511 | + } else { |
---|
| 2512 | + /* |
---|
| 2513 | + * We have two dirty iclogs so start over. This could also be |
---|
| 2514 | + * num of ops indicating this is not the dummy going out. |
---|
| 2515 | + */ |
---|
| 2516 | + *iclogs_changed = 2; |
---|
2672 | 2517 | } |
---|
2673 | | -} /* xlog_state_clean_log */ |
---|
| 2518 | + |
---|
| 2519 | + iclog->ic_state = XLOG_STATE_ACTIVE; |
---|
| 2520 | + iclog->ic_offset = 0; |
---|
| 2521 | + iclog->ic_header.h_num_logops = 0; |
---|
| 2522 | + memset(iclog->ic_header.h_cycle_data, 0, |
---|
| 2523 | + sizeof(iclog->ic_header.h_cycle_data)); |
---|
| 2524 | + iclog->ic_header.h_lsn = 0; |
---|
| 2525 | +} |
---|
| 2526 | + |
---|
| 2527 | +/* |
---|
| 2528 | + * Loop through all iclogs and mark all iclogs currently marked DIRTY as |
---|
| 2529 | + * ACTIVE after iclog I/O has completed. |
---|
| 2530 | + */ |
---|
| 2531 | +static void |
---|
| 2532 | +xlog_state_activate_iclogs( |
---|
| 2533 | + struct xlog *log, |
---|
| 2534 | + int *iclogs_changed) |
---|
| 2535 | +{ |
---|
| 2536 | + struct xlog_in_core *iclog = log->l_iclog; |
---|
| 2537 | + |
---|
| 2538 | + do { |
---|
| 2539 | + if (iclog->ic_state == XLOG_STATE_DIRTY) |
---|
| 2540 | + xlog_state_activate_iclog(iclog, iclogs_changed); |
---|
| 2541 | + /* |
---|
| 2542 | + * The ordering of marking iclogs ACTIVE must be maintained, so |
---|
| 2543 | + * an iclog doesn't become ACTIVE beyond one that is SYNCING. |
---|
| 2544 | + */ |
---|
| 2545 | + else if (iclog->ic_state != XLOG_STATE_ACTIVE) |
---|
| 2546 | + break; |
---|
| 2547 | + } while ((iclog = iclog->ic_next) != log->l_iclog); |
---|
| 2548 | +} |
---|
| 2549 | + |
---|
| 2550 | +static int |
---|
| 2551 | +xlog_covered_state( |
---|
| 2552 | + int prev_state, |
---|
| 2553 | + int iclogs_changed) |
---|
| 2554 | +{ |
---|
| 2555 | + /* |
---|
| 2556 | + * We usually go to NEED. But we go to NEED2 if the changed indicates we |
---|
| 2557 | + * are done writing the dummy record. If we are done with the second |
---|
| 2558 | + * dummy recored (DONE2), then we go to IDLE. |
---|
| 2559 | + */ |
---|
| 2560 | + switch (prev_state) { |
---|
| 2561 | + case XLOG_STATE_COVER_IDLE: |
---|
| 2562 | + case XLOG_STATE_COVER_NEED: |
---|
| 2563 | + case XLOG_STATE_COVER_NEED2: |
---|
| 2564 | + break; |
---|
| 2565 | + case XLOG_STATE_COVER_DONE: |
---|
| 2566 | + if (iclogs_changed == 1) |
---|
| 2567 | + return XLOG_STATE_COVER_NEED2; |
---|
| 2568 | + break; |
---|
| 2569 | + case XLOG_STATE_COVER_DONE2: |
---|
| 2570 | + if (iclogs_changed == 1) |
---|
| 2571 | + return XLOG_STATE_COVER_IDLE; |
---|
| 2572 | + break; |
---|
| 2573 | + default: |
---|
| 2574 | + ASSERT(0); |
---|
| 2575 | + } |
---|
| 2576 | + |
---|
| 2577 | + return XLOG_STATE_COVER_NEED; |
---|
| 2578 | +} |
---|
| 2579 | + |
---|
| 2580 | +STATIC void |
---|
| 2581 | +xlog_state_clean_iclog( |
---|
| 2582 | + struct xlog *log, |
---|
| 2583 | + struct xlog_in_core *dirty_iclog) |
---|
| 2584 | +{ |
---|
| 2585 | + int iclogs_changed = 0; |
---|
| 2586 | + |
---|
| 2587 | + dirty_iclog->ic_state = XLOG_STATE_DIRTY; |
---|
| 2588 | + |
---|
| 2589 | + xlog_state_activate_iclogs(log, &iclogs_changed); |
---|
| 2590 | + wake_up_all(&dirty_iclog->ic_force_wait); |
---|
| 2591 | + |
---|
| 2592 | + if (iclogs_changed) { |
---|
| 2593 | + log->l_covered_state = xlog_covered_state(log->l_covered_state, |
---|
| 2594 | + iclogs_changed); |
---|
| 2595 | + } |
---|
| 2596 | +} |
---|
2674 | 2597 | |
---|
2675 | 2598 | STATIC xfs_lsn_t |
---|
2676 | 2599 | xlog_get_lowest_lsn( |
---|
2677 | | - struct xlog *log) |
---|
| 2600 | + struct xlog *log) |
---|
2678 | 2601 | { |
---|
2679 | | - xlog_in_core_t *lsn_log; |
---|
2680 | | - xfs_lsn_t lowest_lsn, lsn; |
---|
| 2602 | + struct xlog_in_core *iclog = log->l_iclog; |
---|
| 2603 | + xfs_lsn_t lowest_lsn = 0, lsn; |
---|
2681 | 2604 | |
---|
2682 | | - lsn_log = log->l_iclog; |
---|
2683 | | - lowest_lsn = 0; |
---|
2684 | 2605 | do { |
---|
2685 | | - if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) { |
---|
2686 | | - lsn = be64_to_cpu(lsn_log->ic_header.h_lsn); |
---|
2687 | | - if ((lsn && !lowest_lsn) || |
---|
2688 | | - (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) { |
---|
| 2606 | + if (iclog->ic_state == XLOG_STATE_ACTIVE || |
---|
| 2607 | + iclog->ic_state == XLOG_STATE_DIRTY) |
---|
| 2608 | + continue; |
---|
| 2609 | + |
---|
| 2610 | + lsn = be64_to_cpu(iclog->ic_header.h_lsn); |
---|
| 2611 | + if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0) |
---|
2689 | 2612 | lowest_lsn = lsn; |
---|
2690 | | - } |
---|
2691 | | - } |
---|
2692 | | - lsn_log = lsn_log->ic_next; |
---|
2693 | | - } while (lsn_log != log->l_iclog); |
---|
| 2613 | + } while ((iclog = iclog->ic_next) != log->l_iclog); |
---|
| 2614 | + |
---|
2694 | 2615 | return lowest_lsn; |
---|
2695 | 2616 | } |
---|
2696 | 2617 | |
---|
| 2618 | +/* |
---|
| 2619 | + * Completion of a iclog IO does not imply that a transaction has completed, as |
---|
| 2620 | + * transactions can be large enough to span many iclogs. We cannot change the |
---|
| 2621 | + * tail of the log half way through a transaction as this may be the only |
---|
| 2622 | + * transaction in the log and moving the tail to point to the middle of it |
---|
| 2623 | + * will prevent recovery from finding the start of the transaction. Hence we |
---|
| 2624 | + * should only update the last_sync_lsn if this iclog contains transaction |
---|
| 2625 | + * completion callbacks on it. |
---|
| 2626 | + * |
---|
| 2627 | + * We have to do this before we drop the icloglock to ensure we are the only one |
---|
| 2628 | + * that can update it. |
---|
| 2629 | + * |
---|
| 2630 | + * If we are moving the last_sync_lsn forwards, we also need to ensure we kick |
---|
| 2631 | + * the reservation grant head pushing. This is due to the fact that the push |
---|
| 2632 | + * target is bound by the current last_sync_lsn value. Hence if we have a large |
---|
| 2633 | + * amount of log space bound up in this committing transaction then the |
---|
| 2634 | + * last_sync_lsn value may be the limiting factor preventing tail pushing from |
---|
| 2635 | + * freeing space in the log. Hence once we've updated the last_sync_lsn we |
---|
| 2636 | + * should push the AIL to ensure the push target (and hence the grant head) is |
---|
| 2637 | + * no longer bound by the old log head location and can move forwards and make |
---|
| 2638 | + * progress again. |
---|
| 2639 | + */ |
---|
| 2640 | +static void |
---|
| 2641 | +xlog_state_set_callback( |
---|
| 2642 | + struct xlog *log, |
---|
| 2643 | + struct xlog_in_core *iclog, |
---|
| 2644 | + xfs_lsn_t header_lsn) |
---|
| 2645 | +{ |
---|
| 2646 | + iclog->ic_state = XLOG_STATE_CALLBACK; |
---|
| 2647 | + |
---|
| 2648 | + ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), |
---|
| 2649 | + header_lsn) <= 0); |
---|
| 2650 | + |
---|
| 2651 | + if (list_empty_careful(&iclog->ic_callbacks)) |
---|
| 2652 | + return; |
---|
| 2653 | + |
---|
| 2654 | + atomic64_set(&log->l_last_sync_lsn, header_lsn); |
---|
| 2655 | + xlog_grant_push_ail(log, 0); |
---|
| 2656 | +} |
---|
| 2657 | + |
---|
| 2658 | +/* |
---|
| 2659 | + * Return true if we need to stop processing, false to continue to the next |
---|
| 2660 | + * iclog. The caller will need to run callbacks if the iclog is returned in the |
---|
| 2661 | + * XLOG_STATE_CALLBACK state. |
---|
| 2662 | + */ |
---|
| 2663 | +static bool |
---|
| 2664 | +xlog_state_iodone_process_iclog( |
---|
| 2665 | + struct xlog *log, |
---|
| 2666 | + struct xlog_in_core *iclog, |
---|
| 2667 | + bool *ioerror) |
---|
| 2668 | +{ |
---|
| 2669 | + xfs_lsn_t lowest_lsn; |
---|
| 2670 | + xfs_lsn_t header_lsn; |
---|
| 2671 | + |
---|
| 2672 | + switch (iclog->ic_state) { |
---|
| 2673 | + case XLOG_STATE_ACTIVE: |
---|
| 2674 | + case XLOG_STATE_DIRTY: |
---|
| 2675 | + /* |
---|
| 2676 | + * Skip all iclogs in the ACTIVE & DIRTY states: |
---|
| 2677 | + */ |
---|
| 2678 | + return false; |
---|
| 2679 | + case XLOG_STATE_IOERROR: |
---|
| 2680 | + /* |
---|
| 2681 | + * Between marking a filesystem SHUTDOWN and stopping the log, |
---|
| 2682 | + * we do flush all iclogs to disk (if there wasn't a log I/O |
---|
| 2683 | + * error). So, we do want things to go smoothly in case of just |
---|
| 2684 | + * a SHUTDOWN w/o a LOG_IO_ERROR. |
---|
| 2685 | + */ |
---|
| 2686 | + *ioerror = true; |
---|
| 2687 | + return false; |
---|
| 2688 | + case XLOG_STATE_DONE_SYNC: |
---|
| 2689 | + /* |
---|
| 2690 | + * Now that we have an iclog that is in the DONE_SYNC state, do |
---|
| 2691 | + * one more check here to see if we have chased our tail around. |
---|
| 2692 | + * If this is not the lowest lsn iclog, then we will leave it |
---|
| 2693 | + * for another completion to process. |
---|
| 2694 | + */ |
---|
| 2695 | + header_lsn = be64_to_cpu(iclog->ic_header.h_lsn); |
---|
| 2696 | + lowest_lsn = xlog_get_lowest_lsn(log); |
---|
| 2697 | + if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0) |
---|
| 2698 | + return false; |
---|
| 2699 | + xlog_state_set_callback(log, iclog, header_lsn); |
---|
| 2700 | + return false; |
---|
| 2701 | + default: |
---|
| 2702 | + /* |
---|
| 2703 | + * Can only perform callbacks in order. Since this iclog is not |
---|
| 2704 | + * in the DONE_SYNC state, we skip the rest and just try to |
---|
| 2705 | + * clean up. |
---|
| 2706 | + */ |
---|
| 2707 | + return true; |
---|
| 2708 | + } |
---|
| 2709 | +} |
---|
| 2710 | + |
---|
| 2711 | +/* |
---|
| 2712 | + * Keep processing entries in the iclog callback list until we come around and |
---|
| 2713 | + * it is empty. We need to atomically see that the list is empty and change the |
---|
| 2714 | + * state to DIRTY so that we don't miss any more callbacks being added. |
---|
| 2715 | + * |
---|
| 2716 | + * This function is called with the icloglock held and returns with it held. We |
---|
| 2717 | + * drop it while running callbacks, however, as holding it over thousands of |
---|
| 2718 | + * callbacks is unnecessary and causes excessive contention if we do. |
---|
| 2719 | + */ |
---|
| 2720 | +static void |
---|
| 2721 | +xlog_state_do_iclog_callbacks( |
---|
| 2722 | + struct xlog *log, |
---|
| 2723 | + struct xlog_in_core *iclog) |
---|
| 2724 | + __releases(&log->l_icloglock) |
---|
| 2725 | + __acquires(&log->l_icloglock) |
---|
| 2726 | +{ |
---|
| 2727 | + spin_unlock(&log->l_icloglock); |
---|
| 2728 | + spin_lock(&iclog->ic_callback_lock); |
---|
| 2729 | + while (!list_empty(&iclog->ic_callbacks)) { |
---|
| 2730 | + LIST_HEAD(tmp); |
---|
| 2731 | + |
---|
| 2732 | + list_splice_init(&iclog->ic_callbacks, &tmp); |
---|
| 2733 | + |
---|
| 2734 | + spin_unlock(&iclog->ic_callback_lock); |
---|
| 2735 | + xlog_cil_process_committed(&tmp); |
---|
| 2736 | + spin_lock(&iclog->ic_callback_lock); |
---|
| 2737 | + } |
---|
| 2738 | + |
---|
| 2739 | + /* |
---|
| 2740 | + * Pick up the icloglock while still holding the callback lock so we |
---|
| 2741 | + * serialise against anyone trying to add more callbacks to this iclog |
---|
| 2742 | + * now we've finished processing. |
---|
| 2743 | + */ |
---|
| 2744 | + spin_lock(&log->l_icloglock); |
---|
| 2745 | + spin_unlock(&iclog->ic_callback_lock); |
---|
| 2746 | +} |
---|
2697 | 2747 | |
---|
2698 | 2748 | STATIC void |
---|
2699 | 2749 | xlog_state_do_callback( |
---|
2700 | | - struct xlog *log, |
---|
2701 | | - int aborted, |
---|
2702 | | - struct xlog_in_core *ciclog) |
---|
| 2750 | + struct xlog *log) |
---|
2703 | 2751 | { |
---|
2704 | | - xlog_in_core_t *iclog; |
---|
2705 | | - xlog_in_core_t *first_iclog; /* used to know when we've |
---|
2706 | | - * processed all iclogs once */ |
---|
2707 | | - xfs_log_callback_t *cb, *cb_next; |
---|
2708 | | - int flushcnt = 0; |
---|
2709 | | - xfs_lsn_t lowest_lsn; |
---|
2710 | | - int ioerrors; /* counter: iclogs with errors */ |
---|
2711 | | - int loopdidcallbacks; /* flag: inner loop did callbacks*/ |
---|
2712 | | - int funcdidcallbacks; /* flag: function did callbacks */ |
---|
2713 | | - int repeats; /* for issuing console warnings if |
---|
2714 | | - * looping too many times */ |
---|
| 2752 | + struct xlog_in_core *iclog; |
---|
| 2753 | + struct xlog_in_core *first_iclog; |
---|
| 2754 | + bool cycled_icloglock; |
---|
| 2755 | + bool ioerror; |
---|
| 2756 | + int flushcnt = 0; |
---|
| 2757 | + int repeats = 0; |
---|
2715 | 2758 | |
---|
2716 | 2759 | spin_lock(&log->l_icloglock); |
---|
2717 | | - first_iclog = iclog = log->l_iclog; |
---|
2718 | | - ioerrors = 0; |
---|
2719 | | - funcdidcallbacks = 0; |
---|
2720 | | - repeats = 0; |
---|
2721 | | - |
---|
2722 | 2760 | do { |
---|
2723 | 2761 | /* |
---|
2724 | 2762 | * Scan all iclogs starting with the one pointed to by the |
---|
.. | .. |
---|
2730 | 2768 | */ |
---|
2731 | 2769 | first_iclog = log->l_iclog; |
---|
2732 | 2770 | iclog = log->l_iclog; |
---|
2733 | | - loopdidcallbacks = 0; |
---|
| 2771 | + cycled_icloglock = false; |
---|
| 2772 | + ioerror = false; |
---|
2734 | 2773 | repeats++; |
---|
2735 | 2774 | |
---|
2736 | 2775 | do { |
---|
| 2776 | + if (xlog_state_iodone_process_iclog(log, iclog, |
---|
| 2777 | + &ioerror)) |
---|
| 2778 | + break; |
---|
2737 | 2779 | |
---|
2738 | | - /* skip all iclogs in the ACTIVE & DIRTY states */ |
---|
2739 | | - if (iclog->ic_state & |
---|
2740 | | - (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY)) { |
---|
| 2780 | + if (iclog->ic_state != XLOG_STATE_CALLBACK && |
---|
| 2781 | + iclog->ic_state != XLOG_STATE_IOERROR) { |
---|
2741 | 2782 | iclog = iclog->ic_next; |
---|
2742 | 2783 | continue; |
---|
2743 | 2784 | } |
---|
2744 | 2785 | |
---|
2745 | 2786 | /* |
---|
2746 | | - * Between marking a filesystem SHUTDOWN and stopping |
---|
2747 | | - * the log, we do flush all iclogs to disk (if there |
---|
2748 | | - * wasn't a log I/O error). So, we do want things to |
---|
2749 | | - * go smoothly in case of just a SHUTDOWN w/o a |
---|
2750 | | - * LOG_IO_ERROR. |
---|
| 2787 | + * Running callbacks will drop the icloglock which means |
---|
| 2788 | + * we'll have to run at least one more complete loop. |
---|
2751 | 2789 | */ |
---|
2752 | | - if (!(iclog->ic_state & XLOG_STATE_IOERROR)) { |
---|
2753 | | - /* |
---|
2754 | | - * Can only perform callbacks in order. Since |
---|
2755 | | - * this iclog is not in the DONE_SYNC/ |
---|
2756 | | - * DO_CALLBACK state, we skip the rest and |
---|
2757 | | - * just try to clean up. If we set our iclog |
---|
2758 | | - * to DO_CALLBACK, we will not process it when |
---|
2759 | | - * we retry since a previous iclog is in the |
---|
2760 | | - * CALLBACK and the state cannot change since |
---|
2761 | | - * we are holding the l_icloglock. |
---|
2762 | | - */ |
---|
2763 | | - if (!(iclog->ic_state & |
---|
2764 | | - (XLOG_STATE_DONE_SYNC | |
---|
2765 | | - XLOG_STATE_DO_CALLBACK))) { |
---|
2766 | | - if (ciclog && (ciclog->ic_state == |
---|
2767 | | - XLOG_STATE_DONE_SYNC)) { |
---|
2768 | | - ciclog->ic_state = XLOG_STATE_DO_CALLBACK; |
---|
2769 | | - } |
---|
2770 | | - break; |
---|
2771 | | - } |
---|
2772 | | - /* |
---|
2773 | | - * We now have an iclog that is in either the |
---|
2774 | | - * DO_CALLBACK or DONE_SYNC states. The other |
---|
2775 | | - * states (WANT_SYNC, SYNCING, or CALLBACK were |
---|
2776 | | - * caught by the above if and are going to |
---|
2777 | | - * clean (i.e. we aren't doing their callbacks) |
---|
2778 | | - * see the above if. |
---|
2779 | | - */ |
---|
2780 | | - |
---|
2781 | | - /* |
---|
2782 | | - * We will do one more check here to see if we |
---|
2783 | | - * have chased our tail around. |
---|
2784 | | - */ |
---|
2785 | | - |
---|
2786 | | - lowest_lsn = xlog_get_lowest_lsn(log); |
---|
2787 | | - if (lowest_lsn && |
---|
2788 | | - XFS_LSN_CMP(lowest_lsn, |
---|
2789 | | - be64_to_cpu(iclog->ic_header.h_lsn)) < 0) { |
---|
2790 | | - iclog = iclog->ic_next; |
---|
2791 | | - continue; /* Leave this iclog for |
---|
2792 | | - * another thread */ |
---|
2793 | | - } |
---|
2794 | | - |
---|
2795 | | - iclog->ic_state = XLOG_STATE_CALLBACK; |
---|
2796 | | - |
---|
2797 | | - |
---|
2798 | | - /* |
---|
2799 | | - * Completion of a iclog IO does not imply that |
---|
2800 | | - * a transaction has completed, as transactions |
---|
2801 | | - * can be large enough to span many iclogs. We |
---|
2802 | | - * cannot change the tail of the log half way |
---|
2803 | | - * through a transaction as this may be the only |
---|
2804 | | - * transaction in the log and moving th etail to |
---|
2805 | | - * point to the middle of it will prevent |
---|
2806 | | - * recovery from finding the start of the |
---|
2807 | | - * transaction. Hence we should only update the |
---|
2808 | | - * last_sync_lsn if this iclog contains |
---|
2809 | | - * transaction completion callbacks on it. |
---|
2810 | | - * |
---|
2811 | | - * We have to do this before we drop the |
---|
2812 | | - * icloglock to ensure we are the only one that |
---|
2813 | | - * can update it. |
---|
2814 | | - */ |
---|
2815 | | - ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), |
---|
2816 | | - be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); |
---|
2817 | | - if (iclog->ic_callback) |
---|
2818 | | - atomic64_set(&log->l_last_sync_lsn, |
---|
2819 | | - be64_to_cpu(iclog->ic_header.h_lsn)); |
---|
2820 | | - |
---|
2821 | | - } else |
---|
2822 | | - ioerrors++; |
---|
2823 | | - |
---|
2824 | | - spin_unlock(&log->l_icloglock); |
---|
2825 | | - |
---|
2826 | | - /* |
---|
2827 | | - * Keep processing entries in the callback list until |
---|
2828 | | - * we come around and it is empty. We need to |
---|
2829 | | - * atomically see that the list is empty and change the |
---|
2830 | | - * state to DIRTY so that we don't miss any more |
---|
2831 | | - * callbacks being added. |
---|
2832 | | - */ |
---|
2833 | | - spin_lock(&iclog->ic_callback_lock); |
---|
2834 | | - cb = iclog->ic_callback; |
---|
2835 | | - while (cb) { |
---|
2836 | | - iclog->ic_callback_tail = &(iclog->ic_callback); |
---|
2837 | | - iclog->ic_callback = NULL; |
---|
2838 | | - spin_unlock(&iclog->ic_callback_lock); |
---|
2839 | | - |
---|
2840 | | - /* perform callbacks in the order given */ |
---|
2841 | | - for (; cb; cb = cb_next) { |
---|
2842 | | - cb_next = cb->cb_next; |
---|
2843 | | - cb->cb_func(cb->cb_arg, aborted); |
---|
2844 | | - } |
---|
2845 | | - spin_lock(&iclog->ic_callback_lock); |
---|
2846 | | - cb = iclog->ic_callback; |
---|
2847 | | - } |
---|
2848 | | - |
---|
2849 | | - loopdidcallbacks++; |
---|
2850 | | - funcdidcallbacks++; |
---|
2851 | | - |
---|
2852 | | - spin_lock(&log->l_icloglock); |
---|
2853 | | - ASSERT(iclog->ic_callback == NULL); |
---|
2854 | | - spin_unlock(&iclog->ic_callback_lock); |
---|
2855 | | - if (!(iclog->ic_state & XLOG_STATE_IOERROR)) |
---|
2856 | | - iclog->ic_state = XLOG_STATE_DIRTY; |
---|
2857 | | - |
---|
2858 | | - /* |
---|
2859 | | - * Transition from DIRTY to ACTIVE if applicable. |
---|
2860 | | - * NOP if STATE_IOERROR. |
---|
2861 | | - */ |
---|
2862 | | - xlog_state_clean_log(log); |
---|
2863 | | - |
---|
2864 | | - /* wake up threads waiting in xfs_log_force() */ |
---|
2865 | | - wake_up_all(&iclog->ic_force_wait); |
---|
2866 | | - |
---|
| 2790 | + cycled_icloglock = true; |
---|
| 2791 | + xlog_state_do_iclog_callbacks(log, iclog); |
---|
| 2792 | + if (XLOG_FORCED_SHUTDOWN(log)) |
---|
| 2793 | + wake_up_all(&iclog->ic_force_wait); |
---|
| 2794 | + else |
---|
| 2795 | + xlog_state_clean_iclog(log, iclog); |
---|
2867 | 2796 | iclog = iclog->ic_next; |
---|
2868 | 2797 | } while (first_iclog != iclog); |
---|
2869 | 2798 | |
---|
.. | .. |
---|
2874 | 2803 | "%s: possible infinite loop (%d iterations)", |
---|
2875 | 2804 | __func__, flushcnt); |
---|
2876 | 2805 | } |
---|
2877 | | - } while (!ioerrors && loopdidcallbacks); |
---|
| 2806 | + } while (!ioerror && cycled_icloglock); |
---|
2878 | 2807 | |
---|
2879 | | -#ifdef DEBUG |
---|
2880 | | - /* |
---|
2881 | | - * Make one last gasp attempt to see if iclogs are being left in limbo. |
---|
2882 | | - * If the above loop finds an iclog earlier than the current iclog and |
---|
2883 | | - * in one of the syncing states, the current iclog is put into |
---|
2884 | | - * DO_CALLBACK and the callbacks are deferred to the completion of the |
---|
2885 | | - * earlier iclog. Walk the iclogs in order and make sure that no iclog |
---|
2886 | | - * is in DO_CALLBACK unless an earlier iclog is in one of the syncing |
---|
2887 | | - * states. |
---|
2888 | | - * |
---|
2889 | | - * Note that SYNCING|IOABORT is a valid state so we cannot just check |
---|
2890 | | - * for ic_state == SYNCING. |
---|
2891 | | - */ |
---|
2892 | | - if (funcdidcallbacks) { |
---|
2893 | | - first_iclog = iclog = log->l_iclog; |
---|
2894 | | - do { |
---|
2895 | | - ASSERT(iclog->ic_state != XLOG_STATE_DO_CALLBACK); |
---|
2896 | | - /* |
---|
2897 | | - * Terminate the loop if iclogs are found in states |
---|
2898 | | - * which will cause other threads to clean up iclogs. |
---|
2899 | | - * |
---|
2900 | | - * SYNCING - i/o completion will go through logs |
---|
2901 | | - * DONE_SYNC - interrupt thread should be waiting for |
---|
2902 | | - * l_icloglock |
---|
2903 | | - * IOERROR - give up hope all ye who enter here |
---|
2904 | | - */ |
---|
2905 | | - if (iclog->ic_state == XLOG_STATE_WANT_SYNC || |
---|
2906 | | - iclog->ic_state & XLOG_STATE_SYNCING || |
---|
2907 | | - iclog->ic_state == XLOG_STATE_DONE_SYNC || |
---|
2908 | | - iclog->ic_state == XLOG_STATE_IOERROR ) |
---|
2909 | | - break; |
---|
2910 | | - iclog = iclog->ic_next; |
---|
2911 | | - } while (first_iclog != iclog); |
---|
2912 | | - } |
---|
2913 | | -#endif |
---|
2914 | | - |
---|
2915 | | - if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) |
---|
| 2808 | + if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE || |
---|
| 2809 | + log->l_iclog->ic_state == XLOG_STATE_IOERROR) |
---|
2916 | 2810 | wake_up_all(&log->l_flush_wait); |
---|
2917 | 2811 | |
---|
2918 | 2812 | spin_unlock(&log->l_icloglock); |
---|
.. | .. |
---|
2934 | 2828 | */ |
---|
2935 | 2829 | STATIC void |
---|
2936 | 2830 | xlog_state_done_syncing( |
---|
2937 | | - xlog_in_core_t *iclog, |
---|
2938 | | - int aborted) |
---|
| 2831 | + struct xlog_in_core *iclog) |
---|
2939 | 2832 | { |
---|
2940 | | - struct xlog *log = iclog->ic_log; |
---|
| 2833 | + struct xlog *log = iclog->ic_log; |
---|
2941 | 2834 | |
---|
2942 | 2835 | spin_lock(&log->l_icloglock); |
---|
2943 | | - |
---|
2944 | | - ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || |
---|
2945 | | - iclog->ic_state == XLOG_STATE_IOERROR); |
---|
2946 | 2836 | ASSERT(atomic_read(&iclog->ic_refcnt) == 0); |
---|
2947 | | - ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); |
---|
2948 | | - |
---|
2949 | 2837 | |
---|
2950 | 2838 | /* |
---|
2951 | 2839 | * If we got an error, either on the first buffer, or in the case of |
---|
2952 | | - * split log writes, on the second, we mark ALL iclogs STATE_IOERROR, |
---|
2953 | | - * and none should ever be attempted to be written to disk |
---|
2954 | | - * again. |
---|
| 2840 | + * split log writes, on the second, we shut down the file system and |
---|
| 2841 | + * no iclogs should ever be attempted to be written to disk again. |
---|
2955 | 2842 | */ |
---|
2956 | | - if (iclog->ic_state != XLOG_STATE_IOERROR) { |
---|
2957 | | - if (--iclog->ic_bwritecnt == 1) { |
---|
2958 | | - spin_unlock(&log->l_icloglock); |
---|
2959 | | - return; |
---|
2960 | | - } |
---|
| 2843 | + if (!XLOG_FORCED_SHUTDOWN(log)) { |
---|
| 2844 | + ASSERT(iclog->ic_state == XLOG_STATE_SYNCING); |
---|
2961 | 2845 | iclog->ic_state = XLOG_STATE_DONE_SYNC; |
---|
2962 | 2846 | } |
---|
2963 | 2847 | |
---|
.. | .. |
---|
2968 | 2852 | */ |
---|
2969 | 2853 | wake_up_all(&iclog->ic_write_wait); |
---|
2970 | 2854 | spin_unlock(&log->l_icloglock); |
---|
2971 | | - xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ |
---|
2972 | | -} /* xlog_state_done_syncing */ |
---|
2973 | | - |
---|
| 2855 | + xlog_state_do_callback(log); |
---|
| 2856 | +} |
---|
2974 | 2857 | |
---|
2975 | 2858 | /* |
---|
2976 | 2859 | * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must |
---|
.. | .. |
---|
3002 | 2885 | int log_offset; |
---|
3003 | 2886 | xlog_rec_header_t *head; |
---|
3004 | 2887 | xlog_in_core_t *iclog; |
---|
3005 | | - int error; |
---|
3006 | 2888 | |
---|
3007 | 2889 | restart: |
---|
3008 | 2890 | spin_lock(&log->l_icloglock); |
---|
.. | .. |
---|
3051 | 2933 | * can fit into remaining data section. |
---|
3052 | 2934 | */ |
---|
3053 | 2935 | if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) { |
---|
| 2936 | + int error = 0; |
---|
| 2937 | + |
---|
3054 | 2938 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); |
---|
3055 | 2939 | |
---|
3056 | 2940 | /* |
---|
3057 | | - * If I'm the only one writing to this iclog, sync it to disk. |
---|
3058 | | - * We need to do an atomic compare and decrement here to avoid |
---|
3059 | | - * racing with concurrent atomic_dec_and_lock() calls in |
---|
| 2941 | + * If we are the only one writing to this iclog, sync it to |
---|
| 2942 | + * disk. We need to do an atomic compare and decrement here to |
---|
| 2943 | + * avoid racing with concurrent atomic_dec_and_lock() calls in |
---|
3060 | 2944 | * xlog_state_release_iclog() when there is more than one |
---|
3061 | 2945 | * reference to the iclog. |
---|
3062 | 2946 | */ |
---|
3063 | | - if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) { |
---|
3064 | | - /* we are the only one */ |
---|
3065 | | - spin_unlock(&log->l_icloglock); |
---|
| 2947 | + if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) |
---|
3066 | 2948 | error = xlog_state_release_iclog(log, iclog); |
---|
3067 | | - if (error) |
---|
3068 | | - return error; |
---|
3069 | | - } else { |
---|
3070 | | - spin_unlock(&log->l_icloglock); |
---|
3071 | | - } |
---|
| 2949 | + spin_unlock(&log->l_icloglock); |
---|
| 2950 | + if (error) |
---|
| 2951 | + return error; |
---|
3072 | 2952 | goto restart; |
---|
3073 | 2953 | } |
---|
3074 | 2954 | |
---|
.. | .. |
---|
3092 | 2972 | |
---|
3093 | 2973 | *logoffsetp = log_offset; |
---|
3094 | 2974 | return 0; |
---|
3095 | | -} /* xlog_state_get_iclog_space */ |
---|
| 2975 | +} |
---|
3096 | 2976 | |
---|
3097 | | -/* The first cnt-1 times through here we don't need to |
---|
3098 | | - * move the grant write head because the permanent |
---|
3099 | | - * reservation has reserved cnt times the unit amount. |
---|
3100 | | - * Release part of current permanent unit reservation and |
---|
3101 | | - * reset current reservation to be one units worth. Also |
---|
3102 | | - * move grant reservation head forward. |
---|
| 2977 | +/* |
---|
| 2978 | + * The first cnt-1 times a ticket goes through here we don't need to move the |
---|
| 2979 | + * grant write head because the permanent reservation has reserved cnt times the |
---|
| 2980 | + * unit amount. Release part of current permanent unit reservation and reset |
---|
| 2981 | + * current reservation to be one units worth. Also move grant reservation head |
---|
| 2982 | + * forward. |
---|
3103 | 2983 | */ |
---|
3104 | | -STATIC void |
---|
3105 | | -xlog_regrant_reserve_log_space( |
---|
| 2984 | +void |
---|
| 2985 | +xfs_log_ticket_regrant( |
---|
3106 | 2986 | struct xlog *log, |
---|
3107 | 2987 | struct xlog_ticket *ticket) |
---|
3108 | 2988 | { |
---|
3109 | | - trace_xfs_log_regrant_reserve_enter(log, ticket); |
---|
| 2989 | + trace_xfs_log_ticket_regrant(log, ticket); |
---|
3110 | 2990 | |
---|
3111 | 2991 | if (ticket->t_cnt > 0) |
---|
3112 | 2992 | ticket->t_cnt--; |
---|
.. | .. |
---|
3118 | 2998 | ticket->t_curr_res = ticket->t_unit_res; |
---|
3119 | 2999 | xlog_tic_reset_res(ticket); |
---|
3120 | 3000 | |
---|
3121 | | - trace_xfs_log_regrant_reserve_sub(log, ticket); |
---|
| 3001 | + trace_xfs_log_ticket_regrant_sub(log, ticket); |
---|
3122 | 3002 | |
---|
3123 | 3003 | /* just return if we still have some of the pre-reserved space */ |
---|
3124 | | - if (ticket->t_cnt > 0) |
---|
3125 | | - return; |
---|
| 3004 | + if (!ticket->t_cnt) { |
---|
| 3005 | + xlog_grant_add_space(log, &log->l_reserve_head.grant, |
---|
| 3006 | + ticket->t_unit_res); |
---|
| 3007 | + trace_xfs_log_ticket_regrant_exit(log, ticket); |
---|
3126 | 3008 | |
---|
3127 | | - xlog_grant_add_space(log, &log->l_reserve_head.grant, |
---|
3128 | | - ticket->t_unit_res); |
---|
| 3009 | + ticket->t_curr_res = ticket->t_unit_res; |
---|
| 3010 | + xlog_tic_reset_res(ticket); |
---|
| 3011 | + } |
---|
3129 | 3012 | |
---|
3130 | | - trace_xfs_log_regrant_reserve_exit(log, ticket); |
---|
3131 | | - |
---|
3132 | | - ticket->t_curr_res = ticket->t_unit_res; |
---|
3133 | | - xlog_tic_reset_res(ticket); |
---|
3134 | | -} /* xlog_regrant_reserve_log_space */ |
---|
3135 | | - |
---|
| 3013 | + xfs_log_ticket_put(ticket); |
---|
| 3014 | +} |
---|
3136 | 3015 | |
---|
3137 | 3016 | /* |
---|
3138 | 3017 | * Give back the space left from a reservation. |
---|
.. | .. |
---|
3148 | 3027 | * space, the count will stay at zero and the only space remaining will be |
---|
3149 | 3028 | * in the current reservation field. |
---|
3150 | 3029 | */ |
---|
3151 | | -STATIC void |
---|
3152 | | -xlog_ungrant_log_space( |
---|
| 3030 | +void |
---|
| 3031 | +xfs_log_ticket_ungrant( |
---|
3153 | 3032 | struct xlog *log, |
---|
3154 | 3033 | struct xlog_ticket *ticket) |
---|
3155 | 3034 | { |
---|
3156 | | - int bytes; |
---|
| 3035 | + int bytes; |
---|
| 3036 | + |
---|
| 3037 | + trace_xfs_log_ticket_ungrant(log, ticket); |
---|
3157 | 3038 | |
---|
3158 | 3039 | if (ticket->t_cnt > 0) |
---|
3159 | 3040 | ticket->t_cnt--; |
---|
3160 | 3041 | |
---|
3161 | | - trace_xfs_log_ungrant_enter(log, ticket); |
---|
3162 | | - trace_xfs_log_ungrant_sub(log, ticket); |
---|
| 3042 | + trace_xfs_log_ticket_ungrant_sub(log, ticket); |
---|
3163 | 3043 | |
---|
3164 | 3044 | /* |
---|
3165 | 3045 | * If this is a permanent reservation ticket, we may be able to free |
---|
.. | .. |
---|
3174 | 3054 | xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes); |
---|
3175 | 3055 | xlog_grant_sub_space(log, &log->l_write_head.grant, bytes); |
---|
3176 | 3056 | |
---|
3177 | | - trace_xfs_log_ungrant_exit(log, ticket); |
---|
| 3057 | + trace_xfs_log_ticket_ungrant_exit(log, ticket); |
---|
3178 | 3058 | |
---|
3179 | 3059 | xfs_log_space_wake(log->l_mp); |
---|
| 3060 | + xfs_log_ticket_put(ticket); |
---|
3180 | 3061 | } |
---|
3181 | 3062 | |
---|
3182 | 3063 | /* |
---|
3183 | | - * Flush iclog to disk if this is the last reference to the given iclog and |
---|
3184 | | - * the WANT_SYNC bit is set. |
---|
3185 | | - * |
---|
3186 | | - * When this function is entered, the iclog is not necessarily in the |
---|
3187 | | - * WANT_SYNC state. It may be sitting around waiting to get filled. |
---|
3188 | | - * |
---|
3189 | | - * |
---|
3190 | | - */ |
---|
3191 | | -STATIC int |
---|
3192 | | -xlog_state_release_iclog( |
---|
3193 | | - struct xlog *log, |
---|
3194 | | - struct xlog_in_core *iclog) |
---|
3195 | | -{ |
---|
3196 | | - int sync = 0; /* do we sync? */ |
---|
3197 | | - |
---|
3198 | | - if (iclog->ic_state & XLOG_STATE_IOERROR) |
---|
3199 | | - return -EIO; |
---|
3200 | | - |
---|
3201 | | - ASSERT(atomic_read(&iclog->ic_refcnt) > 0); |
---|
3202 | | - if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) |
---|
3203 | | - return 0; |
---|
3204 | | - |
---|
3205 | | - if (iclog->ic_state & XLOG_STATE_IOERROR) { |
---|
3206 | | - spin_unlock(&log->l_icloglock); |
---|
3207 | | - return -EIO; |
---|
3208 | | - } |
---|
3209 | | - ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || |
---|
3210 | | - iclog->ic_state == XLOG_STATE_WANT_SYNC); |
---|
3211 | | - |
---|
3212 | | - if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { |
---|
3213 | | - /* update tail before writing to iclog */ |
---|
3214 | | - xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); |
---|
3215 | | - sync++; |
---|
3216 | | - iclog->ic_state = XLOG_STATE_SYNCING; |
---|
3217 | | - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); |
---|
3218 | | - xlog_verify_tail_lsn(log, iclog, tail_lsn); |
---|
3219 | | - /* cycle incremented when incrementing curr_block */ |
---|
3220 | | - } |
---|
3221 | | - spin_unlock(&log->l_icloglock); |
---|
3222 | | - |
---|
3223 | | - /* |
---|
3224 | | - * We let the log lock go, so it's possible that we hit a log I/O |
---|
3225 | | - * error or some other SHUTDOWN condition that marks the iclog |
---|
3226 | | - * as XLOG_STATE_IOERROR before the bwrite. However, we know that |
---|
3227 | | - * this iclog has consistent data, so we ignore IOERROR |
---|
3228 | | - * flags after this point. |
---|
3229 | | - */ |
---|
3230 | | - if (sync) |
---|
3231 | | - return xlog_sync(log, iclog); |
---|
3232 | | - return 0; |
---|
3233 | | -} /* xlog_state_release_iclog */ |
---|
3234 | | - |
---|
3235 | | - |
---|
3236 | | -/* |
---|
3237 | | - * This routine will mark the current iclog in the ring as WANT_SYNC |
---|
3238 | | - * and move the current iclog pointer to the next iclog in the ring. |
---|
3239 | | - * When this routine is called from xlog_state_get_iclog_space(), the |
---|
3240 | | - * exact size of the iclog has not yet been determined. All we know is |
---|
3241 | | - * that every data block. We have run out of space in this log record. |
---|
| 3064 | + * This routine will mark the current iclog in the ring as WANT_SYNC and move |
---|
| 3065 | + * the current iclog pointer to the next iclog in the ring. |
---|
3242 | 3066 | */ |
---|
3243 | 3067 | STATIC void |
---|
3244 | 3068 | xlog_state_switch_iclogs( |
---|
.. | .. |
---|
3247 | 3071 | int eventual_size) |
---|
3248 | 3072 | { |
---|
3249 | 3073 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); |
---|
| 3074 | + assert_spin_locked(&log->l_icloglock); |
---|
| 3075 | + |
---|
3250 | 3076 | if (!eventual_size) |
---|
3251 | 3077 | eventual_size = iclog->ic_offset; |
---|
3252 | 3078 | iclog->ic_state = XLOG_STATE_WANT_SYNC; |
---|
.. | .. |
---|
3281 | 3107 | } |
---|
3282 | 3108 | ASSERT(iclog == log->l_iclog); |
---|
3283 | 3109 | log->l_iclog = iclog->ic_next; |
---|
3284 | | -} /* xlog_state_switch_iclogs */ |
---|
| 3110 | +} |
---|
3285 | 3111 | |
---|
3286 | 3112 | /* |
---|
3287 | 3113 | * Write out all data in the in-core log as of this exact moment in time. |
---|
.. | .. |
---|
3326 | 3152 | |
---|
3327 | 3153 | spin_lock(&log->l_icloglock); |
---|
3328 | 3154 | iclog = log->l_iclog; |
---|
3329 | | - if (iclog->ic_state & XLOG_STATE_IOERROR) |
---|
| 3155 | + if (iclog->ic_state == XLOG_STATE_IOERROR) |
---|
3330 | 3156 | goto out_error; |
---|
3331 | 3157 | |
---|
3332 | 3158 | if (iclog->ic_state == XLOG_STATE_DIRTY || |
---|
.. | .. |
---|
3341 | 3167 | * previous iclog and go to sleep. |
---|
3342 | 3168 | */ |
---|
3343 | 3169 | iclog = iclog->ic_prev; |
---|
3344 | | - if (iclog->ic_state == XLOG_STATE_ACTIVE || |
---|
3345 | | - iclog->ic_state == XLOG_STATE_DIRTY) |
---|
3346 | | - goto out_unlock; |
---|
3347 | 3170 | } else if (iclog->ic_state == XLOG_STATE_ACTIVE) { |
---|
3348 | 3171 | if (atomic_read(&iclog->ic_refcnt) == 0) { |
---|
3349 | 3172 | /* |
---|
.. | .. |
---|
3356 | 3179 | atomic_inc(&iclog->ic_refcnt); |
---|
3357 | 3180 | lsn = be64_to_cpu(iclog->ic_header.h_lsn); |
---|
3358 | 3181 | xlog_state_switch_iclogs(log, iclog, 0); |
---|
3359 | | - spin_unlock(&log->l_icloglock); |
---|
3360 | | - |
---|
3361 | 3182 | if (xlog_state_release_iclog(log, iclog)) |
---|
3362 | | - return -EIO; |
---|
| 3183 | + goto out_error; |
---|
3363 | 3184 | |
---|
3364 | | - spin_lock(&log->l_icloglock); |
---|
3365 | | - if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn || |
---|
3366 | | - iclog->ic_state == XLOG_STATE_DIRTY) |
---|
| 3185 | + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) |
---|
3367 | 3186 | goto out_unlock; |
---|
3368 | 3187 | } else { |
---|
3369 | 3188 | /* |
---|
.. | .. |
---|
3383 | 3202 | ; |
---|
3384 | 3203 | } |
---|
3385 | 3204 | |
---|
3386 | | - if (!(flags & XFS_LOG_SYNC)) |
---|
3387 | | - goto out_unlock; |
---|
3388 | | - |
---|
3389 | | - if (iclog->ic_state & XLOG_STATE_IOERROR) |
---|
3390 | | - goto out_error; |
---|
3391 | | - XFS_STATS_INC(mp, xs_log_force_sleep); |
---|
3392 | | - xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); |
---|
3393 | | - if (iclog->ic_state & XLOG_STATE_IOERROR) |
---|
3394 | | - return -EIO; |
---|
3395 | | - return 0; |
---|
3396 | | - |
---|
| 3205 | + if (flags & XFS_LOG_SYNC) |
---|
| 3206 | + return xlog_wait_on_iclog(iclog); |
---|
3397 | 3207 | out_unlock: |
---|
3398 | 3208 | spin_unlock(&log->l_icloglock); |
---|
3399 | 3209 | return 0; |
---|
.. | .. |
---|
3403 | 3213 | } |
---|
3404 | 3214 | |
---|
3405 | 3215 | static int |
---|
3406 | | -__xfs_log_force_lsn( |
---|
3407 | | - struct xfs_mount *mp, |
---|
| 3216 | +xlog_force_lsn( |
---|
| 3217 | + struct xlog *log, |
---|
3408 | 3218 | xfs_lsn_t lsn, |
---|
3409 | 3219 | uint flags, |
---|
3410 | 3220 | int *log_flushed, |
---|
3411 | 3221 | bool already_slept) |
---|
3412 | 3222 | { |
---|
3413 | | - struct xlog *log = mp->m_log; |
---|
3414 | 3223 | struct xlog_in_core *iclog; |
---|
3415 | 3224 | |
---|
3416 | 3225 | spin_lock(&log->l_icloglock); |
---|
3417 | 3226 | iclog = log->l_iclog; |
---|
3418 | | - if (iclog->ic_state & XLOG_STATE_IOERROR) |
---|
| 3227 | + if (iclog->ic_state == XLOG_STATE_IOERROR) |
---|
3419 | 3228 | goto out_error; |
---|
3420 | 3229 | |
---|
3421 | 3230 | while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) { |
---|
.. | .. |
---|
3423 | 3232 | if (iclog == log->l_iclog) |
---|
3424 | 3233 | goto out_unlock; |
---|
3425 | 3234 | } |
---|
3426 | | - |
---|
3427 | | - if (iclog->ic_state == XLOG_STATE_DIRTY) |
---|
3428 | | - goto out_unlock; |
---|
3429 | 3235 | |
---|
3430 | 3236 | if (iclog->ic_state == XLOG_STATE_ACTIVE) { |
---|
3431 | 3237 | /* |
---|
.. | .. |
---|
3444 | 3250 | * will go out then. |
---|
3445 | 3251 | */ |
---|
3446 | 3252 | if (!already_slept && |
---|
3447 | | - (iclog->ic_prev->ic_state & |
---|
3448 | | - (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { |
---|
3449 | | - ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); |
---|
3450 | | - |
---|
3451 | | - XFS_STATS_INC(mp, xs_log_force_sleep); |
---|
3452 | | - |
---|
| 3253 | + (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC || |
---|
| 3254 | + iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) { |
---|
3453 | 3255 | xlog_wait(&iclog->ic_prev->ic_write_wait, |
---|
3454 | 3256 | &log->l_icloglock); |
---|
3455 | 3257 | return -EAGAIN; |
---|
3456 | 3258 | } |
---|
3457 | 3259 | atomic_inc(&iclog->ic_refcnt); |
---|
3458 | 3260 | xlog_state_switch_iclogs(log, iclog, 0); |
---|
3459 | | - spin_unlock(&log->l_icloglock); |
---|
3460 | 3261 | if (xlog_state_release_iclog(log, iclog)) |
---|
3461 | | - return -EIO; |
---|
| 3262 | + goto out_error; |
---|
3462 | 3263 | if (log_flushed) |
---|
3463 | 3264 | *log_flushed = 1; |
---|
3464 | | - spin_lock(&log->l_icloglock); |
---|
3465 | 3265 | } |
---|
3466 | 3266 | |
---|
3467 | | - if (!(flags & XFS_LOG_SYNC) || |
---|
3468 | | - (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) |
---|
3469 | | - goto out_unlock; |
---|
3470 | | - |
---|
3471 | | - if (iclog->ic_state & XLOG_STATE_IOERROR) |
---|
3472 | | - goto out_error; |
---|
3473 | | - |
---|
3474 | | - XFS_STATS_INC(mp, xs_log_force_sleep); |
---|
3475 | | - xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); |
---|
3476 | | - if (iclog->ic_state & XLOG_STATE_IOERROR) |
---|
3477 | | - return -EIO; |
---|
3478 | | - return 0; |
---|
3479 | | - |
---|
| 3267 | + if (flags & XFS_LOG_SYNC) |
---|
| 3268 | + return xlog_wait_on_iclog(iclog); |
---|
3480 | 3269 | out_unlock: |
---|
3481 | 3270 | spin_unlock(&log->l_icloglock); |
---|
3482 | 3271 | return 0; |
---|
.. | .. |
---|
3500 | 3289 | * to disk, that thread will wake up all threads waiting on the queue. |
---|
3501 | 3290 | */ |
---|
3502 | 3291 | int |
---|
3503 | | -xfs_log_force_lsn( |
---|
| 3292 | +xfs_log_force_seq( |
---|
3504 | 3293 | struct xfs_mount *mp, |
---|
3505 | | - xfs_lsn_t lsn, |
---|
| 3294 | + xfs_csn_t seq, |
---|
3506 | 3295 | uint flags, |
---|
3507 | 3296 | int *log_flushed) |
---|
3508 | 3297 | { |
---|
| 3298 | + struct xlog *log = mp->m_log; |
---|
| 3299 | + xfs_lsn_t lsn; |
---|
3509 | 3300 | int ret; |
---|
3510 | | - ASSERT(lsn != 0); |
---|
| 3301 | + ASSERT(seq != 0); |
---|
3511 | 3302 | |
---|
3512 | 3303 | XFS_STATS_INC(mp, xs_log_force); |
---|
3513 | | - trace_xfs_log_force(mp, lsn, _RET_IP_); |
---|
| 3304 | + trace_xfs_log_force(mp, seq, _RET_IP_); |
---|
3514 | 3305 | |
---|
3515 | | - lsn = xlog_cil_force_lsn(mp->m_log, lsn); |
---|
| 3306 | + lsn = xlog_cil_force_seq(log, seq); |
---|
3516 | 3307 | if (lsn == NULLCOMMITLSN) |
---|
3517 | 3308 | return 0; |
---|
3518 | 3309 | |
---|
3519 | | - ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false); |
---|
3520 | | - if (ret == -EAGAIN) |
---|
3521 | | - ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true); |
---|
| 3310 | + ret = xlog_force_lsn(log, lsn, flags, log_flushed, false); |
---|
| 3311 | + if (ret == -EAGAIN) { |
---|
| 3312 | + XFS_STATS_INC(mp, xs_log_force_sleep); |
---|
| 3313 | + ret = xlog_force_lsn(log, lsn, flags, log_flushed, true); |
---|
| 3314 | + } |
---|
3522 | 3315 | return ret; |
---|
3523 | 3316 | } |
---|
3524 | | - |
---|
3525 | | -/* |
---|
3526 | | - * Called when we want to mark the current iclog as being ready to sync to |
---|
3527 | | - * disk. |
---|
3528 | | - */ |
---|
3529 | | -STATIC void |
---|
3530 | | -xlog_state_want_sync( |
---|
3531 | | - struct xlog *log, |
---|
3532 | | - struct xlog_in_core *iclog) |
---|
3533 | | -{ |
---|
3534 | | - assert_spin_locked(&log->l_icloglock); |
---|
3535 | | - |
---|
3536 | | - if (iclog->ic_state == XLOG_STATE_ACTIVE) { |
---|
3537 | | - xlog_state_switch_iclogs(log, iclog, 0); |
---|
3538 | | - } else { |
---|
3539 | | - ASSERT(iclog->ic_state & |
---|
3540 | | - (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR)); |
---|
3541 | | - } |
---|
3542 | | -} |
---|
3543 | | - |
---|
3544 | | - |
---|
3545 | | -/***************************************************************************** |
---|
3546 | | - * |
---|
3547 | | - * TICKET functions |
---|
3548 | | - * |
---|
3549 | | - ***************************************************************************** |
---|
3550 | | - */ |
---|
3551 | 3317 | |
---|
3552 | 3318 | /* |
---|
3553 | 3319 | * Free a used ticket when its refcount falls to zero. |
---|
.. | .. |
---|
3558 | 3324 | { |
---|
3559 | 3325 | ASSERT(atomic_read(&ticket->t_ref) > 0); |
---|
3560 | 3326 | if (atomic_dec_and_test(&ticket->t_ref)) |
---|
3561 | | - kmem_zone_free(xfs_log_ticket_zone, ticket); |
---|
| 3327 | + kmem_cache_free(xfs_log_ticket_zone, ticket); |
---|
3562 | 3328 | } |
---|
3563 | 3329 | |
---|
3564 | 3330 | xlog_ticket_t * |
---|
.. | .. |
---|
3676 | 3442 | int unit_bytes, |
---|
3677 | 3443 | int cnt, |
---|
3678 | 3444 | char client, |
---|
3679 | | - bool permanent, |
---|
3680 | | - xfs_km_flags_t alloc_flags) |
---|
| 3445 | + bool permanent) |
---|
3681 | 3446 | { |
---|
3682 | 3447 | struct xlog_ticket *tic; |
---|
3683 | 3448 | int unit_res; |
---|
3684 | 3449 | |
---|
3685 | | - tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags); |
---|
3686 | | - if (!tic) |
---|
3687 | | - return NULL; |
---|
| 3450 | + tic = kmem_cache_zalloc(xfs_log_ticket_zone, GFP_NOFS | __GFP_NOFAIL); |
---|
3688 | 3451 | |
---|
3689 | 3452 | unit_res = xfs_log_calc_unit_res(log->l_mp, unit_bytes); |
---|
3690 | 3453 | |
---|
.. | .. |
---|
3697 | 3460 | tic->t_ocnt = cnt; |
---|
3698 | 3461 | tic->t_tid = prandom_u32(); |
---|
3699 | 3462 | tic->t_clientid = client; |
---|
3700 | | - tic->t_flags = XLOG_TIC_INITED; |
---|
3701 | 3463 | if (permanent) |
---|
3702 | 3464 | tic->t_flags |= XLOG_TIC_PERM_RESERV; |
---|
3703 | 3465 | |
---|
.. | .. |
---|
3706 | 3468 | return tic; |
---|
3707 | 3469 | } |
---|
3708 | 3470 | |
---|
3709 | | - |
---|
3710 | | -/****************************************************************************** |
---|
3711 | | - * |
---|
3712 | | - * Log debug routines |
---|
3713 | | - * |
---|
3714 | | - ****************************************************************************** |
---|
3715 | | - */ |
---|
3716 | 3471 | #if defined(DEBUG) |
---|
3717 | 3472 | /* |
---|
3718 | 3473 | * Make sure that the destination ptr is within the valid data region of |
---|
.. | .. |
---|
3798 | 3553 | if (blocks < BTOBB(iclog->ic_offset) + 1) |
---|
3799 | 3554 | xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); |
---|
3800 | 3555 | } |
---|
3801 | | -} /* xlog_verify_tail_lsn */ |
---|
| 3556 | +} |
---|
3802 | 3557 | |
---|
3803 | 3558 | /* |
---|
3804 | 3559 | * Perform a number of checks on the iclog before writing to disk. |
---|
.. | .. |
---|
3819 | 3574 | xlog_verify_iclog( |
---|
3820 | 3575 | struct xlog *log, |
---|
3821 | 3576 | struct xlog_in_core *iclog, |
---|
3822 | | - int count, |
---|
3823 | | - bool syncing) |
---|
| 3577 | + int count) |
---|
3824 | 3578 | { |
---|
3825 | 3579 | xlog_op_header_t *ophead; |
---|
3826 | 3580 | xlog_in_core_t *icptr; |
---|
.. | .. |
---|
3864 | 3618 | /* clientid is only 1 byte */ |
---|
3865 | 3619 | p = &ophead->oh_clientid; |
---|
3866 | 3620 | field_offset = p - base_ptr; |
---|
3867 | | - if (!syncing || (field_offset & 0x1ff)) { |
---|
| 3621 | + if (field_offset & 0x1ff) { |
---|
3868 | 3622 | clientid = ophead->oh_clientid; |
---|
3869 | 3623 | } else { |
---|
3870 | 3624 | idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap); |
---|
.. | .. |
---|
3887 | 3641 | /* check length */ |
---|
3888 | 3642 | p = &ophead->oh_len; |
---|
3889 | 3643 | field_offset = p - base_ptr; |
---|
3890 | | - if (!syncing || (field_offset & 0x1ff)) { |
---|
| 3644 | + if (field_offset & 0x1ff) { |
---|
3891 | 3645 | op_len = be32_to_cpu(ophead->oh_len); |
---|
3892 | 3646 | } else { |
---|
3893 | 3647 | idx = BTOBBT((uintptr_t)&ophead->oh_len - |
---|
.. | .. |
---|
3902 | 3656 | } |
---|
3903 | 3657 | ptr += sizeof(xlog_op_header_t) + op_len; |
---|
3904 | 3658 | } |
---|
3905 | | -} /* xlog_verify_iclog */ |
---|
| 3659 | +} |
---|
3906 | 3660 | #endif |
---|
3907 | 3661 | |
---|
3908 | 3662 | /* |
---|
.. | .. |
---|
3915 | 3669 | xlog_in_core_t *iclog, *ic; |
---|
3916 | 3670 | |
---|
3917 | 3671 | iclog = log->l_iclog; |
---|
3918 | | - if (! (iclog->ic_state & XLOG_STATE_IOERROR)) { |
---|
| 3672 | + if (iclog->ic_state != XLOG_STATE_IOERROR) { |
---|
3919 | 3673 | /* |
---|
3920 | 3674 | * Mark all the incore logs IOERROR. |
---|
3921 | 3675 | * From now on, no log flushes will result. |
---|
.. | .. |
---|
3975 | 3729 | * Somebody could've already done the hard work for us. |
---|
3976 | 3730 | * No need to get locks for this. |
---|
3977 | 3731 | */ |
---|
3978 | | - if (logerror && log->l_iclog->ic_state & XLOG_STATE_IOERROR) { |
---|
| 3732 | + if (logerror && log->l_iclog->ic_state == XLOG_STATE_IOERROR) { |
---|
3979 | 3733 | ASSERT(XLOG_FORCED_SHUTDOWN(log)); |
---|
3980 | 3734 | return 1; |
---|
3981 | 3735 | } |
---|
.. | .. |
---|
4026 | 3780 | spin_lock(&log->l_cilp->xc_push_lock); |
---|
4027 | 3781 | wake_up_all(&log->l_cilp->xc_commit_wait); |
---|
4028 | 3782 | spin_unlock(&log->l_cilp->xc_push_lock); |
---|
4029 | | - xlog_state_do_callback(log, XFS_LI_ABORTED, NULL); |
---|
| 3783 | + xlog_state_do_callback(log); |
---|
4030 | 3784 | |
---|
4031 | | -#ifdef XFSERRORDEBUG |
---|
4032 | | - { |
---|
4033 | | - xlog_in_core_t *iclog; |
---|
4034 | | - |
---|
4035 | | - spin_lock(&log->l_icloglock); |
---|
4036 | | - iclog = log->l_iclog; |
---|
4037 | | - do { |
---|
4038 | | - ASSERT(iclog->ic_callback == 0); |
---|
4039 | | - iclog = iclog->ic_next; |
---|
4040 | | - } while (iclog != log->l_iclog); |
---|
4041 | | - spin_unlock(&log->l_icloglock); |
---|
4042 | | - } |
---|
4043 | | -#endif |
---|
4044 | 3785 | /* return non-zero if log IOERROR transition had already happened */ |
---|
4045 | 3786 | return retval; |
---|
4046 | 3787 | } |
---|