hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/fs/xfs/xfs_log.c
....@@ -16,24 +16,14 @@
1616 #include "xfs_trans_priv.h"
1717 #include "xfs_log.h"
1818 #include "xfs_log_priv.h"
19
-#include "xfs_log_recover.h"
20
-#include "xfs_inode.h"
2119 #include "xfs_trace.h"
22
-#include "xfs_fsops.h"
23
-#include "xfs_cksum.h"
2420 #include "xfs_sysfs.h"
2521 #include "xfs_sb.h"
22
+#include "xfs_health.h"
2623
2724 kmem_zone_t *xfs_log_ticket_zone;
2825
2926 /* Local miscellaneous function prototypes */
30
-STATIC int
31
-xlog_commit_record(
32
- struct xlog *log,
33
- struct xlog_ticket *ticket,
34
- struct xlog_in_core **iclog,
35
- xfs_lsn_t *commitlsnp);
36
-
3727 STATIC struct xlog *
3828 xlog_alloc_log(
3929 struct xfs_mount *mp,
....@@ -44,20 +34,12 @@
4434 xlog_space_left(
4535 struct xlog *log,
4636 atomic64_t *head);
47
-STATIC int
48
-xlog_sync(
49
- struct xlog *log,
50
- struct xlog_in_core *iclog);
5137 STATIC void
5238 xlog_dealloc_log(
5339 struct xlog *log);
5440
5541 /* local state machine functions */
56
-STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
57
-STATIC void
58
-xlog_state_do_callback(
59
- struct xlog *log,
60
- int aborted,
42
+STATIC void xlog_state_done_syncing(
6143 struct xlog_in_core *iclog);
6244 STATIC int
6345 xlog_state_get_iclog_space(
....@@ -67,33 +49,19 @@
6749 struct xlog_ticket *ticket,
6850 int *continued_write,
6951 int *logoffsetp);
70
-STATIC int
71
-xlog_state_release_iclog(
72
- struct xlog *log,
73
- struct xlog_in_core *iclog);
7452 STATIC void
7553 xlog_state_switch_iclogs(
7654 struct xlog *log,
7755 struct xlog_in_core *iclog,
7856 int eventual_size);
7957 STATIC void
80
-xlog_state_want_sync(
81
- struct xlog *log,
82
- struct xlog_in_core *iclog);
83
-
84
-STATIC void
8558 xlog_grant_push_ail(
8659 struct xlog *log,
8760 int need_bytes);
8861 STATIC void
89
-xlog_regrant_reserve_log_space(
62
+xlog_sync(
9063 struct xlog *log,
91
- struct xlog_ticket *ticket);
92
-STATIC void
93
-xlog_ungrant_log_space(
94
- struct xlog *log,
95
- struct xlog_ticket *ticket);
96
-
64
+ struct xlog_in_core *iclog);
9765 #if defined(DEBUG)
9866 STATIC void
9967 xlog_verify_dest_ptr(
....@@ -106,8 +74,7 @@
10674 xlog_verify_iclog(
10775 struct xlog *log,
10876 struct xlog_in_core *iclog,
109
- int count,
110
- bool syncing);
77
+ int count);
11178 STATIC void
11279 xlog_verify_tail_lsn(
11380 struct xlog *log,
....@@ -116,7 +83,7 @@
11683 #else
11784 #define xlog_verify_dest_ptr(a,b)
11885 #define xlog_verify_grant_tail(a)
119
-#define xlog_verify_iclog(a,b,c,d)
86
+#define xlog_verify_iclog(a,b,c)
12087 #define xlog_verify_tail_lsn(a,b,c)
12188 #endif
12289
....@@ -225,15 +192,42 @@
225192 {
226193 struct xlog_ticket *tic;
227194 int need_bytes;
195
+ bool woken_task = false;
228196
229197 list_for_each_entry(tic, &head->waiters, t_queue) {
198
+
199
+ /*
200
+ * There is a chance that the size of the CIL checkpoints in
201
+ * progress at the last AIL push target calculation resulted in
202
+ * limiting the target to the log head (l_last_sync_lsn) at the
203
+ * time. This may not reflect where the log head is now as the
204
+ * CIL checkpoints may have completed.
205
+ *
206
+ * Hence when we are woken here, it may be that the head of the
207
+ * log that has moved rather than the tail. As the tail didn't
208
+ * move, there still won't be space available for the
209
+ * reservation we require. However, if the AIL has already
210
+ * pushed to the target defined by the old log head location, we
211
+ * will hang here waiting for something else to update the AIL
212
+ * push target.
213
+ *
214
+ * Therefore, if there isn't space to wake the first waiter on
215
+ * the grant head, we need to push the AIL again to ensure the
216
+ * target reflects both the current log tail and log head
217
+ * position before we wait for the tail to move again.
218
+ */
219
+
230220 need_bytes = xlog_ticket_reservation(log, head, tic);
231
- if (*free_bytes < need_bytes)
221
+ if (*free_bytes < need_bytes) {
222
+ if (!woken_task)
223
+ xlog_grant_push_ail(log, need_bytes);
232224 return false;
225
+ }
233226
234227 *free_bytes -= need_bytes;
235228 trace_xfs_log_grant_wake_up(log, tic);
236229 wake_up_process(tic->t_task);
230
+ woken_task = true;
237231 }
238232
239233 return true;
....@@ -353,6 +347,25 @@
353347 tic->t_res_num++;
354348 }
355349
350
+bool
351
+xfs_log_writable(
352
+ struct xfs_mount *mp)
353
+{
354
+ /*
355
+ * Never write to the log on norecovery mounts, if the block device is
356
+ * read-only, or if the filesystem is shutdown. Read-only mounts still
357
+ * allow internal writes for log recovery and unmount purposes, so don't
358
+ * restrict that case here.
359
+ */
360
+ if (mp->m_flags & XFS_MOUNT_NORECOVERY)
361
+ return false;
362
+ if (xfs_readonly_buftarg(mp->m_log->l_targ))
363
+ return false;
364
+ if (XFS_FORCED_SHUTDOWN(mp))
365
+ return false;
366
+ return true;
367
+}
368
+
356369 /*
357370 * Replenish the byte reservation required by moving the grant write head.
358371 */
....@@ -439,11 +452,7 @@
439452 XFS_STATS_INC(mp, xs_try_logspace);
440453
441454 ASSERT(*ticp == NULL);
442
- tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
443
- KM_SLEEP | KM_MAYFAIL);
444
- if (!tic)
445
- return -ENOMEM;
446
-
455
+ tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent);
447456 *ticp = tic;
448457
449458 xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
....@@ -473,110 +482,67 @@
473482 return error;
474483 }
475484
476
-
477
-/*
478
- * NOTES:
479
- *
480
- * 1. currblock field gets updated at startup and after in-core logs
481
- * marked as with WANT_SYNC.
482
- */
483
-
484
-/*
485
- * This routine is called when a user of a log manager ticket is done with
486
- * the reservation. If the ticket was ever used, then a commit record for
487
- * the associated transaction is written out as a log operation header with
488
- * no data. The flag XLOG_TIC_INITED is set when the first write occurs with
489
- * a given ticket. If the ticket was one with a permanent reservation, then
490
- * a few operations are done differently. Permanent reservation tickets by
491
- * default don't release the reservation. They just commit the current
492
- * transaction with the belief that the reservation is still needed. A flag
493
- * must be passed in before permanent reservations are actually released.
494
- * When these type of tickets are not released, they need to be set into
495
- * the inited state again. By doing this, a start record will be written
496
- * out when the next write occurs.
497
- */
498
-xfs_lsn_t
499
-xfs_log_done(
500
- struct xfs_mount *mp,
501
- struct xlog_ticket *ticket,
502
- struct xlog_in_core **iclog,
503
- bool regrant)
504
-{
505
- struct xlog *log = mp->m_log;
506
- xfs_lsn_t lsn = 0;
507
-
508
- if (XLOG_FORCED_SHUTDOWN(log) ||
509
- /*
510
- * If nothing was ever written, don't write out commit record.
511
- * If we get an error, just continue and give back the log ticket.
512
- */
513
- (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
514
- (xlog_commit_record(log, ticket, iclog, &lsn)))) {
515
- lsn = (xfs_lsn_t) -1;
516
- regrant = false;
517
- }
518
-
519
-
520
- if (!regrant) {
521
- trace_xfs_log_done_nonperm(log, ticket);
522
-
523
- /*
524
- * Release ticket if not permanent reservation or a specific
525
- * request has been made to release a permanent reservation.
526
- */
527
- xlog_ungrant_log_space(log, ticket);
528
- } else {
529
- trace_xfs_log_done_perm(log, ticket);
530
-
531
- xlog_regrant_reserve_log_space(log, ticket);
532
- /* If this ticket was a permanent reservation and we aren't
533
- * trying to release it, reset the inited flags; so next time
534
- * we write, a start record will be written out.
535
- */
536
- ticket->t_flags |= XLOG_TIC_INITED;
537
- }
538
-
539
- xfs_log_ticket_put(ticket);
540
- return lsn;
541
-}
542
-
543
-/*
544
- * Attaches a new iclog I/O completion callback routine during
545
- * transaction commit. If the log is in error state, a non-zero
546
- * return code is handed back and the caller is responsible for
547
- * executing the callback at an appropriate time.
548
- */
549
-int
550
-xfs_log_notify(
551
- struct xlog_in_core *iclog,
552
- xfs_log_callback_t *cb)
553
-{
554
- int abortflg;
555
-
556
- spin_lock(&iclog->ic_callback_lock);
557
- abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
558
- if (!abortflg) {
559
- ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
560
- (iclog->ic_state == XLOG_STATE_WANT_SYNC));
561
- cb->cb_next = NULL;
562
- *(iclog->ic_callback_tail) = cb;
563
- iclog->ic_callback_tail = &(cb->cb_next);
564
- }
565
- spin_unlock(&iclog->ic_callback_lock);
566
- return abortflg;
567
-}
568
-
569
-int
570
-xfs_log_release_iclog(
571
- struct xfs_mount *mp,
485
+static bool
486
+__xlog_state_release_iclog(
487
+ struct xlog *log,
572488 struct xlog_in_core *iclog)
573489 {
574
- if (xlog_state_release_iclog(mp->m_log, iclog)) {
575
- xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
490
+ lockdep_assert_held(&log->l_icloglock);
491
+
492
+ if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
493
+ /* update tail before writing to iclog */
494
+ xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
495
+
496
+ iclog->ic_state = XLOG_STATE_SYNCING;
497
+ iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
498
+ xlog_verify_tail_lsn(log, iclog, tail_lsn);
499
+ /* cycle incremented when incrementing curr_block */
500
+ return true;
501
+ }
502
+
503
+ ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
504
+ return false;
505
+}
506
+
507
+/*
508
+ * Flush iclog to disk if this is the last reference to the given iclog and the
509
+ * it is in the WANT_SYNC state.
510
+ */
511
+static int
512
+xlog_state_release_iclog(
513
+ struct xlog *log,
514
+ struct xlog_in_core *iclog)
515
+{
516
+ lockdep_assert_held(&log->l_icloglock);
517
+
518
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
576519 return -EIO;
520
+
521
+ if (atomic_dec_and_test(&iclog->ic_refcnt) &&
522
+ __xlog_state_release_iclog(log, iclog)) {
523
+ spin_unlock(&log->l_icloglock);
524
+ xlog_sync(log, iclog);
525
+ spin_lock(&log->l_icloglock);
577526 }
578527
579528 return 0;
529
+}
530
+
531
+void
532
+xfs_log_release_iclog(
533
+ struct xlog_in_core *iclog)
534
+{
535
+ struct xlog *log = iclog->ic_log;
536
+ bool sync = false;
537
+
538
+ if (atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) {
539
+ if (iclog->ic_state != XLOG_STATE_IOERROR)
540
+ sync = __xlog_state_release_iclog(log, iclog);
541
+ spin_unlock(&log->l_icloglock);
542
+ }
543
+
544
+ if (sync)
545
+ xlog_sync(log, iclog);
580546 }
581547
582548 /*
....@@ -799,6 +765,9 @@
799765 if (readonly)
800766 mp->m_flags |= XFS_MOUNT_RDONLY;
801767
768
+ /* Make sure the log is dead if we're returning failure. */
769
+ ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR));
770
+
802771 return error;
803772 }
804773
....@@ -806,45 +775,78 @@
806775 * The mount has failed. Cancel the recovery if it hasn't completed and destroy
807776 * the log.
808777 */
809
-int
778
+void
810779 xfs_log_mount_cancel(
811780 struct xfs_mount *mp)
812781 {
813
- int error;
814
-
815
- error = xlog_recover_cancel(mp->m_log);
782
+ xlog_recover_cancel(mp->m_log);
816783 xfs_log_unmount(mp);
817
-
818
- return error;
819784 }
820785
821786 /*
822
- * Final log writes as part of unmount.
823
- *
824
- * Mark the filesystem clean as unmount happens. Note that during relocation
825
- * this routine needs to be executed as part of source-bag while the
826
- * deallocation must not be done until source-end.
787
+ * Wait for the iclog to be written disk, or return an error if the log has been
788
+ * shut down.
827789 */
828
-
829
-/* Actually write the unmount record to disk. */
830
-static void
831
-xfs_log_write_unmount_record(
832
- struct xfs_mount *mp)
790
+static int
791
+xlog_wait_on_iclog(
792
+ struct xlog_in_core *iclog)
793
+ __releases(iclog->ic_log->l_icloglock)
833794 {
834
- /* the data section must be 32 bit size aligned */
835
- struct xfs_unmount_log_format magic = {
795
+ struct xlog *log = iclog->ic_log;
796
+
797
+ if (!XLOG_FORCED_SHUTDOWN(log) &&
798
+ iclog->ic_state != XLOG_STATE_ACTIVE &&
799
+ iclog->ic_state != XLOG_STATE_DIRTY) {
800
+ XFS_STATS_INC(log->l_mp, xs_log_force_sleep);
801
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
802
+ } else {
803
+ spin_unlock(&log->l_icloglock);
804
+ }
805
+
806
+ if (XLOG_FORCED_SHUTDOWN(log))
807
+ return -EIO;
808
+ return 0;
809
+}
810
+
811
+/*
812
+ * Write out an unmount record using the ticket provided. We have to account for
813
+ * the data space used in the unmount ticket as this write is not done from a
814
+ * transaction context that has already done the accounting for us.
815
+ */
816
+static int
817
+xlog_write_unmount_record(
818
+ struct xlog *log,
819
+ struct xlog_ticket *ticket,
820
+ xfs_lsn_t *lsn,
821
+ uint flags)
822
+{
823
+ struct xfs_unmount_log_format ulf = {
836824 .magic = XLOG_UNMOUNT_TYPE,
837825 };
838826 struct xfs_log_iovec reg = {
839
- .i_addr = &magic,
840
- .i_len = sizeof(magic),
827
+ .i_addr = &ulf,
828
+ .i_len = sizeof(ulf),
841829 .i_type = XLOG_REG_TYPE_UNMOUNT,
842830 };
843831 struct xfs_log_vec vec = {
844832 .lv_niovecs = 1,
845833 .lv_iovecp = &reg,
846834 };
847
- struct xlog *log = mp->m_log;
835
+
836
+ /* account for space used by record data */
837
+ ticket->t_curr_res -= sizeof(ulf);
838
+ return xlog_write(log, &vec, ticket, lsn, NULL, flags, false);
839
+}
840
+
841
+/*
842
+ * Mark the filesystem clean by writing an unmount record to the head of the
843
+ * log.
844
+ */
845
+static void
846
+xlog_unmount_write(
847
+ struct xlog *log)
848
+{
849
+ struct xfs_mount *mp = log->l_mp;
848850 struct xlog_in_core *iclog;
849851 struct xlog_ticket *tic = NULL;
850852 xfs_lsn_t lsn;
....@@ -855,23 +857,7 @@
855857 if (error)
856858 goto out_err;
857859
858
- /*
859
- * If we think the summary counters are bad, clear the unmount header
860
- * flag in the unmount record so that the summary counters will be
861
- * recalculated during log recovery at next mount. Refer to
862
- * xlog_check_unmount_rec for more details.
863
- */
864
- if (XFS_TEST_ERROR((mp->m_flags & XFS_MOUNT_BAD_SUMMARY), mp,
865
- XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
866
- xfs_alert(mp, "%s: will fix summary counters at next mount",
867
- __func__);
868
- flags &= ~XLOG_UNMOUNT_TRANS;
869
- }
870
-
871
- /* remove inited flag, and account for space used */
872
- tic->t_flags = 0;
873
- tic->t_curr_res -= sizeof(magic);
874
- error = xlog_write(log, &vec, tic, &lsn, NULL, flags);
860
+ error = xlog_write_unmount_record(log, tic, &lsn, flags);
875861 /*
876862 * At this point, we're umounting anyway, so there's no point in
877863 * transitioning log state to IOERROR. Just continue...
....@@ -883,29 +869,30 @@
883869 spin_lock(&log->l_icloglock);
884870 iclog = log->l_iclog;
885871 atomic_inc(&iclog->ic_refcnt);
886
- xlog_state_want_sync(log, iclog);
887
- spin_unlock(&log->l_icloglock);
872
+ if (iclog->ic_state == XLOG_STATE_ACTIVE)
873
+ xlog_state_switch_iclogs(log, iclog, 0);
874
+ else
875
+ ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
876
+ iclog->ic_state == XLOG_STATE_IOERROR);
888877 error = xlog_state_release_iclog(log, iclog);
889
-
890
- spin_lock(&log->l_icloglock);
891
- switch (iclog->ic_state) {
892
- default:
893
- if (!XLOG_FORCED_SHUTDOWN(log)) {
894
- xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
895
- break;
896
- }
897
- /* fall through */
898
- case XLOG_STATE_ACTIVE:
899
- case XLOG_STATE_DIRTY:
900
- spin_unlock(&log->l_icloglock);
901
- break;
902
- }
878
+ xlog_wait_on_iclog(iclog);
903879
904880 if (tic) {
905881 trace_xfs_log_umount_write(log, tic);
906
- xlog_ungrant_log_space(log, tic);
907
- xfs_log_ticket_put(tic);
882
+ xfs_log_ticket_ungrant(log, tic);
908883 }
884
+}
885
+
886
+static void
887
+xfs_log_unmount_verify_iclog(
888
+ struct xlog *log)
889
+{
890
+ struct xlog_in_core *iclog = log->l_iclog;
891
+
892
+ do {
893
+ ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
894
+ ASSERT(iclog->ic_offset == 0);
895
+ } while ((iclog = iclog->ic_next) != log->l_iclog);
909896 }
910897
911898 /*
....@@ -915,79 +902,36 @@
915902 * currently architecture converted and "Unmount" is a bit foo.
916903 * As far as I know, there weren't any dependencies on the old behaviour.
917904 */
918
-
919
-static int
920
-xfs_log_unmount_write(xfs_mount_t *mp)
905
+static void
906
+xfs_log_unmount_write(
907
+ struct xfs_mount *mp)
921908 {
922
- struct xlog *log = mp->m_log;
923
- xlog_in_core_t *iclog;
924
-#ifdef DEBUG
925
- xlog_in_core_t *first_iclog;
926
-#endif
927
- int error;
909
+ struct xlog *log = mp->m_log;
910
+
911
+ if (!xfs_log_writable(mp))
912
+ return;
913
+
914
+ xfs_log_force(mp, XFS_LOG_SYNC);
915
+
916
+ if (XLOG_FORCED_SHUTDOWN(log))
917
+ return;
928918
929919 /*
930
- * Don't write out unmount record on norecovery mounts or ro devices.
931
- * Or, if we are doing a forced umount (typically because of IO errors).
920
+ * If we think the summary counters are bad, avoid writing the unmount
921
+ * record to force log recovery at next mount, after which the summary
922
+ * counters will be recalculated. Refer to xlog_check_unmount_rec for
923
+ * more details.
932924 */
933
- if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
934
- xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
935
- ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
936
- return 0;
925
+ if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
926
+ XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
927
+ xfs_alert(mp, "%s: will fix summary counters at next mount",
928
+ __func__);
929
+ return;
937930 }
938931
939
- error = xfs_log_force(mp, XFS_LOG_SYNC);
940
- ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
941
-
942
-#ifdef DEBUG
943
- first_iclog = iclog = log->l_iclog;
944
- do {
945
- if (!(iclog->ic_state & XLOG_STATE_IOERROR)) {
946
- ASSERT(iclog->ic_state & XLOG_STATE_ACTIVE);
947
- ASSERT(iclog->ic_offset == 0);
948
- }
949
- iclog = iclog->ic_next;
950
- } while (iclog != first_iclog);
951
-#endif
952
- if (! (XLOG_FORCED_SHUTDOWN(log))) {
953
- xfs_log_write_unmount_record(mp);
954
- } else {
955
- /*
956
- * We're already in forced_shutdown mode, couldn't
957
- * even attempt to write out the unmount transaction.
958
- *
959
- * Go through the motions of sync'ing and releasing
960
- * the iclog, even though no I/O will actually happen,
961
- * we need to wait for other log I/Os that may already
962
- * be in progress. Do this as a separate section of
963
- * code so we'll know if we ever get stuck here that
964
- * we're in this odd situation of trying to unmount
965
- * a file system that went into forced_shutdown as
966
- * the result of an unmount..
967
- */
968
- spin_lock(&log->l_icloglock);
969
- iclog = log->l_iclog;
970
- atomic_inc(&iclog->ic_refcnt);
971
-
972
- xlog_state_want_sync(log, iclog);
973
- spin_unlock(&log->l_icloglock);
974
- error = xlog_state_release_iclog(log, iclog);
975
-
976
- spin_lock(&log->l_icloglock);
977
-
978
- if ( ! ( iclog->ic_state == XLOG_STATE_ACTIVE
979
- || iclog->ic_state == XLOG_STATE_DIRTY
980
- || iclog->ic_state == XLOG_STATE_IOERROR) ) {
981
-
982
- xlog_wait(&iclog->ic_force_wait,
983
- &log->l_icloglock);
984
- } else {
985
- spin_unlock(&log->l_icloglock);
986
- }
987
- }
988
-
989
- return error;
990
-} /* xfs_log_unmount_write */
932
+ xfs_log_unmount_verify_iclog(log);
933
+ xlog_unmount_write(log);
934
+}
991935
992936 /*
993937 * Empty the log for unmount/freeze.
....@@ -1243,53 +1187,40 @@
12431187 }
12441188
12451189
1246
-/*
1247
- * Log function which is called when an io completes.
1248
- *
1249
- * The log manager needs its own routine, in order to control what
1250
- * happens with the buffer after the write completes.
1251
- */
12521190 static void
1253
-xlog_iodone(xfs_buf_t *bp)
1191
+xlog_ioend_work(
1192
+ struct work_struct *work)
12541193 {
1255
- struct xlog_in_core *iclog = bp->b_log_item;
1256
- struct xlog *l = iclog->ic_log;
1257
- int aborted = 0;
1194
+ struct xlog_in_core *iclog =
1195
+ container_of(work, struct xlog_in_core, ic_end_io_work);
1196
+ struct xlog *log = iclog->ic_log;
1197
+ int error;
1198
+
1199
+ error = blk_status_to_errno(iclog->ic_bio.bi_status);
1200
+#ifdef DEBUG
1201
+ /* treat writes with injected CRC errors as failed */
1202
+ if (iclog->ic_fail_crc)
1203
+ error = -EIO;
1204
+#endif
12581205
12591206 /*
1260
- * Race to shutdown the filesystem if we see an error or the iclog is in
1261
- * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
1262
- * CRC errors into log recovery.
1207
+ * Race to shutdown the filesystem if we see an error.
12631208 */
1264
- if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) ||
1265
- iclog->ic_state & XLOG_STATE_IOABORT) {
1266
- if (iclog->ic_state & XLOG_STATE_IOABORT)
1267
- iclog->ic_state &= ~XLOG_STATE_IOABORT;
1268
-
1269
- xfs_buf_ioerror_alert(bp, __func__);
1270
- xfs_buf_stale(bp);
1271
- xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
1272
- /*
1273
- * This flag will be propagated to the trans-committed
1274
- * callback routines to let them know that the log-commit
1275
- * didn't succeed.
1276
- */
1277
- aborted = XFS_LI_ABORTED;
1278
- } else if (iclog->ic_state & XLOG_STATE_IOERROR) {
1279
- aborted = XFS_LI_ABORTED;
1209
+ if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
1210
+ xfs_alert(log->l_mp, "log I/O error %d", error);
1211
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
12801212 }
12811213
1282
- /* log I/O is always issued ASYNC */
1283
- ASSERT(bp->b_flags & XBF_ASYNC);
1284
- xlog_state_done_syncing(iclog, aborted);
1214
+ xlog_state_done_syncing(iclog);
1215
+ bio_uninit(&iclog->ic_bio);
12851216
12861217 /*
1287
- * drop the buffer lock now that we are done. Nothing references
1288
- * the buffer after this, so an unmount waiting on this lock can now
1289
- * tear it down safely. As such, it is unsafe to reference the buffer
1290
- * (bp) after the unlock as we could race with it being freed.
1218
+ * Drop the lock to signal that we are done. Nothing references the
1219
+ * iclog after this, so an unmount waiting on this lock can now tear it
1220
+ * down safely. As such, it is unsafe to reference the iclog after the
1221
+ * unlock as we could race with it being freed.
12911222 */
1292
- xfs_buf_unlock(bp);
1223
+ up(&iclog->ic_sema);
12931224 }
12941225
12951226 /*
....@@ -1300,65 +1231,26 @@
13001231 * If the filesystem blocksize is too large, we may need to choose a
13011232 * larger size since the directory code currently logs entire blocks.
13021233 */
1303
-
13041234 STATIC void
13051235 xlog_get_iclog_buffer_size(
13061236 struct xfs_mount *mp,
13071237 struct xlog *log)
13081238 {
1309
- int size;
1310
- int xhdrs;
1311
-
13121239 if (mp->m_logbufs <= 0)
1313
- log->l_iclog_bufs = XLOG_MAX_ICLOGS;
1314
- else
1315
- log->l_iclog_bufs = mp->m_logbufs;
1240
+ mp->m_logbufs = XLOG_MAX_ICLOGS;
1241
+ if (mp->m_logbsize <= 0)
1242
+ mp->m_logbsize = XLOG_BIG_RECORD_BSIZE;
1243
+
1244
+ log->l_iclog_bufs = mp->m_logbufs;
1245
+ log->l_iclog_size = mp->m_logbsize;
13161246
13171247 /*
1318
- * Buffer size passed in from mount system call.
1248
+ * # headers = size / 32k - one header holds cycles from 32k of data.
13191249 */
1320
- if (mp->m_logbsize > 0) {
1321
- size = log->l_iclog_size = mp->m_logbsize;
1322
- log->l_iclog_size_log = 0;
1323
- while (size != 1) {
1324
- log->l_iclog_size_log++;
1325
- size >>= 1;
1326
- }
1327
-
1328
- if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1329
- /* # headers = size / 32k
1330
- * one header holds cycles from 32k of data
1331
- */
1332
-
1333
- xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE;
1334
- if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE)
1335
- xhdrs++;
1336
- log->l_iclog_hsize = xhdrs << BBSHIFT;
1337
- log->l_iclog_heads = xhdrs;
1338
- } else {
1339
- ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE);
1340
- log->l_iclog_hsize = BBSIZE;
1341
- log->l_iclog_heads = 1;
1342
- }
1343
- goto done;
1344
- }
1345
-
1346
- /* All machines use 32kB buffers by default. */
1347
- log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
1348
- log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
1349
-
1350
- /* the default log size is 16k or 32k which is one header sector */
1351
- log->l_iclog_hsize = BBSIZE;
1352
- log->l_iclog_heads = 1;
1353
-
1354
-done:
1355
- /* are we being asked to make the sizes selected above visible? */
1356
- if (mp->m_logbufs == 0)
1357
- mp->m_logbufs = log->l_iclog_bufs;
1358
- if (mp->m_logbsize == 0)
1359
- mp->m_logbsize = log->l_iclog_size;
1360
-} /* xlog_get_iclog_buffer_size */
1361
-
1250
+ log->l_iclog_heads =
1251
+ DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE);
1252
+ log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT;
1253
+}
13621254
13631255 void
13641256 xfs_log_work_queue(
....@@ -1421,7 +1313,6 @@
14211313 xlog_rec_header_t *head;
14221314 xlog_in_core_t **iclogp;
14231315 xlog_in_core_t *iclog, *prev_iclog=NULL;
1424
- xfs_buf_t *bp;
14251316 int i;
14261317 int error = -ENOMEM;
14271318 uint log2_size = 0;
....@@ -1479,30 +1370,6 @@
14791370
14801371 xlog_get_iclog_buffer_size(mp, log);
14811372
1482
- /*
1483
- * Use a NULL block for the extra log buffer used during splits so that
1484
- * it will trigger errors if we ever try to do IO on it without first
1485
- * having set it up properly.
1486
- */
1487
- error = -ENOMEM;
1488
- bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL,
1489
- BTOBB(log->l_iclog_size), XBF_NO_IOACCT);
1490
- if (!bp)
1491
- goto out_free_log;
1492
-
1493
- /*
1494
- * The iclogbuf buffer locks are held over IO but we are not going to do
1495
- * IO yet. Hence unlock the buffer so that the log IO path can grab it
1496
- * when appropriately.
1497
- */
1498
- ASSERT(xfs_buf_islocked(bp));
1499
- xfs_buf_unlock(bp);
1500
-
1501
- /* use high priority wq for log I/O completion */
1502
- bp->b_ioend_wq = mp->m_log_workqueue;
1503
- bp->b_iodone = xlog_iodone;
1504
- log->l_xbuf = bp;
1505
-
15061373 spin_lock_init(&log->l_icloglock);
15071374 init_waitqueue_head(&log->l_flush_wait);
15081375
....@@ -1515,29 +1382,23 @@
15151382 * xlog_in_core_t in xfs_log_priv.h for details.
15161383 */
15171384 ASSERT(log->l_iclog_size >= 4096);
1518
- for (i=0; i < log->l_iclog_bufs; i++) {
1519
- *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL);
1520
- if (!*iclogp)
1385
+ for (i = 0; i < log->l_iclog_bufs; i++) {
1386
+ int align_mask = xfs_buftarg_dma_alignment(mp->m_logdev_targp);
1387
+ size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
1388
+ sizeof(struct bio_vec);
1389
+
1390
+ iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL);
1391
+ if (!iclog)
15211392 goto out_free_iclog;
15221393
1523
- iclog = *iclogp;
1394
+ *iclogp = iclog;
15241395 iclog->ic_prev = prev_iclog;
15251396 prev_iclog = iclog;
15261397
1527
- bp = xfs_buf_get_uncached(mp->m_logdev_targp,
1528
- BTOBB(log->l_iclog_size),
1529
- XBF_NO_IOACCT);
1530
- if (!bp)
1398
+ iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask,
1399
+ KM_MAYFAIL | KM_ZERO);
1400
+ if (!iclog->ic_data)
15311401 goto out_free_iclog;
1532
-
1533
- ASSERT(xfs_buf_islocked(bp));
1534
- xfs_buf_unlock(bp);
1535
-
1536
- /* use high priority wq for log I/O completion */
1537
- bp->b_ioend_wq = mp->m_log_workqueue;
1538
- bp->b_iodone = xlog_iodone;
1539
- iclog->ic_bp = bp;
1540
- iclog->ic_data = bp->b_addr;
15411402 #ifdef DEBUG
15421403 log->l_iclog_bak[i] = &iclog->ic_header;
15431404 #endif
....@@ -1551,58 +1412,62 @@
15511412 head->h_fmt = cpu_to_be32(XLOG_FMT);
15521413 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
15531414
1554
- iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize;
1415
+ iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize;
15551416 iclog->ic_state = XLOG_STATE_ACTIVE;
15561417 iclog->ic_log = log;
15571418 atomic_set(&iclog->ic_refcnt, 0);
15581419 spin_lock_init(&iclog->ic_callback_lock);
1559
- iclog->ic_callback_tail = &(iclog->ic_callback);
1420
+ INIT_LIST_HEAD(&iclog->ic_callbacks);
15601421 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
15611422
15621423 init_waitqueue_head(&iclog->ic_force_wait);
15631424 init_waitqueue_head(&iclog->ic_write_wait);
1425
+ INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work);
1426
+ sema_init(&iclog->ic_sema, 1);
15641427
15651428 iclogp = &iclog->ic_next;
15661429 }
15671430 *iclogp = log->l_iclog; /* complete ring */
15681431 log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
15691432
1433
+ log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
1434
+ WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0,
1435
+ mp->m_super->s_id);
1436
+ if (!log->l_ioend_workqueue)
1437
+ goto out_free_iclog;
1438
+
15701439 error = xlog_cil_init(log);
15711440 if (error)
1572
- goto out_free_iclog;
1441
+ goto out_destroy_workqueue;
15731442 return log;
15741443
1444
+out_destroy_workqueue:
1445
+ destroy_workqueue(log->l_ioend_workqueue);
15751446 out_free_iclog:
15761447 for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
15771448 prev_iclog = iclog->ic_next;
1578
- if (iclog->ic_bp)
1579
- xfs_buf_free(iclog->ic_bp);
1449
+ kmem_free(iclog->ic_data);
15801450 kmem_free(iclog);
15811451 if (prev_iclog == log->l_iclog)
15821452 break;
15831453 }
1584
- spinlock_destroy(&log->l_icloglock);
1585
- xfs_buf_free(log->l_xbuf);
15861454 out_free_log:
15871455 kmem_free(log);
15881456 out:
15891457 return ERR_PTR(error);
15901458 } /* xlog_alloc_log */
15911459
1592
-
15931460 /*
15941461 * Write out the commit record of a transaction associated with the given
1595
- * ticket. Return the lsn of the commit record.
1462
+ * ticket to close off a running log write. Return the lsn of the commit record.
15961463 */
1597
-STATIC int
1464
+int
15981465 xlog_commit_record(
15991466 struct xlog *log,
16001467 struct xlog_ticket *ticket,
16011468 struct xlog_in_core **iclog,
1602
- xfs_lsn_t *commitlsnp)
1469
+ xfs_lsn_t *lsn)
16031470 {
1604
- struct xfs_mount *mp = log->l_mp;
1605
- int error;
16061471 struct xfs_log_iovec reg = {
16071472 .i_addr = NULL,
16081473 .i_len = 0,
....@@ -1612,24 +1477,27 @@
16121477 .lv_niovecs = 1,
16131478 .lv_iovecp = &reg,
16141479 };
1480
+ int error;
16151481
1616
- ASSERT_ALWAYS(iclog);
1617
- error = xlog_write(log, &vec, ticket, commitlsnp, iclog,
1618
- XLOG_COMMIT_TRANS);
1482
+ if (XLOG_FORCED_SHUTDOWN(log))
1483
+ return -EIO;
1484
+
1485
+ error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS,
1486
+ false);
16191487 if (error)
1620
- xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1488
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
16211489 return error;
16221490 }
16231491
16241492 /*
1625
- * Push on the buffer cache code if we ever use more than 75% of the on-disk
1626
- * log space. This code pushes on the lsn which would supposedly free up
1627
- * the 25% which we want to leave free. We may need to adopt a policy which
1628
- * pushes on an lsn which is further along in the log once we reach the high
1629
- * water mark. In this manner, we would be creating a low water mark.
1493
+ * Compute the LSN that we'd need to push the log tail towards in order to have
1494
+ * (a) enough on-disk log space to log the number of bytes specified, (b) at
1495
+ * least 25% of the log space free, and (c) at least 256 blocks free. If the
1496
+ * log free space already meets all three thresholds, this function returns
1497
+ * NULLCOMMITLSN.
16301498 */
1631
-STATIC void
1632
-xlog_grant_push_ail(
1499
+xfs_lsn_t
1500
+xlog_grant_push_threshold(
16331501 struct xlog *log,
16341502 int need_bytes)
16351503 {
....@@ -1655,7 +1523,7 @@
16551523 free_threshold = max(free_threshold, (log->l_logBBsize >> 2));
16561524 free_threshold = max(free_threshold, 256);
16571525 if (free_blocks >= free_threshold)
1658
- return;
1526
+ return NULLCOMMITLSN;
16591527
16601528 xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
16611529 &threshold_block);
....@@ -1675,13 +1543,33 @@
16751543 if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
16761544 threshold_lsn = last_sync_lsn;
16771545
1546
+ return threshold_lsn;
1547
+}
1548
+
1549
+/*
1550
+ * Push the tail of the log if we need to do so to maintain the free log space
1551
+ * thresholds set out by xlog_grant_push_threshold. We may need to adopt a
1552
+ * policy which pushes on an lsn which is further along in the log once we
1553
+ * reach the high water mark. In this manner, we would be creating a low water
1554
+ * mark.
1555
+ */
1556
+STATIC void
1557
+xlog_grant_push_ail(
1558
+ struct xlog *log,
1559
+ int need_bytes)
1560
+{
1561
+ xfs_lsn_t threshold_lsn;
1562
+
1563
+ threshold_lsn = xlog_grant_push_threshold(log, need_bytes);
1564
+ if (threshold_lsn == NULLCOMMITLSN || XLOG_FORCED_SHUTDOWN(log))
1565
+ return;
1566
+
16781567 /*
16791568 * Get the transaction layer to kick the dirty buffers out to
16801569 * disk asynchronously. No point in trying to do this if
16811570 * the filesystem is shutting down.
16821571 */
1683
- if (!XLOG_FORCED_SHUTDOWN(log))
1684
- xfs_ail_push(log->l_ailp, threshold_lsn);
1572
+ xfs_ail_push(log->l_ailp, threshold_lsn);
16851573 }
16861574
16871575 /*
....@@ -1751,9 +1639,7 @@
17511639 int i;
17521640 int xheads;
17531641
1754
- xheads = size / XLOG_HEADER_CYCLE_SIZE;
1755
- if (size % XLOG_HEADER_CYCLE_SIZE)
1756
- xheads++;
1642
+ xheads = DIV_ROUND_UP(size, XLOG_HEADER_CYCLE_SIZE);
17571643
17581644 for (i = 1; i < xheads; i++) {
17591645 crc = crc32c(crc, &xhdr[i].hic_xheader,
....@@ -1767,42 +1653,167 @@
17671653 return xfs_end_cksum(crc);
17681654 }
17691655
1770
-/*
1771
- * The bdstrat callback function for log bufs. This gives us a central
1772
- * place to trap bufs in case we get hit by a log I/O error and need to
1773
- * shutdown. Actually, in practice, even when we didn't get a log error,
1774
- * we transition the iclogs to IOERROR state *after* flushing all existing
1775
- * iclogs to disk. This is because we don't want anymore new transactions to be
1776
- * started or completed afterwards.
1777
- *
1778
- * We lock the iclogbufs here so that we can serialise against IO completion
1779
- * during unmount. We might be processing a shutdown triggered during unmount,
1780
- * and that can occur asynchronously to the unmount thread, and hence we need to
1781
- * ensure that completes before tearing down the iclogbufs. Hence we need to
1782
- * hold the buffer lock across the log IO to acheive that.
1783
- */
1784
-STATIC int
1785
-xlog_bdstrat(
1786
- struct xfs_buf *bp)
1656
+static void
1657
+xlog_bio_end_io(
1658
+ struct bio *bio)
17871659 {
1788
- struct xlog_in_core *iclog = bp->b_log_item;
1660
+ struct xlog_in_core *iclog = bio->bi_private;
17891661
1790
- xfs_buf_lock(bp);
1791
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
1792
- xfs_buf_ioerror(bp, -EIO);
1793
- xfs_buf_stale(bp);
1794
- xfs_buf_ioend(bp);
1662
+ queue_work(iclog->ic_log->l_ioend_workqueue,
1663
+ &iclog->ic_end_io_work);
1664
+}
1665
+
1666
+static int
1667
+xlog_map_iclog_data(
1668
+ struct bio *bio,
1669
+ void *data,
1670
+ size_t count)
1671
+{
1672
+ do {
1673
+ struct page *page = kmem_to_page(data);
1674
+ unsigned int off = offset_in_page(data);
1675
+ size_t len = min_t(size_t, count, PAGE_SIZE - off);
1676
+
1677
+ if (bio_add_page(bio, page, len, off) != len)
1678
+ return -EIO;
1679
+
1680
+ data += len;
1681
+ count -= len;
1682
+ } while (count);
1683
+
1684
+ return 0;
1685
+}
1686
+
1687
+STATIC void
1688
+xlog_write_iclog(
1689
+ struct xlog *log,
1690
+ struct xlog_in_core *iclog,
1691
+ uint64_t bno,
1692
+ unsigned int count,
1693
+ bool need_flush)
1694
+{
1695
+ ASSERT(bno < log->l_logBBsize);
1696
+
1697
+ /*
1698
+ * We lock the iclogbufs here so that we can serialise against I/O
1699
+ * completion during unmount. We might be processing a shutdown
1700
+ * triggered during unmount, and that can occur asynchronously to the
1701
+ * unmount thread, and hence we need to ensure that completes before
1702
+ * tearing down the iclogbufs. Hence we need to hold the buffer lock
1703
+ * across the log IO to archieve that.
1704
+ */
1705
+ down(&iclog->ic_sema);
1706
+ if (unlikely(iclog->ic_state == XLOG_STATE_IOERROR)) {
17951707 /*
17961708 * It would seem logical to return EIO here, but we rely on
17971709 * the log state machine to propagate I/O errors instead of
1798
- * doing it here. Similarly, IO completion will unlock the
1799
- * buffer, so we don't do it here.
1710
+ * doing it here. We kick of the state machine and unlock
1711
+ * the buffer manually, the code needs to be kept in sync
1712
+ * with the I/O completion path.
18001713 */
1801
- return 0;
1714
+ xlog_state_done_syncing(iclog);
1715
+ up(&iclog->ic_sema);
1716
+ return;
18021717 }
18031718
1804
- xfs_buf_submit(bp);
1805
- return 0;
1719
+ bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
1720
+ bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
1721
+ iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
1722
+ iclog->ic_bio.bi_end_io = xlog_bio_end_io;
1723
+ iclog->ic_bio.bi_private = iclog;
1724
+
1725
+ /*
1726
+ * We use REQ_SYNC | REQ_IDLE here to tell the block layer the are more
1727
+ * IOs coming immediately after this one. This prevents the block layer
1728
+ * writeback throttle from throttling log writes behind background
1729
+ * metadata writeback and causing priority inversions.
1730
+ */
1731
+ iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC |
1732
+ REQ_IDLE | REQ_FUA;
1733
+ if (need_flush)
1734
+ iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
1735
+
1736
+ if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
1737
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
1738
+ return;
1739
+ }
1740
+ if (is_vmalloc_addr(iclog->ic_data))
1741
+ flush_kernel_vmap_range(iclog->ic_data, count);
1742
+
1743
+ /*
1744
+ * If this log buffer would straddle the end of the log we will have
1745
+ * to split it up into two bios, so that we can continue at the start.
1746
+ */
1747
+ if (bno + BTOBB(count) > log->l_logBBsize) {
1748
+ struct bio *split;
1749
+
1750
+ split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno,
1751
+ GFP_NOIO, &fs_bio_set);
1752
+ bio_chain(split, &iclog->ic_bio);
1753
+ submit_bio(split);
1754
+
1755
+ /* restart at logical offset zero for the remainder */
1756
+ iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart;
1757
+ }
1758
+
1759
+ submit_bio(&iclog->ic_bio);
1760
+}
1761
+
1762
+/*
1763
+ * We need to bump cycle number for the part of the iclog that is
1764
+ * written to the start of the log. Watch out for the header magic
1765
+ * number case, though.
1766
+ */
1767
+static void
1768
+xlog_split_iclog(
1769
+ struct xlog *log,
1770
+ void *data,
1771
+ uint64_t bno,
1772
+ unsigned int count)
1773
+{
1774
+ unsigned int split_offset = BBTOB(log->l_logBBsize - bno);
1775
+ unsigned int i;
1776
+
1777
+ for (i = split_offset; i < count; i += BBSIZE) {
1778
+ uint32_t cycle = get_unaligned_be32(data + i);
1779
+
1780
+ if (++cycle == XLOG_HEADER_MAGIC_NUM)
1781
+ cycle++;
1782
+ put_unaligned_be32(cycle, data + i);
1783
+ }
1784
+}
1785
+
1786
+static int
1787
+xlog_calc_iclog_size(
1788
+ struct xlog *log,
1789
+ struct xlog_in_core *iclog,
1790
+ uint32_t *roundoff)
1791
+{
1792
+ uint32_t count_init, count;
1793
+ bool use_lsunit;
1794
+
1795
+ use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
1796
+ log->l_mp->m_sb.sb_logsunit > 1;
1797
+
1798
+ /* Add for LR header */
1799
+ count_init = log->l_iclog_hsize + iclog->ic_offset;
1800
+
1801
+ /* Round out the log write size */
1802
+ if (use_lsunit) {
1803
+ /* we have a v2 stripe unit to use */
1804
+ count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
1805
+ } else {
1806
+ count = BBTOB(BTOBB(count_init));
1807
+ }
1808
+
1809
+ ASSERT(count >= count_init);
1810
+ *roundoff = count - count_init;
1811
+
1812
+ if (use_lsunit)
1813
+ ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit);
1814
+ else
1815
+ ASSERT(*roundoff < BBTOB(1));
1816
+ return count;
18061817 }
18071818
18081819 /*
....@@ -1825,46 +1836,23 @@
18251836 * log will require grabbing the lock though.
18261837 *
18271838 * The entire log manager uses a logical block numbering scheme. Only
1828
- * log_sync (and then only bwrite()) know about the fact that the log may
1829
- * not start with block zero on a given device. The log block start offset
1830
- * is added immediately before calling bwrite().
1839
+ * xlog_write_iclog knows about the fact that the log may not start with
1840
+ * block zero on a given device.
18311841 */
1832
-
1833
-STATIC int
1842
+STATIC void
18341843 xlog_sync(
18351844 struct xlog *log,
18361845 struct xlog_in_core *iclog)
18371846 {
1838
- xfs_buf_t *bp;
1839
- int i;
1840
- uint count; /* byte count of bwrite */
1841
- uint count_init; /* initial count before roundup */
1842
- int roundoff; /* roundoff to BB or stripe */
1843
- int split = 0; /* split write into two regions */
1844
- int error;
1845
- int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
1846
- int size;
1847
+ unsigned int count; /* byte count of bwrite */
1848
+ unsigned int roundoff; /* roundoff to BB or stripe */
1849
+ uint64_t bno;
1850
+ unsigned int size;
1851
+ bool need_flush = true, split = false;
18471852
1848
- XFS_STATS_INC(log->l_mp, xs_log_writes);
18491853 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
18501854
1851
- /* Add for LR header */
1852
- count_init = log->l_iclog_hsize + iclog->ic_offset;
1853
-
1854
- /* Round out the log write size */
1855
- if (v2 && log->l_mp->m_sb.sb_logsunit > 1) {
1856
- /* we have a v2 stripe unit to use */
1857
- count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
1858
- } else {
1859
- count = BBTOB(BTOBB(count_init));
1860
- }
1861
- roundoff = count - count_init;
1862
- ASSERT(roundoff >= 0);
1863
- ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 &&
1864
- roundoff < log->l_mp->m_sb.sb_logsunit)
1865
- ||
1866
- (log->l_mp->m_sb.sb_logsunit <= 1 &&
1867
- roundoff < BBTOB(1)));
1855
+ count = xlog_calc_iclog_size(log, iclog, &roundoff);
18681856
18691857 /* move grant heads by roundoff in sync */
18701858 xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
....@@ -1875,41 +1863,19 @@
18751863
18761864 /* real byte length */
18771865 size = iclog->ic_offset;
1878
- if (v2)
1866
+ if (xfs_sb_version_haslogv2(&log->l_mp->m_sb))
18791867 size += roundoff;
18801868 iclog->ic_header.h_len = cpu_to_be32(size);
18811869
1882
- bp = iclog->ic_bp;
1883
- XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
1884
-
1870
+ XFS_STATS_INC(log->l_mp, xs_log_writes);
18851871 XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
18861872
1873
+ bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn));
1874
+
18871875 /* Do we need to split this write into 2 parts? */
1888
- if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
1889
- char *dptr;
1890
-
1891
- split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)));
1892
- count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp));
1893
- iclog->ic_bwritecnt = 2;
1894
-
1895
- /*
1896
- * Bump the cycle numbers at the start of each block in the
1897
- * part of the iclog that ends up in the buffer that gets
1898
- * written to the start of the log.
1899
- *
1900
- * Watch out for the header magic number case, though.
1901
- */
1902
- dptr = (char *)&iclog->ic_header + count;
1903
- for (i = 0; i < split; i += BBSIZE) {
1904
- uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
1905
- if (++cycle == XLOG_HEADER_MAGIC_NUM)
1906
- cycle++;
1907
- *(__be32 *)dptr = cpu_to_be32(cycle);
1908
-
1909
- dptr += BBSIZE;
1910
- }
1911
- } else {
1912
- iclog->ic_bwritecnt = 1;
1876
+ if (bno + BTOBB(count) > log->l_logBBsize) {
1877
+ xlog_split_iclog(log, &iclog->ic_header, bno, count);
1878
+ split = true;
19131879 }
19141880
19151881 /* calculcate the checksum */
....@@ -1922,18 +1888,15 @@
19221888 * write on I/O completion and shutdown the fs. The subsequent mount
19231889 * detects the bad CRC and attempts to recover.
19241890 */
1891
+#ifdef DEBUG
19251892 if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
19261893 iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
1927
- iclog->ic_state |= XLOG_STATE_IOABORT;
1894
+ iclog->ic_fail_crc = true;
19281895 xfs_warn(log->l_mp,
19291896 "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
19301897 be64_to_cpu(iclog->ic_header.h_lsn));
19311898 }
1932
-
1933
- bp->b_io_length = BTOBB(count);
1934
- bp->b_log_item = iclog;
1935
- bp->b_flags &= ~XBF_FLUSH;
1936
- bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
1899
+#endif
19371900
19381901 /*
19391902 * Flush the data device before flushing the log to make sure all meta
....@@ -1943,50 +1906,14 @@
19431906 * synchronously here; for an internal log we can simply use the block
19441907 * layer state machine for preflushes.
19451908 */
1946
- if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
1909
+ if (log->l_targ != log->l_mp->m_ddev_targp || split) {
19471910 xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
1948
- else
1949
- bp->b_flags |= XBF_FLUSH;
1950
-
1951
- ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1952
- ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
1953
-
1954
- xlog_verify_iclog(log, iclog, count, true);
1955
-
1956
- /* account for log which doesn't start at block #0 */
1957
- XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
1958
-
1959
- /*
1960
- * Don't call xfs_bwrite here. We do log-syncs even when the filesystem
1961
- * is shutting down.
1962
- */
1963
- error = xlog_bdstrat(bp);
1964
- if (error) {
1965
- xfs_buf_ioerror_alert(bp, "xlog_sync");
1966
- return error;
1911
+ need_flush = false;
19671912 }
1968
- if (split) {
1969
- bp = iclog->ic_log->l_xbuf;
1970
- XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */
1971
- xfs_buf_associate_memory(bp,
1972
- (char *)&iclog->ic_header + count, split);
1973
- bp->b_log_item = iclog;
1974
- bp->b_flags &= ~XBF_FLUSH;
1975
- bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
19761913
1977
- ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1978
- ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
1979
-
1980
- /* account for internal log which doesn't start at block #0 */
1981
- XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
1982
- error = xlog_bdstrat(bp);
1983
- if (error) {
1984
- xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
1985
- return error;
1986
- }
1987
- }
1988
- return 0;
1989
-} /* xlog_sync */
1914
+ xlog_verify_iclog(log, iclog, count);
1915
+ xlog_write_iclog(log, iclog, bno, count, need_flush);
1916
+}
19901917
19911918 /*
19921919 * Deallocate a log structure
....@@ -2006,38 +1933,27 @@
20061933 */
20071934 iclog = log->l_iclog;
20081935 for (i = 0; i < log->l_iclog_bufs; i++) {
2009
- xfs_buf_lock(iclog->ic_bp);
2010
- xfs_buf_unlock(iclog->ic_bp);
1936
+ down(&iclog->ic_sema);
1937
+ up(&iclog->ic_sema);
20111938 iclog = iclog->ic_next;
20121939 }
20131940
2014
- /*
2015
- * Always need to ensure that the extra buffer does not point to memory
2016
- * owned by another log buffer before we free it. Also, cycle the lock
2017
- * first to ensure we've completed IO on it.
2018
- */
2019
- xfs_buf_lock(log->l_xbuf);
2020
- xfs_buf_unlock(log->l_xbuf);
2021
- xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
2022
- xfs_buf_free(log->l_xbuf);
2023
-
20241941 iclog = log->l_iclog;
20251942 for (i = 0; i < log->l_iclog_bufs; i++) {
2026
- xfs_buf_free(iclog->ic_bp);
20271943 next_iclog = iclog->ic_next;
1944
+ kmem_free(iclog->ic_data);
20281945 kmem_free(iclog);
20291946 iclog = next_iclog;
20301947 }
2031
- spinlock_destroy(&log->l_icloglock);
20321948
20331949 log->l_mp->m_log = NULL;
1950
+ destroy_workqueue(log->l_ioend_workqueue);
20341951 kmem_free(log);
2035
-} /* xlog_dealloc_log */
1952
+}
20361953
20371954 /*
20381955 * Update counters atomically now that memcpy is done.
20391956 */
2040
-/* ARGSUSED */
20411957 static inline void
20421958 xlog_state_finish_copy(
20431959 struct xlog *log,
....@@ -2045,16 +1961,11 @@
20451961 int record_cnt,
20461962 int copy_bytes)
20471963 {
2048
- spin_lock(&log->l_icloglock);
1964
+ lockdep_assert_held(&log->l_icloglock);
20491965
20501966 be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt);
20511967 iclog->ic_offset += copy_bytes;
2052
-
2053
- spin_unlock(&log->l_icloglock);
2054
-} /* xlog_state_finish_copy */
2055
-
2056
-
2057
-
1968
+}
20581969
20591970 /*
20601971 * print out info relating to regions written which consume
....@@ -2070,7 +1981,7 @@
20701981
20711982 /* match with XLOG_REG_TYPE_* in xfs_log.h */
20721983 #define REG_TYPE_STR(type, str) [XLOG_REG_TYPE_##type] = str
2073
- static char *res_type_str[XLOG_REG_TYPE_MAX + 1] = {
1984
+ static char *res_type_str[] = {
20741985 REG_TYPE_STR(BFORMAT, "bformat"),
20751986 REG_TYPE_STR(BCHUNK, "bchunk"),
20761987 REG_TYPE_STR(EFI_FORMAT, "efi_format"),
....@@ -2090,8 +2001,15 @@
20902001 REG_TYPE_STR(UNMOUNT, "unmount"),
20912002 REG_TYPE_STR(COMMIT, "commit"),
20922003 REG_TYPE_STR(TRANSHDR, "trans header"),
2093
- REG_TYPE_STR(ICREATE, "inode create")
2004
+ REG_TYPE_STR(ICREATE, "inode create"),
2005
+ REG_TYPE_STR(RUI_FORMAT, "rui_format"),
2006
+ REG_TYPE_STR(RUD_FORMAT, "rud_format"),
2007
+ REG_TYPE_STR(CUI_FORMAT, "cui_format"),
2008
+ REG_TYPE_STR(CUD_FORMAT, "cud_format"),
2009
+ REG_TYPE_STR(BUI_FORMAT, "bui_format"),
2010
+ REG_TYPE_STR(BUD_FORMAT, "bud_format"),
20942011 };
2012
+ BUILD_BUG_ON(ARRAY_SIZE(res_type_str) != XLOG_REG_TYPE_MAX + 1);
20952013 #undef REG_TYPE_STR
20962014
20972015 xfs_warn(mp, "ticket reservation summary:");
....@@ -2168,22 +2086,20 @@
21682086 }
21692087
21702088 /*
2171
- * Calculate the potential space needed by the log vector. Each region gets
2172
- * its own xlog_op_header_t and may need to be double word aligned.
2089
+ * Calculate the potential space needed by the log vector. We may need a start
2090
+ * record, and each region gets its own struct xlog_op_header and may need to be
2091
+ * double word aligned.
21732092 */
21742093 static int
21752094 xlog_write_calc_vec_length(
21762095 struct xlog_ticket *ticket,
2177
- struct xfs_log_vec *log_vector)
2096
+ struct xfs_log_vec *log_vector,
2097
+ bool need_start_rec)
21782098 {
21792099 struct xfs_log_vec *lv;
2180
- int headers = 0;
2100
+ int headers = need_start_rec ? 1 : 0;
21812101 int len = 0;
21822102 int i;
2183
-
2184
- /* acct for start rec of xact */
2185
- if (ticket->t_flags & XLOG_TIC_INITED)
2186
- headers++;
21872103
21882104 for (lv = log_vector; lv; lv = lv->lv_next) {
21892105 /* we don't write ordered log vectors */
....@@ -2206,27 +2122,16 @@
22062122 return len;
22072123 }
22082124
2209
-/*
2210
- * If first write for transaction, insert start record We can't be trying to
2211
- * commit if we are inited. We can't have any "partial_copy" if we are inited.
2212
- */
2213
-static int
2125
+static void
22142126 xlog_write_start_rec(
22152127 struct xlog_op_header *ophdr,
22162128 struct xlog_ticket *ticket)
22172129 {
2218
- if (!(ticket->t_flags & XLOG_TIC_INITED))
2219
- return 0;
2220
-
22212130 ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
22222131 ophdr->oh_clientid = ticket->t_clientid;
22232132 ophdr->oh_len = 0;
22242133 ophdr->oh_flags = XLOG_START_TRANS;
22252134 ophdr->oh_res2 = 0;
2226
-
2227
- ticket->t_flags &= ~XLOG_TIC_INITED;
2228
-
2229
- return sizeof(struct xlog_op_header);
22302135 }
22312136
22322137 static xlog_op_header_t *
....@@ -2324,15 +2229,18 @@
23242229 int log_offset,
23252230 struct xlog_in_core **commit_iclog)
23262231 {
2232
+ int error;
2233
+
23272234 if (*partial_copy) {
23282235 /*
23292236 * This iclog has already been marked WANT_SYNC by
23302237 * xlog_state_get_iclog_space.
23312238 */
2239
+ spin_lock(&log->l_icloglock);
23322240 xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
23332241 *record_cnt = 0;
23342242 *data_cnt = 0;
2335
- return xlog_state_release_iclog(log, iclog);
2243
+ goto release_iclog;
23362244 }
23372245
23382246 *partial_copy = 0;
....@@ -2340,21 +2248,29 @@
23402248
23412249 if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
23422250 /* no more space in this iclog - push it. */
2251
+ spin_lock(&log->l_icloglock);
23432252 xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
23442253 *record_cnt = 0;
23452254 *data_cnt = 0;
23462255
2347
- spin_lock(&log->l_icloglock);
2348
- xlog_state_want_sync(log, iclog);
2349
- spin_unlock(&log->l_icloglock);
2350
-
2256
+ if (iclog->ic_state == XLOG_STATE_ACTIVE)
2257
+ xlog_state_switch_iclogs(log, iclog, 0);
2258
+ else
2259
+ ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
2260
+ iclog->ic_state == XLOG_STATE_IOERROR);
23512261 if (!commit_iclog)
2352
- return xlog_state_release_iclog(log, iclog);
2262
+ goto release_iclog;
2263
+ spin_unlock(&log->l_icloglock);
23532264 ASSERT(flags & XLOG_COMMIT_TRANS);
23542265 *commit_iclog = iclog;
23552266 }
23562267
23572268 return 0;
2269
+
2270
+release_iclog:
2271
+ error = xlog_state_release_iclog(log, iclog);
2272
+ spin_unlock(&log->l_icloglock);
2273
+ return error;
23582274 }
23592275
23602276 /*
....@@ -2404,39 +2320,28 @@
24042320 struct xlog_ticket *ticket,
24052321 xfs_lsn_t *start_lsn,
24062322 struct xlog_in_core **commit_iclog,
2407
- uint flags)
2323
+ uint flags,
2324
+ bool need_start_rec)
24082325 {
24092326 struct xlog_in_core *iclog = NULL;
2410
- struct xfs_log_iovec *vecp;
2411
- struct xfs_log_vec *lv;
2327
+ struct xfs_log_vec *lv = log_vector;
2328
+ struct xfs_log_iovec *vecp = lv->lv_iovecp;
2329
+ int index = 0;
24122330 int len;
2413
- int index;
24142331 int partial_copy = 0;
24152332 int partial_copy_len = 0;
24162333 int contwr = 0;
24172334 int record_cnt = 0;
24182335 int data_cnt = 0;
2419
- int error;
2420
-
2421
- *start_lsn = 0;
2422
-
2423
- len = xlog_write_calc_vec_length(ticket, log_vector);
2336
+ int error = 0;
24242337
24252338 /*
2426
- * Region headers and bytes are already accounted for.
2427
- * We only need to take into account start records and
2428
- * split regions in this function.
2339
+ * If this is a commit or unmount transaction, we don't need a start
2340
+ * record to be written. We do, however, have to account for the
2341
+ * commit or unmount header that gets written. Hence we always have
2342
+ * to account for an extra xlog_op_header here.
24292343 */
2430
- if (ticket->t_flags & XLOG_TIC_INITED)
2431
- ticket->t_curr_res -= sizeof(xlog_op_header_t);
2432
-
2433
- /*
2434
- * Commit record headers need to be accounted for. These
2435
- * come in as separate writes so are easy to detect.
2436
- */
2437
- if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
2438
- ticket->t_curr_res -= sizeof(xlog_op_header_t);
2439
-
2344
+ ticket->t_curr_res -= sizeof(struct xlog_op_header);
24402345 if (ticket->t_curr_res < 0) {
24412346 xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
24422347 "ctx ticket reservation ran out. Need to up reservation");
....@@ -2444,9 +2349,8 @@
24442349 xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
24452350 }
24462351
2447
- index = 0;
2448
- lv = log_vector;
2449
- vecp = lv->lv_iovecp;
2352
+ len = xlog_write_calc_vec_length(ticket, log_vector, need_start_rec);
2353
+ *start_lsn = 0;
24502354 while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
24512355 void *ptr;
24522356 int log_offset;
....@@ -2470,7 +2374,6 @@
24702374 while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
24712375 struct xfs_log_iovec *reg;
24722376 struct xlog_op_header *ophdr;
2473
- int start_rec_copy;
24742377 int copy_len;
24752378 int copy_off;
24762379 bool ordered = false;
....@@ -2486,11 +2389,15 @@
24862389 ASSERT(reg->i_len % sizeof(int32_t) == 0);
24872390 ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
24882391
2489
- start_rec_copy = xlog_write_start_rec(ptr, ticket);
2490
- if (start_rec_copy) {
2491
- record_cnt++;
2392
+ /*
2393
+ * Before we start formatting log vectors, we need to
2394
+ * write a start record. Only do this for the first
2395
+ * iclog we write to.
2396
+ */
2397
+ if (need_start_rec) {
2398
+ xlog_write_start_rec(ptr, ticket);
24922399 xlog_write_adv_cnt(&ptr, &len, &log_offset,
2493
- start_rec_copy);
2400
+ sizeof(struct xlog_op_header));
24942401 }
24952402
24962403 ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
....@@ -2522,8 +2429,13 @@
25222429 xlog_write_adv_cnt(&ptr, &len, &log_offset,
25232430 copy_len);
25242431 }
2525
- copy_len += start_rec_copy + sizeof(xlog_op_header_t);
2432
+ copy_len += sizeof(struct xlog_op_header);
25262433 record_cnt++;
2434
+ if (need_start_rec) {
2435
+ copy_len += sizeof(struct xlog_op_header);
2436
+ record_cnt++;
2437
+ need_start_rec = false;
2438
+ }
25272439 data_cnt += contwr ? copy_len : 0;
25282440
25292441 error = xlog_write_copy_finish(log, iclog, flags,
....@@ -2567,158 +2479,284 @@
25672479
25682480 ASSERT(len == 0);
25692481
2482
+ spin_lock(&log->l_icloglock);
25702483 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
2571
- if (!commit_iclog)
2572
- return xlog_state_release_iclog(log, iclog);
2484
+ if (commit_iclog) {
2485
+ ASSERT(flags & XLOG_COMMIT_TRANS);
2486
+ *commit_iclog = iclog;
2487
+ } else {
2488
+ error = xlog_state_release_iclog(log, iclog);
2489
+ }
2490
+ spin_unlock(&log->l_icloglock);
25732491
2574
- ASSERT(flags & XLOG_COMMIT_TRANS);
2575
- *commit_iclog = iclog;
2576
- return 0;
2492
+ return error;
25772493 }
25782494
2579
-
2580
-/*****************************************************************************
2581
- *
2582
- * State Machine functions
2583
- *
2584
- *****************************************************************************
2585
- */
2586
-
2587
-/* Clean iclogs starting from the head. This ordering must be
2588
- * maintained, so an iclog doesn't become ACTIVE beyond one that
2589
- * is SYNCING. This is also required to maintain the notion that we use
2590
- * a ordered wait queue to hold off would be writers to the log when every
2591
- * iclog is trying to sync to disk.
2592
- *
2593
- * State Change: DIRTY -> ACTIVE
2594
- */
2595
-STATIC void
2596
-xlog_state_clean_log(
2597
- struct xlog *log)
2495
+static void
2496
+xlog_state_activate_iclog(
2497
+ struct xlog_in_core *iclog,
2498
+ int *iclogs_changed)
25982499 {
2599
- xlog_in_core_t *iclog;
2600
- int changed = 0;
2500
+ ASSERT(list_empty_careful(&iclog->ic_callbacks));
26012501
2602
- iclog = log->l_iclog;
2603
- do {
2604
- if (iclog->ic_state == XLOG_STATE_DIRTY) {
2605
- iclog->ic_state = XLOG_STATE_ACTIVE;
2606
- iclog->ic_offset = 0;
2607
- ASSERT(iclog->ic_callback == NULL);
2608
- /*
2609
- * If the number of ops in this iclog indicate it just
2610
- * contains the dummy transaction, we can
2611
- * change state into IDLE (the second time around).
2612
- * Otherwise we should change the state into
2613
- * NEED a dummy.
2614
- * We don't need to cover the dummy.
2615
- */
2616
- if (!changed &&
2617
- (be32_to_cpu(iclog->ic_header.h_num_logops) ==
2618
- XLOG_COVER_OPS)) {
2619
- changed = 1;
2620
- } else {
2621
- /*
2622
- * We have two dirty iclogs so start over
2623
- * This could also be num of ops indicates
2624
- * this is not the dummy going out.
2625
- */
2626
- changed = 2;
2627
- }
2628
- iclog->ic_header.h_num_logops = 0;
2629
- memset(iclog->ic_header.h_cycle_data, 0,
2630
- sizeof(iclog->ic_header.h_cycle_data));
2631
- iclog->ic_header.h_lsn = 0;
2632
- } else if (iclog->ic_state == XLOG_STATE_ACTIVE)
2633
- /* do nothing */;
2634
- else
2635
- break; /* stop cleaning */
2636
- iclog = iclog->ic_next;
2637
- } while (iclog != log->l_iclog);
2638
-
2639
- /* log is locked when we are called */
26402502 /*
2641
- * Change state for the dummy log recording.
2642
- * We usually go to NEED. But we go to NEED2 if the changed indicates
2643
- * we are done writing the dummy record.
2644
- * If we are done with the second dummy recored (DONE2), then
2645
- * we go to IDLE.
2503
+ * If the number of ops in this iclog indicate it just contains the
2504
+ * dummy transaction, we can change state into IDLE (the second time
2505
+ * around). Otherwise we should change the state into NEED a dummy.
2506
+ * We don't need to cover the dummy.
26462507 */
2647
- if (changed) {
2648
- switch (log->l_covered_state) {
2649
- case XLOG_STATE_COVER_IDLE:
2650
- case XLOG_STATE_COVER_NEED:
2651
- case XLOG_STATE_COVER_NEED2:
2652
- log->l_covered_state = XLOG_STATE_COVER_NEED;
2653
- break;
2654
-
2655
- case XLOG_STATE_COVER_DONE:
2656
- if (changed == 1)
2657
- log->l_covered_state = XLOG_STATE_COVER_NEED2;
2658
- else
2659
- log->l_covered_state = XLOG_STATE_COVER_NEED;
2660
- break;
2661
-
2662
- case XLOG_STATE_COVER_DONE2:
2663
- if (changed == 1)
2664
- log->l_covered_state = XLOG_STATE_COVER_IDLE;
2665
- else
2666
- log->l_covered_state = XLOG_STATE_COVER_NEED;
2667
- break;
2668
-
2669
- default:
2670
- ASSERT(0);
2671
- }
2508
+ if (*iclogs_changed == 0 &&
2509
+ iclog->ic_header.h_num_logops == cpu_to_be32(XLOG_COVER_OPS)) {
2510
+ *iclogs_changed = 1;
2511
+ } else {
2512
+ /*
2513
+ * We have two dirty iclogs so start over. This could also be
2514
+ * num of ops indicating this is not the dummy going out.
2515
+ */
2516
+ *iclogs_changed = 2;
26722517 }
2673
-} /* xlog_state_clean_log */
2518
+
2519
+ iclog->ic_state = XLOG_STATE_ACTIVE;
2520
+ iclog->ic_offset = 0;
2521
+ iclog->ic_header.h_num_logops = 0;
2522
+ memset(iclog->ic_header.h_cycle_data, 0,
2523
+ sizeof(iclog->ic_header.h_cycle_data));
2524
+ iclog->ic_header.h_lsn = 0;
2525
+}
2526
+
2527
+/*
2528
+ * Loop through all iclogs and mark all iclogs currently marked DIRTY as
2529
+ * ACTIVE after iclog I/O has completed.
2530
+ */
2531
+static void
2532
+xlog_state_activate_iclogs(
2533
+ struct xlog *log,
2534
+ int *iclogs_changed)
2535
+{
2536
+ struct xlog_in_core *iclog = log->l_iclog;
2537
+
2538
+ do {
2539
+ if (iclog->ic_state == XLOG_STATE_DIRTY)
2540
+ xlog_state_activate_iclog(iclog, iclogs_changed);
2541
+ /*
2542
+ * The ordering of marking iclogs ACTIVE must be maintained, so
2543
+ * an iclog doesn't become ACTIVE beyond one that is SYNCING.
2544
+ */
2545
+ else if (iclog->ic_state != XLOG_STATE_ACTIVE)
2546
+ break;
2547
+ } while ((iclog = iclog->ic_next) != log->l_iclog);
2548
+}
2549
+
2550
+static int
2551
+xlog_covered_state(
2552
+ int prev_state,
2553
+ int iclogs_changed)
2554
+{
2555
+ /*
2556
+ * We usually go to NEED. But we go to NEED2 if the changed indicates we
2557
+ * are done writing the dummy record. If we are done with the second
2558
+ * dummy recored (DONE2), then we go to IDLE.
2559
+ */
2560
+ switch (prev_state) {
2561
+ case XLOG_STATE_COVER_IDLE:
2562
+ case XLOG_STATE_COVER_NEED:
2563
+ case XLOG_STATE_COVER_NEED2:
2564
+ break;
2565
+ case XLOG_STATE_COVER_DONE:
2566
+ if (iclogs_changed == 1)
2567
+ return XLOG_STATE_COVER_NEED2;
2568
+ break;
2569
+ case XLOG_STATE_COVER_DONE2:
2570
+ if (iclogs_changed == 1)
2571
+ return XLOG_STATE_COVER_IDLE;
2572
+ break;
2573
+ default:
2574
+ ASSERT(0);
2575
+ }
2576
+
2577
+ return XLOG_STATE_COVER_NEED;
2578
+}
2579
+
2580
+STATIC void
2581
+xlog_state_clean_iclog(
2582
+ struct xlog *log,
2583
+ struct xlog_in_core *dirty_iclog)
2584
+{
2585
+ int iclogs_changed = 0;
2586
+
2587
+ dirty_iclog->ic_state = XLOG_STATE_DIRTY;
2588
+
2589
+ xlog_state_activate_iclogs(log, &iclogs_changed);
2590
+ wake_up_all(&dirty_iclog->ic_force_wait);
2591
+
2592
+ if (iclogs_changed) {
2593
+ log->l_covered_state = xlog_covered_state(log->l_covered_state,
2594
+ iclogs_changed);
2595
+ }
2596
+}
26742597
26752598 STATIC xfs_lsn_t
26762599 xlog_get_lowest_lsn(
2677
- struct xlog *log)
2600
+ struct xlog *log)
26782601 {
2679
- xlog_in_core_t *lsn_log;
2680
- xfs_lsn_t lowest_lsn, lsn;
2602
+ struct xlog_in_core *iclog = log->l_iclog;
2603
+ xfs_lsn_t lowest_lsn = 0, lsn;
26812604
2682
- lsn_log = log->l_iclog;
2683
- lowest_lsn = 0;
26842605 do {
2685
- if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) {
2686
- lsn = be64_to_cpu(lsn_log->ic_header.h_lsn);
2687
- if ((lsn && !lowest_lsn) ||
2688
- (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) {
2606
+ if (iclog->ic_state == XLOG_STATE_ACTIVE ||
2607
+ iclog->ic_state == XLOG_STATE_DIRTY)
2608
+ continue;
2609
+
2610
+ lsn = be64_to_cpu(iclog->ic_header.h_lsn);
2611
+ if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0)
26892612 lowest_lsn = lsn;
2690
- }
2691
- }
2692
- lsn_log = lsn_log->ic_next;
2693
- } while (lsn_log != log->l_iclog);
2613
+ } while ((iclog = iclog->ic_next) != log->l_iclog);
2614
+
26942615 return lowest_lsn;
26952616 }
26962617
2618
+/*
2619
+ * Completion of a iclog IO does not imply that a transaction has completed, as
2620
+ * transactions can be large enough to span many iclogs. We cannot change the
2621
+ * tail of the log half way through a transaction as this may be the only
2622
+ * transaction in the log and moving the tail to point to the middle of it
2623
+ * will prevent recovery from finding the start of the transaction. Hence we
2624
+ * should only update the last_sync_lsn if this iclog contains transaction
2625
+ * completion callbacks on it.
2626
+ *
2627
+ * We have to do this before we drop the icloglock to ensure we are the only one
2628
+ * that can update it.
2629
+ *
2630
+ * If we are moving the last_sync_lsn forwards, we also need to ensure we kick
2631
+ * the reservation grant head pushing. This is due to the fact that the push
2632
+ * target is bound by the current last_sync_lsn value. Hence if we have a large
2633
+ * amount of log space bound up in this committing transaction then the
2634
+ * last_sync_lsn value may be the limiting factor preventing tail pushing from
2635
+ * freeing space in the log. Hence once we've updated the last_sync_lsn we
2636
+ * should push the AIL to ensure the push target (and hence the grant head) is
2637
+ * no longer bound by the old log head location and can move forwards and make
2638
+ * progress again.
2639
+ */
2640
+static void
2641
+xlog_state_set_callback(
2642
+ struct xlog *log,
2643
+ struct xlog_in_core *iclog,
2644
+ xfs_lsn_t header_lsn)
2645
+{
2646
+ iclog->ic_state = XLOG_STATE_CALLBACK;
2647
+
2648
+ ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
2649
+ header_lsn) <= 0);
2650
+
2651
+ if (list_empty_careful(&iclog->ic_callbacks))
2652
+ return;
2653
+
2654
+ atomic64_set(&log->l_last_sync_lsn, header_lsn);
2655
+ xlog_grant_push_ail(log, 0);
2656
+}
2657
+
2658
+/*
2659
+ * Return true if we need to stop processing, false to continue to the next
2660
+ * iclog. The caller will need to run callbacks if the iclog is returned in the
2661
+ * XLOG_STATE_CALLBACK state.
2662
+ */
2663
+static bool
2664
+xlog_state_iodone_process_iclog(
2665
+ struct xlog *log,
2666
+ struct xlog_in_core *iclog,
2667
+ bool *ioerror)
2668
+{
2669
+ xfs_lsn_t lowest_lsn;
2670
+ xfs_lsn_t header_lsn;
2671
+
2672
+ switch (iclog->ic_state) {
2673
+ case XLOG_STATE_ACTIVE:
2674
+ case XLOG_STATE_DIRTY:
2675
+ /*
2676
+ * Skip all iclogs in the ACTIVE & DIRTY states:
2677
+ */
2678
+ return false;
2679
+ case XLOG_STATE_IOERROR:
2680
+ /*
2681
+ * Between marking a filesystem SHUTDOWN and stopping the log,
2682
+ * we do flush all iclogs to disk (if there wasn't a log I/O
2683
+ * error). So, we do want things to go smoothly in case of just
2684
+ * a SHUTDOWN w/o a LOG_IO_ERROR.
2685
+ */
2686
+ *ioerror = true;
2687
+ return false;
2688
+ case XLOG_STATE_DONE_SYNC:
2689
+ /*
2690
+ * Now that we have an iclog that is in the DONE_SYNC state, do
2691
+ * one more check here to see if we have chased our tail around.
2692
+ * If this is not the lowest lsn iclog, then we will leave it
2693
+ * for another completion to process.
2694
+ */
2695
+ header_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
2696
+ lowest_lsn = xlog_get_lowest_lsn(log);
2697
+ if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0)
2698
+ return false;
2699
+ xlog_state_set_callback(log, iclog, header_lsn);
2700
+ return false;
2701
+ default:
2702
+ /*
2703
+ * Can only perform callbacks in order. Since this iclog is not
2704
+ * in the DONE_SYNC state, we skip the rest and just try to
2705
+ * clean up.
2706
+ */
2707
+ return true;
2708
+ }
2709
+}
2710
+
2711
+/*
2712
+ * Keep processing entries in the iclog callback list until we come around and
2713
+ * it is empty. We need to atomically see that the list is empty and change the
2714
+ * state to DIRTY so that we don't miss any more callbacks being added.
2715
+ *
2716
+ * This function is called with the icloglock held and returns with it held. We
2717
+ * drop it while running callbacks, however, as holding it over thousands of
2718
+ * callbacks is unnecessary and causes excessive contention if we do.
2719
+ */
2720
+static void
2721
+xlog_state_do_iclog_callbacks(
2722
+ struct xlog *log,
2723
+ struct xlog_in_core *iclog)
2724
+ __releases(&log->l_icloglock)
2725
+ __acquires(&log->l_icloglock)
2726
+{
2727
+ spin_unlock(&log->l_icloglock);
2728
+ spin_lock(&iclog->ic_callback_lock);
2729
+ while (!list_empty(&iclog->ic_callbacks)) {
2730
+ LIST_HEAD(tmp);
2731
+
2732
+ list_splice_init(&iclog->ic_callbacks, &tmp);
2733
+
2734
+ spin_unlock(&iclog->ic_callback_lock);
2735
+ xlog_cil_process_committed(&tmp);
2736
+ spin_lock(&iclog->ic_callback_lock);
2737
+ }
2738
+
2739
+ /*
2740
+ * Pick up the icloglock while still holding the callback lock so we
2741
+ * serialise against anyone trying to add more callbacks to this iclog
2742
+ * now we've finished processing.
2743
+ */
2744
+ spin_lock(&log->l_icloglock);
2745
+ spin_unlock(&iclog->ic_callback_lock);
2746
+}
26972747
26982748 STATIC void
26992749 xlog_state_do_callback(
2700
- struct xlog *log,
2701
- int aborted,
2702
- struct xlog_in_core *ciclog)
2750
+ struct xlog *log)
27032751 {
2704
- xlog_in_core_t *iclog;
2705
- xlog_in_core_t *first_iclog; /* used to know when we've
2706
- * processed all iclogs once */
2707
- xfs_log_callback_t *cb, *cb_next;
2708
- int flushcnt = 0;
2709
- xfs_lsn_t lowest_lsn;
2710
- int ioerrors; /* counter: iclogs with errors */
2711
- int loopdidcallbacks; /* flag: inner loop did callbacks*/
2712
- int funcdidcallbacks; /* flag: function did callbacks */
2713
- int repeats; /* for issuing console warnings if
2714
- * looping too many times */
2752
+ struct xlog_in_core *iclog;
2753
+ struct xlog_in_core *first_iclog;
2754
+ bool cycled_icloglock;
2755
+ bool ioerror;
2756
+ int flushcnt = 0;
2757
+ int repeats = 0;
27152758
27162759 spin_lock(&log->l_icloglock);
2717
- first_iclog = iclog = log->l_iclog;
2718
- ioerrors = 0;
2719
- funcdidcallbacks = 0;
2720
- repeats = 0;
2721
-
27222760 do {
27232761 /*
27242762 * Scan all iclogs starting with the one pointed to by the
....@@ -2730,140 +2768,31 @@
27302768 */
27312769 first_iclog = log->l_iclog;
27322770 iclog = log->l_iclog;
2733
- loopdidcallbacks = 0;
2771
+ cycled_icloglock = false;
2772
+ ioerror = false;
27342773 repeats++;
27352774
27362775 do {
2776
+ if (xlog_state_iodone_process_iclog(log, iclog,
2777
+ &ioerror))
2778
+ break;
27372779
2738
- /* skip all iclogs in the ACTIVE & DIRTY states */
2739
- if (iclog->ic_state &
2740
- (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY)) {
2780
+ if (iclog->ic_state != XLOG_STATE_CALLBACK &&
2781
+ iclog->ic_state != XLOG_STATE_IOERROR) {
27412782 iclog = iclog->ic_next;
27422783 continue;
27432784 }
27442785
27452786 /*
2746
- * Between marking a filesystem SHUTDOWN and stopping
2747
- * the log, we do flush all iclogs to disk (if there
2748
- * wasn't a log I/O error). So, we do want things to
2749
- * go smoothly in case of just a SHUTDOWN w/o a
2750
- * LOG_IO_ERROR.
2787
+ * Running callbacks will drop the icloglock which means
2788
+ * we'll have to run at least one more complete loop.
27512789 */
2752
- if (!(iclog->ic_state & XLOG_STATE_IOERROR)) {
2753
- /*
2754
- * Can only perform callbacks in order. Since
2755
- * this iclog is not in the DONE_SYNC/
2756
- * DO_CALLBACK state, we skip the rest and
2757
- * just try to clean up. If we set our iclog
2758
- * to DO_CALLBACK, we will not process it when
2759
- * we retry since a previous iclog is in the
2760
- * CALLBACK and the state cannot change since
2761
- * we are holding the l_icloglock.
2762
- */
2763
- if (!(iclog->ic_state &
2764
- (XLOG_STATE_DONE_SYNC |
2765
- XLOG_STATE_DO_CALLBACK))) {
2766
- if (ciclog && (ciclog->ic_state ==
2767
- XLOG_STATE_DONE_SYNC)) {
2768
- ciclog->ic_state = XLOG_STATE_DO_CALLBACK;
2769
- }
2770
- break;
2771
- }
2772
- /*
2773
- * We now have an iclog that is in either the
2774
- * DO_CALLBACK or DONE_SYNC states. The other
2775
- * states (WANT_SYNC, SYNCING, or CALLBACK were
2776
- * caught by the above if and are going to
2777
- * clean (i.e. we aren't doing their callbacks)
2778
- * see the above if.
2779
- */
2780
-
2781
- /*
2782
- * We will do one more check here to see if we
2783
- * have chased our tail around.
2784
- */
2785
-
2786
- lowest_lsn = xlog_get_lowest_lsn(log);
2787
- if (lowest_lsn &&
2788
- XFS_LSN_CMP(lowest_lsn,
2789
- be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
2790
- iclog = iclog->ic_next;
2791
- continue; /* Leave this iclog for
2792
- * another thread */
2793
- }
2794
-
2795
- iclog->ic_state = XLOG_STATE_CALLBACK;
2796
-
2797
-
2798
- /*
2799
- * Completion of a iclog IO does not imply that
2800
- * a transaction has completed, as transactions
2801
- * can be large enough to span many iclogs. We
2802
- * cannot change the tail of the log half way
2803
- * through a transaction as this may be the only
2804
- * transaction in the log and moving th etail to
2805
- * point to the middle of it will prevent
2806
- * recovery from finding the start of the
2807
- * transaction. Hence we should only update the
2808
- * last_sync_lsn if this iclog contains
2809
- * transaction completion callbacks on it.
2810
- *
2811
- * We have to do this before we drop the
2812
- * icloglock to ensure we are the only one that
2813
- * can update it.
2814
- */
2815
- ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
2816
- be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
2817
- if (iclog->ic_callback)
2818
- atomic64_set(&log->l_last_sync_lsn,
2819
- be64_to_cpu(iclog->ic_header.h_lsn));
2820
-
2821
- } else
2822
- ioerrors++;
2823
-
2824
- spin_unlock(&log->l_icloglock);
2825
-
2826
- /*
2827
- * Keep processing entries in the callback list until
2828
- * we come around and it is empty. We need to
2829
- * atomically see that the list is empty and change the
2830
- * state to DIRTY so that we don't miss any more
2831
- * callbacks being added.
2832
- */
2833
- spin_lock(&iclog->ic_callback_lock);
2834
- cb = iclog->ic_callback;
2835
- while (cb) {
2836
- iclog->ic_callback_tail = &(iclog->ic_callback);
2837
- iclog->ic_callback = NULL;
2838
- spin_unlock(&iclog->ic_callback_lock);
2839
-
2840
- /* perform callbacks in the order given */
2841
- for (; cb; cb = cb_next) {
2842
- cb_next = cb->cb_next;
2843
- cb->cb_func(cb->cb_arg, aborted);
2844
- }
2845
- spin_lock(&iclog->ic_callback_lock);
2846
- cb = iclog->ic_callback;
2847
- }
2848
-
2849
- loopdidcallbacks++;
2850
- funcdidcallbacks++;
2851
-
2852
- spin_lock(&log->l_icloglock);
2853
- ASSERT(iclog->ic_callback == NULL);
2854
- spin_unlock(&iclog->ic_callback_lock);
2855
- if (!(iclog->ic_state & XLOG_STATE_IOERROR))
2856
- iclog->ic_state = XLOG_STATE_DIRTY;
2857
-
2858
- /*
2859
- * Transition from DIRTY to ACTIVE if applicable.
2860
- * NOP if STATE_IOERROR.
2861
- */
2862
- xlog_state_clean_log(log);
2863
-
2864
- /* wake up threads waiting in xfs_log_force() */
2865
- wake_up_all(&iclog->ic_force_wait);
2866
-
2790
+ cycled_icloglock = true;
2791
+ xlog_state_do_iclog_callbacks(log, iclog);
2792
+ if (XLOG_FORCED_SHUTDOWN(log))
2793
+ wake_up_all(&iclog->ic_force_wait);
2794
+ else
2795
+ xlog_state_clean_iclog(log, iclog);
28672796 iclog = iclog->ic_next;
28682797 } while (first_iclog != iclog);
28692798
....@@ -2874,45 +2803,10 @@
28742803 "%s: possible infinite loop (%d iterations)",
28752804 __func__, flushcnt);
28762805 }
2877
- } while (!ioerrors && loopdidcallbacks);
2806
+ } while (!ioerror && cycled_icloglock);
28782807
2879
-#ifdef DEBUG
2880
- /*
2881
- * Make one last gasp attempt to see if iclogs are being left in limbo.
2882
- * If the above loop finds an iclog earlier than the current iclog and
2883
- * in one of the syncing states, the current iclog is put into
2884
- * DO_CALLBACK and the callbacks are deferred to the completion of the
2885
- * earlier iclog. Walk the iclogs in order and make sure that no iclog
2886
- * is in DO_CALLBACK unless an earlier iclog is in one of the syncing
2887
- * states.
2888
- *
2889
- * Note that SYNCING|IOABORT is a valid state so we cannot just check
2890
- * for ic_state == SYNCING.
2891
- */
2892
- if (funcdidcallbacks) {
2893
- first_iclog = iclog = log->l_iclog;
2894
- do {
2895
- ASSERT(iclog->ic_state != XLOG_STATE_DO_CALLBACK);
2896
- /*
2897
- * Terminate the loop if iclogs are found in states
2898
- * which will cause other threads to clean up iclogs.
2899
- *
2900
- * SYNCING - i/o completion will go through logs
2901
- * DONE_SYNC - interrupt thread should be waiting for
2902
- * l_icloglock
2903
- * IOERROR - give up hope all ye who enter here
2904
- */
2905
- if (iclog->ic_state == XLOG_STATE_WANT_SYNC ||
2906
- iclog->ic_state & XLOG_STATE_SYNCING ||
2907
- iclog->ic_state == XLOG_STATE_DONE_SYNC ||
2908
- iclog->ic_state == XLOG_STATE_IOERROR )
2909
- break;
2910
- iclog = iclog->ic_next;
2911
- } while (first_iclog != iclog);
2912
- }
2913
-#endif
2914
-
2915
- if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
2808
+ if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE ||
2809
+ log->l_iclog->ic_state == XLOG_STATE_IOERROR)
29162810 wake_up_all(&log->l_flush_wait);
29172811
29182812 spin_unlock(&log->l_icloglock);
....@@ -2934,30 +2828,20 @@
29342828 */
29352829 STATIC void
29362830 xlog_state_done_syncing(
2937
- xlog_in_core_t *iclog,
2938
- int aborted)
2831
+ struct xlog_in_core *iclog)
29392832 {
2940
- struct xlog *log = iclog->ic_log;
2833
+ struct xlog *log = iclog->ic_log;
29412834
29422835 spin_lock(&log->l_icloglock);
2943
-
2944
- ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
2945
- iclog->ic_state == XLOG_STATE_IOERROR);
29462836 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
2947
- ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
2948
-
29492837
29502838 /*
29512839 * If we got an error, either on the first buffer, or in the case of
2952
- * split log writes, on the second, we mark ALL iclogs STATE_IOERROR,
2953
- * and none should ever be attempted to be written to disk
2954
- * again.
2840
+ * split log writes, on the second, we shut down the file system and
2841
+ * no iclogs should ever be attempted to be written to disk again.
29552842 */
2956
- if (iclog->ic_state != XLOG_STATE_IOERROR) {
2957
- if (--iclog->ic_bwritecnt == 1) {
2958
- spin_unlock(&log->l_icloglock);
2959
- return;
2960
- }
2843
+ if (!XLOG_FORCED_SHUTDOWN(log)) {
2844
+ ASSERT(iclog->ic_state == XLOG_STATE_SYNCING);
29612845 iclog->ic_state = XLOG_STATE_DONE_SYNC;
29622846 }
29632847
....@@ -2968,9 +2852,8 @@
29682852 */
29692853 wake_up_all(&iclog->ic_write_wait);
29702854 spin_unlock(&log->l_icloglock);
2971
- xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
2972
-} /* xlog_state_done_syncing */
2973
-
2855
+ xlog_state_do_callback(log);
2856
+}
29742857
29752858 /*
29762859 * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
....@@ -3002,7 +2885,6 @@
30022885 int log_offset;
30032886 xlog_rec_header_t *head;
30042887 xlog_in_core_t *iclog;
3005
- int error;
30062888
30072889 restart:
30082890 spin_lock(&log->l_icloglock);
....@@ -3051,24 +2933,22 @@
30512933 * can fit into remaining data section.
30522934 */
30532935 if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) {
2936
+ int error = 0;
2937
+
30542938 xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
30552939
30562940 /*
3057
- * If I'm the only one writing to this iclog, sync it to disk.
3058
- * We need to do an atomic compare and decrement here to avoid
3059
- * racing with concurrent atomic_dec_and_lock() calls in
2941
+ * If we are the only one writing to this iclog, sync it to
2942
+ * disk. We need to do an atomic compare and decrement here to
2943
+ * avoid racing with concurrent atomic_dec_and_lock() calls in
30602944 * xlog_state_release_iclog() when there is more than one
30612945 * reference to the iclog.
30622946 */
3063
- if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) {
3064
- /* we are the only one */
3065
- spin_unlock(&log->l_icloglock);
2947
+ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
30662948 error = xlog_state_release_iclog(log, iclog);
3067
- if (error)
3068
- return error;
3069
- } else {
3070
- spin_unlock(&log->l_icloglock);
3071
- }
2949
+ spin_unlock(&log->l_icloglock);
2950
+ if (error)
2951
+ return error;
30722952 goto restart;
30732953 }
30742954
....@@ -3092,21 +2972,21 @@
30922972
30932973 *logoffsetp = log_offset;
30942974 return 0;
3095
-} /* xlog_state_get_iclog_space */
2975
+}
30962976
3097
-/* The first cnt-1 times through here we don't need to
3098
- * move the grant write head because the permanent
3099
- * reservation has reserved cnt times the unit amount.
3100
- * Release part of current permanent unit reservation and
3101
- * reset current reservation to be one units worth. Also
3102
- * move grant reservation head forward.
2977
+/*
2978
+ * The first cnt-1 times a ticket goes through here we don't need to move the
2979
+ * grant write head because the permanent reservation has reserved cnt times the
2980
+ * unit amount. Release part of current permanent unit reservation and reset
2981
+ * current reservation to be one units worth. Also move grant reservation head
2982
+ * forward.
31032983 */
3104
-STATIC void
3105
-xlog_regrant_reserve_log_space(
2984
+void
2985
+xfs_log_ticket_regrant(
31062986 struct xlog *log,
31072987 struct xlog_ticket *ticket)
31082988 {
3109
- trace_xfs_log_regrant_reserve_enter(log, ticket);
2989
+ trace_xfs_log_ticket_regrant(log, ticket);
31102990
31112991 if (ticket->t_cnt > 0)
31122992 ticket->t_cnt--;
....@@ -3118,21 +2998,20 @@
31182998 ticket->t_curr_res = ticket->t_unit_res;
31192999 xlog_tic_reset_res(ticket);
31203000
3121
- trace_xfs_log_regrant_reserve_sub(log, ticket);
3001
+ trace_xfs_log_ticket_regrant_sub(log, ticket);
31223002
31233003 /* just return if we still have some of the pre-reserved space */
3124
- if (ticket->t_cnt > 0)
3125
- return;
3004
+ if (!ticket->t_cnt) {
3005
+ xlog_grant_add_space(log, &log->l_reserve_head.grant,
3006
+ ticket->t_unit_res);
3007
+ trace_xfs_log_ticket_regrant_exit(log, ticket);
31263008
3127
- xlog_grant_add_space(log, &log->l_reserve_head.grant,
3128
- ticket->t_unit_res);
3009
+ ticket->t_curr_res = ticket->t_unit_res;
3010
+ xlog_tic_reset_res(ticket);
3011
+ }
31293012
3130
- trace_xfs_log_regrant_reserve_exit(log, ticket);
3131
-
3132
- ticket->t_curr_res = ticket->t_unit_res;
3133
- xlog_tic_reset_res(ticket);
3134
-} /* xlog_regrant_reserve_log_space */
3135
-
3013
+ xfs_log_ticket_put(ticket);
3014
+}
31363015
31373016 /*
31383017 * Give back the space left from a reservation.
....@@ -3148,18 +3027,19 @@
31483027 * space, the count will stay at zero and the only space remaining will be
31493028 * in the current reservation field.
31503029 */
3151
-STATIC void
3152
-xlog_ungrant_log_space(
3030
+void
3031
+xfs_log_ticket_ungrant(
31533032 struct xlog *log,
31543033 struct xlog_ticket *ticket)
31553034 {
3156
- int bytes;
3035
+ int bytes;
3036
+
3037
+ trace_xfs_log_ticket_ungrant(log, ticket);
31573038
31583039 if (ticket->t_cnt > 0)
31593040 ticket->t_cnt--;
31603041
3161
- trace_xfs_log_ungrant_enter(log, ticket);
3162
- trace_xfs_log_ungrant_sub(log, ticket);
3042
+ trace_xfs_log_ticket_ungrant_sub(log, ticket);
31633043
31643044 /*
31653045 * If this is a permanent reservation ticket, we may be able to free
....@@ -3174,71 +3054,15 @@
31743054 xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes);
31753055 xlog_grant_sub_space(log, &log->l_write_head.grant, bytes);
31763056
3177
- trace_xfs_log_ungrant_exit(log, ticket);
3057
+ trace_xfs_log_ticket_ungrant_exit(log, ticket);
31783058
31793059 xfs_log_space_wake(log->l_mp);
3060
+ xfs_log_ticket_put(ticket);
31803061 }
31813062
31823063 /*
3183
- * Flush iclog to disk if this is the last reference to the given iclog and
3184
- * the WANT_SYNC bit is set.
3185
- *
3186
- * When this function is entered, the iclog is not necessarily in the
3187
- * WANT_SYNC state. It may be sitting around waiting to get filled.
3188
- *
3189
- *
3190
- */
3191
-STATIC int
3192
-xlog_state_release_iclog(
3193
- struct xlog *log,
3194
- struct xlog_in_core *iclog)
3195
-{
3196
- int sync = 0; /* do we sync? */
3197
-
3198
- if (iclog->ic_state & XLOG_STATE_IOERROR)
3199
- return -EIO;
3200
-
3201
- ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
3202
- if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
3203
- return 0;
3204
-
3205
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
3206
- spin_unlock(&log->l_icloglock);
3207
- return -EIO;
3208
- }
3209
- ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
3210
- iclog->ic_state == XLOG_STATE_WANT_SYNC);
3211
-
3212
- if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
3213
- /* update tail before writing to iclog */
3214
- xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
3215
- sync++;
3216
- iclog->ic_state = XLOG_STATE_SYNCING;
3217
- iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
3218
- xlog_verify_tail_lsn(log, iclog, tail_lsn);
3219
- /* cycle incremented when incrementing curr_block */
3220
- }
3221
- spin_unlock(&log->l_icloglock);
3222
-
3223
- /*
3224
- * We let the log lock go, so it's possible that we hit a log I/O
3225
- * error or some other SHUTDOWN condition that marks the iclog
3226
- * as XLOG_STATE_IOERROR before the bwrite. However, we know that
3227
- * this iclog has consistent data, so we ignore IOERROR
3228
- * flags after this point.
3229
- */
3230
- if (sync)
3231
- return xlog_sync(log, iclog);
3232
- return 0;
3233
-} /* xlog_state_release_iclog */
3234
-
3235
-
3236
-/*
3237
- * This routine will mark the current iclog in the ring as WANT_SYNC
3238
- * and move the current iclog pointer to the next iclog in the ring.
3239
- * When this routine is called from xlog_state_get_iclog_space(), the
3240
- * exact size of the iclog has not yet been determined. All we know is
3241
- * that every data block. We have run out of space in this log record.
3064
+ * This routine will mark the current iclog in the ring as WANT_SYNC and move
3065
+ * the current iclog pointer to the next iclog in the ring.
32423066 */
32433067 STATIC void
32443068 xlog_state_switch_iclogs(
....@@ -3247,6 +3071,8 @@
32473071 int eventual_size)
32483072 {
32493073 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
3074
+ assert_spin_locked(&log->l_icloglock);
3075
+
32503076 if (!eventual_size)
32513077 eventual_size = iclog->ic_offset;
32523078 iclog->ic_state = XLOG_STATE_WANT_SYNC;
....@@ -3281,7 +3107,7 @@
32813107 }
32823108 ASSERT(iclog == log->l_iclog);
32833109 log->l_iclog = iclog->ic_next;
3284
-} /* xlog_state_switch_iclogs */
3110
+}
32853111
32863112 /*
32873113 * Write out all data in the in-core log as of this exact moment in time.
....@@ -3326,7 +3152,7 @@
33263152
33273153 spin_lock(&log->l_icloglock);
33283154 iclog = log->l_iclog;
3329
- if (iclog->ic_state & XLOG_STATE_IOERROR)
3155
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
33303156 goto out_error;
33313157
33323158 if (iclog->ic_state == XLOG_STATE_DIRTY ||
....@@ -3341,9 +3167,6 @@
33413167 * previous iclog and go to sleep.
33423168 */
33433169 iclog = iclog->ic_prev;
3344
- if (iclog->ic_state == XLOG_STATE_ACTIVE ||
3345
- iclog->ic_state == XLOG_STATE_DIRTY)
3346
- goto out_unlock;
33473170 } else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
33483171 if (atomic_read(&iclog->ic_refcnt) == 0) {
33493172 /*
....@@ -3356,14 +3179,10 @@
33563179 atomic_inc(&iclog->ic_refcnt);
33573180 lsn = be64_to_cpu(iclog->ic_header.h_lsn);
33583181 xlog_state_switch_iclogs(log, iclog, 0);
3359
- spin_unlock(&log->l_icloglock);
3360
-
33613182 if (xlog_state_release_iclog(log, iclog))
3362
- return -EIO;
3183
+ goto out_error;
33633184
3364
- spin_lock(&log->l_icloglock);
3365
- if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn ||
3366
- iclog->ic_state == XLOG_STATE_DIRTY)
3185
+ if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
33673186 goto out_unlock;
33683187 } else {
33693188 /*
....@@ -3383,17 +3202,8 @@
33833202 ;
33843203 }
33853204
3386
- if (!(flags & XFS_LOG_SYNC))
3387
- goto out_unlock;
3388
-
3389
- if (iclog->ic_state & XLOG_STATE_IOERROR)
3390
- goto out_error;
3391
- XFS_STATS_INC(mp, xs_log_force_sleep);
3392
- xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
3393
- if (iclog->ic_state & XLOG_STATE_IOERROR)
3394
- return -EIO;
3395
- return 0;
3396
-
3205
+ if (flags & XFS_LOG_SYNC)
3206
+ return xlog_wait_on_iclog(iclog);
33973207 out_unlock:
33983208 spin_unlock(&log->l_icloglock);
33993209 return 0;
....@@ -3403,19 +3213,18 @@
34033213 }
34043214
34053215 static int
3406
-__xfs_log_force_lsn(
3407
- struct xfs_mount *mp,
3216
+xlog_force_lsn(
3217
+ struct xlog *log,
34083218 xfs_lsn_t lsn,
34093219 uint flags,
34103220 int *log_flushed,
34113221 bool already_slept)
34123222 {
3413
- struct xlog *log = mp->m_log;
34143223 struct xlog_in_core *iclog;
34153224
34163225 spin_lock(&log->l_icloglock);
34173226 iclog = log->l_iclog;
3418
- if (iclog->ic_state & XLOG_STATE_IOERROR)
3227
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
34193228 goto out_error;
34203229
34213230 while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
....@@ -3423,9 +3232,6 @@
34233232 if (iclog == log->l_iclog)
34243233 goto out_unlock;
34253234 }
3426
-
3427
- if (iclog->ic_state == XLOG_STATE_DIRTY)
3428
- goto out_unlock;
34293235
34303236 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
34313237 /*
....@@ -3444,39 +3250,22 @@
34443250 * will go out then.
34453251 */
34463252 if (!already_slept &&
3447
- (iclog->ic_prev->ic_state &
3448
- (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
3449
- ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3450
-
3451
- XFS_STATS_INC(mp, xs_log_force_sleep);
3452
-
3253
+ (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC ||
3254
+ iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) {
34533255 xlog_wait(&iclog->ic_prev->ic_write_wait,
34543256 &log->l_icloglock);
34553257 return -EAGAIN;
34563258 }
34573259 atomic_inc(&iclog->ic_refcnt);
34583260 xlog_state_switch_iclogs(log, iclog, 0);
3459
- spin_unlock(&log->l_icloglock);
34603261 if (xlog_state_release_iclog(log, iclog))
3461
- return -EIO;
3262
+ goto out_error;
34623263 if (log_flushed)
34633264 *log_flushed = 1;
3464
- spin_lock(&log->l_icloglock);
34653265 }
34663266
3467
- if (!(flags & XFS_LOG_SYNC) ||
3468
- (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)))
3469
- goto out_unlock;
3470
-
3471
- if (iclog->ic_state & XLOG_STATE_IOERROR)
3472
- goto out_error;
3473
-
3474
- XFS_STATS_INC(mp, xs_log_force_sleep);
3475
- xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
3476
- if (iclog->ic_state & XLOG_STATE_IOERROR)
3477
- return -EIO;
3478
- return 0;
3479
-
3267
+ if (flags & XFS_LOG_SYNC)
3268
+ return xlog_wait_on_iclog(iclog);
34803269 out_unlock:
34813270 spin_unlock(&log->l_icloglock);
34823271 return 0;
....@@ -3500,54 +3289,31 @@
35003289 * to disk, that thread will wake up all threads waiting on the queue.
35013290 */
35023291 int
3503
-xfs_log_force_lsn(
3292
+xfs_log_force_seq(
35043293 struct xfs_mount *mp,
3505
- xfs_lsn_t lsn,
3294
+ xfs_csn_t seq,
35063295 uint flags,
35073296 int *log_flushed)
35083297 {
3298
+ struct xlog *log = mp->m_log;
3299
+ xfs_lsn_t lsn;
35093300 int ret;
3510
- ASSERT(lsn != 0);
3301
+ ASSERT(seq != 0);
35113302
35123303 XFS_STATS_INC(mp, xs_log_force);
3513
- trace_xfs_log_force(mp, lsn, _RET_IP_);
3304
+ trace_xfs_log_force(mp, seq, _RET_IP_);
35143305
3515
- lsn = xlog_cil_force_lsn(mp->m_log, lsn);
3306
+ lsn = xlog_cil_force_seq(log, seq);
35163307 if (lsn == NULLCOMMITLSN)
35173308 return 0;
35183309
3519
- ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false);
3520
- if (ret == -EAGAIN)
3521
- ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true);
3310
+ ret = xlog_force_lsn(log, lsn, flags, log_flushed, false);
3311
+ if (ret == -EAGAIN) {
3312
+ XFS_STATS_INC(mp, xs_log_force_sleep);
3313
+ ret = xlog_force_lsn(log, lsn, flags, log_flushed, true);
3314
+ }
35223315 return ret;
35233316 }
3524
-
3525
-/*
3526
- * Called when we want to mark the current iclog as being ready to sync to
3527
- * disk.
3528
- */
3529
-STATIC void
3530
-xlog_state_want_sync(
3531
- struct xlog *log,
3532
- struct xlog_in_core *iclog)
3533
-{
3534
- assert_spin_locked(&log->l_icloglock);
3535
-
3536
- if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3537
- xlog_state_switch_iclogs(log, iclog, 0);
3538
- } else {
3539
- ASSERT(iclog->ic_state &
3540
- (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR));
3541
- }
3542
-}
3543
-
3544
-
3545
-/*****************************************************************************
3546
- *
3547
- * TICKET functions
3548
- *
3549
- *****************************************************************************
3550
- */
35513317
35523318 /*
35533319 * Free a used ticket when its refcount falls to zero.
....@@ -3558,7 +3324,7 @@
35583324 {
35593325 ASSERT(atomic_read(&ticket->t_ref) > 0);
35603326 if (atomic_dec_and_test(&ticket->t_ref))
3561
- kmem_zone_free(xfs_log_ticket_zone, ticket);
3327
+ kmem_cache_free(xfs_log_ticket_zone, ticket);
35623328 }
35633329
35643330 xlog_ticket_t *
....@@ -3676,15 +3442,12 @@
36763442 int unit_bytes,
36773443 int cnt,
36783444 char client,
3679
- bool permanent,
3680
- xfs_km_flags_t alloc_flags)
3445
+ bool permanent)
36813446 {
36823447 struct xlog_ticket *tic;
36833448 int unit_res;
36843449
3685
- tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags);
3686
- if (!tic)
3687
- return NULL;
3450
+ tic = kmem_cache_zalloc(xfs_log_ticket_zone, GFP_NOFS | __GFP_NOFAIL);
36883451
36893452 unit_res = xfs_log_calc_unit_res(log->l_mp, unit_bytes);
36903453
....@@ -3697,7 +3460,6 @@
36973460 tic->t_ocnt = cnt;
36983461 tic->t_tid = prandom_u32();
36993462 tic->t_clientid = client;
3700
- tic->t_flags = XLOG_TIC_INITED;
37013463 if (permanent)
37023464 tic->t_flags |= XLOG_TIC_PERM_RESERV;
37033465
....@@ -3706,13 +3468,6 @@
37063468 return tic;
37073469 }
37083470
3709
-
3710
-/******************************************************************************
3711
- *
3712
- * Log debug routines
3713
- *
3714
- ******************************************************************************
3715
- */
37163471 #if defined(DEBUG)
37173472 /*
37183473 * Make sure that the destination ptr is within the valid data region of
....@@ -3798,7 +3553,7 @@
37983553 if (blocks < BTOBB(iclog->ic_offset) + 1)
37993554 xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
38003555 }
3801
-} /* xlog_verify_tail_lsn */
3556
+}
38023557
38033558 /*
38043559 * Perform a number of checks on the iclog before writing to disk.
....@@ -3819,8 +3574,7 @@
38193574 xlog_verify_iclog(
38203575 struct xlog *log,
38213576 struct xlog_in_core *iclog,
3822
- int count,
3823
- bool syncing)
3577
+ int count)
38243578 {
38253579 xlog_op_header_t *ophead;
38263580 xlog_in_core_t *icptr;
....@@ -3864,7 +3618,7 @@
38643618 /* clientid is only 1 byte */
38653619 p = &ophead->oh_clientid;
38663620 field_offset = p - base_ptr;
3867
- if (!syncing || (field_offset & 0x1ff)) {
3621
+ if (field_offset & 0x1ff) {
38683622 clientid = ophead->oh_clientid;
38693623 } else {
38703624 idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
....@@ -3887,7 +3641,7 @@
38873641 /* check length */
38883642 p = &ophead->oh_len;
38893643 field_offset = p - base_ptr;
3890
- if (!syncing || (field_offset & 0x1ff)) {
3644
+ if (field_offset & 0x1ff) {
38913645 op_len = be32_to_cpu(ophead->oh_len);
38923646 } else {
38933647 idx = BTOBBT((uintptr_t)&ophead->oh_len -
....@@ -3902,7 +3656,7 @@
39023656 }
39033657 ptr += sizeof(xlog_op_header_t) + op_len;
39043658 }
3905
-} /* xlog_verify_iclog */
3659
+}
39063660 #endif
39073661
39083662 /*
....@@ -3915,7 +3669,7 @@
39153669 xlog_in_core_t *iclog, *ic;
39163670
39173671 iclog = log->l_iclog;
3918
- if (! (iclog->ic_state & XLOG_STATE_IOERROR)) {
3672
+ if (iclog->ic_state != XLOG_STATE_IOERROR) {
39193673 /*
39203674 * Mark all the incore logs IOERROR.
39213675 * From now on, no log flushes will result.
....@@ -3975,7 +3729,7 @@
39753729 * Somebody could've already done the hard work for us.
39763730 * No need to get locks for this.
39773731 */
3978
- if (logerror && log->l_iclog->ic_state & XLOG_STATE_IOERROR) {
3732
+ if (logerror && log->l_iclog->ic_state == XLOG_STATE_IOERROR) {
39793733 ASSERT(XLOG_FORCED_SHUTDOWN(log));
39803734 return 1;
39813735 }
....@@ -4026,21 +3780,8 @@
40263780 spin_lock(&log->l_cilp->xc_push_lock);
40273781 wake_up_all(&log->l_cilp->xc_commit_wait);
40283782 spin_unlock(&log->l_cilp->xc_push_lock);
4029
- xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);
3783
+ xlog_state_do_callback(log);
40303784
4031
-#ifdef XFSERRORDEBUG
4032
- {
4033
- xlog_in_core_t *iclog;
4034
-
4035
- spin_lock(&log->l_icloglock);
4036
- iclog = log->l_iclog;
4037
- do {
4038
- ASSERT(iclog->ic_callback == 0);
4039
- iclog = iclog->ic_next;
4040
- } while (iclog != log->l_iclog);
4041
- spin_unlock(&log->l_icloglock);
4042
- }
4043
-#endif
40443785 /* return non-zero if log IOERROR transition had already happened */
40453786 return retval;
40463787 }