hc
2023-12-06 08f87f769b595151be1afeff53e144f543faa614
kernel/fs/gfs2/log.c
....@@ -1,10 +1,7 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
34 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
4
- *
5
- * This copyrighted material is made available to anyone wishing to use,
6
- * modify, copy, or redistribute it subject to the terms and conditions
7
- * of the GNU General Public License version 2.
85 */
96
107 #include <linux/sched.h>
....@@ -33,12 +30,14 @@
3330 #include "util.h"
3431 #include "dir.h"
3532 #include "trace_gfs2.h"
33
+#include "trans.h"
34
+
35
+static void gfs2_log_shutdown(struct gfs2_sbd *sdp);
3636
3737 /**
3838 * gfs2_struct2blk - compute stuff
3939 * @sdp: the filesystem
4040 * @nstruct: the number of structures
41
- * @ssize: the size of the structures
4241 *
4342 * Compute the number of log descriptor blocks needed to hold a certain number
4443 * of structures of a certain size.
....@@ -46,18 +45,16 @@
4645 * Returns: the number of blocks needed (minimum is always 1)
4746 */
4847
49
-unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
50
- unsigned int ssize)
48
+unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct)
5149 {
5250 unsigned int blks;
5351 unsigned int first, second;
5452
5553 blks = 1;
56
- first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize;
54
+ first = sdp->sd_ldptrs;
5755
5856 if (nstruct > first) {
59
- second = (sdp->sd_sb.sb_bsize -
60
- sizeof(struct gfs2_meta_header)) / ssize;
57
+ second = sdp->sd_inptrs;
6158 blks += DIV_ROUND_UP(nstruct - first, second);
6259 }
6360
....@@ -73,7 +70,7 @@
7370 *
7471 */
7572
76
-static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
73
+void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
7774 {
7875 bd->bd_tr = NULL;
7976 list_del_init(&bd->bd_ail_st_list);
....@@ -92,8 +89,7 @@
9289
9390 static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
9491 struct writeback_control *wbc,
95
- struct gfs2_trans *tr,
96
- bool *withdraw)
92
+ struct gfs2_trans *tr)
9793 __releases(&sdp->sd_ail_lock)
9894 __acquires(&sdp->sd_ail_lock)
9995 {
....@@ -101,6 +97,7 @@
10197 struct address_space *mapping;
10298 struct gfs2_bufdata *bd, *s;
10399 struct buffer_head *bh;
100
+ int ret = 0;
104101
105102 list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) {
106103 bh = bd->bd_bh;
....@@ -108,16 +105,21 @@
108105 gfs2_assert(sdp, bd->bd_tr == tr);
109106
110107 if (!buffer_busy(bh)) {
111
- if (!buffer_uptodate(bh) &&
112
- !test_and_set_bit(SDF_AIL1_IO_ERROR,
113
- &sdp->sd_flags)) {
114
- gfs2_io_error_bh(sdp, bh);
115
- *withdraw = true;
108
+ if (buffer_uptodate(bh)) {
109
+ list_move(&bd->bd_ail_st_list,
110
+ &tr->tr_ail2_list);
111
+ continue;
116112 }
117
- list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
118
- continue;
113
+ if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
114
+ gfs2_io_error_bh(sdp, bh);
115
+ gfs2_withdraw_delayed(sdp);
116
+ }
119117 }
120118
119
+ if (gfs2_withdrawn(sdp)) {
120
+ gfs2_remove_from_ail(bd);
121
+ continue;
122
+ }
121123 if (!buffer_dirty(bh))
122124 continue;
123125 if (gl == bd->bd_gl)
....@@ -128,16 +130,49 @@
128130 if (!mapping)
129131 continue;
130132 spin_unlock(&sdp->sd_ail_lock);
131
- generic_writepages(mapping, wbc);
133
+ ret = generic_writepages(mapping, wbc);
132134 spin_lock(&sdp->sd_ail_lock);
133
- if (wbc->nr_to_write <= 0)
135
+ if (ret == -ENODATA) /* if a jdata write into a new hole */
136
+ ret = 0; /* ignore it */
137
+ if (ret || wbc->nr_to_write <= 0)
134138 break;
135
- return 1;
139
+ return -EBUSY;
136140 }
137141
138
- return 0;
142
+ return ret;
139143 }
140144
145
+static void dump_ail_list(struct gfs2_sbd *sdp)
146
+{
147
+ struct gfs2_trans *tr;
148
+ struct gfs2_bufdata *bd;
149
+ struct buffer_head *bh;
150
+
151
+ list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
152
+ list_for_each_entry_reverse(bd, &tr->tr_ail1_list,
153
+ bd_ail_st_list) {
154
+ bh = bd->bd_bh;
155
+ fs_err(sdp, "bd %p: blk:0x%llx bh=%p ", bd,
156
+ (unsigned long long)bd->bd_blkno, bh);
157
+ if (!bh) {
158
+ fs_err(sdp, "\n");
159
+ continue;
160
+ }
161
+ fs_err(sdp, "0x%llx up2:%d dirt:%d lkd:%d req:%d "
162
+ "map:%d new:%d ar:%d aw:%d delay:%d "
163
+ "io err:%d unwritten:%d dfr:%d pin:%d esc:%d\n",
164
+ (unsigned long long)bh->b_blocknr,
165
+ buffer_uptodate(bh), buffer_dirty(bh),
166
+ buffer_locked(bh), buffer_req(bh),
167
+ buffer_mapped(bh), buffer_new(bh),
168
+ buffer_async_read(bh), buffer_async_write(bh),
169
+ buffer_delay(bh), buffer_write_io_error(bh),
170
+ buffer_unwritten(bh),
171
+ buffer_defer_completion(bh),
172
+ buffer_pinned(bh), buffer_escaped(bh));
173
+ }
174
+ }
175
+}
141176
142177 /**
143178 * gfs2_ail1_flush - start writeback of some ail1 entries
....@@ -153,22 +188,38 @@
153188 struct list_head *head = &sdp->sd_ail1_list;
154189 struct gfs2_trans *tr;
155190 struct blk_plug plug;
156
- bool withdraw = false;
191
+ int ret;
192
+ unsigned long flush_start = jiffies;
157193
158194 trace_gfs2_ail_flush(sdp, wbc, 1);
159195 blk_start_plug(&plug);
160196 spin_lock(&sdp->sd_ail_lock);
161197 restart:
198
+ ret = 0;
199
+ if (time_after(jiffies, flush_start + (HZ * 600))) {
200
+ fs_err(sdp, "Error: In %s for ten minutes! t=%d\n",
201
+ __func__, current->journal_info ? 1 : 0);
202
+ dump_ail_list(sdp);
203
+ goto out;
204
+ }
162205 list_for_each_entry_reverse(tr, head, tr_list) {
163206 if (wbc->nr_to_write <= 0)
164207 break;
165
- if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw))
166
- goto restart;
208
+ ret = gfs2_ail1_start_one(sdp, wbc, tr);
209
+ if (ret) {
210
+ if (ret == -EBUSY)
211
+ goto restart;
212
+ break;
213
+ }
167214 }
215
+out:
168216 spin_unlock(&sdp->sd_ail_lock);
169217 blk_finish_plug(&plug);
170
- if (withdraw)
171
- gfs2_lm_withdraw(sdp, NULL);
218
+ if (ret) {
219
+ gfs2_lm(sdp, "gfs2_ail1_start_one (generic_writepages) "
220
+ "returned: %d\n", ret);
221
+ gfs2_withdraw(sdp);
222
+ }
172223 trace_gfs2_ail_flush(sdp, wbc, 0);
173224 }
174225
....@@ -192,49 +243,74 @@
192243 /**
193244 * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
194245 * @sdp: the filesystem
195
- * @ai: the AIL entry
246
+ * @tr: the transaction
247
+ * @max_revokes: If nonzero, issue revokes for the bd items for written buffers
196248 *
249
+ * returns: the transaction's count of remaining active items
197250 */
198251
199
-static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
200
- bool *withdraw)
252
+static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
253
+ int *max_revokes)
201254 {
202255 struct gfs2_bufdata *bd, *s;
203256 struct buffer_head *bh;
257
+ int active_count = 0;
204258
205259 list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list,
206260 bd_ail_st_list) {
207261 bh = bd->bd_bh;
208262 gfs2_assert(sdp, bd->bd_tr == tr);
209
- if (buffer_busy(bh))
263
+ /*
264
+ * If another process flagged an io error, e.g. writing to the
265
+ * journal, error all other bhs and move them off the ail1 to
266
+ * prevent a tight loop when unmount tries to flush ail1,
267
+ * regardless of whether they're still busy. If no outside
268
+ * errors were found and the buffer is busy, move to the next.
269
+ * If the ail buffer is not busy and caught an error, flag it
270
+ * for others.
271
+ */
272
+ if (!sdp->sd_log_error && buffer_busy(bh)) {
273
+ active_count++;
210274 continue;
275
+ }
211276 if (!buffer_uptodate(bh) &&
212
- !test_and_set_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) {
277
+ !cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
213278 gfs2_io_error_bh(sdp, bh);
214
- *withdraw = true;
279
+ gfs2_withdraw_delayed(sdp);
280
+ }
281
+ /*
282
+ * If we have space for revokes and the bd is no longer on any
283
+ * buf list, we can just add a revoke for it immediately and
284
+ * avoid having to put it on the ail2 list, where it would need
285
+ * to be revoked later.
286
+ */
287
+ if (*max_revokes && list_empty(&bd->bd_list)) {
288
+ gfs2_add_revoke(sdp, bd);
289
+ (*max_revokes)--;
290
+ continue;
215291 }
216292 list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
217293 }
294
+ return active_count;
218295 }
219296
220297 /**
221298 * gfs2_ail1_empty - Try to empty the ail1 lists
222299 * @sdp: The superblock
300
+ * @max_revokes: If non-zero, add revokes where appropriate
223301 *
224302 * Tries to empty the ail1 lists, starting with the oldest first
225303 */
226304
227
-static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
305
+static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
228306 {
229307 struct gfs2_trans *tr, *s;
230308 int oldest_tr = 1;
231309 int ret;
232
- bool withdraw = false;
233310
234311 spin_lock(&sdp->sd_ail_lock);
235312 list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
236
- gfs2_ail1_empty_one(sdp, tr, &withdraw);
237
- if (list_empty(&tr->tr_ail1_list) && oldest_tr)
313
+ if (!gfs2_ail1_empty_one(sdp, tr, &max_revokes) && oldest_tr)
238314 list_move(&tr->tr_list, &sdp->sd_ail2_list);
239315 else
240316 oldest_tr = 0;
....@@ -242,8 +318,10 @@
242318 ret = list_empty(&sdp->sd_ail1_list);
243319 spin_unlock(&sdp->sd_ail_lock);
244320
245
- if (withdraw)
246
- gfs2_lm_withdraw(sdp, "fatal: I/O error(s)\n");
321
+ if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) {
322
+ gfs2_lm(sdp, "fatal: I/O error(s)\n");
323
+ gfs2_withdraw(sdp);
324
+ }
247325
248326 return ret;
249327 }
....@@ -271,20 +349,17 @@
271349 }
272350
273351 /**
274
- * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
275
- * @sdp: the filesystem
276
- * @ai: the AIL entry
277
- *
352
+ * gfs2_ail_empty_tr - empty one of the ail lists for a transaction
278353 */
279354
280
-static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
355
+static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
356
+ struct list_head *head)
281357 {
282
- struct list_head *head = &tr->tr_ail2_list;
283358 struct gfs2_bufdata *bd;
284359
285360 while (!list_empty(head)) {
286
- bd = list_entry(head->prev, struct gfs2_bufdata,
287
- bd_ail_st_list);
361
+ bd = list_first_entry(head, struct gfs2_bufdata,
362
+ bd_ail_st_list);
288363 gfs2_assert(sdp, bd->bd_tr == tr);
289364 gfs2_remove_from_ail(bd);
290365 }
....@@ -306,11 +381,11 @@
306381 if (!rm)
307382 continue;
308383
309
- gfs2_ail2_empty_one(sdp, tr);
384
+ gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
310385 list_del(&tr->tr_list);
311386 gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
312387 gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
313
- kfree(tr);
388
+ gfs2_trans_free(sdp, tr);
314389 }
315390
316391 spin_unlock(&sdp->sd_ail_lock);
....@@ -472,9 +547,8 @@
472547 reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp));
473548 }
474549
475
- if (sdp->sd_log_commited_revoke > 0)
476
- reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
477
- sizeof(u64));
550
+ if (sdp->sd_log_committed_revoke > 0)
551
+ reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke);
478552 /* One for the overall header */
479553 if (reserved)
480554 reserved++;
....@@ -491,7 +565,7 @@
491565 if (list_empty(&sdp->sd_ail1_list)) {
492566 tail = sdp->sd_log_head;
493567 } else {
494
- tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans,
568
+ tr = list_last_entry(&sdp->sd_ail1_list, struct gfs2_trans,
495569 tr_list);
496570 tail = tr->tr_first;
497571 }
....@@ -516,7 +590,7 @@
516590 }
517591
518592
519
-static void log_flush_wait(struct gfs2_sbd *sdp)
593
+void log_flush_wait(struct gfs2_sbd *sdp)
520594 {
521595 DEFINE_WAIT(wait);
522596
....@@ -545,18 +619,23 @@
545619 return 0;
546620 }
547621
622
+static void __ordered_del_inode(struct gfs2_inode *ip)
623
+{
624
+ if (!list_empty(&ip->i_ordered))
625
+ list_del_init(&ip->i_ordered);
626
+}
627
+
548628 static void gfs2_ordered_write(struct gfs2_sbd *sdp)
549629 {
550630 struct gfs2_inode *ip;
551631 LIST_HEAD(written);
552632
553633 spin_lock(&sdp->sd_ordered_lock);
554
- list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
555
- while (!list_empty(&sdp->sd_log_le_ordered)) {
556
- ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
634
+ list_sort(NULL, &sdp->sd_log_ordered, &ip_cmp);
635
+ while (!list_empty(&sdp->sd_log_ordered)) {
636
+ ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered);
557637 if (ip->i_inode.i_mapping->nrpages == 0) {
558
- test_and_clear_bit(GIF_ORDERED, &ip->i_flags);
559
- list_del(&ip->i_ordered);
638
+ __ordered_del_inode(ip);
560639 continue;
561640 }
562641 list_move(&ip->i_ordered, &written);
....@@ -564,7 +643,7 @@
564643 filemap_fdatawrite(ip->i_inode.i_mapping);
565644 spin_lock(&sdp->sd_ordered_lock);
566645 }
567
- list_splice(&written, &sdp->sd_log_le_ordered);
646
+ list_splice(&written, &sdp->sd_log_ordered);
568647 spin_unlock(&sdp->sd_ordered_lock);
569648 }
570649
....@@ -573,10 +652,9 @@
573652 struct gfs2_inode *ip;
574653
575654 spin_lock(&sdp->sd_ordered_lock);
576
- while (!list_empty(&sdp->sd_log_le_ordered)) {
577
- ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
578
- list_del(&ip->i_ordered);
579
- WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
655
+ while (!list_empty(&sdp->sd_log_ordered)) {
656
+ ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered);
657
+ __ordered_del_inode(ip);
580658 if (ip->i_inode.i_mapping->nrpages == 0)
581659 continue;
582660 spin_unlock(&sdp->sd_ordered_lock);
....@@ -591,8 +669,7 @@
591669 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
592670
593671 spin_lock(&sdp->sd_ordered_lock);
594
- if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags))
595
- list_del(&ip->i_ordered);
672
+ __ordered_del_inode(ip);
596673 spin_unlock(&sdp->sd_ordered_lock);
597674 }
598675
....@@ -601,16 +678,15 @@
601678 struct buffer_head *bh = bd->bd_bh;
602679 struct gfs2_glock *gl = bd->bd_gl;
603680
681
+ sdp->sd_log_num_revoke++;
682
+ if (atomic_inc_return(&gl->gl_revokes) == 1)
683
+ gfs2_glock_hold(gl);
604684 bh->b_private = NULL;
605685 bd->bd_blkno = bh->b_blocknr;
606686 gfs2_remove_from_ail(bd); /* drops ref on bh */
607687 bd->bd_bh = NULL;
608
- bd->bd_ops = &gfs2_revoke_lops;
609
- sdp->sd_log_num_revoke++;
610
- if (atomic_inc_return(&gl->gl_revokes) == 1)
611
- gfs2_glock_hold(gl);
612688 set_bit(GLF_LFLUSH, &gl->gl_flags);
613
- list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
689
+ list_add(&bd->bd_list, &sdp->sd_log_revokes);
614690 }
615691
616692 void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
....@@ -621,27 +697,24 @@
621697 }
622698 }
623699
700
+/**
701
+ * gfs2_write_revokes - Add as many revokes to the system transaction as we can
702
+ * @sdp: The GFS2 superblock
703
+ *
704
+ * Our usual strategy is to defer writing revokes as much as we can in the hope
705
+ * that we'll eventually overwrite the journal, which will make those revokes
706
+ * go away. This changes when we flush the log: at that point, there will
707
+ * likely be some left-over space in the last revoke block of that transaction.
708
+ * We can fill that space with additional revokes for blocks that have already
709
+ * been written back. This will basically come at no cost now, and will save
710
+ * us from having to keep track of those blocks on the AIL2 list later.
711
+ */
624712 void gfs2_write_revokes(struct gfs2_sbd *sdp)
625713 {
626
- struct gfs2_trans *tr;
627
- struct gfs2_bufdata *bd, *tmp;
628
- int have_revokes = 0;
714
+ /* number of revokes we still have room for */
629715 int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
630716
631
- gfs2_ail1_empty(sdp);
632
- spin_lock(&sdp->sd_ail_lock);
633
- list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) {
634
- list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) {
635
- if (list_empty(&bd->bd_list)) {
636
- have_revokes = 1;
637
- goto done;
638
- }
639
- }
640
- }
641
-done:
642
- spin_unlock(&sdp->sd_ail_lock);
643
- if (have_revokes == 0)
644
- return;
717
+ gfs2_log_lock(sdp);
645718 while (sdp->sd_log_num_revoke > max_revokes)
646719 max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
647720 max_revokes -= sdp->sd_log_num_revoke;
....@@ -649,38 +722,34 @@
649722 atomic_dec(&sdp->sd_log_blks_free);
650723 /* If no blocks have been reserved, we need to also
651724 * reserve a block for the header */
652
- if (!sdp->sd_log_blks_reserved)
725
+ if (!sdp->sd_log_blks_reserved) {
653726 atomic_dec(&sdp->sd_log_blks_free);
654
- }
655
- gfs2_log_lock(sdp);
656
- spin_lock(&sdp->sd_ail_lock);
657
- list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) {
658
- list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) {
659
- if (max_revokes == 0)
660
- goto out_of_blocks;
661
- if (!list_empty(&bd->bd_list))
662
- continue;
663
- gfs2_add_revoke(sdp, bd);
664
- max_revokes--;
727
+ trace_gfs2_log_blocks(sdp, -2);
728
+ } else {
729
+ trace_gfs2_log_blocks(sdp, -1);
665730 }
666731 }
667
-out_of_blocks:
668
- spin_unlock(&sdp->sd_ail_lock);
732
+ gfs2_ail1_empty(sdp, max_revokes);
669733 gfs2_log_unlock(sdp);
670734
671735 if (!sdp->sd_log_num_revoke) {
672736 atomic_inc(&sdp->sd_log_blks_free);
673
- if (!sdp->sd_log_blks_reserved)
737
+ if (!sdp->sd_log_blks_reserved) {
674738 atomic_inc(&sdp->sd_log_blks_free);
739
+ trace_gfs2_log_blocks(sdp, 2);
740
+ } else {
741
+ trace_gfs2_log_blocks(sdp, 1);
742
+ }
675743 }
676744 }
677745
678746 /**
679
- * write_log_header - Write a journal log header buffer at sd_log_flush_head
747
+ * gfs2_write_log_header - Write a journal log header buffer at lblock
680748 * @sdp: The GFS2 superblock
681749 * @jd: journal descriptor of the journal to which we are writing
682750 * @seq: sequence number
683751 * @tail: tail of the log
752
+ * @lblock: value for lh_blkno (block number relative to start of journal)
684753 * @flags: log header flags GFS2_LOG_HEAD_*
685754 * @op_flags: flags to pass to the bio
686755 *
....@@ -688,16 +757,21 @@
688757 */
689758
690759 void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
691
- u64 seq, u32 tail, u32 flags, int op_flags)
760
+ u64 seq, u32 tail, u32 lblock, u32 flags,
761
+ int op_flags)
692762 {
693763 struct gfs2_log_header *lh;
694764 u32 hash, crc;
695
- struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
765
+ struct page *page;
696766 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
697767 struct timespec64 tv;
698768 struct super_block *sb = sdp->sd_vfs;
699
- u64 addr;
769
+ u64 dblock;
700770
771
+ if (gfs2_withdrawn(sdp))
772
+ goto out;
773
+
774
+ page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
701775 lh = page_address(page);
702776 clear_page(lh);
703777
....@@ -709,15 +783,21 @@
709783 lh->lh_sequence = cpu_to_be64(seq);
710784 lh->lh_flags = cpu_to_be32(flags);
711785 lh->lh_tail = cpu_to_be32(tail);
712
- lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
786
+ lh->lh_blkno = cpu_to_be32(lblock);
713787 hash = ~crc32(~0, lh, LH_V1_SIZE);
714788 lh->lh_hash = cpu_to_be32(hash);
715789
716790 ktime_get_coarse_real_ts64(&tv);
717791 lh->lh_nsec = cpu_to_be32(tv.tv_nsec);
718792 lh->lh_sec = cpu_to_be64(tv.tv_sec);
719
- addr = gfs2_log_bmap(sdp);
720
- lh->lh_addr = cpu_to_be64(addr);
793
+ if (!list_empty(&jd->extent_list))
794
+ dblock = gfs2_log_bmap(jd, lblock);
795
+ else {
796
+ int ret = gfs2_lblk_to_dblk(jd->jd_inode, lblock, &dblock);
797
+ if (gfs2_assert_withdraw(sdp, ret == 0))
798
+ return;
799
+ }
800
+ lh->lh_addr = cpu_to_be64(dblock);
721801 lh->lh_jinode = cpu_to_be64(GFS2_I(jd->jd_inode)->i_no_addr);
722802
723803 /* We may only write local statfs, quota, etc., when writing to our
....@@ -742,8 +822,9 @@
742822 sb->s_blocksize - LH_V1_SIZE - 4);
743823 lh->lh_crc = cpu_to_be32(crc);
744824
745
- gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr);
746
- gfs2_log_flush_bio(sdp, REQ_OP_WRITE, op_flags);
825
+ gfs2_log_write(sdp, page, sb->s_blocksize, 0, dblock);
826
+ gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags);
827
+out:
747828 log_flush_wait(sdp);
748829 }
749830
....@@ -771,10 +852,101 @@
771852 }
772853 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
773854 gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, tail,
774
- flags, op_flags);
855
+ sdp->sd_log_flush_head, flags, op_flags);
856
+ gfs2_log_incr_head(sdp);
775857
776858 if (sdp->sd_log_tail != tail)
777859 log_pull_tail(sdp, tail);
860
+}
861
+
862
+/**
863
+ * ail_drain - drain the ail lists after a withdraw
864
+ * @sdp: Pointer to GFS2 superblock
865
+ */
866
+static void ail_drain(struct gfs2_sbd *sdp)
867
+{
868
+ struct gfs2_trans *tr;
869
+
870
+ spin_lock(&sdp->sd_ail_lock);
871
+ /*
872
+ * For transactions on the sd_ail1_list we need to drain both the
873
+ * ail1 and ail2 lists. That's because function gfs2_ail1_start_one
874
+ * (temporarily) moves items from its tr_ail1 list to tr_ail2 list
875
+ * before revokes are sent for that block. Items on the sd_ail2_list
876
+ * should have already gotten beyond that point, so no need.
877
+ */
878
+ while (!list_empty(&sdp->sd_ail1_list)) {
879
+ tr = list_first_entry(&sdp->sd_ail1_list, struct gfs2_trans,
880
+ tr_list);
881
+ gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list);
882
+ gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
883
+ list_del(&tr->tr_list);
884
+ gfs2_trans_free(sdp, tr);
885
+ }
886
+ while (!list_empty(&sdp->sd_ail2_list)) {
887
+ tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans,
888
+ tr_list);
889
+ gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
890
+ list_del(&tr->tr_list);
891
+ gfs2_trans_free(sdp, tr);
892
+ }
893
+ spin_unlock(&sdp->sd_ail_lock);
894
+}
895
+
896
+/**
897
+ * empty_ail1_list - try to start IO and empty the ail1 list
898
+ * @sdp: Pointer to GFS2 superblock
899
+ */
900
+static void empty_ail1_list(struct gfs2_sbd *sdp)
901
+{
902
+ unsigned long start = jiffies;
903
+
904
+ for (;;) {
905
+ if (time_after(jiffies, start + (HZ * 600))) {
906
+ fs_err(sdp, "Error: In %s for 10 minutes! t=%d\n",
907
+ __func__, current->journal_info ? 1 : 0);
908
+ dump_ail_list(sdp);
909
+ return;
910
+ }
911
+ gfs2_ail1_start(sdp);
912
+ gfs2_ail1_wait(sdp);
913
+ if (gfs2_ail1_empty(sdp, 0))
914
+ return;
915
+ }
916
+}
917
+
918
+/**
919
+ * trans_drain - drain the buf and databuf queue for a failed transaction
920
+ * @tr: the transaction to drain
921
+ *
922
+ * When this is called, we're taking an error exit for a log write that failed
923
+ * but since we bypassed the after_commit functions, we need to remove the
924
+ * items from the buf and databuf queue.
925
+ */
926
+static void trans_drain(struct gfs2_trans *tr)
927
+{
928
+ struct gfs2_bufdata *bd;
929
+ struct list_head *head;
930
+
931
+ if (!tr)
932
+ return;
933
+
934
+ head = &tr->tr_buf;
935
+ while (!list_empty(head)) {
936
+ bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
937
+ list_del_init(&bd->bd_list);
938
+ if (!list_empty(&bd->bd_ail_st_list))
939
+ gfs2_remove_from_ail(bd);
940
+ kmem_cache_free(gfs2_bufdata_cachep, bd);
941
+ }
942
+ head = &tr->tr_databuf;
943
+ while (!list_empty(head)) {
944
+ bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
945
+ list_del_init(&bd->bd_list);
946
+ if (!list_empty(&bd->bd_ail_st_list))
947
+ gfs2_remove_from_ail(bd);
948
+ kmem_cache_free(gfs2_bufdata_cachep, bd);
949
+ }
778950 }
779951
780952 /**
....@@ -787,16 +959,21 @@
787959
788960 void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
789961 {
790
- struct gfs2_trans *tr;
962
+ struct gfs2_trans *tr = NULL;
791963 enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
792964
793965 down_write(&sdp->sd_log_flush_lock);
794966
967
+ /*
968
+ * Do this check while holding the log_flush_lock to prevent new
969
+ * buffers from being added to the ail via gfs2_pin()
970
+ */
971
+ if (gfs2_withdrawn(sdp))
972
+ goto out;
973
+
795974 /* Log might have been flushed while we waited for the flush lock */
796
- if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
797
- up_write(&sdp->sd_log_flush_lock);
798
- return;
799
- }
975
+ if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags))
976
+ goto out;
800977 trace_gfs2_log_flush(sdp, 1, flags);
801978
802979 if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
....@@ -808,17 +985,27 @@
808985 sdp->sd_log_tr = NULL;
809986 tr->tr_first = sdp->sd_log_flush_head;
810987 if (unlikely (state == SFS_FROZEN))
811
- gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new);
988
+ if (gfs2_assert_withdraw_delayed(sdp,
989
+ !tr->tr_num_buf_new && !tr->tr_num_databuf_new))
990
+ goto out_withdraw;
812991 }
813992
814993 if (unlikely(state == SFS_FROZEN))
815
- gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
816
- gfs2_assert_withdraw(sdp,
817
- sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
994
+ if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke))
995
+ goto out_withdraw;
996
+ if (gfs2_assert_withdraw_delayed(sdp,
997
+ sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke))
998
+ goto out_withdraw;
818999
8191000 gfs2_ordered_write(sdp);
1001
+ if (gfs2_withdrawn(sdp))
1002
+ goto out_withdraw;
8201003 lops_before_commit(sdp, tr);
821
- gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
1004
+ if (gfs2_withdrawn(sdp))
1005
+ goto out_withdraw;
1006
+ gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
1007
+ if (gfs2_withdrawn(sdp))
1008
+ goto out_withdraw;
8221009
8231010 if (sdp->sd_log_head != sdp->sd_log_flush_head) {
8241011 log_flush_wait(sdp);
....@@ -828,12 +1015,14 @@
8281015 trace_gfs2_log_blocks(sdp, -1);
8291016 log_write_header(sdp, flags);
8301017 }
1018
+ if (gfs2_withdrawn(sdp))
1019
+ goto out_withdraw;
8311020 lops_after_commit(sdp, tr);
8321021
8331022 gfs2_log_lock(sdp);
8341023 sdp->sd_log_head = sdp->sd_log_flush_head;
8351024 sdp->sd_log_blks_reserved = 0;
836
- sdp->sd_log_commited_revoke = 0;
1025
+ sdp->sd_log_committed_revoke = 0;
8371026
8381027 spin_lock(&sdp->sd_ail_lock);
8391028 if (tr && !list_empty(&tr->tr_ail1_list)) {
....@@ -845,12 +1034,9 @@
8451034
8461035 if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) {
8471036 if (!sdp->sd_log_idle) {
848
- for (;;) {
849
- gfs2_ail1_start(sdp);
850
- gfs2_ail1_wait(sdp);
851
- if (gfs2_ail1_empty(sdp))
852
- break;
853
- }
1037
+ empty_ail1_list(sdp);
1038
+ if (gfs2_withdrawn(sdp))
1039
+ goto out_withdraw;
8541040 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
8551041 trace_gfs2_log_blocks(sdp, -1);
8561042 log_write_header(sdp, flags);
....@@ -863,10 +1049,30 @@
8631049 atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
8641050 }
8651051
1052
+out_end:
8661053 trace_gfs2_log_flush(sdp, 0, flags);
1054
+out:
8671055 up_write(&sdp->sd_log_flush_lock);
1056
+ gfs2_trans_free(sdp, tr);
1057
+ if (gfs2_withdrawing(sdp))
1058
+ gfs2_withdraw(sdp);
1059
+ return;
8681060
869
- kfree(tr);
1061
+out_withdraw:
1062
+ trans_drain(tr);
1063
+ /**
1064
+ * If the tr_list is empty, we're withdrawing during a log
1065
+ * flush that targets a transaction, but the transaction was
1066
+ * never queued onto any of the ail lists. Here we add it to
1067
+ * ail1 just so that ail_drain() will find and free it.
1068
+ */
1069
+ spin_lock(&sdp->sd_ail_lock);
1070
+ if (tr && list_empty(&tr->tr_list))
1071
+ list_add(&tr->tr_list, &sdp->sd_ail1_list);
1072
+ spin_unlock(&sdp->sd_ail_lock);
1073
+ ail_drain(sdp); /* frees all transactions */
1074
+ tr = NULL;
1075
+ goto out_end;
8701076 }
8711077
8721078 /**
....@@ -913,7 +1119,7 @@
9131119 set_bit(TR_ATTACHED, &tr->tr_flags);
9141120 }
9151121
916
- sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
1122
+ sdp->sd_log_committed_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
9171123 reserved = calc_reserved(sdp);
9181124 maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
9191125 gfs2_assert_withdraw(sdp, maxres >= reserved);
....@@ -936,7 +1142,7 @@
9361142 * or the total number of used blocks (pinned blocks plus AIL blocks)
9371143 * is greater than thresh2.
9381144 *
939
- * At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of
1145
+ * At mount time thresh1 is 2/5ths of journal size, thresh2 is 4/5ths of
9401146 * journal size.
9411147 *
9421148 * Returns: errno
....@@ -958,7 +1164,7 @@
9581164 *
9591165 */
9601166
961
-void gfs2_log_shutdown(struct gfs2_sbd *sdp)
1167
+static void gfs2_log_shutdown(struct gfs2_sbd *sdp)
9621168 {
9631169 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
9641170 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
....@@ -1010,18 +1216,24 @@
10101216
10111217 while (!kthread_should_stop()) {
10121218
1219
+ if (gfs2_withdrawn(sdp)) {
1220
+ msleep_interruptible(HZ);
1221
+ continue;
1222
+ }
10131223 /* Check for errors writing to the journal */
10141224 if (sdp->sd_log_error) {
1015
- gfs2_lm_withdraw(sdp,
1016
- "GFS2: fsid=%s: error %d: "
1017
- "withdrawing the file system to "
1018
- "prevent further damage.\n",
1019
- sdp->sd_fsname, sdp->sd_log_error);
1225
+ gfs2_lm(sdp,
1226
+ "GFS2: fsid=%s: error %d: "
1227
+ "withdrawing the file system to "
1228
+ "prevent further damage.\n",
1229
+ sdp->sd_fsname, sdp->sd_log_error);
1230
+ gfs2_withdraw(sdp);
1231
+ continue;
10201232 }
10211233
10221234 did_flush = false;
10231235 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
1024
- gfs2_ail1_empty(sdp);
1236
+ gfs2_ail1_empty(sdp, 0);
10251237 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
10261238 GFS2_LFC_LOGD_JFLUSH_REQD);
10271239 did_flush = true;
....@@ -1030,7 +1242,7 @@
10301242 if (gfs2_ail_flush_reqd(sdp)) {
10311243 gfs2_ail1_start(sdp);
10321244 gfs2_ail1_wait(sdp);
1033
- gfs2_ail1_empty(sdp);
1245
+ gfs2_ail1_empty(sdp, 0);
10341246 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
10351247 GFS2_LFC_LOGD_AIL_FLUSH_REQD);
10361248 did_flush = true;