hc
2024-01-31 f9004dbfff8a3fbbd7e2a88c8a4327c7f2f8e5b2
kernel/fs/gfs2/recovery.c
....@@ -1,10 +1,7 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
34 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4
- *
5
- * This copyrighted material is made available to anyone wishing to use,
6
- * modify, copy, or redistribute it subject to the terms and conditions
7
- * of the GNU General Public License version 2.
85 */
96
107 #include <linux/module.h>
....@@ -114,12 +111,45 @@
114111 struct gfs2_revoke_replay *rr;
115112
116113 while (!list_empty(head)) {
117
- rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
114
+ rr = list_first_entry(head, struct gfs2_revoke_replay, rr_list);
118115 list_del(&rr->rr_list);
119116 kfree(rr);
120117 }
121118 }
122119
120
+int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
121
+ unsigned int blkno, struct gfs2_log_header_host *head)
122
+{
123
+ u32 hash, crc;
124
+
125
+ if (lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
126
+ lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
127
+ (blkno && be32_to_cpu(lh->lh_blkno) != blkno))
128
+ return 1;
129
+
130
+ hash = crc32(~0, lh, LH_V1_SIZE - 4);
131
+ hash = ~crc32_le_shift(hash, 4); /* assume lh_hash is zero */
132
+
133
+ if (be32_to_cpu(lh->lh_hash) != hash)
134
+ return 1;
135
+
136
+ crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
137
+ sdp->sd_sb.sb_bsize - LH_V1_SIZE - 4);
138
+
139
+ if ((lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc))
140
+ return 1;
141
+
142
+ head->lh_sequence = be64_to_cpu(lh->lh_sequence);
143
+ head->lh_flags = be32_to_cpu(lh->lh_flags);
144
+ head->lh_tail = be32_to_cpu(lh->lh_tail);
145
+ head->lh_blkno = be32_to_cpu(lh->lh_blkno);
146
+
147
+ head->lh_local_total = be64_to_cpu(lh->lh_local_total);
148
+ head->lh_local_free = be64_to_cpu(lh->lh_local_free);
149
+ head->lh_local_dinodes = be64_to_cpu(lh->lh_local_dinodes);
150
+
151
+ return 0;
152
+}
123153 /**
124154 * get_log_header - read the log header for a given segment
125155 * @jd: the journal
....@@ -137,158 +167,17 @@
137167 static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
138168 struct gfs2_log_header_host *head)
139169 {
140
- struct gfs2_log_header *lh;
170
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
141171 struct buffer_head *bh;
142
- u32 hash, crc;
143172 int error;
144173
145174 error = gfs2_replay_read_block(jd, blk, &bh);
146175 if (error)
147176 return error;
148
- lh = (void *)bh->b_data;
149177
150
- hash = crc32(~0, lh, LH_V1_SIZE - 4);
151
- hash = ~crc32_le_shift(hash, 4); /* assume lh_hash is zero */
152
-
153
- crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
154
- bh->b_size - LH_V1_SIZE - 4);
155
-
156
- error = lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
157
- lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
158
- be32_to_cpu(lh->lh_blkno) != blk ||
159
- be32_to_cpu(lh->lh_hash) != hash ||
160
- (lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc);
161
-
178
+ error = __get_log_header(sdp, (const struct gfs2_log_header *)bh->b_data,
179
+ blk, head);
162180 brelse(bh);
163
-
164
- if (!error) {
165
- head->lh_sequence = be64_to_cpu(lh->lh_sequence);
166
- head->lh_flags = be32_to_cpu(lh->lh_flags);
167
- head->lh_tail = be32_to_cpu(lh->lh_tail);
168
- head->lh_blkno = be32_to_cpu(lh->lh_blkno);
169
- }
170
- return error;
171
-}
172
-
173
-/**
174
- * find_good_lh - find a good log header
175
- * @jd: the journal
176
- * @blk: the segment to start searching from
177
- * @lh: the log header to fill in
178
- * @forward: if true search forward in the log, else search backward
179
- *
180
- * Call get_log_header() to get a log header for a segment, but if the
181
- * segment is bad, either scan forward or backward until we find a good one.
182
- *
183
- * Returns: errno
184
- */
185
-
186
-static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
187
- struct gfs2_log_header_host *head)
188
-{
189
- unsigned int orig_blk = *blk;
190
- int error;
191
-
192
- for (;;) {
193
- error = get_log_header(jd, *blk, head);
194
- if (error <= 0)
195
- return error;
196
-
197
- if (++*blk == jd->jd_blocks)
198
- *blk = 0;
199
-
200
- if (*blk == orig_blk) {
201
- gfs2_consist_inode(GFS2_I(jd->jd_inode));
202
- return -EIO;
203
- }
204
- }
205
-}
206
-
207
-/**
208
- * jhead_scan - make sure we've found the head of the log
209
- * @jd: the journal
210
- * @head: this is filled in with the log descriptor of the head
211
- *
212
- * At this point, seg and lh should be either the head of the log or just
213
- * before. Scan forward until we find the head.
214
- *
215
- * Returns: errno
216
- */
217
-
218
-static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
219
-{
220
- unsigned int blk = head->lh_blkno;
221
- struct gfs2_log_header_host lh;
222
- int error;
223
-
224
- for (;;) {
225
- if (++blk == jd->jd_blocks)
226
- blk = 0;
227
-
228
- error = get_log_header(jd, blk, &lh);
229
- if (error < 0)
230
- return error;
231
- if (error == 1)
232
- continue;
233
-
234
- if (lh.lh_sequence == head->lh_sequence) {
235
- gfs2_consist_inode(GFS2_I(jd->jd_inode));
236
- return -EIO;
237
- }
238
- if (lh.lh_sequence < head->lh_sequence)
239
- break;
240
-
241
- *head = lh;
242
- }
243
-
244
- return 0;
245
-}
246
-
247
-/**
248
- * gfs2_find_jhead - find the head of a log
249
- * @jd: the journal
250
- * @head: the log descriptor for the head of the log is returned here
251
- *
252
- * Do a binary search of a journal and find the valid log entry with the
253
- * highest sequence number. (i.e. the log head)
254
- *
255
- * Returns: errno
256
- */
257
-
258
-int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
259
-{
260
- struct gfs2_log_header_host lh_1, lh_m;
261
- u32 blk_1, blk_2, blk_m;
262
- int error;
263
-
264
- blk_1 = 0;
265
- blk_2 = jd->jd_blocks - 1;
266
-
267
- for (;;) {
268
- blk_m = (blk_1 + blk_2) / 2;
269
-
270
- error = find_good_lh(jd, &blk_1, &lh_1);
271
- if (error)
272
- return error;
273
-
274
- error = find_good_lh(jd, &blk_m, &lh_m);
275
- if (error)
276
- return error;
277
-
278
- if (blk_1 == blk_m || blk_m == blk_2)
279
- break;
280
-
281
- if (lh_1.lh_sequence <= lh_m.lh_sequence)
282
- blk_1 = blk_m;
283
- else
284
- blk_2 = blk_m;
285
- }
286
-
287
- error = jhead_scan(jd, &lh_1);
288
- if (error)
289
- return error;
290
-
291
- *head = lh_1;
292181
293182 return error;
294183 }
....@@ -305,7 +194,7 @@
305194 * Returns: errno
306195 */
307196
308
-static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
197
+static int foreach_descriptor(struct gfs2_jdesc *jd, u32 start,
309198 unsigned int end, int pass)
310199 {
311200 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
....@@ -375,12 +264,16 @@
375264 struct gfs2_log_header_host *head)
376265 {
377266 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
267
+ u32 lblock = head->lh_blkno;
378268
379
- sdp->sd_log_flush_head = head->lh_blkno;
380
- gfs2_replay_incr_blk(jd, &sdp->sd_log_flush_head);
381
- gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0,
269
+ gfs2_replay_incr_blk(jd, &lblock);
270
+ gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0, lblock,
382271 GFS2_LOG_HEAD_UNMOUNT | GFS2_LOG_HEAD_RECOVERY,
383272 REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC);
273
+ if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
274
+ sdp->sd_log_flush_head = lblock;
275
+ gfs2_log_incr_head(sdp);
276
+ }
384277 }
385278
386279
....@@ -403,6 +296,109 @@
403296 sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
404297 }
405298
299
+/**
300
+ * update_statfs_inode - Update the master statfs inode or zero out the local
301
+ * statfs inode for a given journal.
302
+ * @jd: The journal
303
+ * @head: If NULL, @inode is the local statfs inode and we need to zero it out.
304
+ * Otherwise, it @head contains the statfs change info that needs to be
305
+ * synced to the master statfs inode (pointed to by @inode).
306
+ * @inode: statfs inode to update.
307
+ */
308
+static int update_statfs_inode(struct gfs2_jdesc *jd,
309
+ struct gfs2_log_header_host *head,
310
+ struct inode *inode)
311
+{
312
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
313
+ struct gfs2_inode *ip;
314
+ struct buffer_head *bh;
315
+ struct gfs2_statfs_change_host sc;
316
+ int error = 0;
317
+
318
+ BUG_ON(!inode);
319
+ ip = GFS2_I(inode);
320
+
321
+ error = gfs2_meta_inode_buffer(ip, &bh);
322
+ if (error)
323
+ goto out;
324
+
325
+ spin_lock(&sdp->sd_statfs_spin);
326
+
327
+ if (head) { /* Update the master statfs inode */
328
+ gfs2_statfs_change_in(&sc, bh->b_data + sizeof(struct gfs2_dinode));
329
+ sc.sc_total += head->lh_local_total;
330
+ sc.sc_free += head->lh_local_free;
331
+ sc.sc_dinodes += head->lh_local_dinodes;
332
+ gfs2_statfs_change_out(&sc, bh->b_data + sizeof(struct gfs2_dinode));
333
+
334
+ fs_info(sdp, "jid=%u: Updated master statfs Total:%lld, "
335
+ "Free:%lld, Dinodes:%lld after change "
336
+ "[%+lld,%+lld,%+lld]\n", jd->jd_jid, sc.sc_total,
337
+ sc.sc_free, sc.sc_dinodes, head->lh_local_total,
338
+ head->lh_local_free, head->lh_local_dinodes);
339
+ } else { /* Zero out the local statfs inode */
340
+ memset(bh->b_data + sizeof(struct gfs2_dinode), 0,
341
+ sizeof(struct gfs2_statfs_change));
342
+ /* If it's our own journal, reset any in-memory changes too */
343
+ if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
344
+ memset(&sdp->sd_statfs_local, 0,
345
+ sizeof(struct gfs2_statfs_change_host));
346
+ }
347
+ }
348
+ spin_unlock(&sdp->sd_statfs_spin);
349
+
350
+ mark_buffer_dirty(bh);
351
+ brelse(bh);
352
+ gfs2_inode_metasync(ip->i_gl);
353
+
354
+out:
355
+ return error;
356
+}
357
+
358
+/**
359
+ * recover_local_statfs - Update the master and local statfs changes for this
360
+ * journal.
361
+ *
362
+ * Previously, statfs updates would be read in from the local statfs inode and
363
+ * synced to the master statfs inode during recovery.
364
+ *
365
+ * We now use the statfs updates in the journal head to update the master statfs
366
+ * inode instead of reading in from the local statfs inode. To preserve backward
367
+ * compatibility with kernels that can't do this, we still need to keep the
368
+ * local statfs inode up to date by writing changes to it. At some point in the
369
+ * future, we can do away with the local statfs inodes altogether and keep the
370
+ * statfs changes solely in the journal.
371
+ *
372
+ * @jd: the journal
373
+ * @head: the journal head
374
+ *
375
+ * Returns: errno
376
+ */
377
+static void recover_local_statfs(struct gfs2_jdesc *jd,
378
+ struct gfs2_log_header_host *head)
379
+{
380
+ int error;
381
+ struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
382
+
383
+ if (!head->lh_local_total && !head->lh_local_free
384
+ && !head->lh_local_dinodes) /* No change */
385
+ goto zero_local;
386
+
387
+ /* First update the master statfs inode with the changes we
388
+ * found in the journal. */
389
+ error = update_statfs_inode(jd, head, sdp->sd_statfs_inode);
390
+ if (error)
391
+ goto out;
392
+
393
+zero_local:
394
+ /* Zero out the local statfs inode so any changes in there
395
+ * are not re-recovered. */
396
+ error = update_statfs_inode(jd, NULL,
397
+ find_local_statfs_inode(sdp, jd->jd_jid));
398
+out:
399
+ return;
400
+}
401
+
406402 void gfs2_recover_func(struct work_struct *work)
407403 {
408404 struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
....@@ -416,6 +412,11 @@
416412 int error = 0;
417413 int jlocked = 0;
418414
415
+ if (gfs2_withdrawn(sdp)) {
416
+ fs_err(sdp, "jid=%u: Recovery not attempted due to withdraw.\n",
417
+ jd->jd_jid);
418
+ goto fail;
419
+ }
419420 t_start = ktime_get();
420421 if (sdp->sd_args.ar_spectator)
421422 goto fail;
....@@ -439,7 +440,7 @@
439440
440441 default:
441442 goto fail;
442
- };
443
+ }
443444
444445 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
445446 LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
....@@ -456,10 +457,12 @@
456457 if (error)
457458 goto fail_gunlock_ji;
458459
459
- error = gfs2_find_jhead(jd, &head);
460
+ error = gfs2_find_jhead(jd, &head, true);
460461 if (error)
461462 goto fail_gunlock_ji;
462463 t_jhd = ktime_get();
464
+ fs_info(sdp, "jid=%u: Journal head lookup took %lldms\n", jd->jd_jid,
465
+ ktime_ms_delta(t_jhd, t_jlck));
463466
464467 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
465468 fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
....@@ -467,9 +470,7 @@
467470
468471 /* Acquire a shared hold on the freeze lock */
469472
470
- error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
471
- LM_FLAG_NOEXP | LM_FLAG_PRIORITY,
472
- &thaw_gh);
473
+ error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY);
473474 if (error)
474475 goto fail_gunlock_ji;
475476
....@@ -499,20 +500,29 @@
499500 }
500501
501502 t_tlck = ktime_get();
502
- fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
503
+ fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n",
504
+ jd->jd_jid, head.lh_tail, head.lh_blkno);
503505
506
+ /* We take the sd_log_flush_lock here primarily to prevent log
507
+ * flushes and simultaneous journal replays from stomping on
508
+ * each other wrt sd_log_bio. */
509
+ down_read(&sdp->sd_log_flush_lock);
504510 for (pass = 0; pass < 2; pass++) {
505511 lops_before_scan(jd, &head, pass);
506512 error = foreach_descriptor(jd, head.lh_tail,
507513 head.lh_blkno, pass);
508514 lops_after_scan(jd, error, pass);
509
- if (error)
515
+ if (error) {
516
+ up_read(&sdp->sd_log_flush_lock);
510517 goto fail_gunlock_thaw;
518
+ }
511519 }
512520
521
+ recover_local_statfs(jd, &head);
513522 clean_journal(jd, &head);
523
+ up_read(&sdp->sd_log_flush_lock);
514524
515
- gfs2_glock_dq_uninit(&thaw_gh);
525
+ gfs2_freeze_unlock(&thaw_gh);
516526 t_rep = ktime_get();
517527 fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
518528 "jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
....@@ -534,7 +544,7 @@
534544 goto done;
535545
536546 fail_gunlock_thaw:
537
- gfs2_glock_dq_uninit(&thaw_gh);
547
+ gfs2_freeze_unlock(&thaw_gh);
538548 fail_gunlock_ji:
539549 if (jlocked) {
540550 gfs2_glock_dq_uninit(&ji_gh);