.. | .. |
---|
13 | 13 | #include "xfs_sb.h" |
---|
14 | 14 | #include "xfs_mount.h" |
---|
15 | 15 | #include "xfs_defer.h" |
---|
16 | | -#include "xfs_da_format.h" |
---|
17 | | -#include "xfs_da_btree.h" |
---|
18 | 16 | #include "xfs_inode.h" |
---|
19 | 17 | #include "xfs_trans.h" |
---|
20 | 18 | #include "xfs_log.h" |
---|
21 | 19 | #include "xfs_log_priv.h" |
---|
22 | 20 | #include "xfs_log_recover.h" |
---|
23 | | -#include "xfs_inode_item.h" |
---|
24 | | -#include "xfs_extfree_item.h" |
---|
25 | 21 | #include "xfs_trans_priv.h" |
---|
26 | 22 | #include "xfs_alloc.h" |
---|
27 | 23 | #include "xfs_ialloc.h" |
---|
28 | | -#include "xfs_quota.h" |
---|
29 | | -#include "xfs_cksum.h" |
---|
30 | 24 | #include "xfs_trace.h" |
---|
31 | 25 | #include "xfs_icache.h" |
---|
32 | | -#include "xfs_bmap_btree.h" |
---|
33 | 26 | #include "xfs_error.h" |
---|
34 | | -#include "xfs_dir2.h" |
---|
35 | | -#include "xfs_rmap_item.h" |
---|
36 | 27 | #include "xfs_buf_item.h" |
---|
37 | | -#include "xfs_refcount_item.h" |
---|
38 | | -#include "xfs_bmap_item.h" |
---|
39 | 28 | |
---|
40 | 29 | #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) |
---|
41 | 30 | |
---|
.. | .. |
---|
59 | 48 | struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *); |
---|
60 | 49 | |
---|
61 | 50 | /* |
---|
62 | | - * This structure is used during recovery to record the buf log items which |
---|
63 | | - * have been canceled and should not be replayed. |
---|
64 | | - */ |
---|
65 | | -struct xfs_buf_cancel { |
---|
66 | | - xfs_daddr_t bc_blkno; |
---|
67 | | - uint bc_len; |
---|
68 | | - int bc_refcount; |
---|
69 | | - struct list_head bc_list; |
---|
70 | | -}; |
---|
71 | | - |
---|
72 | | -/* |
---|
73 | 51 | * Sector aligned buffer routines for buffer create/read/write/access |
---|
74 | 52 | */ |
---|
75 | 53 | |
---|
.. | .. |
---|
79 | 57 | * are valid, false otherwise. |
---|
80 | 58 | */ |
---|
81 | 59 | static inline bool |
---|
82 | | -xlog_verify_bp( |
---|
| 60 | +xlog_verify_bno( |
---|
83 | 61 | struct xlog *log, |
---|
84 | 62 | xfs_daddr_t blk_no, |
---|
85 | 63 | int bbcount) |
---|
.. | .. |
---|
92 | 70 | } |
---|
93 | 71 | |
---|
94 | 72 | /* |
---|
95 | | - * Allocate a buffer to hold log data. The buffer needs to be able |
---|
96 | | - * to map to a range of nbblks basic blocks at any valid (basic |
---|
97 | | - * block) offset within the log. |
---|
| 73 | + * Allocate a buffer to hold log data. The buffer needs to be able to map to |
---|
| 74 | + * a range of nbblks basic blocks at any valid offset within the log. |
---|
98 | 75 | */ |
---|
99 | | -STATIC xfs_buf_t * |
---|
100 | | -xlog_get_bp( |
---|
| 76 | +static char * |
---|
| 77 | +xlog_alloc_buffer( |
---|
101 | 78 | struct xlog *log, |
---|
102 | 79 | int nbblks) |
---|
103 | 80 | { |
---|
104 | | - struct xfs_buf *bp; |
---|
| 81 | + int align_mask = xfs_buftarg_dma_alignment(log->l_targ); |
---|
105 | 82 | |
---|
106 | 83 | /* |
---|
107 | 84 | * Pass log block 0 since we don't have an addr yet, buffer will be |
---|
108 | 85 | * verified on read. |
---|
109 | 86 | */ |
---|
110 | | - if (!xlog_verify_bp(log, 0, nbblks)) { |
---|
| 87 | + if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, 0, nbblks))) { |
---|
111 | 88 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", |
---|
112 | 89 | nbblks); |
---|
113 | | - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
---|
114 | 90 | return NULL; |
---|
115 | 91 | } |
---|
116 | 92 | |
---|
117 | 93 | /* |
---|
118 | | - * We do log I/O in units of log sectors (a power-of-2 |
---|
119 | | - * multiple of the basic block size), so we round up the |
---|
120 | | - * requested size to accommodate the basic blocks required |
---|
121 | | - * for complete log sectors. |
---|
| 94 | + * We do log I/O in units of log sectors (a power-of-2 multiple of the |
---|
| 95 | + * basic block size), so we round up the requested size to accommodate |
---|
| 96 | + * the basic blocks required for complete log sectors. |
---|
122 | 97 | * |
---|
123 | | - * In addition, the buffer may be used for a non-sector- |
---|
124 | | - * aligned block offset, in which case an I/O of the |
---|
125 | | - * requested size could extend beyond the end of the |
---|
126 | | - * buffer. If the requested size is only 1 basic block it |
---|
127 | | - * will never straddle a sector boundary, so this won't be |
---|
128 | | - * an issue. Nor will this be a problem if the log I/O is |
---|
129 | | - * done in basic blocks (sector size 1). But otherwise we |
---|
130 | | - * extend the buffer by one extra log sector to ensure |
---|
131 | | - * there's space to accommodate this possibility. |
---|
| 98 | + * In addition, the buffer may be used for a non-sector-aligned block |
---|
| 99 | + * offset, in which case an I/O of the requested size could extend |
---|
| 100 | + * beyond the end of the buffer. If the requested size is only 1 basic |
---|
| 101 | + * block it will never straddle a sector boundary, so this won't be an |
---|
| 102 | + * issue. Nor will this be a problem if the log I/O is done in basic |
---|
| 103 | + * blocks (sector size 1). But otherwise we extend the buffer by one |
---|
| 104 | + * extra log sector to ensure there's space to accommodate this |
---|
| 105 | + * possibility. |
---|
132 | 106 | */ |
---|
133 | 107 | if (nbblks > 1 && log->l_sectBBsize > 1) |
---|
134 | 108 | nbblks += log->l_sectBBsize; |
---|
135 | 109 | nbblks = round_up(nbblks, log->l_sectBBsize); |
---|
136 | | - |
---|
137 | | - bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0); |
---|
138 | | - if (bp) |
---|
139 | | - xfs_buf_unlock(bp); |
---|
140 | | - return bp; |
---|
141 | | -} |
---|
142 | | - |
---|
143 | | -STATIC void |
---|
144 | | -xlog_put_bp( |
---|
145 | | - xfs_buf_t *bp) |
---|
146 | | -{ |
---|
147 | | - xfs_buf_free(bp); |
---|
| 110 | + return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL | KM_ZERO); |
---|
148 | 111 | } |
---|
149 | 112 | |
---|
150 | 113 | /* |
---|
151 | 114 | * Return the address of the start of the given block number's data |
---|
152 | 115 | * in a log buffer. The buffer covers a log sector-aligned region. |
---|
153 | 116 | */ |
---|
154 | | -STATIC char * |
---|
| 117 | +static inline unsigned int |
---|
155 | 118 | xlog_align( |
---|
156 | 119 | struct xlog *log, |
---|
157 | | - xfs_daddr_t blk_no, |
---|
158 | | - int nbblks, |
---|
159 | | - struct xfs_buf *bp) |
---|
| 120 | + xfs_daddr_t blk_no) |
---|
160 | 121 | { |
---|
161 | | - xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); |
---|
162 | | - |
---|
163 | | - ASSERT(offset + nbblks <= bp->b_length); |
---|
164 | | - return bp->b_addr + BBTOB(offset); |
---|
| 122 | + return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1)); |
---|
165 | 123 | } |
---|
166 | 124 | |
---|
167 | | - |
---|
168 | | -/* |
---|
169 | | - * nbblks should be uint, but oh well. Just want to catch that 32-bit length. |
---|
170 | | - */ |
---|
171 | | -STATIC int |
---|
172 | | -xlog_bread_noalign( |
---|
173 | | - struct xlog *log, |
---|
174 | | - xfs_daddr_t blk_no, |
---|
175 | | - int nbblks, |
---|
176 | | - struct xfs_buf *bp) |
---|
| 125 | +static int |
---|
| 126 | +xlog_do_io( |
---|
| 127 | + struct xlog *log, |
---|
| 128 | + xfs_daddr_t blk_no, |
---|
| 129 | + unsigned int nbblks, |
---|
| 130 | + char *data, |
---|
| 131 | + unsigned int op) |
---|
177 | 132 | { |
---|
178 | | - int error; |
---|
| 133 | + int error; |
---|
179 | 134 | |
---|
180 | | - if (!xlog_verify_bp(log, blk_no, nbblks)) { |
---|
| 135 | + if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, blk_no, nbblks))) { |
---|
181 | 136 | xfs_warn(log->l_mp, |
---|
182 | 137 | "Invalid log block/length (0x%llx, 0x%x) for buffer", |
---|
183 | 138 | blk_no, nbblks); |
---|
184 | | - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
---|
185 | 139 | return -EFSCORRUPTED; |
---|
186 | 140 | } |
---|
187 | 141 | |
---|
188 | 142 | blk_no = round_down(blk_no, log->l_sectBBsize); |
---|
189 | 143 | nbblks = round_up(nbblks, log->l_sectBBsize); |
---|
190 | | - |
---|
191 | 144 | ASSERT(nbblks > 0); |
---|
192 | | - ASSERT(nbblks <= bp->b_length); |
---|
193 | 145 | |
---|
194 | | - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
---|
195 | | - bp->b_flags |= XBF_READ; |
---|
196 | | - bp->b_io_length = nbblks; |
---|
197 | | - bp->b_error = 0; |
---|
198 | | - |
---|
199 | | - error = xfs_buf_submit(bp); |
---|
200 | | - if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) |
---|
201 | | - xfs_buf_ioerror_alert(bp, __func__); |
---|
| 146 | + error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, |
---|
| 147 | + BBTOB(nbblks), data, op); |
---|
| 148 | + if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) { |
---|
| 149 | + xfs_alert(log->l_mp, |
---|
| 150 | + "log recovery %s I/O error at daddr 0x%llx len %d error %d", |
---|
| 151 | + op == REQ_OP_WRITE ? "write" : "read", |
---|
| 152 | + blk_no, nbblks, error); |
---|
| 153 | + } |
---|
202 | 154 | return error; |
---|
| 155 | +} |
---|
| 156 | + |
---|
| 157 | +STATIC int |
---|
| 158 | +xlog_bread_noalign( |
---|
| 159 | + struct xlog *log, |
---|
| 160 | + xfs_daddr_t blk_no, |
---|
| 161 | + int nbblks, |
---|
| 162 | + char *data) |
---|
| 163 | +{ |
---|
| 164 | + return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); |
---|
203 | 165 | } |
---|
204 | 166 | |
---|
205 | 167 | STATIC int |
---|
.. | .. |
---|
207 | 169 | struct xlog *log, |
---|
208 | 170 | xfs_daddr_t blk_no, |
---|
209 | 171 | int nbblks, |
---|
210 | | - struct xfs_buf *bp, |
---|
| 172 | + char *data, |
---|
211 | 173 | char **offset) |
---|
212 | 174 | { |
---|
213 | 175 | int error; |
---|
214 | 176 | |
---|
215 | | - error = xlog_bread_noalign(log, blk_no, nbblks, bp); |
---|
216 | | - if (error) |
---|
217 | | - return error; |
---|
218 | | - |
---|
219 | | - *offset = xlog_align(log, blk_no, nbblks, bp); |
---|
220 | | - return 0; |
---|
| 177 | + error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); |
---|
| 178 | + if (!error) |
---|
| 179 | + *offset = data + xlog_align(log, blk_no); |
---|
| 180 | + return error; |
---|
221 | 181 | } |
---|
222 | 182 | |
---|
223 | | -/* |
---|
224 | | - * Read at an offset into the buffer. Returns with the buffer in it's original |
---|
225 | | - * state regardless of the result of the read. |
---|
226 | | - */ |
---|
227 | | -STATIC int |
---|
228 | | -xlog_bread_offset( |
---|
229 | | - struct xlog *log, |
---|
230 | | - xfs_daddr_t blk_no, /* block to read from */ |
---|
231 | | - int nbblks, /* blocks to read */ |
---|
232 | | - struct xfs_buf *bp, |
---|
233 | | - char *offset) |
---|
234 | | -{ |
---|
235 | | - char *orig_offset = bp->b_addr; |
---|
236 | | - int orig_len = BBTOB(bp->b_length); |
---|
237 | | - int error, error2; |
---|
238 | | - |
---|
239 | | - error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); |
---|
240 | | - if (error) |
---|
241 | | - return error; |
---|
242 | | - |
---|
243 | | - error = xlog_bread_noalign(log, blk_no, nbblks, bp); |
---|
244 | | - |
---|
245 | | - /* must reset buffer pointer even on error */ |
---|
246 | | - error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); |
---|
247 | | - if (error) |
---|
248 | | - return error; |
---|
249 | | - return error2; |
---|
250 | | -} |
---|
251 | | - |
---|
252 | | -/* |
---|
253 | | - * Write out the buffer at the given block for the given number of blocks. |
---|
254 | | - * The buffer is kept locked across the write and is returned locked. |
---|
255 | | - * This can only be used for synchronous log writes. |
---|
256 | | - */ |
---|
257 | 183 | STATIC int |
---|
258 | 184 | xlog_bwrite( |
---|
259 | 185 | struct xlog *log, |
---|
260 | 186 | xfs_daddr_t blk_no, |
---|
261 | 187 | int nbblks, |
---|
262 | | - struct xfs_buf *bp) |
---|
| 188 | + char *data) |
---|
263 | 189 | { |
---|
264 | | - int error; |
---|
265 | | - |
---|
266 | | - if (!xlog_verify_bp(log, blk_no, nbblks)) { |
---|
267 | | - xfs_warn(log->l_mp, |
---|
268 | | - "Invalid log block/length (0x%llx, 0x%x) for buffer", |
---|
269 | | - blk_no, nbblks); |
---|
270 | | - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
---|
271 | | - return -EFSCORRUPTED; |
---|
272 | | - } |
---|
273 | | - |
---|
274 | | - blk_no = round_down(blk_no, log->l_sectBBsize); |
---|
275 | | - nbblks = round_up(nbblks, log->l_sectBBsize); |
---|
276 | | - |
---|
277 | | - ASSERT(nbblks > 0); |
---|
278 | | - ASSERT(nbblks <= bp->b_length); |
---|
279 | | - |
---|
280 | | - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
---|
281 | | - xfs_buf_hold(bp); |
---|
282 | | - xfs_buf_lock(bp); |
---|
283 | | - bp->b_io_length = nbblks; |
---|
284 | | - bp->b_error = 0; |
---|
285 | | - |
---|
286 | | - error = xfs_bwrite(bp); |
---|
287 | | - if (error) |
---|
288 | | - xfs_buf_ioerror_alert(bp, __func__); |
---|
289 | | - xfs_buf_relse(bp); |
---|
290 | | - return error; |
---|
| 190 | + return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE); |
---|
291 | 191 | } |
---|
292 | 192 | |
---|
293 | 193 | #ifdef DEBUG |
---|
.. | .. |
---|
323 | 223 | * (XLOG_FMT_UNKNOWN). This stops us from trying to recover |
---|
324 | 224 | * a dirty log created in IRIX. |
---|
325 | 225 | */ |
---|
326 | | - if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) { |
---|
| 226 | + if (XFS_IS_CORRUPT(mp, head->h_fmt != cpu_to_be32(XLOG_FMT))) { |
---|
327 | 227 | xfs_warn(mp, |
---|
328 | 228 | "dirty log written in incompatible format - can't recover"); |
---|
329 | 229 | xlog_header_check_dump(mp, head); |
---|
330 | | - XFS_ERROR_REPORT("xlog_header_check_recover(1)", |
---|
331 | | - XFS_ERRLEVEL_HIGH, mp); |
---|
332 | 230 | return -EFSCORRUPTED; |
---|
333 | | - } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
---|
| 231 | + } |
---|
| 232 | + if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid, |
---|
| 233 | + &head->h_fs_uuid))) { |
---|
334 | 234 | xfs_warn(mp, |
---|
335 | 235 | "dirty log entry has mismatched uuid - can't recover"); |
---|
336 | 236 | xlog_header_check_dump(mp, head); |
---|
337 | | - XFS_ERROR_REPORT("xlog_header_check_recover(2)", |
---|
338 | | - XFS_ERRLEVEL_HIGH, mp); |
---|
339 | 237 | return -EFSCORRUPTED; |
---|
340 | 238 | } |
---|
341 | 239 | return 0; |
---|
.. | .. |
---|
358 | 256 | * by IRIX and continue. |
---|
359 | 257 | */ |
---|
360 | 258 | xfs_warn(mp, "null uuid in log - IRIX style log"); |
---|
361 | | - } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
---|
| 259 | + } else if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid, |
---|
| 260 | + &head->h_fs_uuid))) { |
---|
362 | 261 | xfs_warn(mp, "log has mismatched uuid - can't recover"); |
---|
363 | 262 | xlog_header_check_dump(mp, head); |
---|
364 | | - XFS_ERROR_REPORT("xlog_header_check_mount", |
---|
365 | | - XFS_ERRLEVEL_HIGH, mp); |
---|
366 | 263 | return -EFSCORRUPTED; |
---|
367 | 264 | } |
---|
368 | 265 | return 0; |
---|
369 | | -} |
---|
370 | | - |
---|
371 | | -STATIC void |
---|
372 | | -xlog_recover_iodone( |
---|
373 | | - struct xfs_buf *bp) |
---|
374 | | -{ |
---|
375 | | - if (bp->b_error) { |
---|
376 | | - /* |
---|
377 | | - * We're not going to bother about retrying |
---|
378 | | - * this during recovery. One strike! |
---|
379 | | - */ |
---|
380 | | - if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { |
---|
381 | | - xfs_buf_ioerror_alert(bp, __func__); |
---|
382 | | - xfs_force_shutdown(bp->b_target->bt_mount, |
---|
383 | | - SHUTDOWN_META_IO_ERROR); |
---|
384 | | - } |
---|
385 | | - } |
---|
386 | | - |
---|
387 | | - /* |
---|
388 | | - * On v5 supers, a bli could be attached to update the metadata LSN. |
---|
389 | | - * Clean it up. |
---|
390 | | - */ |
---|
391 | | - if (bp->b_log_item) |
---|
392 | | - xfs_buf_item_relse(bp); |
---|
393 | | - ASSERT(bp->b_log_item == NULL); |
---|
394 | | - |
---|
395 | | - bp->b_iodone = NULL; |
---|
396 | | - xfs_buf_ioend(bp); |
---|
397 | 266 | } |
---|
398 | 267 | |
---|
399 | 268 | /* |
---|
.. | .. |
---|
405 | 274 | STATIC int |
---|
406 | 275 | xlog_find_cycle_start( |
---|
407 | 276 | struct xlog *log, |
---|
408 | | - struct xfs_buf *bp, |
---|
| 277 | + char *buffer, |
---|
409 | 278 | xfs_daddr_t first_blk, |
---|
410 | 279 | xfs_daddr_t *last_blk, |
---|
411 | 280 | uint cycle) |
---|
.. | .. |
---|
419 | 288 | end_blk = *last_blk; |
---|
420 | 289 | mid_blk = BLK_AVG(first_blk, end_blk); |
---|
421 | 290 | while (mid_blk != first_blk && mid_blk != end_blk) { |
---|
422 | | - error = xlog_bread(log, mid_blk, 1, bp, &offset); |
---|
| 291 | + error = xlog_bread(log, mid_blk, 1, buffer, &offset); |
---|
423 | 292 | if (error) |
---|
424 | 293 | return error; |
---|
425 | 294 | mid_cycle = xlog_get_cycle(offset); |
---|
.. | .. |
---|
455 | 324 | { |
---|
456 | 325 | xfs_daddr_t i, j; |
---|
457 | 326 | uint cycle; |
---|
458 | | - xfs_buf_t *bp; |
---|
| 327 | + char *buffer; |
---|
459 | 328 | xfs_daddr_t bufblks; |
---|
460 | 329 | char *buf = NULL; |
---|
461 | 330 | int error = 0; |
---|
.. | .. |
---|
469 | 338 | bufblks = 1 << ffs(nbblks); |
---|
470 | 339 | while (bufblks > log->l_logBBsize) |
---|
471 | 340 | bufblks >>= 1; |
---|
472 | | - while (!(bp = xlog_get_bp(log, bufblks))) { |
---|
| 341 | + while (!(buffer = xlog_alloc_buffer(log, bufblks))) { |
---|
473 | 342 | bufblks >>= 1; |
---|
474 | 343 | if (bufblks < log->l_sectBBsize) |
---|
475 | 344 | return -ENOMEM; |
---|
.. | .. |
---|
480 | 349 | |
---|
481 | 350 | bcount = min(bufblks, (start_blk + nbblks - i)); |
---|
482 | 351 | |
---|
483 | | - error = xlog_bread(log, i, bcount, bp, &buf); |
---|
| 352 | + error = xlog_bread(log, i, bcount, buffer, &buf); |
---|
484 | 353 | if (error) |
---|
485 | 354 | goto out; |
---|
486 | 355 | |
---|
.. | .. |
---|
498 | 367 | *new_blk = -1; |
---|
499 | 368 | |
---|
500 | 369 | out: |
---|
501 | | - xlog_put_bp(bp); |
---|
| 370 | + kmem_free(buffer); |
---|
502 | 371 | return error; |
---|
| 372 | +} |
---|
| 373 | + |
---|
| 374 | +static inline int |
---|
| 375 | +xlog_logrec_hblks(struct xlog *log, struct xlog_rec_header *rh) |
---|
| 376 | +{ |
---|
| 377 | + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
---|
| 378 | + int h_size = be32_to_cpu(rh->h_size); |
---|
| 379 | + |
---|
| 380 | + if ((be32_to_cpu(rh->h_version) & XLOG_VERSION_2) && |
---|
| 381 | + h_size > XLOG_HEADER_CYCLE_SIZE) |
---|
| 382 | + return DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE); |
---|
| 383 | + } |
---|
| 384 | + return 1; |
---|
503 | 385 | } |
---|
504 | 386 | |
---|
505 | 387 | /* |
---|
.. | .. |
---|
522 | 404 | int extra_bblks) |
---|
523 | 405 | { |
---|
524 | 406 | xfs_daddr_t i; |
---|
525 | | - xfs_buf_t *bp; |
---|
| 407 | + char *buffer; |
---|
526 | 408 | char *offset = NULL; |
---|
527 | 409 | xlog_rec_header_t *head = NULL; |
---|
528 | 410 | int error = 0; |
---|
.. | .. |
---|
532 | 414 | |
---|
533 | 415 | ASSERT(start_blk != 0 || *last_blk != start_blk); |
---|
534 | 416 | |
---|
535 | | - if (!(bp = xlog_get_bp(log, num_blks))) { |
---|
536 | | - if (!(bp = xlog_get_bp(log, 1))) |
---|
| 417 | + buffer = xlog_alloc_buffer(log, num_blks); |
---|
| 418 | + if (!buffer) { |
---|
| 419 | + buffer = xlog_alloc_buffer(log, 1); |
---|
| 420 | + if (!buffer) |
---|
537 | 421 | return -ENOMEM; |
---|
538 | 422 | smallmem = 1; |
---|
539 | 423 | } else { |
---|
540 | | - error = xlog_bread(log, start_blk, num_blks, bp, &offset); |
---|
| 424 | + error = xlog_bread(log, start_blk, num_blks, buffer, &offset); |
---|
541 | 425 | if (error) |
---|
542 | 426 | goto out; |
---|
543 | 427 | offset += ((num_blks - 1) << BBSHIFT); |
---|
.. | .. |
---|
549 | 433 | xfs_warn(log->l_mp, |
---|
550 | 434 | "Log inconsistent (didn't find previous header)"); |
---|
551 | 435 | ASSERT(0); |
---|
552 | | - error = -EIO; |
---|
| 436 | + error = -EFSCORRUPTED; |
---|
553 | 437 | goto out; |
---|
554 | 438 | } |
---|
555 | 439 | |
---|
556 | 440 | if (smallmem) { |
---|
557 | | - error = xlog_bread(log, i, 1, bp, &offset); |
---|
| 441 | + error = xlog_bread(log, i, 1, buffer, &offset); |
---|
558 | 442 | if (error) |
---|
559 | 443 | goto out; |
---|
560 | 444 | } |
---|
.. | .. |
---|
592 | 476 | * reset last_blk. Only when last_blk points in the middle of a log |
---|
593 | 477 | * record do we update last_blk. |
---|
594 | 478 | */ |
---|
595 | | - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
---|
596 | | - uint h_size = be32_to_cpu(head->h_size); |
---|
597 | | - |
---|
598 | | - xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; |
---|
599 | | - if (h_size % XLOG_HEADER_CYCLE_SIZE) |
---|
600 | | - xhdrs++; |
---|
601 | | - } else { |
---|
602 | | - xhdrs = 1; |
---|
603 | | - } |
---|
| 479 | + xhdrs = xlog_logrec_hblks(log, head); |
---|
604 | 480 | |
---|
605 | 481 | if (*last_blk - i + extra_bblks != |
---|
606 | 482 | BTOBB(be32_to_cpu(head->h_len)) + xhdrs) |
---|
607 | 483 | *last_blk = i; |
---|
608 | 484 | |
---|
609 | 485 | out: |
---|
610 | | - xlog_put_bp(bp); |
---|
| 486 | + kmem_free(buffer); |
---|
611 | 487 | return error; |
---|
612 | 488 | } |
---|
613 | 489 | |
---|
.. | .. |
---|
629 | 505 | struct xlog *log, |
---|
630 | 506 | xfs_daddr_t *return_head_blk) |
---|
631 | 507 | { |
---|
632 | | - xfs_buf_t *bp; |
---|
| 508 | + char *buffer; |
---|
633 | 509 | char *offset; |
---|
634 | 510 | xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; |
---|
635 | 511 | int num_scan_bblks; |
---|
.. | .. |
---|
659 | 535 | } |
---|
660 | 536 | |
---|
661 | 537 | first_blk = 0; /* get cycle # of 1st block */ |
---|
662 | | - bp = xlog_get_bp(log, 1); |
---|
663 | | - if (!bp) |
---|
| 538 | + buffer = xlog_alloc_buffer(log, 1); |
---|
| 539 | + if (!buffer) |
---|
664 | 540 | return -ENOMEM; |
---|
665 | 541 | |
---|
666 | | - error = xlog_bread(log, 0, 1, bp, &offset); |
---|
| 542 | + error = xlog_bread(log, 0, 1, buffer, &offset); |
---|
667 | 543 | if (error) |
---|
668 | | - goto bp_err; |
---|
| 544 | + goto out_free_buffer; |
---|
669 | 545 | |
---|
670 | 546 | first_half_cycle = xlog_get_cycle(offset); |
---|
671 | 547 | |
---|
672 | 548 | last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ |
---|
673 | | - error = xlog_bread(log, last_blk, 1, bp, &offset); |
---|
| 549 | + error = xlog_bread(log, last_blk, 1, buffer, &offset); |
---|
674 | 550 | if (error) |
---|
675 | | - goto bp_err; |
---|
| 551 | + goto out_free_buffer; |
---|
676 | 552 | |
---|
677 | 553 | last_half_cycle = xlog_get_cycle(offset); |
---|
678 | 554 | ASSERT(last_half_cycle != 0); |
---|
.. | .. |
---|
740 | 616 | * ^ we want to locate this spot |
---|
741 | 617 | */ |
---|
742 | 618 | stop_on_cycle = last_half_cycle; |
---|
743 | | - if ((error = xlog_find_cycle_start(log, bp, first_blk, |
---|
744 | | - &head_blk, last_half_cycle))) |
---|
745 | | - goto bp_err; |
---|
| 619 | + error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk, |
---|
| 620 | + last_half_cycle); |
---|
| 621 | + if (error) |
---|
| 622 | + goto out_free_buffer; |
---|
746 | 623 | } |
---|
747 | 624 | |
---|
748 | 625 | /* |
---|
.. | .. |
---|
762 | 639 | if ((error = xlog_find_verify_cycle(log, |
---|
763 | 640 | start_blk, num_scan_bblks, |
---|
764 | 641 | stop_on_cycle, &new_blk))) |
---|
765 | | - goto bp_err; |
---|
| 642 | + goto out_free_buffer; |
---|
766 | 643 | if (new_blk != -1) |
---|
767 | 644 | head_blk = new_blk; |
---|
768 | 645 | } else { /* need to read 2 parts of log */ |
---|
.. | .. |
---|
799 | 676 | if ((error = xlog_find_verify_cycle(log, start_blk, |
---|
800 | 677 | num_scan_bblks - (int)head_blk, |
---|
801 | 678 | (stop_on_cycle - 1), &new_blk))) |
---|
802 | | - goto bp_err; |
---|
| 679 | + goto out_free_buffer; |
---|
803 | 680 | if (new_blk != -1) { |
---|
804 | 681 | head_blk = new_blk; |
---|
805 | 682 | goto validate_head; |
---|
.. | .. |
---|
815 | 692 | if ((error = xlog_find_verify_cycle(log, |
---|
816 | 693 | start_blk, (int)head_blk, |
---|
817 | 694 | stop_on_cycle, &new_blk))) |
---|
818 | | - goto bp_err; |
---|
| 695 | + goto out_free_buffer; |
---|
819 | 696 | if (new_blk != -1) |
---|
820 | 697 | head_blk = new_blk; |
---|
821 | 698 | } |
---|
.. | .. |
---|
834 | 711 | if (error == 1) |
---|
835 | 712 | error = -EIO; |
---|
836 | 713 | if (error) |
---|
837 | | - goto bp_err; |
---|
| 714 | + goto out_free_buffer; |
---|
838 | 715 | } else { |
---|
839 | 716 | start_blk = 0; |
---|
840 | 717 | ASSERT(head_blk <= INT_MAX); |
---|
841 | 718 | error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); |
---|
842 | 719 | if (error < 0) |
---|
843 | | - goto bp_err; |
---|
| 720 | + goto out_free_buffer; |
---|
844 | 721 | if (error == 1) { |
---|
845 | 722 | /* We hit the beginning of the log during our search */ |
---|
846 | 723 | start_blk = log_bbnum - (num_scan_bblks - head_blk); |
---|
.. | .. |
---|
853 | 730 | if (error == 1) |
---|
854 | 731 | error = -EIO; |
---|
855 | 732 | if (error) |
---|
856 | | - goto bp_err; |
---|
| 733 | + goto out_free_buffer; |
---|
857 | 734 | if (new_blk != log_bbnum) |
---|
858 | 735 | head_blk = new_blk; |
---|
859 | 736 | } else if (error) |
---|
860 | | - goto bp_err; |
---|
| 737 | + goto out_free_buffer; |
---|
861 | 738 | } |
---|
862 | 739 | |
---|
863 | | - xlog_put_bp(bp); |
---|
| 740 | + kmem_free(buffer); |
---|
864 | 741 | if (head_blk == log_bbnum) |
---|
865 | 742 | *return_head_blk = 0; |
---|
866 | 743 | else |
---|
.. | .. |
---|
873 | 750 | */ |
---|
874 | 751 | return 0; |
---|
875 | 752 | |
---|
876 | | - bp_err: |
---|
877 | | - xlog_put_bp(bp); |
---|
878 | | - |
---|
| 753 | +out_free_buffer: |
---|
| 754 | + kmem_free(buffer); |
---|
879 | 755 | if (error) |
---|
880 | 756 | xfs_warn(log->l_mp, "failed to find log head"); |
---|
881 | 757 | return error; |
---|
.. | .. |
---|
895 | 771 | xfs_daddr_t head_blk, |
---|
896 | 772 | xfs_daddr_t tail_blk, |
---|
897 | 773 | int count, |
---|
898 | | - struct xfs_buf *bp, |
---|
| 774 | + char *buffer, |
---|
899 | 775 | xfs_daddr_t *rblk, |
---|
900 | 776 | struct xlog_rec_header **rhead, |
---|
901 | 777 | bool *wrapped) |
---|
.. | .. |
---|
914 | 790 | */ |
---|
915 | 791 | end_blk = head_blk > tail_blk ? tail_blk : 0; |
---|
916 | 792 | for (i = (int) head_blk - 1; i >= end_blk; i--) { |
---|
917 | | - error = xlog_bread(log, i, 1, bp, &offset); |
---|
| 793 | + error = xlog_bread(log, i, 1, buffer, &offset); |
---|
918 | 794 | if (error) |
---|
919 | 795 | goto out_error; |
---|
920 | 796 | |
---|
.. | .. |
---|
933 | 809 | */ |
---|
934 | 810 | if (tail_blk >= head_blk && found != count) { |
---|
935 | 811 | for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) { |
---|
936 | | - error = xlog_bread(log, i, 1, bp, &offset); |
---|
| 812 | + error = xlog_bread(log, i, 1, buffer, &offset); |
---|
937 | 813 | if (error) |
---|
938 | 814 | goto out_error; |
---|
939 | 815 | |
---|
.. | .. |
---|
969 | 845 | xfs_daddr_t head_blk, |
---|
970 | 846 | xfs_daddr_t tail_blk, |
---|
971 | 847 | int count, |
---|
972 | | - struct xfs_buf *bp, |
---|
| 848 | + char *buffer, |
---|
973 | 849 | xfs_daddr_t *rblk, |
---|
974 | 850 | struct xlog_rec_header **rhead, |
---|
975 | 851 | bool *wrapped) |
---|
.. | .. |
---|
988 | 864 | */ |
---|
989 | 865 | end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1; |
---|
990 | 866 | for (i = (int) tail_blk; i <= end_blk; i++) { |
---|
991 | | - error = xlog_bread(log, i, 1, bp, &offset); |
---|
| 867 | + error = xlog_bread(log, i, 1, buffer, &offset); |
---|
992 | 868 | if (error) |
---|
993 | 869 | goto out_error; |
---|
994 | 870 | |
---|
.. | .. |
---|
1006 | 882 | */ |
---|
1007 | 883 | if (tail_blk > head_blk && found != count) { |
---|
1008 | 884 | for (i = 0; i < (int) head_blk; i++) { |
---|
1009 | | - error = xlog_bread(log, i, 1, bp, &offset); |
---|
| 885 | + error = xlog_bread(log, i, 1, buffer, &offset); |
---|
1010 | 886 | if (error) |
---|
1011 | 887 | goto out_error; |
---|
1012 | 888 | |
---|
.. | .. |
---|
1069 | 945 | int hsize) |
---|
1070 | 946 | { |
---|
1071 | 947 | struct xlog_rec_header *thead; |
---|
1072 | | - struct xfs_buf *bp; |
---|
| 948 | + char *buffer; |
---|
1073 | 949 | xfs_daddr_t first_bad; |
---|
1074 | 950 | int error = 0; |
---|
1075 | 951 | bool wrapped; |
---|
1076 | 952 | xfs_daddr_t tmp_tail; |
---|
1077 | 953 | xfs_daddr_t orig_tail = *tail_blk; |
---|
1078 | 954 | |
---|
1079 | | - bp = xlog_get_bp(log, 1); |
---|
1080 | | - if (!bp) |
---|
| 955 | + buffer = xlog_alloc_buffer(log, 1); |
---|
| 956 | + if (!buffer) |
---|
1081 | 957 | return -ENOMEM; |
---|
1082 | 958 | |
---|
1083 | 959 | /* |
---|
1084 | 960 | * Make sure the tail points to a record (returns positive count on |
---|
1085 | 961 | * success). |
---|
1086 | 962 | */ |
---|
1087 | | - error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp, |
---|
| 963 | + error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer, |
---|
1088 | 964 | &tmp_tail, &thead, &wrapped); |
---|
1089 | 965 | if (error < 0) |
---|
1090 | 966 | goto out; |
---|
.. | .. |
---|
1113 | 989 | break; |
---|
1114 | 990 | |
---|
1115 | 991 | /* skip to the next record; returns positive count on success */ |
---|
1116 | | - error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp, |
---|
1117 | | - &tmp_tail, &thead, &wrapped); |
---|
| 992 | + error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, |
---|
| 993 | + buffer, &tmp_tail, &thead, &wrapped); |
---|
1118 | 994 | if (error < 0) |
---|
1119 | 995 | goto out; |
---|
1120 | 996 | |
---|
.. | .. |
---|
1129 | 1005 | "Tail block (0x%llx) overwrite detected. Updated to 0x%llx", |
---|
1130 | 1006 | orig_tail, *tail_blk); |
---|
1131 | 1007 | out: |
---|
1132 | | - xlog_put_bp(bp); |
---|
| 1008 | + kmem_free(buffer); |
---|
1133 | 1009 | return error; |
---|
1134 | 1010 | } |
---|
1135 | 1011 | |
---|
.. | .. |
---|
1151 | 1027 | struct xlog *log, |
---|
1152 | 1028 | xfs_daddr_t *head_blk, /* in/out: unverified head */ |
---|
1153 | 1029 | xfs_daddr_t *tail_blk, /* out: tail block */ |
---|
1154 | | - struct xfs_buf *bp, |
---|
| 1030 | + char *buffer, |
---|
1155 | 1031 | xfs_daddr_t *rhead_blk, /* start blk of last record */ |
---|
1156 | 1032 | struct xlog_rec_header **rhead, /* ptr to last record */ |
---|
1157 | 1033 | bool *wrapped) /* last rec. wraps phys. log */ |
---|
1158 | 1034 | { |
---|
1159 | 1035 | struct xlog_rec_header *tmp_rhead; |
---|
1160 | | - struct xfs_buf *tmp_bp; |
---|
| 1036 | + char *tmp_buffer; |
---|
1161 | 1037 | xfs_daddr_t first_bad; |
---|
1162 | 1038 | xfs_daddr_t tmp_rhead_blk; |
---|
1163 | 1039 | int found; |
---|
.. | .. |
---|
1168 | 1044 | * Check the head of the log for torn writes. Search backwards from the |
---|
1169 | 1045 | * head until we hit the tail or the maximum number of log record I/Os |
---|
1170 | 1046 | * that could have been in flight at one time. Use a temporary buffer so |
---|
1171 | | - * we don't trash the rhead/bp pointers from the caller. |
---|
| 1047 | + * we don't trash the rhead/buffer pointers from the caller. |
---|
1172 | 1048 | */ |
---|
1173 | | - tmp_bp = xlog_get_bp(log, 1); |
---|
1174 | | - if (!tmp_bp) |
---|
| 1049 | + tmp_buffer = xlog_alloc_buffer(log, 1); |
---|
| 1050 | + if (!tmp_buffer) |
---|
1175 | 1051 | return -ENOMEM; |
---|
1176 | 1052 | error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk, |
---|
1177 | | - XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk, |
---|
1178 | | - &tmp_rhead, &tmp_wrapped); |
---|
1179 | | - xlog_put_bp(tmp_bp); |
---|
| 1053 | + XLOG_MAX_ICLOGS, tmp_buffer, |
---|
| 1054 | + &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped); |
---|
| 1055 | + kmem_free(tmp_buffer); |
---|
1180 | 1056 | if (error < 0) |
---|
1181 | 1057 | return error; |
---|
1182 | 1058 | |
---|
.. | .. |
---|
1203 | 1079 | * |
---|
1204 | 1080 | * Note that xlog_find_tail() clears the blocks at the new head |
---|
1205 | 1081 | * (i.e., the records with invalid CRC) if the cycle number |
---|
1206 | | - * matches the the current cycle. |
---|
| 1082 | + * matches the current cycle. |
---|
1207 | 1083 | */ |
---|
1208 | | - found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp, |
---|
1209 | | - rhead_blk, rhead, wrapped); |
---|
| 1084 | + found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, |
---|
| 1085 | + buffer, rhead_blk, rhead, wrapped); |
---|
1210 | 1086 | if (found < 0) |
---|
1211 | 1087 | return found; |
---|
1212 | 1088 | if (found == 0) /* XXX: right thing to do here? */ |
---|
.. | .. |
---|
1266 | 1142 | xfs_daddr_t *tail_blk, |
---|
1267 | 1143 | struct xlog_rec_header *rhead, |
---|
1268 | 1144 | xfs_daddr_t rhead_blk, |
---|
1269 | | - struct xfs_buf *bp, |
---|
| 1145 | + char *buffer, |
---|
1270 | 1146 | bool *clean) |
---|
1271 | 1147 | { |
---|
1272 | 1148 | struct xlog_op_header *op_head; |
---|
.. | .. |
---|
1287 | 1163 | * below. We won't want to clear the unmount record if there is one, so |
---|
1288 | 1164 | * we pass the lsn of the unmount record rather than the block after it. |
---|
1289 | 1165 | */ |
---|
1290 | | - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
---|
1291 | | - int h_size = be32_to_cpu(rhead->h_size); |
---|
1292 | | - int h_version = be32_to_cpu(rhead->h_version); |
---|
1293 | | - |
---|
1294 | | - if ((h_version & XLOG_VERSION_2) && |
---|
1295 | | - (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
---|
1296 | | - hblks = h_size / XLOG_HEADER_CYCLE_SIZE; |
---|
1297 | | - if (h_size % XLOG_HEADER_CYCLE_SIZE) |
---|
1298 | | - hblks++; |
---|
1299 | | - } else { |
---|
1300 | | - hblks = 1; |
---|
1301 | | - } |
---|
1302 | | - } else { |
---|
1303 | | - hblks = 1; |
---|
1304 | | - } |
---|
1305 | | - |
---|
| 1166 | + hblks = xlog_logrec_hblks(log, rhead); |
---|
1306 | 1167 | after_umount_blk = xlog_wrap_logbno(log, |
---|
1307 | 1168 | rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len))); |
---|
1308 | 1169 | |
---|
1309 | 1170 | if (*head_blk == after_umount_blk && |
---|
1310 | 1171 | be32_to_cpu(rhead->h_num_logops) == 1) { |
---|
1311 | 1172 | umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks); |
---|
1312 | | - error = xlog_bread(log, umount_data_blk, 1, bp, &offset); |
---|
| 1173 | + error = xlog_bread(log, umount_data_blk, 1, buffer, &offset); |
---|
1313 | 1174 | if (error) |
---|
1314 | 1175 | return error; |
---|
1315 | 1176 | |
---|
.. | .. |
---|
1388 | 1249 | { |
---|
1389 | 1250 | xlog_rec_header_t *rhead; |
---|
1390 | 1251 | char *offset = NULL; |
---|
1391 | | - xfs_buf_t *bp; |
---|
| 1252 | + char *buffer; |
---|
1392 | 1253 | int error; |
---|
1393 | 1254 | xfs_daddr_t rhead_blk; |
---|
1394 | 1255 | xfs_lsn_t tail_lsn; |
---|
.. | .. |
---|
1402 | 1263 | return error; |
---|
1403 | 1264 | ASSERT(*head_blk < INT_MAX); |
---|
1404 | 1265 | |
---|
1405 | | - bp = xlog_get_bp(log, 1); |
---|
1406 | | - if (!bp) |
---|
| 1266 | + buffer = xlog_alloc_buffer(log, 1); |
---|
| 1267 | + if (!buffer) |
---|
1407 | 1268 | return -ENOMEM; |
---|
1408 | 1269 | if (*head_blk == 0) { /* special case */ |
---|
1409 | | - error = xlog_bread(log, 0, 1, bp, &offset); |
---|
| 1270 | + error = xlog_bread(log, 0, 1, buffer, &offset); |
---|
1410 | 1271 | if (error) |
---|
1411 | 1272 | goto done; |
---|
1412 | 1273 | |
---|
.. | .. |
---|
1422 | 1283 | * block. This wraps all the way back around to the head so something is |
---|
1423 | 1284 | * seriously wrong if we can't find it. |
---|
1424 | 1285 | */ |
---|
1425 | | - error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, |
---|
| 1286 | + error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer, |
---|
1426 | 1287 | &rhead_blk, &rhead, &wrapped); |
---|
1427 | 1288 | if (error < 0) |
---|
1428 | | - return error; |
---|
| 1289 | + goto done; |
---|
1429 | 1290 | if (!error) { |
---|
1430 | 1291 | xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); |
---|
1431 | | - return -EIO; |
---|
| 1292 | + error = -EFSCORRUPTED; |
---|
| 1293 | + goto done; |
---|
1432 | 1294 | } |
---|
1433 | 1295 | *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); |
---|
1434 | 1296 | |
---|
.. | .. |
---|
1443 | 1305 | * state to determine whether recovery is necessary. |
---|
1444 | 1306 | */ |
---|
1445 | 1307 | error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, |
---|
1446 | | - rhead_blk, bp, &clean); |
---|
| 1308 | + rhead_blk, buffer, &clean); |
---|
1447 | 1309 | if (error) |
---|
1448 | 1310 | goto done; |
---|
1449 | 1311 | |
---|
.. | .. |
---|
1460 | 1322 | if (!clean) { |
---|
1461 | 1323 | xfs_daddr_t orig_head = *head_blk; |
---|
1462 | 1324 | |
---|
1463 | | - error = xlog_verify_head(log, head_blk, tail_blk, bp, |
---|
| 1325 | + error = xlog_verify_head(log, head_blk, tail_blk, buffer, |
---|
1464 | 1326 | &rhead_blk, &rhead, &wrapped); |
---|
1465 | 1327 | if (error) |
---|
1466 | 1328 | goto done; |
---|
.. | .. |
---|
1471 | 1333 | wrapped); |
---|
1472 | 1334 | tail_lsn = atomic64_read(&log->l_tail_lsn); |
---|
1473 | 1335 | error = xlog_check_unmount_rec(log, head_blk, tail_blk, |
---|
1474 | | - rhead, rhead_blk, bp, |
---|
| 1336 | + rhead, rhead_blk, buffer, |
---|
1475 | 1337 | &clean); |
---|
1476 | 1338 | if (error) |
---|
1477 | 1339 | goto done; |
---|
.. | .. |
---|
1505 | 1367 | * But... if the -device- itself is readonly, just skip this. |
---|
1506 | 1368 | * We can't recover this device anyway, so it won't matter. |
---|
1507 | 1369 | */ |
---|
1508 | | - if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) |
---|
| 1370 | + if (!xfs_readonly_buftarg(log->l_targ)) |
---|
1509 | 1371 | error = xlog_clear_stale_blocks(log, tail_lsn); |
---|
1510 | 1372 | |
---|
1511 | 1373 | done: |
---|
1512 | | - xlog_put_bp(bp); |
---|
| 1374 | + kmem_free(buffer); |
---|
1513 | 1375 | |
---|
1514 | 1376 | if (error) |
---|
1515 | 1377 | xfs_warn(log->l_mp, "failed to locate log tail"); |
---|
.. | .. |
---|
1537 | 1399 | struct xlog *log, |
---|
1538 | 1400 | xfs_daddr_t *blk_no) |
---|
1539 | 1401 | { |
---|
1540 | | - xfs_buf_t *bp; |
---|
| 1402 | + char *buffer; |
---|
1541 | 1403 | char *offset; |
---|
1542 | 1404 | uint first_cycle, last_cycle; |
---|
1543 | 1405 | xfs_daddr_t new_blk, last_blk, start_blk; |
---|
.. | .. |
---|
1547 | 1409 | *blk_no = 0; |
---|
1548 | 1410 | |
---|
1549 | 1411 | /* check totally zeroed log */ |
---|
1550 | | - bp = xlog_get_bp(log, 1); |
---|
1551 | | - if (!bp) |
---|
| 1412 | + buffer = xlog_alloc_buffer(log, 1); |
---|
| 1413 | + if (!buffer) |
---|
1552 | 1414 | return -ENOMEM; |
---|
1553 | | - error = xlog_bread(log, 0, 1, bp, &offset); |
---|
| 1415 | + error = xlog_bread(log, 0, 1, buffer, &offset); |
---|
1554 | 1416 | if (error) |
---|
1555 | | - goto bp_err; |
---|
| 1417 | + goto out_free_buffer; |
---|
1556 | 1418 | |
---|
1557 | 1419 | first_cycle = xlog_get_cycle(offset); |
---|
1558 | 1420 | if (first_cycle == 0) { /* completely zeroed log */ |
---|
1559 | 1421 | *blk_no = 0; |
---|
1560 | | - xlog_put_bp(bp); |
---|
| 1422 | + kmem_free(buffer); |
---|
1561 | 1423 | return 1; |
---|
1562 | 1424 | } |
---|
1563 | 1425 | |
---|
1564 | 1426 | /* check partially zeroed log */ |
---|
1565 | | - error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); |
---|
| 1427 | + error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset); |
---|
1566 | 1428 | if (error) |
---|
1567 | | - goto bp_err; |
---|
| 1429 | + goto out_free_buffer; |
---|
1568 | 1430 | |
---|
1569 | 1431 | last_cycle = xlog_get_cycle(offset); |
---|
1570 | 1432 | if (last_cycle != 0) { /* log completely written to */ |
---|
1571 | | - xlog_put_bp(bp); |
---|
| 1433 | + kmem_free(buffer); |
---|
1572 | 1434 | return 0; |
---|
1573 | 1435 | } |
---|
1574 | 1436 | |
---|
1575 | 1437 | /* we have a partially zeroed log */ |
---|
1576 | 1438 | last_blk = log_bbnum-1; |
---|
1577 | | - if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) |
---|
1578 | | - goto bp_err; |
---|
| 1439 | + error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0); |
---|
| 1440 | + if (error) |
---|
| 1441 | + goto out_free_buffer; |
---|
1579 | 1442 | |
---|
1580 | 1443 | /* |
---|
1581 | 1444 | * Validate the answer. Because there is no way to guarantee that |
---|
.. | .. |
---|
1598 | 1461 | */ |
---|
1599 | 1462 | if ((error = xlog_find_verify_cycle(log, start_blk, |
---|
1600 | 1463 | (int)num_scan_bblks, 0, &new_blk))) |
---|
1601 | | - goto bp_err; |
---|
| 1464 | + goto out_free_buffer; |
---|
1602 | 1465 | if (new_blk != -1) |
---|
1603 | 1466 | last_blk = new_blk; |
---|
1604 | 1467 | |
---|
.. | .. |
---|
1610 | 1473 | if (error == 1) |
---|
1611 | 1474 | error = -EIO; |
---|
1612 | 1475 | if (error) |
---|
1613 | | - goto bp_err; |
---|
| 1476 | + goto out_free_buffer; |
---|
1614 | 1477 | |
---|
1615 | 1478 | *blk_no = last_blk; |
---|
1616 | | -bp_err: |
---|
1617 | | - xlog_put_bp(bp); |
---|
| 1479 | +out_free_buffer: |
---|
| 1480 | + kmem_free(buffer); |
---|
1618 | 1481 | if (error) |
---|
1619 | 1482 | return error; |
---|
1620 | 1483 | return 1; |
---|
.. | .. |
---|
1657 | 1520 | int tail_block) |
---|
1658 | 1521 | { |
---|
1659 | 1522 | char *offset; |
---|
1660 | | - xfs_buf_t *bp; |
---|
| 1523 | + char *buffer; |
---|
1661 | 1524 | int balign, ealign; |
---|
1662 | 1525 | int sectbb = log->l_sectBBsize; |
---|
1663 | 1526 | int end_block = start_block + blocks; |
---|
.. | .. |
---|
1674 | 1537 | bufblks = 1 << ffs(blocks); |
---|
1675 | 1538 | while (bufblks > log->l_logBBsize) |
---|
1676 | 1539 | bufblks >>= 1; |
---|
1677 | | - while (!(bp = xlog_get_bp(log, bufblks))) { |
---|
| 1540 | + while (!(buffer = xlog_alloc_buffer(log, bufblks))) { |
---|
1678 | 1541 | bufblks >>= 1; |
---|
1679 | 1542 | if (bufblks < sectbb) |
---|
1680 | 1543 | return -ENOMEM; |
---|
.. | .. |
---|
1686 | 1549 | */ |
---|
1687 | 1550 | balign = round_down(start_block, sectbb); |
---|
1688 | 1551 | if (balign != start_block) { |
---|
1689 | | - error = xlog_bread_noalign(log, start_block, 1, bp); |
---|
| 1552 | + error = xlog_bread_noalign(log, start_block, 1, buffer); |
---|
1690 | 1553 | if (error) |
---|
1691 | | - goto out_put_bp; |
---|
| 1554 | + goto out_free_buffer; |
---|
1692 | 1555 | |
---|
1693 | 1556 | j = start_block - balign; |
---|
1694 | 1557 | } |
---|
.. | .. |
---|
1705 | 1568 | */ |
---|
1706 | 1569 | ealign = round_down(end_block, sectbb); |
---|
1707 | 1570 | if (j == 0 && (start_block + endcount > ealign)) { |
---|
1708 | | - offset = bp->b_addr + BBTOB(ealign - start_block); |
---|
1709 | | - error = xlog_bread_offset(log, ealign, sectbb, |
---|
1710 | | - bp, offset); |
---|
| 1571 | + error = xlog_bread_noalign(log, ealign, sectbb, |
---|
| 1572 | + buffer + BBTOB(ealign - start_block)); |
---|
1711 | 1573 | if (error) |
---|
1712 | 1574 | break; |
---|
1713 | 1575 | |
---|
1714 | 1576 | } |
---|
1715 | 1577 | |
---|
1716 | | - offset = xlog_align(log, start_block, endcount, bp); |
---|
| 1578 | + offset = buffer + xlog_align(log, start_block); |
---|
1717 | 1579 | for (; j < endcount; j++) { |
---|
1718 | 1580 | xlog_add_record(log, offset, cycle, i+j, |
---|
1719 | 1581 | tail_cycle, tail_block); |
---|
1720 | 1582 | offset += BBSIZE; |
---|
1721 | 1583 | } |
---|
1722 | | - error = xlog_bwrite(log, start_block, endcount, bp); |
---|
| 1584 | + error = xlog_bwrite(log, start_block, endcount, buffer); |
---|
1723 | 1585 | if (error) |
---|
1724 | 1586 | break; |
---|
1725 | 1587 | start_block += endcount; |
---|
1726 | 1588 | j = 0; |
---|
1727 | 1589 | } |
---|
1728 | 1590 | |
---|
1729 | | - out_put_bp: |
---|
1730 | | - xlog_put_bp(bp); |
---|
| 1591 | +out_free_buffer: |
---|
| 1592 | + kmem_free(buffer); |
---|
1731 | 1593 | return error; |
---|
1732 | 1594 | } |
---|
1733 | 1595 | |
---|
.. | .. |
---|
1777 | 1639 | * the distance from the beginning of the log to the |
---|
1778 | 1640 | * tail. |
---|
1779 | 1641 | */ |
---|
1780 | | - if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { |
---|
1781 | | - XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", |
---|
1782 | | - XFS_ERRLEVEL_LOW, log->l_mp); |
---|
| 1642 | + if (XFS_IS_CORRUPT(log->l_mp, |
---|
| 1643 | + head_block < tail_block || |
---|
| 1644 | + head_block >= log->l_logBBsize)) |
---|
1783 | 1645 | return -EFSCORRUPTED; |
---|
1784 | | - } |
---|
1785 | 1646 | tail_distance = tail_block + (log->l_logBBsize - head_block); |
---|
1786 | 1647 | } else { |
---|
1787 | 1648 | /* |
---|
.. | .. |
---|
1789 | 1650 | * so the distance from the head to the tail is just |
---|
1790 | 1651 | * the tail block minus the head block. |
---|
1791 | 1652 | */ |
---|
1792 | | - if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ |
---|
1793 | | - XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", |
---|
1794 | | - XFS_ERRLEVEL_LOW, log->l_mp); |
---|
| 1653 | + if (XFS_IS_CORRUPT(log->l_mp, |
---|
| 1654 | + head_block >= tail_block || |
---|
| 1655 | + head_cycle != tail_cycle + 1)) |
---|
1795 | 1656 | return -EFSCORRUPTED; |
---|
1796 | | - } |
---|
1797 | 1657 | tail_distance = tail_block - head_block; |
---|
1798 | 1658 | } |
---|
1799 | 1659 | |
---|
.. | .. |
---|
1863 | 1723 | return 0; |
---|
1864 | 1724 | } |
---|
1865 | 1725 | |
---|
| 1726 | +/* |
---|
| 1727 | + * Release the recovered intent item in the AIL that matches the given intent |
---|
| 1728 | + * type and intent id. |
---|
| 1729 | + */ |
---|
| 1730 | +void |
---|
| 1731 | +xlog_recover_release_intent( |
---|
| 1732 | + struct xlog *log, |
---|
| 1733 | + unsigned short intent_type, |
---|
| 1734 | + uint64_t intent_id) |
---|
| 1735 | +{ |
---|
| 1736 | + struct xfs_ail_cursor cur; |
---|
| 1737 | + struct xfs_log_item *lip; |
---|
| 1738 | + struct xfs_ail *ailp = log->l_ailp; |
---|
| 1739 | + |
---|
| 1740 | + spin_lock(&ailp->ail_lock); |
---|
| 1741 | + for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL; |
---|
| 1742 | + lip = xfs_trans_ail_cursor_next(ailp, &cur)) { |
---|
| 1743 | + if (lip->li_type != intent_type) |
---|
| 1744 | + continue; |
---|
| 1745 | + if (!lip->li_ops->iop_match(lip, intent_id)) |
---|
| 1746 | + continue; |
---|
| 1747 | + |
---|
| 1748 | + spin_unlock(&ailp->ail_lock); |
---|
| 1749 | + lip->li_ops->iop_release(lip); |
---|
| 1750 | + spin_lock(&ailp->ail_lock); |
---|
| 1751 | + break; |
---|
| 1752 | + } |
---|
| 1753 | + |
---|
| 1754 | + xfs_trans_ail_cursor_done(&cur); |
---|
| 1755 | + spin_unlock(&ailp->ail_lock); |
---|
| 1756 | +} |
---|
| 1757 | + |
---|
1866 | 1758 | /****************************************************************************** |
---|
1867 | 1759 | * |
---|
1868 | 1760 | * Log recover routines |
---|
1869 | 1761 | * |
---|
1870 | 1762 | ****************************************************************************** |
---|
1871 | 1763 | */ |
---|
| 1764 | +static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = { |
---|
| 1765 | + &xlog_buf_item_ops, |
---|
| 1766 | + &xlog_inode_item_ops, |
---|
| 1767 | + &xlog_dquot_item_ops, |
---|
| 1768 | + &xlog_quotaoff_item_ops, |
---|
| 1769 | + &xlog_icreate_item_ops, |
---|
| 1770 | + &xlog_efi_item_ops, |
---|
| 1771 | + &xlog_efd_item_ops, |
---|
| 1772 | + &xlog_rui_item_ops, |
---|
| 1773 | + &xlog_rud_item_ops, |
---|
| 1774 | + &xlog_cui_item_ops, |
---|
| 1775 | + &xlog_cud_item_ops, |
---|
| 1776 | + &xlog_bui_item_ops, |
---|
| 1777 | + &xlog_bud_item_ops, |
---|
| 1778 | +}; |
---|
| 1779 | + |
---|
| 1780 | +static const struct xlog_recover_item_ops * |
---|
| 1781 | +xlog_find_item_ops( |
---|
| 1782 | + struct xlog_recover_item *item) |
---|
| 1783 | +{ |
---|
| 1784 | + unsigned int i; |
---|
| 1785 | + |
---|
| 1786 | + for (i = 0; i < ARRAY_SIZE(xlog_recover_item_ops); i++) |
---|
| 1787 | + if (ITEM_TYPE(item) == xlog_recover_item_ops[i]->item_type) |
---|
| 1788 | + return xlog_recover_item_ops[i]; |
---|
| 1789 | + |
---|
| 1790 | + return NULL; |
---|
| 1791 | +} |
---|
1872 | 1792 | |
---|
1873 | 1793 | /* |
---|
1874 | 1794 | * Sort the log items in the transaction. |
---|
.. | .. |
---|
1925 | 1845 | struct xlog_recover *trans, |
---|
1926 | 1846 | int pass) |
---|
1927 | 1847 | { |
---|
1928 | | - xlog_recover_item_t *item, *n; |
---|
| 1848 | + struct xlog_recover_item *item, *n; |
---|
1929 | 1849 | int error = 0; |
---|
1930 | 1850 | LIST_HEAD(sort_list); |
---|
1931 | 1851 | LIST_HEAD(cancel_list); |
---|
1932 | 1852 | LIST_HEAD(buffer_list); |
---|
1933 | 1853 | LIST_HEAD(inode_buffer_list); |
---|
1934 | | - LIST_HEAD(inode_list); |
---|
| 1854 | + LIST_HEAD(item_list); |
---|
1935 | 1855 | |
---|
1936 | 1856 | list_splice_init(&trans->r_itemq, &sort_list); |
---|
1937 | 1857 | list_for_each_entry_safe(item, n, &sort_list, ri_list) { |
---|
1938 | | - xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
---|
| 1858 | + enum xlog_recover_reorder fate = XLOG_REORDER_ITEM_LIST; |
---|
1939 | 1859 | |
---|
1940 | | - switch (ITEM_TYPE(item)) { |
---|
1941 | | - case XFS_LI_ICREATE: |
---|
1942 | | - list_move_tail(&item->ri_list, &buffer_list); |
---|
1943 | | - break; |
---|
1944 | | - case XFS_LI_BUF: |
---|
1945 | | - if (buf_f->blf_flags & XFS_BLF_CANCEL) { |
---|
1946 | | - trace_xfs_log_recover_item_reorder_head(log, |
---|
1947 | | - trans, item, pass); |
---|
1948 | | - list_move(&item->ri_list, &cancel_list); |
---|
1949 | | - break; |
---|
1950 | | - } |
---|
1951 | | - if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { |
---|
1952 | | - list_move(&item->ri_list, &inode_buffer_list); |
---|
1953 | | - break; |
---|
1954 | | - } |
---|
1955 | | - list_move_tail(&item->ri_list, &buffer_list); |
---|
1956 | | - break; |
---|
1957 | | - case XFS_LI_INODE: |
---|
1958 | | - case XFS_LI_DQUOT: |
---|
1959 | | - case XFS_LI_QUOTAOFF: |
---|
1960 | | - case XFS_LI_EFD: |
---|
1961 | | - case XFS_LI_EFI: |
---|
1962 | | - case XFS_LI_RUI: |
---|
1963 | | - case XFS_LI_RUD: |
---|
1964 | | - case XFS_LI_CUI: |
---|
1965 | | - case XFS_LI_CUD: |
---|
1966 | | - case XFS_LI_BUI: |
---|
1967 | | - case XFS_LI_BUD: |
---|
1968 | | - trace_xfs_log_recover_item_reorder_tail(log, |
---|
1969 | | - trans, item, pass); |
---|
1970 | | - list_move_tail(&item->ri_list, &inode_list); |
---|
1971 | | - break; |
---|
1972 | | - default: |
---|
| 1860 | + item->ri_ops = xlog_find_item_ops(item); |
---|
| 1861 | + if (!item->ri_ops) { |
---|
1973 | 1862 | xfs_warn(log->l_mp, |
---|
1974 | | - "%s: unrecognized type of log operation", |
---|
1975 | | - __func__); |
---|
| 1863 | + "%s: unrecognized type of log operation (%d)", |
---|
| 1864 | + __func__, ITEM_TYPE(item)); |
---|
1976 | 1865 | ASSERT(0); |
---|
1977 | 1866 | /* |
---|
1978 | 1867 | * return the remaining items back to the transaction |
---|
.. | .. |
---|
1980 | 1869 | */ |
---|
1981 | 1870 | if (!list_empty(&sort_list)) |
---|
1982 | 1871 | list_splice_init(&sort_list, &trans->r_itemq); |
---|
1983 | | - error = -EIO; |
---|
1984 | | - goto out; |
---|
| 1872 | + error = -EFSCORRUPTED; |
---|
| 1873 | + break; |
---|
| 1874 | + } |
---|
| 1875 | + |
---|
| 1876 | + if (item->ri_ops->reorder) |
---|
| 1877 | + fate = item->ri_ops->reorder(item); |
---|
| 1878 | + |
---|
| 1879 | + switch (fate) { |
---|
| 1880 | + case XLOG_REORDER_BUFFER_LIST: |
---|
| 1881 | + list_move_tail(&item->ri_list, &buffer_list); |
---|
| 1882 | + break; |
---|
| 1883 | + case XLOG_REORDER_CANCEL_LIST: |
---|
| 1884 | + trace_xfs_log_recover_item_reorder_head(log, |
---|
| 1885 | + trans, item, pass); |
---|
| 1886 | + list_move(&item->ri_list, &cancel_list); |
---|
| 1887 | + break; |
---|
| 1888 | + case XLOG_REORDER_INODE_BUFFER_LIST: |
---|
| 1889 | + list_move(&item->ri_list, &inode_buffer_list); |
---|
| 1890 | + break; |
---|
| 1891 | + case XLOG_REORDER_ITEM_LIST: |
---|
| 1892 | + trace_xfs_log_recover_item_reorder_tail(log, |
---|
| 1893 | + trans, item, pass); |
---|
| 1894 | + list_move_tail(&item->ri_list, &item_list); |
---|
| 1895 | + break; |
---|
1985 | 1896 | } |
---|
1986 | 1897 | } |
---|
1987 | | -out: |
---|
| 1898 | + |
---|
1988 | 1899 | ASSERT(list_empty(&sort_list)); |
---|
1989 | 1900 | if (!list_empty(&buffer_list)) |
---|
1990 | 1901 | list_splice(&buffer_list, &trans->r_itemq); |
---|
1991 | | - if (!list_empty(&inode_list)) |
---|
1992 | | - list_splice_tail(&inode_list, &trans->r_itemq); |
---|
| 1902 | + if (!list_empty(&item_list)) |
---|
| 1903 | + list_splice_tail(&item_list, &trans->r_itemq); |
---|
1993 | 1904 | if (!list_empty(&inode_buffer_list)) |
---|
1994 | 1905 | list_splice_tail(&inode_buffer_list, &trans->r_itemq); |
---|
1995 | 1906 | if (!list_empty(&cancel_list)) |
---|
.. | .. |
---|
1997 | 1908 | return error; |
---|
1998 | 1909 | } |
---|
1999 | 1910 | |
---|
2000 | | -/* |
---|
2001 | | - * Build up the table of buf cancel records so that we don't replay |
---|
2002 | | - * cancelled data in the second pass. For buffer records that are |
---|
2003 | | - * not cancel records, there is nothing to do here so we just return. |
---|
2004 | | - * |
---|
2005 | | - * If we get a cancel record which is already in the table, this indicates |
---|
2006 | | - * that the buffer was cancelled multiple times. In order to ensure |
---|
2007 | | - * that during pass 2 we keep the record in the table until we reach its |
---|
2008 | | - * last occurrence in the log, we keep a reference count in the cancel |
---|
2009 | | - * record in the table to tell us how many times we expect to see this |
---|
2010 | | - * record during the second pass. |
---|
2011 | | - */ |
---|
2012 | | -STATIC int |
---|
2013 | | -xlog_recover_buffer_pass1( |
---|
2014 | | - struct xlog *log, |
---|
2015 | | - struct xlog_recover_item *item) |
---|
2016 | | -{ |
---|
2017 | | - xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
---|
2018 | | - struct list_head *bucket; |
---|
2019 | | - struct xfs_buf_cancel *bcp; |
---|
2020 | | - |
---|
2021 | | - /* |
---|
2022 | | - * If this isn't a cancel buffer item, then just return. |
---|
2023 | | - */ |
---|
2024 | | - if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { |
---|
2025 | | - trace_xfs_log_recover_buf_not_cancel(log, buf_f); |
---|
2026 | | - return 0; |
---|
2027 | | - } |
---|
2028 | | - |
---|
2029 | | - /* |
---|
2030 | | - * Insert an xfs_buf_cancel record into the hash table of them. |
---|
2031 | | - * If there is already an identical record, bump its reference count. |
---|
2032 | | - */ |
---|
2033 | | - bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno); |
---|
2034 | | - list_for_each_entry(bcp, bucket, bc_list) { |
---|
2035 | | - if (bcp->bc_blkno == buf_f->blf_blkno && |
---|
2036 | | - bcp->bc_len == buf_f->blf_len) { |
---|
2037 | | - bcp->bc_refcount++; |
---|
2038 | | - trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); |
---|
2039 | | - return 0; |
---|
2040 | | - } |
---|
2041 | | - } |
---|
2042 | | - |
---|
2043 | | - bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); |
---|
2044 | | - bcp->bc_blkno = buf_f->blf_blkno; |
---|
2045 | | - bcp->bc_len = buf_f->blf_len; |
---|
2046 | | - bcp->bc_refcount = 1; |
---|
2047 | | - list_add_tail(&bcp->bc_list, bucket); |
---|
2048 | | - |
---|
2049 | | - trace_xfs_log_recover_buf_cancel_add(log, buf_f); |
---|
2050 | | - return 0; |
---|
2051 | | -} |
---|
2052 | | - |
---|
2053 | | -/* |
---|
2054 | | - * Check to see whether the buffer being recovered has a corresponding |
---|
2055 | | - * entry in the buffer cancel record table. If it is, return the cancel |
---|
2056 | | - * buffer structure to the caller. |
---|
2057 | | - */ |
---|
2058 | | -STATIC struct xfs_buf_cancel * |
---|
2059 | | -xlog_peek_buffer_cancelled( |
---|
| 1911 | +void |
---|
| 1912 | +xlog_buf_readahead( |
---|
2060 | 1913 | struct xlog *log, |
---|
2061 | 1914 | xfs_daddr_t blkno, |
---|
2062 | 1915 | uint len, |
---|
2063 | | - unsigned short flags) |
---|
| 1916 | + const struct xfs_buf_ops *ops) |
---|
2064 | 1917 | { |
---|
2065 | | - struct list_head *bucket; |
---|
2066 | | - struct xfs_buf_cancel *bcp; |
---|
2067 | | - |
---|
2068 | | - if (!log->l_buf_cancel_table) { |
---|
2069 | | - /* empty table means no cancelled buffers in the log */ |
---|
2070 | | - ASSERT(!(flags & XFS_BLF_CANCEL)); |
---|
2071 | | - return NULL; |
---|
2072 | | - } |
---|
2073 | | - |
---|
2074 | | - bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); |
---|
2075 | | - list_for_each_entry(bcp, bucket, bc_list) { |
---|
2076 | | - if (bcp->bc_blkno == blkno && bcp->bc_len == len) |
---|
2077 | | - return bcp; |
---|
2078 | | - } |
---|
2079 | | - |
---|
2080 | | - /* |
---|
2081 | | - * We didn't find a corresponding entry in the table, so return 0 so |
---|
2082 | | - * that the buffer is NOT cancelled. |
---|
2083 | | - */ |
---|
2084 | | - ASSERT(!(flags & XFS_BLF_CANCEL)); |
---|
2085 | | - return NULL; |
---|
2086 | | -} |
---|
2087 | | - |
---|
2088 | | -/* |
---|
2089 | | - * If the buffer is being cancelled then return 1 so that it will be cancelled, |
---|
2090 | | - * otherwise return 0. If the buffer is actually a buffer cancel item |
---|
2091 | | - * (XFS_BLF_CANCEL is set), then decrement the refcount on the entry in the |
---|
2092 | | - * table and remove it from the table if this is the last reference. |
---|
2093 | | - * |
---|
2094 | | - * We remove the cancel record from the table when we encounter its last |
---|
2095 | | - * occurrence in the log so that if the same buffer is re-used again after its |
---|
2096 | | - * last cancellation we actually replay the changes made at that point. |
---|
2097 | | - */ |
---|
2098 | | -STATIC int |
---|
2099 | | -xlog_check_buffer_cancelled( |
---|
2100 | | - struct xlog *log, |
---|
2101 | | - xfs_daddr_t blkno, |
---|
2102 | | - uint len, |
---|
2103 | | - unsigned short flags) |
---|
2104 | | -{ |
---|
2105 | | - struct xfs_buf_cancel *bcp; |
---|
2106 | | - |
---|
2107 | | - bcp = xlog_peek_buffer_cancelled(log, blkno, len, flags); |
---|
2108 | | - if (!bcp) |
---|
2109 | | - return 0; |
---|
2110 | | - |
---|
2111 | | - /* |
---|
2112 | | - * We've go a match, so return 1 so that the recovery of this buffer |
---|
2113 | | - * is cancelled. If this buffer is actually a buffer cancel log |
---|
2114 | | - * item, then decrement the refcount on the one in the table and |
---|
2115 | | - * remove it if this is the last reference. |
---|
2116 | | - */ |
---|
2117 | | - if (flags & XFS_BLF_CANCEL) { |
---|
2118 | | - if (--bcp->bc_refcount == 0) { |
---|
2119 | | - list_del(&bcp->bc_list); |
---|
2120 | | - kmem_free(bcp); |
---|
2121 | | - } |
---|
2122 | | - } |
---|
2123 | | - return 1; |
---|
2124 | | -} |
---|
2125 | | - |
---|
2126 | | -/* |
---|
2127 | | - * Perform recovery for a buffer full of inodes. In these buffers, the only |
---|
2128 | | - * data which should be recovered is that which corresponds to the |
---|
2129 | | - * di_next_unlinked pointers in the on disk inode structures. The rest of the |
---|
2130 | | - * data for the inodes is always logged through the inodes themselves rather |
---|
2131 | | - * than the inode buffer and is recovered in xlog_recover_inode_pass2(). |
---|
2132 | | - * |
---|
2133 | | - * The only time when buffers full of inodes are fully recovered is when the |
---|
2134 | | - * buffer is full of newly allocated inodes. In this case the buffer will |
---|
2135 | | - * not be marked as an inode buffer and so will be sent to |
---|
2136 | | - * xlog_recover_do_reg_buffer() below during recovery. |
---|
2137 | | - */ |
---|
2138 | | -STATIC int |
---|
2139 | | -xlog_recover_do_inode_buffer( |
---|
2140 | | - struct xfs_mount *mp, |
---|
2141 | | - xlog_recover_item_t *item, |
---|
2142 | | - struct xfs_buf *bp, |
---|
2143 | | - xfs_buf_log_format_t *buf_f) |
---|
2144 | | -{ |
---|
2145 | | - int i; |
---|
2146 | | - int item_index = 0; |
---|
2147 | | - int bit = 0; |
---|
2148 | | - int nbits = 0; |
---|
2149 | | - int reg_buf_offset = 0; |
---|
2150 | | - int reg_buf_bytes = 0; |
---|
2151 | | - int next_unlinked_offset; |
---|
2152 | | - int inodes_per_buf; |
---|
2153 | | - xfs_agino_t *logged_nextp; |
---|
2154 | | - xfs_agino_t *buffer_nextp; |
---|
2155 | | - |
---|
2156 | | - trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); |
---|
2157 | | - |
---|
2158 | | - /* |
---|
2159 | | - * Post recovery validation only works properly on CRC enabled |
---|
2160 | | - * filesystems. |
---|
2161 | | - */ |
---|
2162 | | - if (xfs_sb_version_hascrc(&mp->m_sb)) |
---|
2163 | | - bp->b_ops = &xfs_inode_buf_ops; |
---|
2164 | | - |
---|
2165 | | - inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; |
---|
2166 | | - for (i = 0; i < inodes_per_buf; i++) { |
---|
2167 | | - next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + |
---|
2168 | | - offsetof(xfs_dinode_t, di_next_unlinked); |
---|
2169 | | - |
---|
2170 | | - while (next_unlinked_offset >= |
---|
2171 | | - (reg_buf_offset + reg_buf_bytes)) { |
---|
2172 | | - /* |
---|
2173 | | - * The next di_next_unlinked field is beyond |
---|
2174 | | - * the current logged region. Find the next |
---|
2175 | | - * logged region that contains or is beyond |
---|
2176 | | - * the current di_next_unlinked field. |
---|
2177 | | - */ |
---|
2178 | | - bit += nbits; |
---|
2179 | | - bit = xfs_next_bit(buf_f->blf_data_map, |
---|
2180 | | - buf_f->blf_map_size, bit); |
---|
2181 | | - |
---|
2182 | | - /* |
---|
2183 | | - * If there are no more logged regions in the |
---|
2184 | | - * buffer, then we're done. |
---|
2185 | | - */ |
---|
2186 | | - if (bit == -1) |
---|
2187 | | - return 0; |
---|
2188 | | - |
---|
2189 | | - nbits = xfs_contig_bits(buf_f->blf_data_map, |
---|
2190 | | - buf_f->blf_map_size, bit); |
---|
2191 | | - ASSERT(nbits > 0); |
---|
2192 | | - reg_buf_offset = bit << XFS_BLF_SHIFT; |
---|
2193 | | - reg_buf_bytes = nbits << XFS_BLF_SHIFT; |
---|
2194 | | - item_index++; |
---|
2195 | | - } |
---|
2196 | | - |
---|
2197 | | - /* |
---|
2198 | | - * If the current logged region starts after the current |
---|
2199 | | - * di_next_unlinked field, then move on to the next |
---|
2200 | | - * di_next_unlinked field. |
---|
2201 | | - */ |
---|
2202 | | - if (next_unlinked_offset < reg_buf_offset) |
---|
2203 | | - continue; |
---|
2204 | | - |
---|
2205 | | - ASSERT(item->ri_buf[item_index].i_addr != NULL); |
---|
2206 | | - ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); |
---|
2207 | | - ASSERT((reg_buf_offset + reg_buf_bytes) <= |
---|
2208 | | - BBTOB(bp->b_io_length)); |
---|
2209 | | - |
---|
2210 | | - /* |
---|
2211 | | - * The current logged region contains a copy of the |
---|
2212 | | - * current di_next_unlinked field. Extract its value |
---|
2213 | | - * and copy it to the buffer copy. |
---|
2214 | | - */ |
---|
2215 | | - logged_nextp = item->ri_buf[item_index].i_addr + |
---|
2216 | | - next_unlinked_offset - reg_buf_offset; |
---|
2217 | | - if (unlikely(*logged_nextp == 0)) { |
---|
2218 | | - xfs_alert(mp, |
---|
2219 | | - "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). " |
---|
2220 | | - "Trying to replay bad (0) inode di_next_unlinked field.", |
---|
2221 | | - item, bp); |
---|
2222 | | - XFS_ERROR_REPORT("xlog_recover_do_inode_buf", |
---|
2223 | | - XFS_ERRLEVEL_LOW, mp); |
---|
2224 | | - return -EFSCORRUPTED; |
---|
2225 | | - } |
---|
2226 | | - |
---|
2227 | | - buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset); |
---|
2228 | | - *buffer_nextp = *logged_nextp; |
---|
2229 | | - |
---|
2230 | | - /* |
---|
2231 | | - * If necessary, recalculate the CRC in the on-disk inode. We |
---|
2232 | | - * have to leave the inode in a consistent state for whoever |
---|
2233 | | - * reads it next.... |
---|
2234 | | - */ |
---|
2235 | | - xfs_dinode_calc_crc(mp, |
---|
2236 | | - xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); |
---|
2237 | | - |
---|
2238 | | - } |
---|
2239 | | - |
---|
2240 | | - return 0; |
---|
2241 | | -} |
---|
2242 | | - |
---|
2243 | | -/* |
---|
2244 | | - * V5 filesystems know the age of the buffer on disk being recovered. We can |
---|
2245 | | - * have newer objects on disk than we are replaying, and so for these cases we |
---|
2246 | | - * don't want to replay the current change as that will make the buffer contents |
---|
2247 | | - * temporarily invalid on disk. |
---|
2248 | | - * |
---|
2249 | | - * The magic number might not match the buffer type we are going to recover |
---|
2250 | | - * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags. Hence |
---|
2251 | | - * extract the LSN of the existing object in the buffer based on it's current |
---|
2252 | | - * magic number. If we don't recognise the magic number in the buffer, then |
---|
2253 | | - * return a LSN of -1 so that the caller knows it was an unrecognised block and |
---|
2254 | | - * so can recover the buffer. |
---|
2255 | | - * |
---|
2256 | | - * Note: we cannot rely solely on magic number matches to determine that the |
---|
2257 | | - * buffer has a valid LSN - we also need to verify that it belongs to this |
---|
2258 | | - * filesystem, so we need to extract the object's LSN and compare it to that |
---|
2259 | | - * which we read from the superblock. If the UUIDs don't match, then we've got a |
---|
2260 | | - * stale metadata block from an old filesystem instance that we need to recover |
---|
2261 | | - * over the top of. |
---|
2262 | | - */ |
---|
2263 | | -static xfs_lsn_t |
---|
2264 | | -xlog_recover_get_buf_lsn( |
---|
2265 | | - struct xfs_mount *mp, |
---|
2266 | | - struct xfs_buf *bp) |
---|
2267 | | -{ |
---|
2268 | | - uint32_t magic32; |
---|
2269 | | - uint16_t magic16; |
---|
2270 | | - uint16_t magicda; |
---|
2271 | | - void *blk = bp->b_addr; |
---|
2272 | | - uuid_t *uuid; |
---|
2273 | | - xfs_lsn_t lsn = -1; |
---|
2274 | | - |
---|
2275 | | - /* v4 filesystems always recover immediately */ |
---|
2276 | | - if (!xfs_sb_version_hascrc(&mp->m_sb)) |
---|
2277 | | - goto recover_immediately; |
---|
2278 | | - |
---|
2279 | | - magic32 = be32_to_cpu(*(__be32 *)blk); |
---|
2280 | | - switch (magic32) { |
---|
2281 | | - case XFS_ABTB_CRC_MAGIC: |
---|
2282 | | - case XFS_ABTC_CRC_MAGIC: |
---|
2283 | | - case XFS_ABTB_MAGIC: |
---|
2284 | | - case XFS_ABTC_MAGIC: |
---|
2285 | | - case XFS_RMAP_CRC_MAGIC: |
---|
2286 | | - case XFS_REFC_CRC_MAGIC: |
---|
2287 | | - case XFS_IBT_CRC_MAGIC: |
---|
2288 | | - case XFS_IBT_MAGIC: { |
---|
2289 | | - struct xfs_btree_block *btb = blk; |
---|
2290 | | - |
---|
2291 | | - lsn = be64_to_cpu(btb->bb_u.s.bb_lsn); |
---|
2292 | | - uuid = &btb->bb_u.s.bb_uuid; |
---|
2293 | | - break; |
---|
2294 | | - } |
---|
2295 | | - case XFS_BMAP_CRC_MAGIC: |
---|
2296 | | - case XFS_BMAP_MAGIC: { |
---|
2297 | | - struct xfs_btree_block *btb = blk; |
---|
2298 | | - |
---|
2299 | | - lsn = be64_to_cpu(btb->bb_u.l.bb_lsn); |
---|
2300 | | - uuid = &btb->bb_u.l.bb_uuid; |
---|
2301 | | - break; |
---|
2302 | | - } |
---|
2303 | | - case XFS_AGF_MAGIC: |
---|
2304 | | - lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn); |
---|
2305 | | - uuid = &((struct xfs_agf *)blk)->agf_uuid; |
---|
2306 | | - break; |
---|
2307 | | - case XFS_AGFL_MAGIC: |
---|
2308 | | - lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn); |
---|
2309 | | - uuid = &((struct xfs_agfl *)blk)->agfl_uuid; |
---|
2310 | | - break; |
---|
2311 | | - case XFS_AGI_MAGIC: |
---|
2312 | | - lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn); |
---|
2313 | | - uuid = &((struct xfs_agi *)blk)->agi_uuid; |
---|
2314 | | - break; |
---|
2315 | | - case XFS_SYMLINK_MAGIC: |
---|
2316 | | - lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn); |
---|
2317 | | - uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid; |
---|
2318 | | - break; |
---|
2319 | | - case XFS_DIR3_BLOCK_MAGIC: |
---|
2320 | | - case XFS_DIR3_DATA_MAGIC: |
---|
2321 | | - case XFS_DIR3_FREE_MAGIC: |
---|
2322 | | - lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn); |
---|
2323 | | - uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; |
---|
2324 | | - break; |
---|
2325 | | - case XFS_ATTR3_RMT_MAGIC: |
---|
2326 | | - /* |
---|
2327 | | - * Remote attr blocks are written synchronously, rather than |
---|
2328 | | - * being logged. That means they do not contain a valid LSN |
---|
2329 | | - * (i.e. transactionally ordered) in them, and hence any time we |
---|
2330 | | - * see a buffer to replay over the top of a remote attribute |
---|
2331 | | - * block we should simply do so. |
---|
2332 | | - */ |
---|
2333 | | - goto recover_immediately; |
---|
2334 | | - case XFS_SB_MAGIC: |
---|
2335 | | - /* |
---|
2336 | | - * superblock uuids are magic. We may or may not have a |
---|
2337 | | - * sb_meta_uuid on disk, but it will be set in the in-core |
---|
2338 | | - * superblock. We set the uuid pointer for verification |
---|
2339 | | - * according to the superblock feature mask to ensure we check |
---|
2340 | | - * the relevant UUID in the superblock. |
---|
2341 | | - */ |
---|
2342 | | - lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); |
---|
2343 | | - if (xfs_sb_version_hasmetauuid(&mp->m_sb)) |
---|
2344 | | - uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid; |
---|
2345 | | - else |
---|
2346 | | - uuid = &((struct xfs_dsb *)blk)->sb_uuid; |
---|
2347 | | - break; |
---|
2348 | | - default: |
---|
2349 | | - break; |
---|
2350 | | - } |
---|
2351 | | - |
---|
2352 | | - if (lsn != (xfs_lsn_t)-1) { |
---|
2353 | | - if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid)) |
---|
2354 | | - goto recover_immediately; |
---|
2355 | | - return lsn; |
---|
2356 | | - } |
---|
2357 | | - |
---|
2358 | | - magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic); |
---|
2359 | | - switch (magicda) { |
---|
2360 | | - case XFS_DIR3_LEAF1_MAGIC: |
---|
2361 | | - case XFS_DIR3_LEAFN_MAGIC: |
---|
2362 | | - case XFS_DA3_NODE_MAGIC: |
---|
2363 | | - lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); |
---|
2364 | | - uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; |
---|
2365 | | - break; |
---|
2366 | | - default: |
---|
2367 | | - break; |
---|
2368 | | - } |
---|
2369 | | - |
---|
2370 | | - if (lsn != (xfs_lsn_t)-1) { |
---|
2371 | | - if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) |
---|
2372 | | - goto recover_immediately; |
---|
2373 | | - return lsn; |
---|
2374 | | - } |
---|
2375 | | - |
---|
2376 | | - /* |
---|
2377 | | - * We do individual object checks on dquot and inode buffers as they |
---|
2378 | | - * have their own individual LSN records. Also, we could have a stale |
---|
2379 | | - * buffer here, so we have to at least recognise these buffer types. |
---|
2380 | | - * |
---|
2381 | | - * A notd complexity here is inode unlinked list processing - it logs |
---|
2382 | | - * the inode directly in the buffer, but we don't know which inodes have |
---|
2383 | | - * been modified, and there is no global buffer LSN. Hence we need to |
---|
2384 | | - * recover all inode buffer types immediately. This problem will be |
---|
2385 | | - * fixed by logical logging of the unlinked list modifications. |
---|
2386 | | - */ |
---|
2387 | | - magic16 = be16_to_cpu(*(__be16 *)blk); |
---|
2388 | | - switch (magic16) { |
---|
2389 | | - case XFS_DQUOT_MAGIC: |
---|
2390 | | - case XFS_DINODE_MAGIC: |
---|
2391 | | - goto recover_immediately; |
---|
2392 | | - default: |
---|
2393 | | - break; |
---|
2394 | | - } |
---|
2395 | | - |
---|
2396 | | - /* unknown buffer contents, recover immediately */ |
---|
2397 | | - |
---|
2398 | | -recover_immediately: |
---|
2399 | | - return (xfs_lsn_t)-1; |
---|
2400 | | - |
---|
2401 | | -} |
---|
2402 | | - |
---|
2403 | | -/* |
---|
2404 | | - * Validate the recovered buffer is of the correct type and attach the |
---|
2405 | | - * appropriate buffer operations to them for writeback. Magic numbers are in a |
---|
2406 | | - * few places: |
---|
2407 | | - * the first 16 bits of the buffer (inode buffer, dquot buffer), |
---|
2408 | | - * the first 32 bits of the buffer (most blocks), |
---|
2409 | | - * inside a struct xfs_da_blkinfo at the start of the buffer. |
---|
2410 | | - */ |
---|
2411 | | -static void |
---|
2412 | | -xlog_recover_validate_buf_type( |
---|
2413 | | - struct xfs_mount *mp, |
---|
2414 | | - struct xfs_buf *bp, |
---|
2415 | | - xfs_buf_log_format_t *buf_f, |
---|
2416 | | - xfs_lsn_t current_lsn) |
---|
2417 | | -{ |
---|
2418 | | - struct xfs_da_blkinfo *info = bp->b_addr; |
---|
2419 | | - uint32_t magic32; |
---|
2420 | | - uint16_t magic16; |
---|
2421 | | - uint16_t magicda; |
---|
2422 | | - char *warnmsg = NULL; |
---|
2423 | | - |
---|
2424 | | - /* |
---|
2425 | | - * We can only do post recovery validation on items on CRC enabled |
---|
2426 | | - * fielsystems as we need to know when the buffer was written to be able |
---|
2427 | | - * to determine if we should have replayed the item. If we replay old |
---|
2428 | | - * metadata over a newer buffer, then it will enter a temporarily |
---|
2429 | | - * inconsistent state resulting in verification failures. Hence for now |
---|
2430 | | - * just avoid the verification stage for non-crc filesystems |
---|
2431 | | - */ |
---|
2432 | | - if (!xfs_sb_version_hascrc(&mp->m_sb)) |
---|
2433 | | - return; |
---|
2434 | | - |
---|
2435 | | - magic32 = be32_to_cpu(*(__be32 *)bp->b_addr); |
---|
2436 | | - magic16 = be16_to_cpu(*(__be16*)bp->b_addr); |
---|
2437 | | - magicda = be16_to_cpu(info->magic); |
---|
2438 | | - switch (xfs_blft_from_flags(buf_f)) { |
---|
2439 | | - case XFS_BLFT_BTREE_BUF: |
---|
2440 | | - switch (magic32) { |
---|
2441 | | - case XFS_ABTB_CRC_MAGIC: |
---|
2442 | | - case XFS_ABTC_CRC_MAGIC: |
---|
2443 | | - case XFS_ABTB_MAGIC: |
---|
2444 | | - case XFS_ABTC_MAGIC: |
---|
2445 | | - bp->b_ops = &xfs_allocbt_buf_ops; |
---|
2446 | | - break; |
---|
2447 | | - case XFS_IBT_CRC_MAGIC: |
---|
2448 | | - case XFS_FIBT_CRC_MAGIC: |
---|
2449 | | - case XFS_IBT_MAGIC: |
---|
2450 | | - case XFS_FIBT_MAGIC: |
---|
2451 | | - bp->b_ops = &xfs_inobt_buf_ops; |
---|
2452 | | - break; |
---|
2453 | | - case XFS_BMAP_CRC_MAGIC: |
---|
2454 | | - case XFS_BMAP_MAGIC: |
---|
2455 | | - bp->b_ops = &xfs_bmbt_buf_ops; |
---|
2456 | | - break; |
---|
2457 | | - case XFS_RMAP_CRC_MAGIC: |
---|
2458 | | - bp->b_ops = &xfs_rmapbt_buf_ops; |
---|
2459 | | - break; |
---|
2460 | | - case XFS_REFC_CRC_MAGIC: |
---|
2461 | | - bp->b_ops = &xfs_refcountbt_buf_ops; |
---|
2462 | | - break; |
---|
2463 | | - default: |
---|
2464 | | - warnmsg = "Bad btree block magic!"; |
---|
2465 | | - break; |
---|
2466 | | - } |
---|
2467 | | - break; |
---|
2468 | | - case XFS_BLFT_AGF_BUF: |
---|
2469 | | - if (magic32 != XFS_AGF_MAGIC) { |
---|
2470 | | - warnmsg = "Bad AGF block magic!"; |
---|
2471 | | - break; |
---|
2472 | | - } |
---|
2473 | | - bp->b_ops = &xfs_agf_buf_ops; |
---|
2474 | | - break; |
---|
2475 | | - case XFS_BLFT_AGFL_BUF: |
---|
2476 | | - if (magic32 != XFS_AGFL_MAGIC) { |
---|
2477 | | - warnmsg = "Bad AGFL block magic!"; |
---|
2478 | | - break; |
---|
2479 | | - } |
---|
2480 | | - bp->b_ops = &xfs_agfl_buf_ops; |
---|
2481 | | - break; |
---|
2482 | | - case XFS_BLFT_AGI_BUF: |
---|
2483 | | - if (magic32 != XFS_AGI_MAGIC) { |
---|
2484 | | - warnmsg = "Bad AGI block magic!"; |
---|
2485 | | - break; |
---|
2486 | | - } |
---|
2487 | | - bp->b_ops = &xfs_agi_buf_ops; |
---|
2488 | | - break; |
---|
2489 | | - case XFS_BLFT_UDQUOT_BUF: |
---|
2490 | | - case XFS_BLFT_PDQUOT_BUF: |
---|
2491 | | - case XFS_BLFT_GDQUOT_BUF: |
---|
2492 | | -#ifdef CONFIG_XFS_QUOTA |
---|
2493 | | - if (magic16 != XFS_DQUOT_MAGIC) { |
---|
2494 | | - warnmsg = "Bad DQUOT block magic!"; |
---|
2495 | | - break; |
---|
2496 | | - } |
---|
2497 | | - bp->b_ops = &xfs_dquot_buf_ops; |
---|
2498 | | -#else |
---|
2499 | | - xfs_alert(mp, |
---|
2500 | | - "Trying to recover dquots without QUOTA support built in!"); |
---|
2501 | | - ASSERT(0); |
---|
2502 | | -#endif |
---|
2503 | | - break; |
---|
2504 | | - case XFS_BLFT_DINO_BUF: |
---|
2505 | | - if (magic16 != XFS_DINODE_MAGIC) { |
---|
2506 | | - warnmsg = "Bad INODE block magic!"; |
---|
2507 | | - break; |
---|
2508 | | - } |
---|
2509 | | - bp->b_ops = &xfs_inode_buf_ops; |
---|
2510 | | - break; |
---|
2511 | | - case XFS_BLFT_SYMLINK_BUF: |
---|
2512 | | - if (magic32 != XFS_SYMLINK_MAGIC) { |
---|
2513 | | - warnmsg = "Bad symlink block magic!"; |
---|
2514 | | - break; |
---|
2515 | | - } |
---|
2516 | | - bp->b_ops = &xfs_symlink_buf_ops; |
---|
2517 | | - break; |
---|
2518 | | - case XFS_BLFT_DIR_BLOCK_BUF: |
---|
2519 | | - if (magic32 != XFS_DIR2_BLOCK_MAGIC && |
---|
2520 | | - magic32 != XFS_DIR3_BLOCK_MAGIC) { |
---|
2521 | | - warnmsg = "Bad dir block magic!"; |
---|
2522 | | - break; |
---|
2523 | | - } |
---|
2524 | | - bp->b_ops = &xfs_dir3_block_buf_ops; |
---|
2525 | | - break; |
---|
2526 | | - case XFS_BLFT_DIR_DATA_BUF: |
---|
2527 | | - if (magic32 != XFS_DIR2_DATA_MAGIC && |
---|
2528 | | - magic32 != XFS_DIR3_DATA_MAGIC) { |
---|
2529 | | - warnmsg = "Bad dir data magic!"; |
---|
2530 | | - break; |
---|
2531 | | - } |
---|
2532 | | - bp->b_ops = &xfs_dir3_data_buf_ops; |
---|
2533 | | - break; |
---|
2534 | | - case XFS_BLFT_DIR_FREE_BUF: |
---|
2535 | | - if (magic32 != XFS_DIR2_FREE_MAGIC && |
---|
2536 | | - magic32 != XFS_DIR3_FREE_MAGIC) { |
---|
2537 | | - warnmsg = "Bad dir3 free magic!"; |
---|
2538 | | - break; |
---|
2539 | | - } |
---|
2540 | | - bp->b_ops = &xfs_dir3_free_buf_ops; |
---|
2541 | | - break; |
---|
2542 | | - case XFS_BLFT_DIR_LEAF1_BUF: |
---|
2543 | | - if (magicda != XFS_DIR2_LEAF1_MAGIC && |
---|
2544 | | - magicda != XFS_DIR3_LEAF1_MAGIC) { |
---|
2545 | | - warnmsg = "Bad dir leaf1 magic!"; |
---|
2546 | | - break; |
---|
2547 | | - } |
---|
2548 | | - bp->b_ops = &xfs_dir3_leaf1_buf_ops; |
---|
2549 | | - break; |
---|
2550 | | - case XFS_BLFT_DIR_LEAFN_BUF: |
---|
2551 | | - if (magicda != XFS_DIR2_LEAFN_MAGIC && |
---|
2552 | | - magicda != XFS_DIR3_LEAFN_MAGIC) { |
---|
2553 | | - warnmsg = "Bad dir leafn magic!"; |
---|
2554 | | - break; |
---|
2555 | | - } |
---|
2556 | | - bp->b_ops = &xfs_dir3_leafn_buf_ops; |
---|
2557 | | - break; |
---|
2558 | | - case XFS_BLFT_DA_NODE_BUF: |
---|
2559 | | - if (magicda != XFS_DA_NODE_MAGIC && |
---|
2560 | | - magicda != XFS_DA3_NODE_MAGIC) { |
---|
2561 | | - warnmsg = "Bad da node magic!"; |
---|
2562 | | - break; |
---|
2563 | | - } |
---|
2564 | | - bp->b_ops = &xfs_da3_node_buf_ops; |
---|
2565 | | - break; |
---|
2566 | | - case XFS_BLFT_ATTR_LEAF_BUF: |
---|
2567 | | - if (magicda != XFS_ATTR_LEAF_MAGIC && |
---|
2568 | | - magicda != XFS_ATTR3_LEAF_MAGIC) { |
---|
2569 | | - warnmsg = "Bad attr leaf magic!"; |
---|
2570 | | - break; |
---|
2571 | | - } |
---|
2572 | | - bp->b_ops = &xfs_attr3_leaf_buf_ops; |
---|
2573 | | - break; |
---|
2574 | | - case XFS_BLFT_ATTR_RMT_BUF: |
---|
2575 | | - if (magic32 != XFS_ATTR3_RMT_MAGIC) { |
---|
2576 | | - warnmsg = "Bad attr remote magic!"; |
---|
2577 | | - break; |
---|
2578 | | - } |
---|
2579 | | - bp->b_ops = &xfs_attr3_rmt_buf_ops; |
---|
2580 | | - break; |
---|
2581 | | - case XFS_BLFT_SB_BUF: |
---|
2582 | | - if (magic32 != XFS_SB_MAGIC) { |
---|
2583 | | - warnmsg = "Bad SB block magic!"; |
---|
2584 | | - break; |
---|
2585 | | - } |
---|
2586 | | - bp->b_ops = &xfs_sb_buf_ops; |
---|
2587 | | - break; |
---|
2588 | | -#ifdef CONFIG_XFS_RT |
---|
2589 | | - case XFS_BLFT_RTBITMAP_BUF: |
---|
2590 | | - case XFS_BLFT_RTSUMMARY_BUF: |
---|
2591 | | - /* no magic numbers for verification of RT buffers */ |
---|
2592 | | - bp->b_ops = &xfs_rtbuf_ops; |
---|
2593 | | - break; |
---|
2594 | | -#endif /* CONFIG_XFS_RT */ |
---|
2595 | | - default: |
---|
2596 | | - xfs_warn(mp, "Unknown buffer type %d!", |
---|
2597 | | - xfs_blft_from_flags(buf_f)); |
---|
2598 | | - break; |
---|
2599 | | - } |
---|
2600 | | - |
---|
2601 | | - /* |
---|
2602 | | - * Nothing else to do in the case of a NULL current LSN as this means |
---|
2603 | | - * the buffer is more recent than the change in the log and will be |
---|
2604 | | - * skipped. |
---|
2605 | | - */ |
---|
2606 | | - if (current_lsn == NULLCOMMITLSN) |
---|
2607 | | - return; |
---|
2608 | | - |
---|
2609 | | - if (warnmsg) { |
---|
2610 | | - xfs_warn(mp, warnmsg); |
---|
2611 | | - ASSERT(0); |
---|
2612 | | - } |
---|
2613 | | - |
---|
2614 | | - /* |
---|
2615 | | - * We must update the metadata LSN of the buffer as it is written out to |
---|
2616 | | - * ensure that older transactions never replay over this one and corrupt |
---|
2617 | | - * the buffer. This can occur if log recovery is interrupted at some |
---|
2618 | | - * point after the current transaction completes, at which point a |
---|
2619 | | - * subsequent mount starts recovery from the beginning. |
---|
2620 | | - * |
---|
2621 | | - * Write verifiers update the metadata LSN from log items attached to |
---|
2622 | | - * the buffer. Therefore, initialize a bli purely to carry the LSN to |
---|
2623 | | - * the verifier. We'll clean it up in our ->iodone() callback. |
---|
2624 | | - */ |
---|
2625 | | - if (bp->b_ops) { |
---|
2626 | | - struct xfs_buf_log_item *bip; |
---|
2627 | | - |
---|
2628 | | - ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone); |
---|
2629 | | - bp->b_iodone = xlog_recover_iodone; |
---|
2630 | | - xfs_buf_item_init(bp, mp); |
---|
2631 | | - bip = bp->b_log_item; |
---|
2632 | | - bip->bli_item.li_lsn = current_lsn; |
---|
2633 | | - } |
---|
2634 | | -} |
---|
2635 | | - |
---|
2636 | | -/* |
---|
2637 | | - * Perform a 'normal' buffer recovery. Each logged region of the |
---|
2638 | | - * buffer should be copied over the corresponding region in the |
---|
2639 | | - * given buffer. The bitmap in the buf log format structure indicates |
---|
2640 | | - * where to place the logged data. |
---|
2641 | | - */ |
---|
2642 | | -STATIC void |
---|
2643 | | -xlog_recover_do_reg_buffer( |
---|
2644 | | - struct xfs_mount *mp, |
---|
2645 | | - xlog_recover_item_t *item, |
---|
2646 | | - struct xfs_buf *bp, |
---|
2647 | | - xfs_buf_log_format_t *buf_f, |
---|
2648 | | - xfs_lsn_t current_lsn) |
---|
2649 | | -{ |
---|
2650 | | - int i; |
---|
2651 | | - int bit; |
---|
2652 | | - int nbits; |
---|
2653 | | - xfs_failaddr_t fa; |
---|
2654 | | - |
---|
2655 | | - trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); |
---|
2656 | | - |
---|
2657 | | - bit = 0; |
---|
2658 | | - i = 1; /* 0 is the buf format structure */ |
---|
2659 | | - while (1) { |
---|
2660 | | - bit = xfs_next_bit(buf_f->blf_data_map, |
---|
2661 | | - buf_f->blf_map_size, bit); |
---|
2662 | | - if (bit == -1) |
---|
2663 | | - break; |
---|
2664 | | - nbits = xfs_contig_bits(buf_f->blf_data_map, |
---|
2665 | | - buf_f->blf_map_size, bit); |
---|
2666 | | - ASSERT(nbits > 0); |
---|
2667 | | - ASSERT(item->ri_buf[i].i_addr != NULL); |
---|
2668 | | - ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); |
---|
2669 | | - ASSERT(BBTOB(bp->b_io_length) >= |
---|
2670 | | - ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); |
---|
2671 | | - |
---|
2672 | | - /* |
---|
2673 | | - * The dirty regions logged in the buffer, even though |
---|
2674 | | - * contiguous, may span multiple chunks. This is because the |
---|
2675 | | - * dirty region may span a physical page boundary in a buffer |
---|
2676 | | - * and hence be split into two separate vectors for writing into |
---|
2677 | | - * the log. Hence we need to trim nbits back to the length of |
---|
2678 | | - * the current region being copied out of the log. |
---|
2679 | | - */ |
---|
2680 | | - if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT)) |
---|
2681 | | - nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT; |
---|
2682 | | - |
---|
2683 | | - /* |
---|
2684 | | - * Do a sanity check if this is a dquot buffer. Just checking |
---|
2685 | | - * the first dquot in the buffer should do. XXXThis is |
---|
2686 | | - * probably a good thing to do for other buf types also. |
---|
2687 | | - */ |
---|
2688 | | - fa = NULL; |
---|
2689 | | - if (buf_f->blf_flags & |
---|
2690 | | - (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
---|
2691 | | - if (item->ri_buf[i].i_addr == NULL) { |
---|
2692 | | - xfs_alert(mp, |
---|
2693 | | - "XFS: NULL dquot in %s.", __func__); |
---|
2694 | | - goto next; |
---|
2695 | | - } |
---|
2696 | | - if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { |
---|
2697 | | - xfs_alert(mp, |
---|
2698 | | - "XFS: dquot too small (%d) in %s.", |
---|
2699 | | - item->ri_buf[i].i_len, __func__); |
---|
2700 | | - goto next; |
---|
2701 | | - } |
---|
2702 | | - fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr, |
---|
2703 | | - -1, 0); |
---|
2704 | | - if (fa) { |
---|
2705 | | - xfs_alert(mp, |
---|
2706 | | - "dquot corrupt at %pS trying to replay into block 0x%llx", |
---|
2707 | | - fa, bp->b_bn); |
---|
2708 | | - goto next; |
---|
2709 | | - } |
---|
2710 | | - } |
---|
2711 | | - |
---|
2712 | | - memcpy(xfs_buf_offset(bp, |
---|
2713 | | - (uint)bit << XFS_BLF_SHIFT), /* dest */ |
---|
2714 | | - item->ri_buf[i].i_addr, /* source */ |
---|
2715 | | - nbits<<XFS_BLF_SHIFT); /* length */ |
---|
2716 | | - next: |
---|
2717 | | - i++; |
---|
2718 | | - bit += nbits; |
---|
2719 | | - } |
---|
2720 | | - |
---|
2721 | | - /* Shouldn't be any more regions */ |
---|
2722 | | - ASSERT(i == item->ri_total); |
---|
2723 | | - |
---|
2724 | | - xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn); |
---|
2725 | | -} |
---|
2726 | | - |
---|
2727 | | -/* |
---|
2728 | | - * Perform a dquot buffer recovery. |
---|
2729 | | - * Simple algorithm: if we have found a QUOTAOFF log item of the same type |
---|
2730 | | - * (ie. USR or GRP), then just toss this buffer away; don't recover it. |
---|
2731 | | - * Else, treat it as a regular buffer and do recovery. |
---|
2732 | | - * |
---|
2733 | | - * Return false if the buffer was tossed and true if we recovered the buffer to |
---|
2734 | | - * indicate to the caller if the buffer needs writing. |
---|
2735 | | - */ |
---|
2736 | | -STATIC bool |
---|
2737 | | -xlog_recover_do_dquot_buffer( |
---|
2738 | | - struct xfs_mount *mp, |
---|
2739 | | - struct xlog *log, |
---|
2740 | | - struct xlog_recover_item *item, |
---|
2741 | | - struct xfs_buf *bp, |
---|
2742 | | - struct xfs_buf_log_format *buf_f) |
---|
2743 | | -{ |
---|
2744 | | - uint type; |
---|
2745 | | - |
---|
2746 | | - trace_xfs_log_recover_buf_dquot_buf(log, buf_f); |
---|
2747 | | - |
---|
2748 | | - /* |
---|
2749 | | - * Filesystems are required to send in quota flags at mount time. |
---|
2750 | | - */ |
---|
2751 | | - if (!mp->m_qflags) |
---|
2752 | | - return false; |
---|
2753 | | - |
---|
2754 | | - type = 0; |
---|
2755 | | - if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) |
---|
2756 | | - type |= XFS_DQ_USER; |
---|
2757 | | - if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF) |
---|
2758 | | - type |= XFS_DQ_PROJ; |
---|
2759 | | - if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF) |
---|
2760 | | - type |= XFS_DQ_GROUP; |
---|
2761 | | - /* |
---|
2762 | | - * This type of quotas was turned off, so ignore this buffer |
---|
2763 | | - */ |
---|
2764 | | - if (log->l_quotaoffs_flag & type) |
---|
2765 | | - return false; |
---|
2766 | | - |
---|
2767 | | - xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN); |
---|
2768 | | - return true; |
---|
2769 | | -} |
---|
2770 | | - |
---|
2771 | | -/* |
---|
2772 | | - * This routine replays a modification made to a buffer at runtime. |
---|
2773 | | - * There are actually two types of buffer, regular and inode, which |
---|
2774 | | - * are handled differently. Inode buffers are handled differently |
---|
2775 | | - * in that we only recover a specific set of data from them, namely |
---|
2776 | | - * the inode di_next_unlinked fields. This is because all other inode |
---|
2777 | | - * data is actually logged via inode records and any data we replay |
---|
2778 | | - * here which overlaps that may be stale. |
---|
2779 | | - * |
---|
2780 | | - * When meta-data buffers are freed at run time we log a buffer item |
---|
2781 | | - * with the XFS_BLF_CANCEL bit set to indicate that previous copies |
---|
2782 | | - * of the buffer in the log should not be replayed at recovery time. |
---|
2783 | | - * This is so that if the blocks covered by the buffer are reused for |
---|
2784 | | - * file data before we crash we don't end up replaying old, freed |
---|
2785 | | - * meta-data into a user's file. |
---|
2786 | | - * |
---|
2787 | | - * To handle the cancellation of buffer log items, we make two passes |
---|
2788 | | - * over the log during recovery. During the first we build a table of |
---|
2789 | | - * those buffers which have been cancelled, and during the second we |
---|
2790 | | - * only replay those buffers which do not have corresponding cancel |
---|
2791 | | - * records in the table. See xlog_recover_buffer_pass[1,2] above |
---|
2792 | | - * for more details on the implementation of the table of cancel records. |
---|
2793 | | - */ |
---|
2794 | | -STATIC int |
---|
2795 | | -xlog_recover_buffer_pass2( |
---|
2796 | | - struct xlog *log, |
---|
2797 | | - struct list_head *buffer_list, |
---|
2798 | | - struct xlog_recover_item *item, |
---|
2799 | | - xfs_lsn_t current_lsn) |
---|
2800 | | -{ |
---|
2801 | | - xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
---|
2802 | | - xfs_mount_t *mp = log->l_mp; |
---|
2803 | | - xfs_buf_t *bp; |
---|
2804 | | - int error; |
---|
2805 | | - uint buf_flags; |
---|
2806 | | - xfs_lsn_t lsn; |
---|
2807 | | - |
---|
2808 | | - /* |
---|
2809 | | - * In this pass we only want to recover all the buffers which have |
---|
2810 | | - * not been cancelled and are not cancellation buffers themselves. |
---|
2811 | | - */ |
---|
2812 | | - if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno, |
---|
2813 | | - buf_f->blf_len, buf_f->blf_flags)) { |
---|
2814 | | - trace_xfs_log_recover_buf_cancel(log, buf_f); |
---|
2815 | | - return 0; |
---|
2816 | | - } |
---|
2817 | | - |
---|
2818 | | - trace_xfs_log_recover_buf_recover(log, buf_f); |
---|
2819 | | - |
---|
2820 | | - buf_flags = 0; |
---|
2821 | | - if (buf_f->blf_flags & XFS_BLF_INODE_BUF) |
---|
2822 | | - buf_flags |= XBF_UNMAPPED; |
---|
2823 | | - |
---|
2824 | | - bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, |
---|
2825 | | - buf_flags, NULL); |
---|
2826 | | - if (!bp) |
---|
2827 | | - return -ENOMEM; |
---|
2828 | | - error = bp->b_error; |
---|
2829 | | - if (error) { |
---|
2830 | | - xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); |
---|
2831 | | - goto out_release; |
---|
2832 | | - } |
---|
2833 | | - |
---|
2834 | | - /* |
---|
2835 | | - * Recover the buffer only if we get an LSN from it and it's less than |
---|
2836 | | - * the lsn of the transaction we are replaying. |
---|
2837 | | - * |
---|
2838 | | - * Note that we have to be extremely careful of readahead here. |
---|
2839 | | - * Readahead does not attach verfiers to the buffers so if we don't |
---|
2840 | | - * actually do any replay after readahead because of the LSN we found |
---|
2841 | | - * in the buffer if more recent than that current transaction then we |
---|
2842 | | - * need to attach the verifier directly. Failure to do so can lead to |
---|
2843 | | - * future recovery actions (e.g. EFI and unlinked list recovery) can |
---|
2844 | | - * operate on the buffers and they won't get the verifier attached. This |
---|
2845 | | - * can lead to blocks on disk having the correct content but a stale |
---|
2846 | | - * CRC. |
---|
2847 | | - * |
---|
2848 | | - * It is safe to assume these clean buffers are currently up to date. |
---|
2849 | | - * If the buffer is dirtied by a later transaction being replayed, then |
---|
2850 | | - * the verifier will be reset to match whatever recover turns that |
---|
2851 | | - * buffer into. |
---|
2852 | | - */ |
---|
2853 | | - lsn = xlog_recover_get_buf_lsn(mp, bp); |
---|
2854 | | - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { |
---|
2855 | | - trace_xfs_log_recover_buf_skip(log, buf_f); |
---|
2856 | | - xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); |
---|
2857 | | - goto out_release; |
---|
2858 | | - } |
---|
2859 | | - |
---|
2860 | | - if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { |
---|
2861 | | - error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
---|
2862 | | - if (error) |
---|
2863 | | - goto out_release; |
---|
2864 | | - } else if (buf_f->blf_flags & |
---|
2865 | | - (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
---|
2866 | | - bool dirty; |
---|
2867 | | - |
---|
2868 | | - dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); |
---|
2869 | | - if (!dirty) |
---|
2870 | | - goto out_release; |
---|
2871 | | - } else { |
---|
2872 | | - xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn); |
---|
2873 | | - } |
---|
2874 | | - |
---|
2875 | | - /* |
---|
2876 | | - * Perform delayed write on the buffer. Asynchronous writes will be |
---|
2877 | | - * slower when taking into account all the buffers to be flushed. |
---|
2878 | | - * |
---|
2879 | | - * Also make sure that only inode buffers with good sizes stay in |
---|
2880 | | - * the buffer cache. The kernel moves inodes in buffers of 1 block |
---|
2881 | | - * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode |
---|
2882 | | - * buffers in the log can be a different size if the log was generated |
---|
2883 | | - * by an older kernel using unclustered inode buffers or a newer kernel |
---|
2884 | | - * running with a different inode cluster size. Regardless, if the |
---|
2885 | | - * the inode buffer size isn't max(blocksize, mp->m_inode_cluster_size) |
---|
2886 | | - * for *our* value of mp->m_inode_cluster_size, then we need to keep |
---|
2887 | | - * the buffer out of the buffer cache so that the buffer won't |
---|
2888 | | - * overlap with future reads of those inodes. |
---|
2889 | | - */ |
---|
2890 | | - if (XFS_DINODE_MAGIC == |
---|
2891 | | - be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && |
---|
2892 | | - (BBTOB(bp->b_io_length) != max(log->l_mp->m_sb.sb_blocksize, |
---|
2893 | | - (uint32_t)log->l_mp->m_inode_cluster_size))) { |
---|
2894 | | - xfs_buf_stale(bp); |
---|
2895 | | - error = xfs_bwrite(bp); |
---|
2896 | | - } else { |
---|
2897 | | - ASSERT(bp->b_target->bt_mount == mp); |
---|
2898 | | - bp->b_iodone = xlog_recover_iodone; |
---|
2899 | | - xfs_buf_delwri_queue(bp, buffer_list); |
---|
2900 | | - } |
---|
2901 | | - |
---|
2902 | | -out_release: |
---|
2903 | | - xfs_buf_relse(bp); |
---|
2904 | | - return error; |
---|
2905 | | -} |
---|
2906 | | - |
---|
2907 | | -/* |
---|
2908 | | - * Inode fork owner changes |
---|
2909 | | - * |
---|
2910 | | - * If we have been told that we have to reparent the inode fork, it's because an |
---|
2911 | | - * extent swap operation on a CRC enabled filesystem has been done and we are |
---|
2912 | | - * replaying it. We need to walk the BMBT of the appropriate fork and change the |
---|
2913 | | - * owners of it. |
---|
2914 | | - * |
---|
2915 | | - * The complexity here is that we don't have an inode context to work with, so |
---|
2916 | | - * after we've replayed the inode we need to instantiate one. This is where the |
---|
2917 | | - * fun begins. |
---|
2918 | | - * |
---|
2919 | | - * We are in the middle of log recovery, so we can't run transactions. That |
---|
2920 | | - * means we cannot use cache coherent inode instantiation via xfs_iget(), as |
---|
2921 | | - * that will result in the corresponding iput() running the inode through |
---|
2922 | | - * xfs_inactive(). If we've just replayed an inode core that changes the link |
---|
2923 | | - * count to zero (i.e. it's been unlinked), then xfs_inactive() will run |
---|
2924 | | - * transactions (bad!). |
---|
2925 | | - * |
---|
2926 | | - * So, to avoid this, we instantiate an inode directly from the inode core we've |
---|
2927 | | - * just recovered. We have the buffer still locked, and all we really need to |
---|
2928 | | - * instantiate is the inode core and the forks being modified. We can do this |
---|
2929 | | - * manually, then run the inode btree owner change, and then tear down the |
---|
2930 | | - * xfs_inode without having to run any transactions at all. |
---|
2931 | | - * |
---|
2932 | | - * Also, because we don't have a transaction context available here but need to |
---|
2933 | | - * gather all the buffers we modify for writeback so we pass the buffer_list |
---|
2934 | | - * instead for the operation to use. |
---|
2935 | | - */ |
---|
2936 | | - |
---|
2937 | | -STATIC int |
---|
2938 | | -xfs_recover_inode_owner_change( |
---|
2939 | | - struct xfs_mount *mp, |
---|
2940 | | - struct xfs_dinode *dip, |
---|
2941 | | - struct xfs_inode_log_format *in_f, |
---|
2942 | | - struct list_head *buffer_list) |
---|
2943 | | -{ |
---|
2944 | | - struct xfs_inode *ip; |
---|
2945 | | - int error; |
---|
2946 | | - |
---|
2947 | | - ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)); |
---|
2948 | | - |
---|
2949 | | - ip = xfs_inode_alloc(mp, in_f->ilf_ino); |
---|
2950 | | - if (!ip) |
---|
2951 | | - return -ENOMEM; |
---|
2952 | | - |
---|
2953 | | - /* instantiate the inode */ |
---|
2954 | | - xfs_inode_from_disk(ip, dip); |
---|
2955 | | - ASSERT(ip->i_d.di_version >= 3); |
---|
2956 | | - |
---|
2957 | | - error = xfs_iformat_fork(ip, dip); |
---|
2958 | | - if (error) |
---|
2959 | | - goto out_free_ip; |
---|
2960 | | - |
---|
2961 | | - if (!xfs_inode_verify_forks(ip)) { |
---|
2962 | | - error = -EFSCORRUPTED; |
---|
2963 | | - goto out_free_ip; |
---|
2964 | | - } |
---|
2965 | | - |
---|
2966 | | - if (in_f->ilf_fields & XFS_ILOG_DOWNER) { |
---|
2967 | | - ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT); |
---|
2968 | | - error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK, |
---|
2969 | | - ip->i_ino, buffer_list); |
---|
2970 | | - if (error) |
---|
2971 | | - goto out_free_ip; |
---|
2972 | | - } |
---|
2973 | | - |
---|
2974 | | - if (in_f->ilf_fields & XFS_ILOG_AOWNER) { |
---|
2975 | | - ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT); |
---|
2976 | | - error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK, |
---|
2977 | | - ip->i_ino, buffer_list); |
---|
2978 | | - if (error) |
---|
2979 | | - goto out_free_ip; |
---|
2980 | | - } |
---|
2981 | | - |
---|
2982 | | -out_free_ip: |
---|
2983 | | - xfs_inode_free(ip); |
---|
2984 | | - return error; |
---|
2985 | | -} |
---|
2986 | | - |
---|
2987 | | -STATIC int |
---|
2988 | | -xlog_recover_inode_pass2( |
---|
2989 | | - struct xlog *log, |
---|
2990 | | - struct list_head *buffer_list, |
---|
2991 | | - struct xlog_recover_item *item, |
---|
2992 | | - xfs_lsn_t current_lsn) |
---|
2993 | | -{ |
---|
2994 | | - struct xfs_inode_log_format *in_f; |
---|
2995 | | - xfs_mount_t *mp = log->l_mp; |
---|
2996 | | - xfs_buf_t *bp; |
---|
2997 | | - xfs_dinode_t *dip; |
---|
2998 | | - int len; |
---|
2999 | | - char *src; |
---|
3000 | | - char *dest; |
---|
3001 | | - int error; |
---|
3002 | | - int attr_index; |
---|
3003 | | - uint fields; |
---|
3004 | | - struct xfs_log_dinode *ldip; |
---|
3005 | | - uint isize; |
---|
3006 | | - int need_free = 0; |
---|
3007 | | - |
---|
3008 | | - if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { |
---|
3009 | | - in_f = item->ri_buf[0].i_addr; |
---|
3010 | | - } else { |
---|
3011 | | - in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), KM_SLEEP); |
---|
3012 | | - need_free = 1; |
---|
3013 | | - error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); |
---|
3014 | | - if (error) |
---|
3015 | | - goto error; |
---|
3016 | | - } |
---|
3017 | | - |
---|
3018 | | - /* |
---|
3019 | | - * Inode buffers can be freed, look out for it, |
---|
3020 | | - * and do not replay the inode. |
---|
3021 | | - */ |
---|
3022 | | - if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, |
---|
3023 | | - in_f->ilf_len, 0)) { |
---|
3024 | | - error = 0; |
---|
3025 | | - trace_xfs_log_recover_inode_cancel(log, in_f); |
---|
3026 | | - goto error; |
---|
3027 | | - } |
---|
3028 | | - trace_xfs_log_recover_inode_recover(log, in_f); |
---|
3029 | | - |
---|
3030 | | - bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, |
---|
3031 | | - &xfs_inode_buf_ops); |
---|
3032 | | - if (!bp) { |
---|
3033 | | - error = -ENOMEM; |
---|
3034 | | - goto error; |
---|
3035 | | - } |
---|
3036 | | - error = bp->b_error; |
---|
3037 | | - if (error) { |
---|
3038 | | - xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)"); |
---|
3039 | | - goto out_release; |
---|
3040 | | - } |
---|
3041 | | - ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); |
---|
3042 | | - dip = xfs_buf_offset(bp, in_f->ilf_boffset); |
---|
3043 | | - |
---|
3044 | | - /* |
---|
3045 | | - * Make sure the place we're flushing out to really looks |
---|
3046 | | - * like an inode! |
---|
3047 | | - */ |
---|
3048 | | - if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { |
---|
3049 | | - xfs_alert(mp, |
---|
3050 | | - "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld", |
---|
3051 | | - __func__, dip, bp, in_f->ilf_ino); |
---|
3052 | | - XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", |
---|
3053 | | - XFS_ERRLEVEL_LOW, mp); |
---|
3054 | | - error = -EFSCORRUPTED; |
---|
3055 | | - goto out_release; |
---|
3056 | | - } |
---|
3057 | | - ldip = item->ri_buf[1].i_addr; |
---|
3058 | | - if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) { |
---|
3059 | | - xfs_alert(mp, |
---|
3060 | | - "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld", |
---|
3061 | | - __func__, item, in_f->ilf_ino); |
---|
3062 | | - XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", |
---|
3063 | | - XFS_ERRLEVEL_LOW, mp); |
---|
3064 | | - error = -EFSCORRUPTED; |
---|
3065 | | - goto out_release; |
---|
3066 | | - } |
---|
3067 | | - |
---|
3068 | | - /* |
---|
3069 | | - * If the inode has an LSN in it, recover the inode only if it's less |
---|
3070 | | - * than the lsn of the transaction we are replaying. Note: we still |
---|
3071 | | - * need to replay an owner change even though the inode is more recent |
---|
3072 | | - * than the transaction as there is no guarantee that all the btree |
---|
3073 | | - * blocks are more recent than this transaction, too. |
---|
3074 | | - */ |
---|
3075 | | - if (dip->di_version >= 3) { |
---|
3076 | | - xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); |
---|
3077 | | - |
---|
3078 | | - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { |
---|
3079 | | - trace_xfs_log_recover_inode_skip(log, in_f); |
---|
3080 | | - error = 0; |
---|
3081 | | - goto out_owner_change; |
---|
3082 | | - } |
---|
3083 | | - } |
---|
3084 | | - |
---|
3085 | | - /* |
---|
3086 | | - * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes |
---|
3087 | | - * are transactional and if ordering is necessary we can determine that |
---|
3088 | | - * more accurately by the LSN field in the V3 inode core. Don't trust |
---|
3089 | | - * the inode versions we might be changing them here - use the |
---|
3090 | | - * superblock flag to determine whether we need to look at di_flushiter |
---|
3091 | | - * to skip replay when the on disk inode is newer than the log one |
---|
3092 | | - */ |
---|
3093 | | - if (!xfs_sb_version_hascrc(&mp->m_sb) && |
---|
3094 | | - ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { |
---|
3095 | | - /* |
---|
3096 | | - * Deal with the wrap case, DI_MAX_FLUSH is less |
---|
3097 | | - * than smaller numbers |
---|
3098 | | - */ |
---|
3099 | | - if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && |
---|
3100 | | - ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) { |
---|
3101 | | - /* do nothing */ |
---|
3102 | | - } else { |
---|
3103 | | - trace_xfs_log_recover_inode_skip(log, in_f); |
---|
3104 | | - error = 0; |
---|
3105 | | - goto out_release; |
---|
3106 | | - } |
---|
3107 | | - } |
---|
3108 | | - |
---|
3109 | | - /* Take the opportunity to reset the flush iteration count */ |
---|
3110 | | - ldip->di_flushiter = 0; |
---|
3111 | | - |
---|
3112 | | - if (unlikely(S_ISREG(ldip->di_mode))) { |
---|
3113 | | - if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && |
---|
3114 | | - (ldip->di_format != XFS_DINODE_FMT_BTREE)) { |
---|
3115 | | - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", |
---|
3116 | | - XFS_ERRLEVEL_LOW, mp, ldip, |
---|
3117 | | - sizeof(*ldip)); |
---|
3118 | | - xfs_alert(mp, |
---|
3119 | | - "%s: Bad regular inode log record, rec ptr "PTR_FMT", " |
---|
3120 | | - "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", |
---|
3121 | | - __func__, item, dip, bp, in_f->ilf_ino); |
---|
3122 | | - error = -EFSCORRUPTED; |
---|
3123 | | - goto out_release; |
---|
3124 | | - } |
---|
3125 | | - } else if (unlikely(S_ISDIR(ldip->di_mode))) { |
---|
3126 | | - if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && |
---|
3127 | | - (ldip->di_format != XFS_DINODE_FMT_BTREE) && |
---|
3128 | | - (ldip->di_format != XFS_DINODE_FMT_LOCAL)) { |
---|
3129 | | - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", |
---|
3130 | | - XFS_ERRLEVEL_LOW, mp, ldip, |
---|
3131 | | - sizeof(*ldip)); |
---|
3132 | | - xfs_alert(mp, |
---|
3133 | | - "%s: Bad dir inode log record, rec ptr "PTR_FMT", " |
---|
3134 | | - "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", |
---|
3135 | | - __func__, item, dip, bp, in_f->ilf_ino); |
---|
3136 | | - error = -EFSCORRUPTED; |
---|
3137 | | - goto out_release; |
---|
3138 | | - } |
---|
3139 | | - } |
---|
3140 | | - if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){ |
---|
3141 | | - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", |
---|
3142 | | - XFS_ERRLEVEL_LOW, mp, ldip, |
---|
3143 | | - sizeof(*ldip)); |
---|
3144 | | - xfs_alert(mp, |
---|
3145 | | - "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " |
---|
3146 | | - "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", |
---|
3147 | | - __func__, item, dip, bp, in_f->ilf_ino, |
---|
3148 | | - ldip->di_nextents + ldip->di_anextents, |
---|
3149 | | - ldip->di_nblocks); |
---|
3150 | | - error = -EFSCORRUPTED; |
---|
3151 | | - goto out_release; |
---|
3152 | | - } |
---|
3153 | | - if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) { |
---|
3154 | | - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", |
---|
3155 | | - XFS_ERRLEVEL_LOW, mp, ldip, |
---|
3156 | | - sizeof(*ldip)); |
---|
3157 | | - xfs_alert(mp, |
---|
3158 | | - "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " |
---|
3159 | | - "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__, |
---|
3160 | | - item, dip, bp, in_f->ilf_ino, ldip->di_forkoff); |
---|
3161 | | - error = -EFSCORRUPTED; |
---|
3162 | | - goto out_release; |
---|
3163 | | - } |
---|
3164 | | - isize = xfs_log_dinode_size(ldip->di_version); |
---|
3165 | | - if (unlikely(item->ri_buf[1].i_len > isize)) { |
---|
3166 | | - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", |
---|
3167 | | - XFS_ERRLEVEL_LOW, mp, ldip, |
---|
3168 | | - sizeof(*ldip)); |
---|
3169 | | - xfs_alert(mp, |
---|
3170 | | - "%s: Bad inode log record length %d, rec ptr "PTR_FMT, |
---|
3171 | | - __func__, item->ri_buf[1].i_len, item); |
---|
3172 | | - error = -EFSCORRUPTED; |
---|
3173 | | - goto out_release; |
---|
3174 | | - } |
---|
3175 | | - |
---|
3176 | | - /* recover the log dinode inode into the on disk inode */ |
---|
3177 | | - xfs_log_dinode_to_disk(ldip, dip); |
---|
3178 | | - |
---|
3179 | | - fields = in_f->ilf_fields; |
---|
3180 | | - if (fields & XFS_ILOG_DEV) |
---|
3181 | | - xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); |
---|
3182 | | - |
---|
3183 | | - if (in_f->ilf_size == 2) |
---|
3184 | | - goto out_owner_change; |
---|
3185 | | - len = item->ri_buf[2].i_len; |
---|
3186 | | - src = item->ri_buf[2].i_addr; |
---|
3187 | | - ASSERT(in_f->ilf_size <= 4); |
---|
3188 | | - ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); |
---|
3189 | | - ASSERT(!(fields & XFS_ILOG_DFORK) || |
---|
3190 | | - (len == in_f->ilf_dsize)); |
---|
3191 | | - |
---|
3192 | | - switch (fields & XFS_ILOG_DFORK) { |
---|
3193 | | - case XFS_ILOG_DDATA: |
---|
3194 | | - case XFS_ILOG_DEXT: |
---|
3195 | | - memcpy(XFS_DFORK_DPTR(dip), src, len); |
---|
3196 | | - break; |
---|
3197 | | - |
---|
3198 | | - case XFS_ILOG_DBROOT: |
---|
3199 | | - xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, |
---|
3200 | | - (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), |
---|
3201 | | - XFS_DFORK_DSIZE(dip, mp)); |
---|
3202 | | - break; |
---|
3203 | | - |
---|
3204 | | - default: |
---|
3205 | | - /* |
---|
3206 | | - * There are no data fork flags set. |
---|
3207 | | - */ |
---|
3208 | | - ASSERT((fields & XFS_ILOG_DFORK) == 0); |
---|
3209 | | - break; |
---|
3210 | | - } |
---|
3211 | | - |
---|
3212 | | - /* |
---|
3213 | | - * If we logged any attribute data, recover it. There may or |
---|
3214 | | - * may not have been any other non-core data logged in this |
---|
3215 | | - * transaction. |
---|
3216 | | - */ |
---|
3217 | | - if (in_f->ilf_fields & XFS_ILOG_AFORK) { |
---|
3218 | | - if (in_f->ilf_fields & XFS_ILOG_DFORK) { |
---|
3219 | | - attr_index = 3; |
---|
3220 | | - } else { |
---|
3221 | | - attr_index = 2; |
---|
3222 | | - } |
---|
3223 | | - len = item->ri_buf[attr_index].i_len; |
---|
3224 | | - src = item->ri_buf[attr_index].i_addr; |
---|
3225 | | - ASSERT(len == in_f->ilf_asize); |
---|
3226 | | - |
---|
3227 | | - switch (in_f->ilf_fields & XFS_ILOG_AFORK) { |
---|
3228 | | - case XFS_ILOG_ADATA: |
---|
3229 | | - case XFS_ILOG_AEXT: |
---|
3230 | | - dest = XFS_DFORK_APTR(dip); |
---|
3231 | | - ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); |
---|
3232 | | - memcpy(dest, src, len); |
---|
3233 | | - break; |
---|
3234 | | - |
---|
3235 | | - case XFS_ILOG_ABROOT: |
---|
3236 | | - dest = XFS_DFORK_APTR(dip); |
---|
3237 | | - xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, |
---|
3238 | | - len, (xfs_bmdr_block_t*)dest, |
---|
3239 | | - XFS_DFORK_ASIZE(dip, mp)); |
---|
3240 | | - break; |
---|
3241 | | - |
---|
3242 | | - default: |
---|
3243 | | - xfs_warn(log->l_mp, "%s: Invalid flag", __func__); |
---|
3244 | | - ASSERT(0); |
---|
3245 | | - error = -EIO; |
---|
3246 | | - goto out_release; |
---|
3247 | | - } |
---|
3248 | | - } |
---|
3249 | | - |
---|
3250 | | -out_owner_change: |
---|
3251 | | - /* Recover the swapext owner change unless inode has been deleted */ |
---|
3252 | | - if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) && |
---|
3253 | | - (dip->di_mode != 0)) |
---|
3254 | | - error = xfs_recover_inode_owner_change(mp, dip, in_f, |
---|
3255 | | - buffer_list); |
---|
3256 | | - /* re-generate the checksum. */ |
---|
3257 | | - xfs_dinode_calc_crc(log->l_mp, dip); |
---|
3258 | | - |
---|
3259 | | - ASSERT(bp->b_target->bt_mount == mp); |
---|
3260 | | - bp->b_iodone = xlog_recover_iodone; |
---|
3261 | | - xfs_buf_delwri_queue(bp, buffer_list); |
---|
3262 | | - |
---|
3263 | | -out_release: |
---|
3264 | | - xfs_buf_relse(bp); |
---|
3265 | | -error: |
---|
3266 | | - if (need_free) |
---|
3267 | | - kmem_free(in_f); |
---|
3268 | | - return error; |
---|
3269 | | -} |
---|
3270 | | - |
---|
3271 | | -/* |
---|
3272 | | - * Recover QUOTAOFF records. We simply make a note of it in the xlog |
---|
3273 | | - * structure, so that we know not to do any dquot item or dquot buffer recovery, |
---|
3274 | | - * of that type. |
---|
3275 | | - */ |
---|
3276 | | -STATIC int |
---|
3277 | | -xlog_recover_quotaoff_pass1( |
---|
3278 | | - struct xlog *log, |
---|
3279 | | - struct xlog_recover_item *item) |
---|
3280 | | -{ |
---|
3281 | | - xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; |
---|
3282 | | - ASSERT(qoff_f); |
---|
3283 | | - |
---|
3284 | | - /* |
---|
3285 | | - * The logitem format's flag tells us if this was user quotaoff, |
---|
3286 | | - * group/project quotaoff or both. |
---|
3287 | | - */ |
---|
3288 | | - if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) |
---|
3289 | | - log->l_quotaoffs_flag |= XFS_DQ_USER; |
---|
3290 | | - if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) |
---|
3291 | | - log->l_quotaoffs_flag |= XFS_DQ_PROJ; |
---|
3292 | | - if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) |
---|
3293 | | - log->l_quotaoffs_flag |= XFS_DQ_GROUP; |
---|
3294 | | - |
---|
3295 | | - return 0; |
---|
3296 | | -} |
---|
3297 | | - |
---|
3298 | | -/* |
---|
3299 | | - * Recover a dquot record |
---|
3300 | | - */ |
---|
3301 | | -STATIC int |
---|
3302 | | -xlog_recover_dquot_pass2( |
---|
3303 | | - struct xlog *log, |
---|
3304 | | - struct list_head *buffer_list, |
---|
3305 | | - struct xlog_recover_item *item, |
---|
3306 | | - xfs_lsn_t current_lsn) |
---|
3307 | | -{ |
---|
3308 | | - xfs_mount_t *mp = log->l_mp; |
---|
3309 | | - xfs_buf_t *bp; |
---|
3310 | | - struct xfs_disk_dquot *ddq, *recddq; |
---|
3311 | | - xfs_failaddr_t fa; |
---|
3312 | | - int error; |
---|
3313 | | - xfs_dq_logformat_t *dq_f; |
---|
3314 | | - uint type; |
---|
3315 | | - |
---|
3316 | | - |
---|
3317 | | - /* |
---|
3318 | | - * Filesystems are required to send in quota flags at mount time. |
---|
3319 | | - */ |
---|
3320 | | - if (mp->m_qflags == 0) |
---|
3321 | | - return 0; |
---|
3322 | | - |
---|
3323 | | - recddq = item->ri_buf[1].i_addr; |
---|
3324 | | - if (recddq == NULL) { |
---|
3325 | | - xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); |
---|
3326 | | - return -EIO; |
---|
3327 | | - } |
---|
3328 | | - if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { |
---|
3329 | | - xfs_alert(log->l_mp, "dquot too small (%d) in %s.", |
---|
3330 | | - item->ri_buf[1].i_len, __func__); |
---|
3331 | | - return -EIO; |
---|
3332 | | - } |
---|
3333 | | - |
---|
3334 | | - /* |
---|
3335 | | - * This type of quotas was turned off, so ignore this record. |
---|
3336 | | - */ |
---|
3337 | | - type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); |
---|
3338 | | - ASSERT(type); |
---|
3339 | | - if (log->l_quotaoffs_flag & type) |
---|
3340 | | - return 0; |
---|
3341 | | - |
---|
3342 | | - /* |
---|
3343 | | - * At this point we know that quota was _not_ turned off. |
---|
3344 | | - * Since the mount flags are not indicating to us otherwise, this |
---|
3345 | | - * must mean that quota is on, and the dquot needs to be replayed. |
---|
3346 | | - * Remember that we may not have fully recovered the superblock yet, |
---|
3347 | | - * so we can't do the usual trick of looking at the SB quota bits. |
---|
3348 | | - * |
---|
3349 | | - * The other possibility, of course, is that the quota subsystem was |
---|
3350 | | - * removed since the last mount - ENOSYS. |
---|
3351 | | - */ |
---|
3352 | | - dq_f = item->ri_buf[0].i_addr; |
---|
3353 | | - ASSERT(dq_f); |
---|
3354 | | - fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0); |
---|
3355 | | - if (fa) { |
---|
3356 | | - xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS", |
---|
3357 | | - dq_f->qlf_id, fa); |
---|
3358 | | - return -EIO; |
---|
3359 | | - } |
---|
3360 | | - ASSERT(dq_f->qlf_len == 1); |
---|
3361 | | - |
---|
3362 | | - /* |
---|
3363 | | - * At this point we are assuming that the dquots have been allocated |
---|
3364 | | - * and hence the buffer has valid dquots stamped in it. It should, |
---|
3365 | | - * therefore, pass verifier validation. If the dquot is bad, then the |
---|
3366 | | - * we'll return an error here, so we don't need to specifically check |
---|
3367 | | - * the dquot in the buffer after the verifier has run. |
---|
3368 | | - */ |
---|
3369 | | - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, |
---|
3370 | | - XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp, |
---|
3371 | | - &xfs_dquot_buf_ops); |
---|
3372 | | - if (error) |
---|
3373 | | - return error; |
---|
3374 | | - |
---|
3375 | | - ASSERT(bp); |
---|
3376 | | - ddq = xfs_buf_offset(bp, dq_f->qlf_boffset); |
---|
3377 | | - |
---|
3378 | | - /* |
---|
3379 | | - * If the dquot has an LSN in it, recover the dquot only if it's less |
---|
3380 | | - * than the lsn of the transaction we are replaying. |
---|
3381 | | - */ |
---|
3382 | | - if (xfs_sb_version_hascrc(&mp->m_sb)) { |
---|
3383 | | - struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq; |
---|
3384 | | - xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn); |
---|
3385 | | - |
---|
3386 | | - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { |
---|
3387 | | - goto out_release; |
---|
3388 | | - } |
---|
3389 | | - } |
---|
3390 | | - |
---|
3391 | | - memcpy(ddq, recddq, item->ri_buf[1].i_len); |
---|
3392 | | - if (xfs_sb_version_hascrc(&mp->m_sb)) { |
---|
3393 | | - xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), |
---|
3394 | | - XFS_DQUOT_CRC_OFF); |
---|
3395 | | - } |
---|
3396 | | - |
---|
3397 | | - ASSERT(dq_f->qlf_size == 2); |
---|
3398 | | - ASSERT(bp->b_target->bt_mount == mp); |
---|
3399 | | - bp->b_iodone = xlog_recover_iodone; |
---|
3400 | | - xfs_buf_delwri_queue(bp, buffer_list); |
---|
3401 | | - |
---|
3402 | | -out_release: |
---|
3403 | | - xfs_buf_relse(bp); |
---|
3404 | | - return 0; |
---|
3405 | | -} |
---|
3406 | | - |
---|
3407 | | -/* |
---|
3408 | | - * This routine is called to create an in-core extent free intent |
---|
3409 | | - * item from the efi format structure which was logged on disk. |
---|
3410 | | - * It allocates an in-core efi, copies the extents from the format |
---|
3411 | | - * structure into it, and adds the efi to the AIL with the given |
---|
3412 | | - * LSN. |
---|
3413 | | - */ |
---|
3414 | | -STATIC int |
---|
3415 | | -xlog_recover_efi_pass2( |
---|
3416 | | - struct xlog *log, |
---|
3417 | | - struct xlog_recover_item *item, |
---|
3418 | | - xfs_lsn_t lsn) |
---|
3419 | | -{ |
---|
3420 | | - int error; |
---|
3421 | | - struct xfs_mount *mp = log->l_mp; |
---|
3422 | | - struct xfs_efi_log_item *efip; |
---|
3423 | | - struct xfs_efi_log_format *efi_formatp; |
---|
3424 | | - |
---|
3425 | | - efi_formatp = item->ri_buf[0].i_addr; |
---|
3426 | | - |
---|
3427 | | - efip = xfs_efi_init(mp, efi_formatp->efi_nextents); |
---|
3428 | | - error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format); |
---|
3429 | | - if (error) { |
---|
3430 | | - xfs_efi_item_free(efip); |
---|
3431 | | - return error; |
---|
3432 | | - } |
---|
3433 | | - atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); |
---|
3434 | | - |
---|
3435 | | - spin_lock(&log->l_ailp->ail_lock); |
---|
3436 | | - /* |
---|
3437 | | - * The EFI has two references. One for the EFD and one for EFI to ensure |
---|
3438 | | - * it makes it into the AIL. Insert the EFI into the AIL directly and |
---|
3439 | | - * drop the EFI reference. Note that xfs_trans_ail_update() drops the |
---|
3440 | | - * AIL lock. |
---|
3441 | | - */ |
---|
3442 | | - xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); |
---|
3443 | | - xfs_efi_release(efip); |
---|
3444 | | - return 0; |
---|
3445 | | -} |
---|
3446 | | - |
---|
3447 | | - |
---|
3448 | | -/* |
---|
3449 | | - * This routine is called when an EFD format structure is found in a committed |
---|
3450 | | - * transaction in the log. Its purpose is to cancel the corresponding EFI if it |
---|
3451 | | - * was still in the log. To do this it searches the AIL for the EFI with an id |
---|
3452 | | - * equal to that in the EFD format structure. If we find it we drop the EFD |
---|
3453 | | - * reference, which removes the EFI from the AIL and frees it. |
---|
3454 | | - */ |
---|
3455 | | -STATIC int |
---|
3456 | | -xlog_recover_efd_pass2( |
---|
3457 | | - struct xlog *log, |
---|
3458 | | - struct xlog_recover_item *item) |
---|
3459 | | -{ |
---|
3460 | | - xfs_efd_log_format_t *efd_formatp; |
---|
3461 | | - xfs_efi_log_item_t *efip = NULL; |
---|
3462 | | - xfs_log_item_t *lip; |
---|
3463 | | - uint64_t efi_id; |
---|
3464 | | - struct xfs_ail_cursor cur; |
---|
3465 | | - struct xfs_ail *ailp = log->l_ailp; |
---|
3466 | | - |
---|
3467 | | - efd_formatp = item->ri_buf[0].i_addr; |
---|
3468 | | - ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + |
---|
3469 | | - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || |
---|
3470 | | - (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + |
---|
3471 | | - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); |
---|
3472 | | - efi_id = efd_formatp->efd_efi_id; |
---|
3473 | | - |
---|
3474 | | - /* |
---|
3475 | | - * Search for the EFI with the id in the EFD format structure in the |
---|
3476 | | - * AIL. |
---|
3477 | | - */ |
---|
3478 | | - spin_lock(&ailp->ail_lock); |
---|
3479 | | - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
---|
3480 | | - while (lip != NULL) { |
---|
3481 | | - if (lip->li_type == XFS_LI_EFI) { |
---|
3482 | | - efip = (xfs_efi_log_item_t *)lip; |
---|
3483 | | - if (efip->efi_format.efi_id == efi_id) { |
---|
3484 | | - /* |
---|
3485 | | - * Drop the EFD reference to the EFI. This |
---|
3486 | | - * removes the EFI from the AIL and frees it. |
---|
3487 | | - */ |
---|
3488 | | - spin_unlock(&ailp->ail_lock); |
---|
3489 | | - xfs_efi_release(efip); |
---|
3490 | | - spin_lock(&ailp->ail_lock); |
---|
3491 | | - break; |
---|
3492 | | - } |
---|
3493 | | - } |
---|
3494 | | - lip = xfs_trans_ail_cursor_next(ailp, &cur); |
---|
3495 | | - } |
---|
3496 | | - |
---|
3497 | | - xfs_trans_ail_cursor_done(&cur); |
---|
3498 | | - spin_unlock(&ailp->ail_lock); |
---|
3499 | | - |
---|
3500 | | - return 0; |
---|
3501 | | -} |
---|
3502 | | - |
---|
3503 | | -/* |
---|
3504 | | - * This routine is called to create an in-core extent rmap update |
---|
3505 | | - * item from the rui format structure which was logged on disk. |
---|
3506 | | - * It allocates an in-core rui, copies the extents from the format |
---|
3507 | | - * structure into it, and adds the rui to the AIL with the given |
---|
3508 | | - * LSN. |
---|
3509 | | - */ |
---|
3510 | | -STATIC int |
---|
3511 | | -xlog_recover_rui_pass2( |
---|
3512 | | - struct xlog *log, |
---|
3513 | | - struct xlog_recover_item *item, |
---|
3514 | | - xfs_lsn_t lsn) |
---|
3515 | | -{ |
---|
3516 | | - int error; |
---|
3517 | | - struct xfs_mount *mp = log->l_mp; |
---|
3518 | | - struct xfs_rui_log_item *ruip; |
---|
3519 | | - struct xfs_rui_log_format *rui_formatp; |
---|
3520 | | - |
---|
3521 | | - rui_formatp = item->ri_buf[0].i_addr; |
---|
3522 | | - |
---|
3523 | | - ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); |
---|
3524 | | - error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format); |
---|
3525 | | - if (error) { |
---|
3526 | | - xfs_rui_item_free(ruip); |
---|
3527 | | - return error; |
---|
3528 | | - } |
---|
3529 | | - atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents); |
---|
3530 | | - |
---|
3531 | | - spin_lock(&log->l_ailp->ail_lock); |
---|
3532 | | - /* |
---|
3533 | | - * The RUI has two references. One for the RUD and one for RUI to ensure |
---|
3534 | | - * it makes it into the AIL. Insert the RUI into the AIL directly and |
---|
3535 | | - * drop the RUI reference. Note that xfs_trans_ail_update() drops the |
---|
3536 | | - * AIL lock. |
---|
3537 | | - */ |
---|
3538 | | - xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn); |
---|
3539 | | - xfs_rui_release(ruip); |
---|
3540 | | - return 0; |
---|
3541 | | -} |
---|
3542 | | - |
---|
3543 | | - |
---|
3544 | | -/* |
---|
3545 | | - * This routine is called when an RUD format structure is found in a committed |
---|
3546 | | - * transaction in the log. Its purpose is to cancel the corresponding RUI if it |
---|
3547 | | - * was still in the log. To do this it searches the AIL for the RUI with an id |
---|
3548 | | - * equal to that in the RUD format structure. If we find it we drop the RUD |
---|
3549 | | - * reference, which removes the RUI from the AIL and frees it. |
---|
3550 | | - */ |
---|
3551 | | -STATIC int |
---|
3552 | | -xlog_recover_rud_pass2( |
---|
3553 | | - struct xlog *log, |
---|
3554 | | - struct xlog_recover_item *item) |
---|
3555 | | -{ |
---|
3556 | | - struct xfs_rud_log_format *rud_formatp; |
---|
3557 | | - struct xfs_rui_log_item *ruip = NULL; |
---|
3558 | | - struct xfs_log_item *lip; |
---|
3559 | | - uint64_t rui_id; |
---|
3560 | | - struct xfs_ail_cursor cur; |
---|
3561 | | - struct xfs_ail *ailp = log->l_ailp; |
---|
3562 | | - |
---|
3563 | | - rud_formatp = item->ri_buf[0].i_addr; |
---|
3564 | | - ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format)); |
---|
3565 | | - rui_id = rud_formatp->rud_rui_id; |
---|
3566 | | - |
---|
3567 | | - /* |
---|
3568 | | - * Search for the RUI with the id in the RUD format structure in the |
---|
3569 | | - * AIL. |
---|
3570 | | - */ |
---|
3571 | | - spin_lock(&ailp->ail_lock); |
---|
3572 | | - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
---|
3573 | | - while (lip != NULL) { |
---|
3574 | | - if (lip->li_type == XFS_LI_RUI) { |
---|
3575 | | - ruip = (struct xfs_rui_log_item *)lip; |
---|
3576 | | - if (ruip->rui_format.rui_id == rui_id) { |
---|
3577 | | - /* |
---|
3578 | | - * Drop the RUD reference to the RUI. This |
---|
3579 | | - * removes the RUI from the AIL and frees it. |
---|
3580 | | - */ |
---|
3581 | | - spin_unlock(&ailp->ail_lock); |
---|
3582 | | - xfs_rui_release(ruip); |
---|
3583 | | - spin_lock(&ailp->ail_lock); |
---|
3584 | | - break; |
---|
3585 | | - } |
---|
3586 | | - } |
---|
3587 | | - lip = xfs_trans_ail_cursor_next(ailp, &cur); |
---|
3588 | | - } |
---|
3589 | | - |
---|
3590 | | - xfs_trans_ail_cursor_done(&cur); |
---|
3591 | | - spin_unlock(&ailp->ail_lock); |
---|
3592 | | - |
---|
3593 | | - return 0; |
---|
3594 | | -} |
---|
3595 | | - |
---|
3596 | | -/* |
---|
3597 | | - * Copy an CUI format buffer from the given buf, and into the destination |
---|
3598 | | - * CUI format structure. The CUI/CUD items were designed not to need any |
---|
3599 | | - * special alignment handling. |
---|
3600 | | - */ |
---|
3601 | | -static int |
---|
3602 | | -xfs_cui_copy_format( |
---|
3603 | | - struct xfs_log_iovec *buf, |
---|
3604 | | - struct xfs_cui_log_format *dst_cui_fmt) |
---|
3605 | | -{ |
---|
3606 | | - struct xfs_cui_log_format *src_cui_fmt; |
---|
3607 | | - uint len; |
---|
3608 | | - |
---|
3609 | | - src_cui_fmt = buf->i_addr; |
---|
3610 | | - len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents); |
---|
3611 | | - |
---|
3612 | | - if (buf->i_len == len) { |
---|
3613 | | - memcpy(dst_cui_fmt, src_cui_fmt, len); |
---|
3614 | | - return 0; |
---|
3615 | | - } |
---|
3616 | | - return -EFSCORRUPTED; |
---|
3617 | | -} |
---|
3618 | | - |
---|
3619 | | -/* |
---|
3620 | | - * This routine is called to create an in-core extent refcount update |
---|
3621 | | - * item from the cui format structure which was logged on disk. |
---|
3622 | | - * It allocates an in-core cui, copies the extents from the format |
---|
3623 | | - * structure into it, and adds the cui to the AIL with the given |
---|
3624 | | - * LSN. |
---|
3625 | | - */ |
---|
3626 | | -STATIC int |
---|
3627 | | -xlog_recover_cui_pass2( |
---|
3628 | | - struct xlog *log, |
---|
3629 | | - struct xlog_recover_item *item, |
---|
3630 | | - xfs_lsn_t lsn) |
---|
3631 | | -{ |
---|
3632 | | - int error; |
---|
3633 | | - struct xfs_mount *mp = log->l_mp; |
---|
3634 | | - struct xfs_cui_log_item *cuip; |
---|
3635 | | - struct xfs_cui_log_format *cui_formatp; |
---|
3636 | | - |
---|
3637 | | - cui_formatp = item->ri_buf[0].i_addr; |
---|
3638 | | - |
---|
3639 | | - cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); |
---|
3640 | | - error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format); |
---|
3641 | | - if (error) { |
---|
3642 | | - xfs_cui_item_free(cuip); |
---|
3643 | | - return error; |
---|
3644 | | - } |
---|
3645 | | - atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); |
---|
3646 | | - |
---|
3647 | | - spin_lock(&log->l_ailp->ail_lock); |
---|
3648 | | - /* |
---|
3649 | | - * The CUI has two references. One for the CUD and one for CUI to ensure |
---|
3650 | | - * it makes it into the AIL. Insert the CUI into the AIL directly and |
---|
3651 | | - * drop the CUI reference. Note that xfs_trans_ail_update() drops the |
---|
3652 | | - * AIL lock. |
---|
3653 | | - */ |
---|
3654 | | - xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn); |
---|
3655 | | - xfs_cui_release(cuip); |
---|
3656 | | - return 0; |
---|
3657 | | -} |
---|
3658 | | - |
---|
3659 | | - |
---|
3660 | | -/* |
---|
3661 | | - * This routine is called when an CUD format structure is found in a committed |
---|
3662 | | - * transaction in the log. Its purpose is to cancel the corresponding CUI if it |
---|
3663 | | - * was still in the log. To do this it searches the AIL for the CUI with an id |
---|
3664 | | - * equal to that in the CUD format structure. If we find it we drop the CUD |
---|
3665 | | - * reference, which removes the CUI from the AIL and frees it. |
---|
3666 | | - */ |
---|
3667 | | -STATIC int |
---|
3668 | | -xlog_recover_cud_pass2( |
---|
3669 | | - struct xlog *log, |
---|
3670 | | - struct xlog_recover_item *item) |
---|
3671 | | -{ |
---|
3672 | | - struct xfs_cud_log_format *cud_formatp; |
---|
3673 | | - struct xfs_cui_log_item *cuip = NULL; |
---|
3674 | | - struct xfs_log_item *lip; |
---|
3675 | | - uint64_t cui_id; |
---|
3676 | | - struct xfs_ail_cursor cur; |
---|
3677 | | - struct xfs_ail *ailp = log->l_ailp; |
---|
3678 | | - |
---|
3679 | | - cud_formatp = item->ri_buf[0].i_addr; |
---|
3680 | | - if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) |
---|
3681 | | - return -EFSCORRUPTED; |
---|
3682 | | - cui_id = cud_formatp->cud_cui_id; |
---|
3683 | | - |
---|
3684 | | - /* |
---|
3685 | | - * Search for the CUI with the id in the CUD format structure in the |
---|
3686 | | - * AIL. |
---|
3687 | | - */ |
---|
3688 | | - spin_lock(&ailp->ail_lock); |
---|
3689 | | - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
---|
3690 | | - while (lip != NULL) { |
---|
3691 | | - if (lip->li_type == XFS_LI_CUI) { |
---|
3692 | | - cuip = (struct xfs_cui_log_item *)lip; |
---|
3693 | | - if (cuip->cui_format.cui_id == cui_id) { |
---|
3694 | | - /* |
---|
3695 | | - * Drop the CUD reference to the CUI. This |
---|
3696 | | - * removes the CUI from the AIL and frees it. |
---|
3697 | | - */ |
---|
3698 | | - spin_unlock(&ailp->ail_lock); |
---|
3699 | | - xfs_cui_release(cuip); |
---|
3700 | | - spin_lock(&ailp->ail_lock); |
---|
3701 | | - break; |
---|
3702 | | - } |
---|
3703 | | - } |
---|
3704 | | - lip = xfs_trans_ail_cursor_next(ailp, &cur); |
---|
3705 | | - } |
---|
3706 | | - |
---|
3707 | | - xfs_trans_ail_cursor_done(&cur); |
---|
3708 | | - spin_unlock(&ailp->ail_lock); |
---|
3709 | | - |
---|
3710 | | - return 0; |
---|
3711 | | -} |
---|
3712 | | - |
---|
3713 | | -/* |
---|
3714 | | - * Copy an BUI format buffer from the given buf, and into the destination |
---|
3715 | | - * BUI format structure. The BUI/BUD items were designed not to need any |
---|
3716 | | - * special alignment handling. |
---|
3717 | | - */ |
---|
3718 | | -static int |
---|
3719 | | -xfs_bui_copy_format( |
---|
3720 | | - struct xfs_log_iovec *buf, |
---|
3721 | | - struct xfs_bui_log_format *dst_bui_fmt) |
---|
3722 | | -{ |
---|
3723 | | - struct xfs_bui_log_format *src_bui_fmt; |
---|
3724 | | - uint len; |
---|
3725 | | - |
---|
3726 | | - src_bui_fmt = buf->i_addr; |
---|
3727 | | - len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents); |
---|
3728 | | - |
---|
3729 | | - if (buf->i_len == len) { |
---|
3730 | | - memcpy(dst_bui_fmt, src_bui_fmt, len); |
---|
3731 | | - return 0; |
---|
3732 | | - } |
---|
3733 | | - return -EFSCORRUPTED; |
---|
3734 | | -} |
---|
3735 | | - |
---|
3736 | | -/* |
---|
3737 | | - * This routine is called to create an in-core extent bmap update |
---|
3738 | | - * item from the bui format structure which was logged on disk. |
---|
3739 | | - * It allocates an in-core bui, copies the extents from the format |
---|
3740 | | - * structure into it, and adds the bui to the AIL with the given |
---|
3741 | | - * LSN. |
---|
3742 | | - */ |
---|
3743 | | -STATIC int |
---|
3744 | | -xlog_recover_bui_pass2( |
---|
3745 | | - struct xlog *log, |
---|
3746 | | - struct xlog_recover_item *item, |
---|
3747 | | - xfs_lsn_t lsn) |
---|
3748 | | -{ |
---|
3749 | | - int error; |
---|
3750 | | - struct xfs_mount *mp = log->l_mp; |
---|
3751 | | - struct xfs_bui_log_item *buip; |
---|
3752 | | - struct xfs_bui_log_format *bui_formatp; |
---|
3753 | | - |
---|
3754 | | - bui_formatp = item->ri_buf[0].i_addr; |
---|
3755 | | - |
---|
3756 | | - if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) |
---|
3757 | | - return -EFSCORRUPTED; |
---|
3758 | | - buip = xfs_bui_init(mp); |
---|
3759 | | - error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); |
---|
3760 | | - if (error) { |
---|
3761 | | - xfs_bui_item_free(buip); |
---|
3762 | | - return error; |
---|
3763 | | - } |
---|
3764 | | - atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); |
---|
3765 | | - |
---|
3766 | | - spin_lock(&log->l_ailp->ail_lock); |
---|
3767 | | - /* |
---|
3768 | | - * The RUI has two references. One for the RUD and one for RUI to ensure |
---|
3769 | | - * it makes it into the AIL. Insert the RUI into the AIL directly and |
---|
3770 | | - * drop the RUI reference. Note that xfs_trans_ail_update() drops the |
---|
3771 | | - * AIL lock. |
---|
3772 | | - */ |
---|
3773 | | - xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn); |
---|
3774 | | - xfs_bui_release(buip); |
---|
3775 | | - return 0; |
---|
3776 | | -} |
---|
3777 | | - |
---|
3778 | | - |
---|
3779 | | -/* |
---|
3780 | | - * This routine is called when an BUD format structure is found in a committed |
---|
3781 | | - * transaction in the log. Its purpose is to cancel the corresponding BUI if it |
---|
3782 | | - * was still in the log. To do this it searches the AIL for the BUI with an id |
---|
3783 | | - * equal to that in the BUD format structure. If we find it we drop the BUD |
---|
3784 | | - * reference, which removes the BUI from the AIL and frees it. |
---|
3785 | | - */ |
---|
3786 | | -STATIC int |
---|
3787 | | -xlog_recover_bud_pass2( |
---|
3788 | | - struct xlog *log, |
---|
3789 | | - struct xlog_recover_item *item) |
---|
3790 | | -{ |
---|
3791 | | - struct xfs_bud_log_format *bud_formatp; |
---|
3792 | | - struct xfs_bui_log_item *buip = NULL; |
---|
3793 | | - struct xfs_log_item *lip; |
---|
3794 | | - uint64_t bui_id; |
---|
3795 | | - struct xfs_ail_cursor cur; |
---|
3796 | | - struct xfs_ail *ailp = log->l_ailp; |
---|
3797 | | - |
---|
3798 | | - bud_formatp = item->ri_buf[0].i_addr; |
---|
3799 | | - if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) |
---|
3800 | | - return -EFSCORRUPTED; |
---|
3801 | | - bui_id = bud_formatp->bud_bui_id; |
---|
3802 | | - |
---|
3803 | | - /* |
---|
3804 | | - * Search for the BUI with the id in the BUD format structure in the |
---|
3805 | | - * AIL. |
---|
3806 | | - */ |
---|
3807 | | - spin_lock(&ailp->ail_lock); |
---|
3808 | | - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
---|
3809 | | - while (lip != NULL) { |
---|
3810 | | - if (lip->li_type == XFS_LI_BUI) { |
---|
3811 | | - buip = (struct xfs_bui_log_item *)lip; |
---|
3812 | | - if (buip->bui_format.bui_id == bui_id) { |
---|
3813 | | - /* |
---|
3814 | | - * Drop the BUD reference to the BUI. This |
---|
3815 | | - * removes the BUI from the AIL and frees it. |
---|
3816 | | - */ |
---|
3817 | | - spin_unlock(&ailp->ail_lock); |
---|
3818 | | - xfs_bui_release(buip); |
---|
3819 | | - spin_lock(&ailp->ail_lock); |
---|
3820 | | - break; |
---|
3821 | | - } |
---|
3822 | | - } |
---|
3823 | | - lip = xfs_trans_ail_cursor_next(ailp, &cur); |
---|
3824 | | - } |
---|
3825 | | - |
---|
3826 | | - xfs_trans_ail_cursor_done(&cur); |
---|
3827 | | - spin_unlock(&ailp->ail_lock); |
---|
3828 | | - |
---|
3829 | | - return 0; |
---|
3830 | | -} |
---|
3831 | | - |
---|
3832 | | -/* |
---|
3833 | | - * This routine is called when an inode create format structure is found in a |
---|
3834 | | - * committed transaction in the log. It's purpose is to initialise the inodes |
---|
3835 | | - * being allocated on disk. This requires us to get inode cluster buffers that |
---|
3836 | | - * match the range to be initialised, stamped with inode templates and written |
---|
3837 | | - * by delayed write so that subsequent modifications will hit the cached buffer |
---|
3838 | | - * and only need writing out at the end of recovery. |
---|
3839 | | - */ |
---|
3840 | | -STATIC int |
---|
3841 | | -xlog_recover_do_icreate_pass2( |
---|
3842 | | - struct xlog *log, |
---|
3843 | | - struct list_head *buffer_list, |
---|
3844 | | - xlog_recover_item_t *item) |
---|
3845 | | -{ |
---|
3846 | | - struct xfs_mount *mp = log->l_mp; |
---|
3847 | | - struct xfs_icreate_log *icl; |
---|
3848 | | - xfs_agnumber_t agno; |
---|
3849 | | - xfs_agblock_t agbno; |
---|
3850 | | - unsigned int count; |
---|
3851 | | - unsigned int isize; |
---|
3852 | | - xfs_agblock_t length; |
---|
3853 | | - int blks_per_cluster; |
---|
3854 | | - int bb_per_cluster; |
---|
3855 | | - int cancel_count; |
---|
3856 | | - int nbufs; |
---|
3857 | | - int i; |
---|
3858 | | - |
---|
3859 | | - icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; |
---|
3860 | | - if (icl->icl_type != XFS_LI_ICREATE) { |
---|
3861 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); |
---|
3862 | | - return -EINVAL; |
---|
3863 | | - } |
---|
3864 | | - |
---|
3865 | | - if (icl->icl_size != 1) { |
---|
3866 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); |
---|
3867 | | - return -EINVAL; |
---|
3868 | | - } |
---|
3869 | | - |
---|
3870 | | - agno = be32_to_cpu(icl->icl_ag); |
---|
3871 | | - if (agno >= mp->m_sb.sb_agcount) { |
---|
3872 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); |
---|
3873 | | - return -EINVAL; |
---|
3874 | | - } |
---|
3875 | | - agbno = be32_to_cpu(icl->icl_agbno); |
---|
3876 | | - if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { |
---|
3877 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); |
---|
3878 | | - return -EINVAL; |
---|
3879 | | - } |
---|
3880 | | - isize = be32_to_cpu(icl->icl_isize); |
---|
3881 | | - if (isize != mp->m_sb.sb_inodesize) { |
---|
3882 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); |
---|
3883 | | - return -EINVAL; |
---|
3884 | | - } |
---|
3885 | | - count = be32_to_cpu(icl->icl_count); |
---|
3886 | | - if (!count) { |
---|
3887 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); |
---|
3888 | | - return -EINVAL; |
---|
3889 | | - } |
---|
3890 | | - length = be32_to_cpu(icl->icl_length); |
---|
3891 | | - if (!length || length >= mp->m_sb.sb_agblocks) { |
---|
3892 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); |
---|
3893 | | - return -EINVAL; |
---|
3894 | | - } |
---|
3895 | | - |
---|
3896 | | - /* |
---|
3897 | | - * The inode chunk is either full or sparse and we only support |
---|
3898 | | - * m_ialloc_min_blks sized sparse allocations at this time. |
---|
3899 | | - */ |
---|
3900 | | - if (length != mp->m_ialloc_blks && |
---|
3901 | | - length != mp->m_ialloc_min_blks) { |
---|
3902 | | - xfs_warn(log->l_mp, |
---|
3903 | | - "%s: unsupported chunk length", __FUNCTION__); |
---|
3904 | | - return -EINVAL; |
---|
3905 | | - } |
---|
3906 | | - |
---|
3907 | | - /* verify inode count is consistent with extent length */ |
---|
3908 | | - if ((count >> mp->m_sb.sb_inopblog) != length) { |
---|
3909 | | - xfs_warn(log->l_mp, |
---|
3910 | | - "%s: inconsistent inode count and chunk length", |
---|
3911 | | - __FUNCTION__); |
---|
3912 | | - return -EINVAL; |
---|
3913 | | - } |
---|
3914 | | - |
---|
3915 | | - /* |
---|
3916 | | - * The icreate transaction can cover multiple cluster buffers and these |
---|
3917 | | - * buffers could have been freed and reused. Check the individual |
---|
3918 | | - * buffers for cancellation so we don't overwrite anything written after |
---|
3919 | | - * a cancellation. |
---|
3920 | | - */ |
---|
3921 | | - blks_per_cluster = xfs_icluster_size_fsb(mp); |
---|
3922 | | - bb_per_cluster = XFS_FSB_TO_BB(mp, blks_per_cluster); |
---|
3923 | | - nbufs = length / blks_per_cluster; |
---|
3924 | | - for (i = 0, cancel_count = 0; i < nbufs; i++) { |
---|
3925 | | - xfs_daddr_t daddr; |
---|
3926 | | - |
---|
3927 | | - daddr = XFS_AGB_TO_DADDR(mp, agno, |
---|
3928 | | - agbno + i * blks_per_cluster); |
---|
3929 | | - if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0)) |
---|
3930 | | - cancel_count++; |
---|
3931 | | - } |
---|
3932 | | - |
---|
3933 | | - /* |
---|
3934 | | - * We currently only use icreate for a single allocation at a time. This |
---|
3935 | | - * means we should expect either all or none of the buffers to be |
---|
3936 | | - * cancelled. Be conservative and skip replay if at least one buffer is |
---|
3937 | | - * cancelled, but warn the user that something is awry if the buffers |
---|
3938 | | - * are not consistent. |
---|
3939 | | - * |
---|
3940 | | - * XXX: This must be refined to only skip cancelled clusters once we use |
---|
3941 | | - * icreate for multiple chunk allocations. |
---|
3942 | | - */ |
---|
3943 | | - ASSERT(!cancel_count || cancel_count == nbufs); |
---|
3944 | | - if (cancel_count) { |
---|
3945 | | - if (cancel_count != nbufs) |
---|
3946 | | - xfs_warn(mp, |
---|
3947 | | - "WARNING: partial inode chunk cancellation, skipped icreate."); |
---|
3948 | | - trace_xfs_log_recover_icreate_cancel(log, icl); |
---|
3949 | | - return 0; |
---|
3950 | | - } |
---|
3951 | | - |
---|
3952 | | - trace_xfs_log_recover_icreate_recover(log, icl); |
---|
3953 | | - return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, |
---|
3954 | | - length, be32_to_cpu(icl->icl_gen)); |
---|
3955 | | -} |
---|
3956 | | - |
---|
3957 | | -STATIC void |
---|
3958 | | -xlog_recover_buffer_ra_pass2( |
---|
3959 | | - struct xlog *log, |
---|
3960 | | - struct xlog_recover_item *item) |
---|
3961 | | -{ |
---|
3962 | | - struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr; |
---|
3963 | | - struct xfs_mount *mp = log->l_mp; |
---|
3964 | | - |
---|
3965 | | - if (xlog_peek_buffer_cancelled(log, buf_f->blf_blkno, |
---|
3966 | | - buf_f->blf_len, buf_f->blf_flags)) { |
---|
3967 | | - return; |
---|
3968 | | - } |
---|
3969 | | - |
---|
3970 | | - xfs_buf_readahead(mp->m_ddev_targp, buf_f->blf_blkno, |
---|
3971 | | - buf_f->blf_len, NULL); |
---|
3972 | | -} |
---|
3973 | | - |
---|
3974 | | -STATIC void |
---|
3975 | | -xlog_recover_inode_ra_pass2( |
---|
3976 | | - struct xlog *log, |
---|
3977 | | - struct xlog_recover_item *item) |
---|
3978 | | -{ |
---|
3979 | | - struct xfs_inode_log_format ilf_buf; |
---|
3980 | | - struct xfs_inode_log_format *ilfp; |
---|
3981 | | - struct xfs_mount *mp = log->l_mp; |
---|
3982 | | - int error; |
---|
3983 | | - |
---|
3984 | | - if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { |
---|
3985 | | - ilfp = item->ri_buf[0].i_addr; |
---|
3986 | | - } else { |
---|
3987 | | - ilfp = &ilf_buf; |
---|
3988 | | - memset(ilfp, 0, sizeof(*ilfp)); |
---|
3989 | | - error = xfs_inode_item_format_convert(&item->ri_buf[0], ilfp); |
---|
3990 | | - if (error) |
---|
3991 | | - return; |
---|
3992 | | - } |
---|
3993 | | - |
---|
3994 | | - if (xlog_peek_buffer_cancelled(log, ilfp->ilf_blkno, ilfp->ilf_len, 0)) |
---|
3995 | | - return; |
---|
3996 | | - |
---|
3997 | | - xfs_buf_readahead(mp->m_ddev_targp, ilfp->ilf_blkno, |
---|
3998 | | - ilfp->ilf_len, &xfs_inode_buf_ra_ops); |
---|
3999 | | -} |
---|
4000 | | - |
---|
4001 | | -STATIC void |
---|
4002 | | -xlog_recover_dquot_ra_pass2( |
---|
4003 | | - struct xlog *log, |
---|
4004 | | - struct xlog_recover_item *item) |
---|
4005 | | -{ |
---|
4006 | | - struct xfs_mount *mp = log->l_mp; |
---|
4007 | | - struct xfs_disk_dquot *recddq; |
---|
4008 | | - struct xfs_dq_logformat *dq_f; |
---|
4009 | | - uint type; |
---|
4010 | | - int len; |
---|
4011 | | - |
---|
4012 | | - |
---|
4013 | | - if (mp->m_qflags == 0) |
---|
4014 | | - return; |
---|
4015 | | - |
---|
4016 | | - recddq = item->ri_buf[1].i_addr; |
---|
4017 | | - if (recddq == NULL) |
---|
4018 | | - return; |
---|
4019 | | - if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) |
---|
4020 | | - return; |
---|
4021 | | - |
---|
4022 | | - type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); |
---|
4023 | | - ASSERT(type); |
---|
4024 | | - if (log->l_quotaoffs_flag & type) |
---|
4025 | | - return; |
---|
4026 | | - |
---|
4027 | | - dq_f = item->ri_buf[0].i_addr; |
---|
4028 | | - ASSERT(dq_f); |
---|
4029 | | - ASSERT(dq_f->qlf_len == 1); |
---|
4030 | | - |
---|
4031 | | - len = XFS_FSB_TO_BB(mp, dq_f->qlf_len); |
---|
4032 | | - if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0)) |
---|
4033 | | - return; |
---|
4034 | | - |
---|
4035 | | - xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len, |
---|
4036 | | - &xfs_dquot_buf_ra_ops); |
---|
4037 | | -} |
---|
4038 | | - |
---|
4039 | | -STATIC void |
---|
4040 | | -xlog_recover_ra_pass2( |
---|
4041 | | - struct xlog *log, |
---|
4042 | | - struct xlog_recover_item *item) |
---|
4043 | | -{ |
---|
4044 | | - switch (ITEM_TYPE(item)) { |
---|
4045 | | - case XFS_LI_BUF: |
---|
4046 | | - xlog_recover_buffer_ra_pass2(log, item); |
---|
4047 | | - break; |
---|
4048 | | - case XFS_LI_INODE: |
---|
4049 | | - xlog_recover_inode_ra_pass2(log, item); |
---|
4050 | | - break; |
---|
4051 | | - case XFS_LI_DQUOT: |
---|
4052 | | - xlog_recover_dquot_ra_pass2(log, item); |
---|
4053 | | - break; |
---|
4054 | | - case XFS_LI_EFI: |
---|
4055 | | - case XFS_LI_EFD: |
---|
4056 | | - case XFS_LI_QUOTAOFF: |
---|
4057 | | - case XFS_LI_RUI: |
---|
4058 | | - case XFS_LI_RUD: |
---|
4059 | | - case XFS_LI_CUI: |
---|
4060 | | - case XFS_LI_CUD: |
---|
4061 | | - case XFS_LI_BUI: |
---|
4062 | | - case XFS_LI_BUD: |
---|
4063 | | - default: |
---|
4064 | | - break; |
---|
4065 | | - } |
---|
4066 | | -} |
---|
4067 | | - |
---|
4068 | | -STATIC int |
---|
4069 | | -xlog_recover_commit_pass1( |
---|
4070 | | - struct xlog *log, |
---|
4071 | | - struct xlog_recover *trans, |
---|
4072 | | - struct xlog_recover_item *item) |
---|
4073 | | -{ |
---|
4074 | | - trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); |
---|
4075 | | - |
---|
4076 | | - switch (ITEM_TYPE(item)) { |
---|
4077 | | - case XFS_LI_BUF: |
---|
4078 | | - return xlog_recover_buffer_pass1(log, item); |
---|
4079 | | - case XFS_LI_QUOTAOFF: |
---|
4080 | | - return xlog_recover_quotaoff_pass1(log, item); |
---|
4081 | | - case XFS_LI_INODE: |
---|
4082 | | - case XFS_LI_EFI: |
---|
4083 | | - case XFS_LI_EFD: |
---|
4084 | | - case XFS_LI_DQUOT: |
---|
4085 | | - case XFS_LI_ICREATE: |
---|
4086 | | - case XFS_LI_RUI: |
---|
4087 | | - case XFS_LI_RUD: |
---|
4088 | | - case XFS_LI_CUI: |
---|
4089 | | - case XFS_LI_CUD: |
---|
4090 | | - case XFS_LI_BUI: |
---|
4091 | | - case XFS_LI_BUD: |
---|
4092 | | - /* nothing to do in pass 1 */ |
---|
4093 | | - return 0; |
---|
4094 | | - default: |
---|
4095 | | - xfs_warn(log->l_mp, "%s: invalid item type (%d)", |
---|
4096 | | - __func__, ITEM_TYPE(item)); |
---|
4097 | | - ASSERT(0); |
---|
4098 | | - return -EIO; |
---|
4099 | | - } |
---|
4100 | | -} |
---|
4101 | | - |
---|
4102 | | -STATIC int |
---|
4103 | | -xlog_recover_commit_pass2( |
---|
4104 | | - struct xlog *log, |
---|
4105 | | - struct xlog_recover *trans, |
---|
4106 | | - struct list_head *buffer_list, |
---|
4107 | | - struct xlog_recover_item *item) |
---|
4108 | | -{ |
---|
4109 | | - trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); |
---|
4110 | | - |
---|
4111 | | - switch (ITEM_TYPE(item)) { |
---|
4112 | | - case XFS_LI_BUF: |
---|
4113 | | - return xlog_recover_buffer_pass2(log, buffer_list, item, |
---|
4114 | | - trans->r_lsn); |
---|
4115 | | - case XFS_LI_INODE: |
---|
4116 | | - return xlog_recover_inode_pass2(log, buffer_list, item, |
---|
4117 | | - trans->r_lsn); |
---|
4118 | | - case XFS_LI_EFI: |
---|
4119 | | - return xlog_recover_efi_pass2(log, item, trans->r_lsn); |
---|
4120 | | - case XFS_LI_EFD: |
---|
4121 | | - return xlog_recover_efd_pass2(log, item); |
---|
4122 | | - case XFS_LI_RUI: |
---|
4123 | | - return xlog_recover_rui_pass2(log, item, trans->r_lsn); |
---|
4124 | | - case XFS_LI_RUD: |
---|
4125 | | - return xlog_recover_rud_pass2(log, item); |
---|
4126 | | - case XFS_LI_CUI: |
---|
4127 | | - return xlog_recover_cui_pass2(log, item, trans->r_lsn); |
---|
4128 | | - case XFS_LI_CUD: |
---|
4129 | | - return xlog_recover_cud_pass2(log, item); |
---|
4130 | | - case XFS_LI_BUI: |
---|
4131 | | - return xlog_recover_bui_pass2(log, item, trans->r_lsn); |
---|
4132 | | - case XFS_LI_BUD: |
---|
4133 | | - return xlog_recover_bud_pass2(log, item); |
---|
4134 | | - case XFS_LI_DQUOT: |
---|
4135 | | - return xlog_recover_dquot_pass2(log, buffer_list, item, |
---|
4136 | | - trans->r_lsn); |
---|
4137 | | - case XFS_LI_ICREATE: |
---|
4138 | | - return xlog_recover_do_icreate_pass2(log, buffer_list, item); |
---|
4139 | | - case XFS_LI_QUOTAOFF: |
---|
4140 | | - /* nothing to do in pass2 */ |
---|
4141 | | - return 0; |
---|
4142 | | - default: |
---|
4143 | | - xfs_warn(log->l_mp, "%s: invalid item type (%d)", |
---|
4144 | | - __func__, ITEM_TYPE(item)); |
---|
4145 | | - ASSERT(0); |
---|
4146 | | - return -EIO; |
---|
4147 | | - } |
---|
| 1918 | + if (!xlog_is_buffer_cancelled(log, blkno, len)) |
---|
| 1919 | + xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops); |
---|
4148 | 1920 | } |
---|
4149 | 1921 | |
---|
4150 | 1922 | STATIC int |
---|
.. | .. |
---|
4158 | 1930 | int error = 0; |
---|
4159 | 1931 | |
---|
4160 | 1932 | list_for_each_entry(item, item_list, ri_list) { |
---|
4161 | | - error = xlog_recover_commit_pass2(log, trans, |
---|
4162 | | - buffer_list, item); |
---|
| 1933 | + trace_xfs_log_recover_item_recover(log, trans, item, |
---|
| 1934 | + XLOG_RECOVER_PASS2); |
---|
| 1935 | + |
---|
| 1936 | + if (item->ri_ops->commit_pass2) |
---|
| 1937 | + error = item->ri_ops->commit_pass2(log, buffer_list, |
---|
| 1938 | + item, trans->r_lsn); |
---|
4163 | 1939 | if (error) |
---|
4164 | 1940 | return error; |
---|
4165 | 1941 | } |
---|
.. | .. |
---|
4196 | 1972 | return error; |
---|
4197 | 1973 | |
---|
4198 | 1974 | list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) { |
---|
| 1975 | + trace_xfs_log_recover_item_recover(log, trans, item, pass); |
---|
| 1976 | + |
---|
4199 | 1977 | switch (pass) { |
---|
4200 | 1978 | case XLOG_RECOVER_PASS1: |
---|
4201 | | - error = xlog_recover_commit_pass1(log, trans, item); |
---|
| 1979 | + if (item->ri_ops->commit_pass1) |
---|
| 1980 | + error = item->ri_ops->commit_pass1(log, item); |
---|
4202 | 1981 | break; |
---|
4203 | 1982 | case XLOG_RECOVER_PASS2: |
---|
4204 | | - xlog_recover_ra_pass2(log, item); |
---|
| 1983 | + if (item->ri_ops->ra_pass2) |
---|
| 1984 | + item->ri_ops->ra_pass2(log, item); |
---|
4205 | 1985 | list_move_tail(&item->ri_list, &ra_list); |
---|
4206 | 1986 | items_queued++; |
---|
4207 | 1987 | if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) { |
---|
.. | .. |
---|
4238 | 2018 | xlog_recover_add_item( |
---|
4239 | 2019 | struct list_head *head) |
---|
4240 | 2020 | { |
---|
4241 | | - xlog_recover_item_t *item; |
---|
| 2021 | + struct xlog_recover_item *item; |
---|
4242 | 2022 | |
---|
4243 | | - item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); |
---|
| 2023 | + item = kmem_zalloc(sizeof(struct xlog_recover_item), 0); |
---|
4244 | 2024 | INIT_LIST_HEAD(&item->ri_list); |
---|
4245 | 2025 | list_add_tail(&item->ri_list, head); |
---|
4246 | 2026 | } |
---|
.. | .. |
---|
4252 | 2032 | char *dp, |
---|
4253 | 2033 | int len) |
---|
4254 | 2034 | { |
---|
4255 | | - xlog_recover_item_t *item; |
---|
| 2035 | + struct xlog_recover_item *item; |
---|
4256 | 2036 | char *ptr, *old_ptr; |
---|
4257 | 2037 | int old_len; |
---|
4258 | 2038 | |
---|
.. | .. |
---|
4264 | 2044 | ASSERT(len <= sizeof(struct xfs_trans_header)); |
---|
4265 | 2045 | if (len > sizeof(struct xfs_trans_header)) { |
---|
4266 | 2046 | xfs_warn(log->l_mp, "%s: bad header length", __func__); |
---|
4267 | | - return -EIO; |
---|
| 2047 | + return -EFSCORRUPTED; |
---|
4268 | 2048 | } |
---|
4269 | 2049 | |
---|
4270 | 2050 | xlog_recover_add_item(&trans->r_itemq); |
---|
.. | .. |
---|
4275 | 2055 | } |
---|
4276 | 2056 | |
---|
4277 | 2057 | /* take the tail entry */ |
---|
4278 | | - item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); |
---|
| 2058 | + item = list_entry(trans->r_itemq.prev, struct xlog_recover_item, |
---|
| 2059 | + ri_list); |
---|
4279 | 2060 | |
---|
4280 | 2061 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; |
---|
4281 | 2062 | old_len = item->ri_buf[item->ri_cnt-1].i_len; |
---|
4282 | 2063 | |
---|
4283 | | - ptr = kmem_realloc(old_ptr, len + old_len, KM_SLEEP); |
---|
| 2064 | + ptr = kvrealloc(old_ptr, old_len, len + old_len, GFP_KERNEL); |
---|
| 2065 | + if (!ptr) |
---|
| 2066 | + return -ENOMEM; |
---|
4284 | 2067 | memcpy(&ptr[old_len], dp, len); |
---|
4285 | 2068 | item->ri_buf[item->ri_cnt-1].i_len += len; |
---|
4286 | 2069 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; |
---|
.. | .. |
---|
4309 | 2092 | int len) |
---|
4310 | 2093 | { |
---|
4311 | 2094 | struct xfs_inode_log_format *in_f; /* any will do */ |
---|
4312 | | - xlog_recover_item_t *item; |
---|
| 2095 | + struct xlog_recover_item *item; |
---|
4313 | 2096 | char *ptr; |
---|
4314 | 2097 | |
---|
4315 | 2098 | if (!len) |
---|
.. | .. |
---|
4320 | 2103 | xfs_warn(log->l_mp, "%s: bad header magic number", |
---|
4321 | 2104 | __func__); |
---|
4322 | 2105 | ASSERT(0); |
---|
4323 | | - return -EIO; |
---|
| 2106 | + return -EFSCORRUPTED; |
---|
4324 | 2107 | } |
---|
4325 | 2108 | |
---|
4326 | 2109 | if (len > sizeof(struct xfs_trans_header)) { |
---|
4327 | 2110 | xfs_warn(log->l_mp, "%s: bad header length", __func__); |
---|
4328 | 2111 | ASSERT(0); |
---|
4329 | | - return -EIO; |
---|
| 2112 | + return -EFSCORRUPTED; |
---|
4330 | 2113 | } |
---|
4331 | 2114 | |
---|
4332 | 2115 | /* |
---|
.. | .. |
---|
4340 | 2123 | return 0; |
---|
4341 | 2124 | } |
---|
4342 | 2125 | |
---|
4343 | | - ptr = kmem_alloc(len, KM_SLEEP); |
---|
| 2126 | + ptr = kmem_alloc(len, 0); |
---|
4344 | 2127 | memcpy(ptr, dp, len); |
---|
4345 | 2128 | in_f = (struct xfs_inode_log_format *)ptr; |
---|
4346 | 2129 | |
---|
4347 | 2130 | /* take the tail entry */ |
---|
4348 | | - item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); |
---|
| 2131 | + item = list_entry(trans->r_itemq.prev, struct xlog_recover_item, |
---|
| 2132 | + ri_list); |
---|
4349 | 2133 | if (item->ri_total != 0 && |
---|
4350 | 2134 | item->ri_total == item->ri_cnt) { |
---|
4351 | 2135 | /* tail item is in use, get a new one */ |
---|
4352 | 2136 | xlog_recover_add_item(&trans->r_itemq); |
---|
4353 | 2137 | item = list_entry(trans->r_itemq.prev, |
---|
4354 | | - xlog_recover_item_t, ri_list); |
---|
| 2138 | + struct xlog_recover_item, ri_list); |
---|
4355 | 2139 | } |
---|
4356 | 2140 | |
---|
4357 | 2141 | if (item->ri_total == 0) { /* first region to be added */ |
---|
.. | .. |
---|
4362 | 2146 | in_f->ilf_size); |
---|
4363 | 2147 | ASSERT(0); |
---|
4364 | 2148 | kmem_free(ptr); |
---|
4365 | | - return -EIO; |
---|
| 2149 | + return -EFSCORRUPTED; |
---|
4366 | 2150 | } |
---|
4367 | 2151 | |
---|
4368 | 2152 | item->ri_total = in_f->ilf_size; |
---|
4369 | 2153 | item->ri_buf = |
---|
4370 | 2154 | kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), |
---|
4371 | | - KM_SLEEP); |
---|
| 2155 | + 0); |
---|
4372 | 2156 | } |
---|
4373 | | - ASSERT(item->ri_total > item->ri_cnt); |
---|
| 2157 | + |
---|
| 2158 | + if (item->ri_total <= item->ri_cnt) { |
---|
| 2159 | + xfs_warn(log->l_mp, |
---|
| 2160 | + "log item region count (%d) overflowed size (%d)", |
---|
| 2161 | + item->ri_cnt, item->ri_total); |
---|
| 2162 | + ASSERT(0); |
---|
| 2163 | + kmem_free(ptr); |
---|
| 2164 | + return -EFSCORRUPTED; |
---|
| 2165 | + } |
---|
| 2166 | + |
---|
4374 | 2167 | /* Description region is ri_buf[0] */ |
---|
4375 | 2168 | item->ri_buf[item->ri_cnt].i_addr = ptr; |
---|
4376 | 2169 | item->ri_buf[item->ri_cnt].i_len = len; |
---|
.. | .. |
---|
4388 | 2181 | xlog_recover_free_trans( |
---|
4389 | 2182 | struct xlog_recover *trans) |
---|
4390 | 2183 | { |
---|
4391 | | - xlog_recover_item_t *item, *n; |
---|
| 2184 | + struct xlog_recover_item *item, *n; |
---|
4392 | 2185 | int i; |
---|
4393 | 2186 | |
---|
4394 | 2187 | hlist_del_init(&trans->r_list); |
---|
.. | .. |
---|
4457 | 2250 | default: |
---|
4458 | 2251 | xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags); |
---|
4459 | 2252 | ASSERT(0); |
---|
4460 | | - error = -EIO; |
---|
| 2253 | + error = -EFSCORRUPTED; |
---|
4461 | 2254 | break; |
---|
4462 | 2255 | } |
---|
4463 | 2256 | if (error || freeit) |
---|
.. | .. |
---|
4502 | 2295 | * This is a new transaction so allocate a new recovery container to |
---|
4503 | 2296 | * hold the recovery ops that will follow. |
---|
4504 | 2297 | */ |
---|
4505 | | - trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP); |
---|
| 2298 | + trans = kmem_zalloc(sizeof(struct xlog_recover), 0); |
---|
4506 | 2299 | trans->r_log_tid = tid; |
---|
4507 | 2300 | trans->r_lsn = be64_to_cpu(rhead->h_lsn); |
---|
4508 | 2301 | INIT_LIST_HEAD(&trans->r_itemq); |
---|
.. | .. |
---|
4537 | 2330 | xfs_warn(log->l_mp, "%s: bad clientid 0x%x", |
---|
4538 | 2331 | __func__, ohead->oh_clientid); |
---|
4539 | 2332 | ASSERT(0); |
---|
4540 | | - return -EIO; |
---|
| 2333 | + return -EFSCORRUPTED; |
---|
4541 | 2334 | } |
---|
4542 | 2335 | |
---|
4543 | 2336 | /* |
---|
.. | .. |
---|
4547 | 2340 | if (dp + len > end) { |
---|
4548 | 2341 | xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len); |
---|
4549 | 2342 | WARN_ON(1); |
---|
4550 | | - return -EIO; |
---|
| 2343 | + return -EFSCORRUPTED; |
---|
4551 | 2344 | } |
---|
4552 | 2345 | |
---|
4553 | 2346 | trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead); |
---|
.. | .. |
---|
4640 | 2433 | return 0; |
---|
4641 | 2434 | } |
---|
4642 | 2435 | |
---|
4643 | | -/* Recover the EFI if necessary. */ |
---|
4644 | | -STATIC int |
---|
4645 | | -xlog_recover_process_efi( |
---|
4646 | | - struct xfs_mount *mp, |
---|
4647 | | - struct xfs_ail *ailp, |
---|
4648 | | - struct xfs_log_item *lip) |
---|
4649 | | -{ |
---|
4650 | | - struct xfs_efi_log_item *efip; |
---|
4651 | | - int error; |
---|
4652 | | - |
---|
4653 | | - /* |
---|
4654 | | - * Skip EFIs that we've already processed. |
---|
4655 | | - */ |
---|
4656 | | - efip = container_of(lip, struct xfs_efi_log_item, efi_item); |
---|
4657 | | - if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) |
---|
4658 | | - return 0; |
---|
4659 | | - |
---|
4660 | | - spin_unlock(&ailp->ail_lock); |
---|
4661 | | - error = xfs_efi_recover(mp, efip); |
---|
4662 | | - spin_lock(&ailp->ail_lock); |
---|
4663 | | - |
---|
4664 | | - return error; |
---|
4665 | | -} |
---|
4666 | | - |
---|
4667 | | -/* Release the EFI since we're cancelling everything. */ |
---|
4668 | | -STATIC void |
---|
4669 | | -xlog_recover_cancel_efi( |
---|
4670 | | - struct xfs_mount *mp, |
---|
4671 | | - struct xfs_ail *ailp, |
---|
4672 | | - struct xfs_log_item *lip) |
---|
4673 | | -{ |
---|
4674 | | - struct xfs_efi_log_item *efip; |
---|
4675 | | - |
---|
4676 | | - efip = container_of(lip, struct xfs_efi_log_item, efi_item); |
---|
4677 | | - |
---|
4678 | | - spin_unlock(&ailp->ail_lock); |
---|
4679 | | - xfs_efi_release(efip); |
---|
4680 | | - spin_lock(&ailp->ail_lock); |
---|
4681 | | -} |
---|
4682 | | - |
---|
4683 | | -/* Recover the RUI if necessary. */ |
---|
4684 | | -STATIC int |
---|
4685 | | -xlog_recover_process_rui( |
---|
4686 | | - struct xfs_mount *mp, |
---|
4687 | | - struct xfs_ail *ailp, |
---|
4688 | | - struct xfs_log_item *lip) |
---|
4689 | | -{ |
---|
4690 | | - struct xfs_rui_log_item *ruip; |
---|
4691 | | - int error; |
---|
4692 | | - |
---|
4693 | | - /* |
---|
4694 | | - * Skip RUIs that we've already processed. |
---|
4695 | | - */ |
---|
4696 | | - ruip = container_of(lip, struct xfs_rui_log_item, rui_item); |
---|
4697 | | - if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags)) |
---|
4698 | | - return 0; |
---|
4699 | | - |
---|
4700 | | - spin_unlock(&ailp->ail_lock); |
---|
4701 | | - error = xfs_rui_recover(mp, ruip); |
---|
4702 | | - spin_lock(&ailp->ail_lock); |
---|
4703 | | - |
---|
4704 | | - return error; |
---|
4705 | | -} |
---|
4706 | | - |
---|
4707 | | -/* Release the RUI since we're cancelling everything. */ |
---|
4708 | | -STATIC void |
---|
4709 | | -xlog_recover_cancel_rui( |
---|
4710 | | - struct xfs_mount *mp, |
---|
4711 | | - struct xfs_ail *ailp, |
---|
4712 | | - struct xfs_log_item *lip) |
---|
4713 | | -{ |
---|
4714 | | - struct xfs_rui_log_item *ruip; |
---|
4715 | | - |
---|
4716 | | - ruip = container_of(lip, struct xfs_rui_log_item, rui_item); |
---|
4717 | | - |
---|
4718 | | - spin_unlock(&ailp->ail_lock); |
---|
4719 | | - xfs_rui_release(ruip); |
---|
4720 | | - spin_lock(&ailp->ail_lock); |
---|
4721 | | -} |
---|
4722 | | - |
---|
4723 | | -/* Recover the CUI if necessary. */ |
---|
4724 | | -STATIC int |
---|
4725 | | -xlog_recover_process_cui( |
---|
4726 | | - struct xfs_trans *parent_tp, |
---|
4727 | | - struct xfs_ail *ailp, |
---|
4728 | | - struct xfs_log_item *lip) |
---|
4729 | | -{ |
---|
4730 | | - struct xfs_cui_log_item *cuip; |
---|
4731 | | - int error; |
---|
4732 | | - |
---|
4733 | | - /* |
---|
4734 | | - * Skip CUIs that we've already processed. |
---|
4735 | | - */ |
---|
4736 | | - cuip = container_of(lip, struct xfs_cui_log_item, cui_item); |
---|
4737 | | - if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)) |
---|
4738 | | - return 0; |
---|
4739 | | - |
---|
4740 | | - spin_unlock(&ailp->ail_lock); |
---|
4741 | | - error = xfs_cui_recover(parent_tp, cuip); |
---|
4742 | | - spin_lock(&ailp->ail_lock); |
---|
4743 | | - |
---|
4744 | | - return error; |
---|
4745 | | -} |
---|
4746 | | - |
---|
4747 | | -/* Release the CUI since we're cancelling everything. */ |
---|
4748 | | -STATIC void |
---|
4749 | | -xlog_recover_cancel_cui( |
---|
4750 | | - struct xfs_mount *mp, |
---|
4751 | | - struct xfs_ail *ailp, |
---|
4752 | | - struct xfs_log_item *lip) |
---|
4753 | | -{ |
---|
4754 | | - struct xfs_cui_log_item *cuip; |
---|
4755 | | - |
---|
4756 | | - cuip = container_of(lip, struct xfs_cui_log_item, cui_item); |
---|
4757 | | - |
---|
4758 | | - spin_unlock(&ailp->ail_lock); |
---|
4759 | | - xfs_cui_release(cuip); |
---|
4760 | | - spin_lock(&ailp->ail_lock); |
---|
4761 | | -} |
---|
4762 | | - |
---|
4763 | | -/* Recover the BUI if necessary. */ |
---|
4764 | | -STATIC int |
---|
4765 | | -xlog_recover_process_bui( |
---|
4766 | | - struct xfs_trans *parent_tp, |
---|
4767 | | - struct xfs_ail *ailp, |
---|
4768 | | - struct xfs_log_item *lip) |
---|
4769 | | -{ |
---|
4770 | | - struct xfs_bui_log_item *buip; |
---|
4771 | | - int error; |
---|
4772 | | - |
---|
4773 | | - /* |
---|
4774 | | - * Skip BUIs that we've already processed. |
---|
4775 | | - */ |
---|
4776 | | - buip = container_of(lip, struct xfs_bui_log_item, bui_item); |
---|
4777 | | - if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)) |
---|
4778 | | - return 0; |
---|
4779 | | - |
---|
4780 | | - spin_unlock(&ailp->ail_lock); |
---|
4781 | | - error = xfs_bui_recover(parent_tp, buip); |
---|
4782 | | - spin_lock(&ailp->ail_lock); |
---|
4783 | | - |
---|
4784 | | - return error; |
---|
4785 | | -} |
---|
4786 | | - |
---|
4787 | | -/* Release the BUI since we're cancelling everything. */ |
---|
4788 | | -STATIC void |
---|
4789 | | -xlog_recover_cancel_bui( |
---|
4790 | | - struct xfs_mount *mp, |
---|
4791 | | - struct xfs_ail *ailp, |
---|
4792 | | - struct xfs_log_item *lip) |
---|
4793 | | -{ |
---|
4794 | | - struct xfs_bui_log_item *buip; |
---|
4795 | | - |
---|
4796 | | - buip = container_of(lip, struct xfs_bui_log_item, bui_item); |
---|
4797 | | - |
---|
4798 | | - spin_unlock(&ailp->ail_lock); |
---|
4799 | | - xfs_bui_release(buip); |
---|
4800 | | - spin_lock(&ailp->ail_lock); |
---|
4801 | | -} |
---|
4802 | | - |
---|
4803 | | -/* Is this log item a deferred action intent? */ |
---|
4804 | | -static inline bool xlog_item_is_intent(struct xfs_log_item *lip) |
---|
4805 | | -{ |
---|
4806 | | - switch (lip->li_type) { |
---|
4807 | | - case XFS_LI_EFI: |
---|
4808 | | - case XFS_LI_RUI: |
---|
4809 | | - case XFS_LI_CUI: |
---|
4810 | | - case XFS_LI_BUI: |
---|
4811 | | - return true; |
---|
4812 | | - default: |
---|
4813 | | - return false; |
---|
4814 | | - } |
---|
4815 | | -} |
---|
4816 | | - |
---|
4817 | 2436 | /* Take all the collected deferred ops and finish them in order. */ |
---|
4818 | 2437 | static int |
---|
4819 | 2438 | xlog_finish_defer_ops( |
---|
4820 | | - struct xfs_trans *parent_tp) |
---|
| 2439 | + struct xfs_mount *mp, |
---|
| 2440 | + struct list_head *capture_list) |
---|
4821 | 2441 | { |
---|
4822 | | - struct xfs_mount *mp = parent_tp->t_mountp; |
---|
| 2442 | + struct xfs_defer_capture *dfc, *next; |
---|
4823 | 2443 | struct xfs_trans *tp; |
---|
4824 | | - int64_t freeblks; |
---|
4825 | | - uint resblks; |
---|
4826 | | - int error; |
---|
| 2444 | + struct xfs_inode *ip; |
---|
| 2445 | + int error = 0; |
---|
4827 | 2446 | |
---|
4828 | | - /* |
---|
4829 | | - * We're finishing the defer_ops that accumulated as a result of |
---|
4830 | | - * recovering unfinished intent items during log recovery. We |
---|
4831 | | - * reserve an itruncate transaction because it is the largest |
---|
4832 | | - * permanent transaction type. Since we're the only user of the fs |
---|
4833 | | - * right now, take 93% (15/16) of the available free blocks. Use |
---|
4834 | | - * weird math to avoid a 64-bit division. |
---|
4835 | | - */ |
---|
4836 | | - freeblks = percpu_counter_sum(&mp->m_fdblocks); |
---|
4837 | | - if (freeblks <= 0) |
---|
4838 | | - return -ENOSPC; |
---|
4839 | | - resblks = min_t(int64_t, UINT_MAX, freeblks); |
---|
4840 | | - resblks = (resblks * 15) >> 4; |
---|
4841 | | - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, |
---|
4842 | | - 0, XFS_TRANS_RESERVE, &tp); |
---|
4843 | | - if (error) |
---|
4844 | | - return error; |
---|
4845 | | - /* transfer all collected dfops to this transaction */ |
---|
4846 | | - xfs_defer_move(tp, parent_tp); |
---|
| 2447 | + list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { |
---|
| 2448 | + struct xfs_trans_res resv; |
---|
4847 | 2449 | |
---|
4848 | | - return xfs_trans_commit(tp); |
---|
| 2450 | + /* |
---|
| 2451 | + * Create a new transaction reservation from the captured |
---|
| 2452 | + * information. Set logcount to 1 to force the new transaction |
---|
| 2453 | + * to regrant every roll so that we can make forward progress |
---|
| 2454 | + * in recovery no matter how full the log might be. |
---|
| 2455 | + */ |
---|
| 2456 | + resv.tr_logres = dfc->dfc_logres; |
---|
| 2457 | + resv.tr_logcount = 1; |
---|
| 2458 | + resv.tr_logflags = XFS_TRANS_PERM_LOG_RES; |
---|
| 2459 | + |
---|
| 2460 | + error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres, |
---|
| 2461 | + dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp); |
---|
| 2462 | + if (error) { |
---|
| 2463 | + xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
---|
| 2464 | + return error; |
---|
| 2465 | + } |
---|
| 2466 | + |
---|
| 2467 | + /* |
---|
| 2468 | + * Transfer to this new transaction all the dfops we captured |
---|
| 2469 | + * from recovering a single intent item. |
---|
| 2470 | + */ |
---|
| 2471 | + list_del_init(&dfc->dfc_list); |
---|
| 2472 | + xfs_defer_ops_continue(dfc, tp, &ip); |
---|
| 2473 | + |
---|
| 2474 | + error = xfs_trans_commit(tp); |
---|
| 2475 | + if (ip) { |
---|
| 2476 | + xfs_iunlock(ip, XFS_ILOCK_EXCL); |
---|
| 2477 | + xfs_irele(ip); |
---|
| 2478 | + } |
---|
| 2479 | + if (error) |
---|
| 2480 | + return error; |
---|
| 2481 | + } |
---|
| 2482 | + |
---|
| 2483 | + ASSERT(list_empty(capture_list)); |
---|
| 2484 | + return 0; |
---|
4849 | 2485 | } |
---|
4850 | 2486 | |
---|
| 2487 | +/* Release all the captured defer ops and capture structures in this list. */ |
---|
| 2488 | +static void |
---|
| 2489 | +xlog_abort_defer_ops( |
---|
| 2490 | + struct xfs_mount *mp, |
---|
| 2491 | + struct list_head *capture_list) |
---|
| 2492 | +{ |
---|
| 2493 | + struct xfs_defer_capture *dfc; |
---|
| 2494 | + struct xfs_defer_capture *next; |
---|
| 2495 | + |
---|
| 2496 | + list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { |
---|
| 2497 | + list_del_init(&dfc->dfc_list); |
---|
| 2498 | + xfs_defer_ops_release(mp, dfc); |
---|
| 2499 | + } |
---|
| 2500 | +} |
---|
4851 | 2501 | /* |
---|
4852 | 2502 | * When this is called, all of the log intent items which did not have |
---|
4853 | 2503 | * corresponding log done items should be in the AIL. What we do now |
---|
.. | .. |
---|
4868 | 2518 | xlog_recover_process_intents( |
---|
4869 | 2519 | struct xlog *log) |
---|
4870 | 2520 | { |
---|
4871 | | - struct xfs_trans *parent_tp; |
---|
| 2521 | + LIST_HEAD(capture_list); |
---|
4872 | 2522 | struct xfs_ail_cursor cur; |
---|
4873 | 2523 | struct xfs_log_item *lip; |
---|
4874 | 2524 | struct xfs_ail *ailp; |
---|
4875 | | - int error; |
---|
| 2525 | + int error = 0; |
---|
4876 | 2526 | #if defined(DEBUG) || defined(XFS_WARN) |
---|
4877 | 2527 | xfs_lsn_t last_lsn; |
---|
4878 | 2528 | #endif |
---|
4879 | 2529 | |
---|
4880 | | - /* |
---|
4881 | | - * The intent recovery handlers commit transactions to complete recovery |
---|
4882 | | - * for individual intents, but any new deferred operations that are |
---|
4883 | | - * queued during that process are held off until the very end. The |
---|
4884 | | - * purpose of this transaction is to serve as a container for deferred |
---|
4885 | | - * operations. Each intent recovery handler must transfer dfops here |
---|
4886 | | - * before its local transaction commits, and we'll finish the entire |
---|
4887 | | - * list below. |
---|
4888 | | - */ |
---|
4889 | | - error = xfs_trans_alloc_empty(log->l_mp, &parent_tp); |
---|
4890 | | - if (error) |
---|
4891 | | - return error; |
---|
4892 | | - |
---|
4893 | 2530 | ailp = log->l_ailp; |
---|
4894 | 2531 | spin_lock(&ailp->ail_lock); |
---|
4895 | | - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
---|
4896 | 2532 | #if defined(DEBUG) || defined(XFS_WARN) |
---|
4897 | 2533 | last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); |
---|
4898 | 2534 | #endif |
---|
4899 | | - while (lip != NULL) { |
---|
| 2535 | + for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
---|
| 2536 | + lip != NULL; |
---|
| 2537 | + lip = xfs_trans_ail_cursor_next(ailp, &cur)) { |
---|
4900 | 2538 | /* |
---|
4901 | 2539 | * We're done when we see something other than an intent. |
---|
4902 | 2540 | * There should be no intents left in the AIL now. |
---|
.. | .. |
---|
4918 | 2556 | |
---|
4919 | 2557 | /* |
---|
4920 | 2558 | * NOTE: If your intent processing routine can create more |
---|
4921 | | - * deferred ops, you /must/ attach them to the dfops in this |
---|
4922 | | - * routine or else those subsequent intents will get |
---|
| 2559 | + * deferred ops, you /must/ attach them to the capture list in |
---|
| 2560 | + * the recover routine or else those subsequent intents will be |
---|
4923 | 2561 | * replayed in the wrong order! |
---|
4924 | 2562 | */ |
---|
4925 | | - switch (lip->li_type) { |
---|
4926 | | - case XFS_LI_EFI: |
---|
4927 | | - error = xlog_recover_process_efi(log->l_mp, ailp, lip); |
---|
4928 | | - break; |
---|
4929 | | - case XFS_LI_RUI: |
---|
4930 | | - error = xlog_recover_process_rui(log->l_mp, ailp, lip); |
---|
4931 | | - break; |
---|
4932 | | - case XFS_LI_CUI: |
---|
4933 | | - error = xlog_recover_process_cui(parent_tp, ailp, lip); |
---|
4934 | | - break; |
---|
4935 | | - case XFS_LI_BUI: |
---|
4936 | | - error = xlog_recover_process_bui(parent_tp, ailp, lip); |
---|
4937 | | - break; |
---|
4938 | | - } |
---|
| 2563 | + spin_unlock(&ailp->ail_lock); |
---|
| 2564 | + error = lip->li_ops->iop_recover(lip, &capture_list); |
---|
| 2565 | + spin_lock(&ailp->ail_lock); |
---|
4939 | 2566 | if (error) |
---|
4940 | | - goto out; |
---|
4941 | | - lip = xfs_trans_ail_cursor_next(ailp, &cur); |
---|
| 2567 | + break; |
---|
4942 | 2568 | } |
---|
4943 | | -out: |
---|
| 2569 | + |
---|
4944 | 2570 | xfs_trans_ail_cursor_done(&cur); |
---|
4945 | 2571 | spin_unlock(&ailp->ail_lock); |
---|
4946 | | - if (!error) |
---|
4947 | | - error = xlog_finish_defer_ops(parent_tp); |
---|
4948 | | - xfs_trans_cancel(parent_tp); |
---|
| 2572 | + if (error) |
---|
| 2573 | + goto err; |
---|
4949 | 2574 | |
---|
| 2575 | + error = xlog_finish_defer_ops(log->l_mp, &capture_list); |
---|
| 2576 | + if (error) |
---|
| 2577 | + goto err; |
---|
| 2578 | + |
---|
| 2579 | + return 0; |
---|
| 2580 | +err: |
---|
| 2581 | + xlog_abort_defer_ops(log->l_mp, &capture_list); |
---|
4950 | 2582 | return error; |
---|
4951 | 2583 | } |
---|
4952 | 2584 | |
---|
.. | .. |
---|
4954 | 2586 | * A cancel occurs when the mount has failed and we're bailing out. |
---|
4955 | 2587 | * Release all pending log intent items so they don't pin the AIL. |
---|
4956 | 2588 | */ |
---|
4957 | | -STATIC int |
---|
| 2589 | +STATIC void |
---|
4958 | 2590 | xlog_recover_cancel_intents( |
---|
4959 | 2591 | struct xlog *log) |
---|
4960 | 2592 | { |
---|
4961 | 2593 | struct xfs_log_item *lip; |
---|
4962 | | - int error = 0; |
---|
4963 | 2594 | struct xfs_ail_cursor cur; |
---|
4964 | 2595 | struct xfs_ail *ailp; |
---|
4965 | 2596 | |
---|
.. | .. |
---|
4979 | 2610 | break; |
---|
4980 | 2611 | } |
---|
4981 | 2612 | |
---|
4982 | | - switch (lip->li_type) { |
---|
4983 | | - case XFS_LI_EFI: |
---|
4984 | | - xlog_recover_cancel_efi(log->l_mp, ailp, lip); |
---|
4985 | | - break; |
---|
4986 | | - case XFS_LI_RUI: |
---|
4987 | | - xlog_recover_cancel_rui(log->l_mp, ailp, lip); |
---|
4988 | | - break; |
---|
4989 | | - case XFS_LI_CUI: |
---|
4990 | | - xlog_recover_cancel_cui(log->l_mp, ailp, lip); |
---|
4991 | | - break; |
---|
4992 | | - case XFS_LI_BUI: |
---|
4993 | | - xlog_recover_cancel_bui(log->l_mp, ailp, lip); |
---|
4994 | | - break; |
---|
4995 | | - } |
---|
4996 | | - |
---|
| 2613 | + spin_unlock(&ailp->ail_lock); |
---|
| 2614 | + lip->li_ops->iop_release(lip); |
---|
| 2615 | + spin_lock(&ailp->ail_lock); |
---|
4997 | 2616 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
---|
4998 | 2617 | } |
---|
4999 | 2618 | |
---|
5000 | 2619 | xfs_trans_ail_cursor_done(&cur); |
---|
5001 | 2620 | spin_unlock(&ailp->ail_lock); |
---|
5002 | | - return error; |
---|
5003 | 2621 | } |
---|
5004 | 2622 | |
---|
5005 | 2623 | /* |
---|
.. | .. |
---|
5026 | 2644 | if (error) |
---|
5027 | 2645 | goto out_abort; |
---|
5028 | 2646 | |
---|
5029 | | - agi = XFS_BUF_TO_AGI(agibp); |
---|
| 2647 | + agi = agibp->b_addr; |
---|
5030 | 2648 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); |
---|
5031 | 2649 | offset = offsetof(xfs_agi_t, agi_unlinked) + |
---|
5032 | 2650 | (sizeof(xfs_agino_t) * bucket); |
---|
.. | .. |
---|
5066 | 2684 | /* |
---|
5067 | 2685 | * Get the on disk inode to find the next inode in the bucket. |
---|
5068 | 2686 | */ |
---|
5069 | | - error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0); |
---|
| 2687 | + error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0); |
---|
5070 | 2688 | if (error) |
---|
5071 | 2689 | goto fail_iput; |
---|
5072 | 2690 | |
---|
.. | .. |
---|
5103 | 2721 | } |
---|
5104 | 2722 | |
---|
5105 | 2723 | /* |
---|
5106 | | - * xlog_iunlink_recover |
---|
| 2724 | + * Recover AGI unlinked lists |
---|
5107 | 2725 | * |
---|
5108 | | - * This is called during recovery to process any inodes which |
---|
5109 | | - * we unlinked but not freed when the system crashed. These |
---|
5110 | | - * inodes will be on the lists in the AGI blocks. What we do |
---|
5111 | | - * here is scan all the AGIs and fully truncate and free any |
---|
5112 | | - * inodes found on the lists. Each inode is removed from the |
---|
5113 | | - * lists when it has been fully truncated and is freed. The |
---|
5114 | | - * freeing of the inode and its removal from the list must be |
---|
5115 | | - * atomic. |
---|
| 2726 | + * This is called during recovery to process any inodes which we unlinked but |
---|
| 2727 | + * not freed when the system crashed. These inodes will be on the lists in the |
---|
| 2728 | + * AGI blocks. What we do here is scan all the AGIs and fully truncate and free |
---|
| 2729 | + * any inodes found on the lists. Each inode is removed from the lists when it |
---|
| 2730 | + * has been fully truncated and is freed. The freeing of the inode and its |
---|
| 2731 | + * removal from the list must be atomic. |
---|
| 2732 | + * |
---|
| 2733 | + * If everything we touch in the agi processing loop is already in memory, this |
---|
| 2734 | + * loop can hold the cpu for a long time. It runs without lock contention, |
---|
| 2735 | + * memory allocation contention, the need wait for IO, etc, and so will run |
---|
| 2736 | + * until we either run out of inodes to process, run low on memory or we run out |
---|
| 2737 | + * of log space. |
---|
| 2738 | + * |
---|
| 2739 | + * This behaviour is bad for latency on single CPU and non-preemptible kernels, |
---|
| 2740 | + * and can prevent other filesytem work (such as CIL pushes) from running. This |
---|
| 2741 | + * can lead to deadlocks if the recovery process runs out of log reservation |
---|
| 2742 | + * space. Hence we need to yield the CPU when there is other kernel work |
---|
| 2743 | + * scheduled on this CPU to ensure other scheduled work can run without undue |
---|
| 2744 | + * latency. |
---|
5116 | 2745 | */ |
---|
5117 | 2746 | STATIC void |
---|
5118 | 2747 | xlog_recover_process_iunlinks( |
---|
.. | .. |
---|
5151 | 2780 | * buffer reference though, so that it stays pinned in memory |
---|
5152 | 2781 | * while we need the buffer. |
---|
5153 | 2782 | */ |
---|
5154 | | - agi = XFS_BUF_TO_AGI(agibp); |
---|
| 2783 | + agi = agibp->b_addr; |
---|
5155 | 2784 | xfs_buf_unlock(agibp); |
---|
5156 | 2785 | |
---|
5157 | 2786 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { |
---|
.. | .. |
---|
5159 | 2788 | while (agino != NULLAGINO) { |
---|
5160 | 2789 | agino = xlog_recover_process_one_iunlink(mp, |
---|
5161 | 2790 | agno, agino, bucket); |
---|
| 2791 | + cond_resched(); |
---|
5162 | 2792 | } |
---|
5163 | 2793 | } |
---|
5164 | 2794 | xfs_buf_rele(agibp); |
---|
5165 | 2795 | } |
---|
5166 | 2796 | } |
---|
5167 | 2797 | |
---|
5168 | | -STATIC int |
---|
| 2798 | +STATIC void |
---|
5169 | 2799 | xlog_unpack_data( |
---|
5170 | 2800 | struct xlog_rec_header *rhead, |
---|
5171 | 2801 | char *dp, |
---|
.. | .. |
---|
5188 | 2818 | dp += BBSIZE; |
---|
5189 | 2819 | } |
---|
5190 | 2820 | } |
---|
5191 | | - |
---|
5192 | | - return 0; |
---|
5193 | 2821 | } |
---|
5194 | 2822 | |
---|
5195 | 2823 | /* |
---|
.. | .. |
---|
5204 | 2832 | int pass, |
---|
5205 | 2833 | struct list_head *buffer_list) |
---|
5206 | 2834 | { |
---|
5207 | | - int error; |
---|
5208 | 2835 | __le32 old_crc = rhead->h_crc; |
---|
5209 | 2836 | __le32 crc; |
---|
5210 | | - |
---|
5211 | 2837 | |
---|
5212 | 2838 | crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); |
---|
5213 | 2839 | |
---|
.. | .. |
---|
5243 | 2869 | * If the filesystem is CRC enabled, this mismatch becomes a |
---|
5244 | 2870 | * fatal log corruption failure. |
---|
5245 | 2871 | */ |
---|
5246 | | - if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) |
---|
| 2872 | + if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) { |
---|
| 2873 | + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); |
---|
5247 | 2874 | return -EFSCORRUPTED; |
---|
| 2875 | + } |
---|
5248 | 2876 | } |
---|
5249 | 2877 | |
---|
5250 | | - error = xlog_unpack_data(rhead, dp, log); |
---|
5251 | | - if (error) |
---|
5252 | | - return error; |
---|
| 2878 | + xlog_unpack_data(rhead, dp, log); |
---|
5253 | 2879 | |
---|
5254 | 2880 | return xlog_recover_process_data(log, rhash, rhead, dp, pass, |
---|
5255 | 2881 | buffer_list); |
---|
.. | .. |
---|
5259 | 2885 | xlog_valid_rec_header( |
---|
5260 | 2886 | struct xlog *log, |
---|
5261 | 2887 | struct xlog_rec_header *rhead, |
---|
5262 | | - xfs_daddr_t blkno) |
---|
| 2888 | + xfs_daddr_t blkno, |
---|
| 2889 | + int bufsize) |
---|
5263 | 2890 | { |
---|
5264 | 2891 | int hlen; |
---|
5265 | 2892 | |
---|
5266 | | - if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { |
---|
5267 | | - XFS_ERROR_REPORT("xlog_valid_rec_header(1)", |
---|
5268 | | - XFS_ERRLEVEL_LOW, log->l_mp); |
---|
| 2893 | + if (XFS_IS_CORRUPT(log->l_mp, |
---|
| 2894 | + rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) |
---|
5269 | 2895 | return -EFSCORRUPTED; |
---|
5270 | | - } |
---|
5271 | | - if (unlikely( |
---|
5272 | | - (!rhead->h_version || |
---|
5273 | | - (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { |
---|
| 2896 | + if (XFS_IS_CORRUPT(log->l_mp, |
---|
| 2897 | + (!rhead->h_version || |
---|
| 2898 | + (be32_to_cpu(rhead->h_version) & |
---|
| 2899 | + (~XLOG_VERSION_OKBITS))))) { |
---|
5274 | 2900 | xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", |
---|
5275 | 2901 | __func__, be32_to_cpu(rhead->h_version)); |
---|
5276 | | - return -EIO; |
---|
| 2902 | + return -EFSCORRUPTED; |
---|
5277 | 2903 | } |
---|
5278 | 2904 | |
---|
5279 | | - /* LR body must have data or it wouldn't have been written */ |
---|
| 2905 | + /* |
---|
| 2906 | + * LR body must have data (or it wouldn't have been written) |
---|
| 2907 | + * and h_len must not be greater than LR buffer size. |
---|
| 2908 | + */ |
---|
5280 | 2909 | hlen = be32_to_cpu(rhead->h_len); |
---|
5281 | | - if (unlikely( hlen <= 0 || hlen > INT_MAX )) { |
---|
5282 | | - XFS_ERROR_REPORT("xlog_valid_rec_header(2)", |
---|
5283 | | - XFS_ERRLEVEL_LOW, log->l_mp); |
---|
| 2910 | + if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > bufsize)) |
---|
5284 | 2911 | return -EFSCORRUPTED; |
---|
5285 | | - } |
---|
5286 | | - if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { |
---|
5287 | | - XFS_ERROR_REPORT("xlog_valid_rec_header(3)", |
---|
5288 | | - XFS_ERRLEVEL_LOW, log->l_mp); |
---|
| 2912 | + |
---|
| 2913 | + if (XFS_IS_CORRUPT(log->l_mp, |
---|
| 2914 | + blkno > log->l_logBBsize || blkno > INT_MAX)) |
---|
5289 | 2915 | return -EFSCORRUPTED; |
---|
5290 | | - } |
---|
5291 | 2916 | return 0; |
---|
5292 | 2917 | } |
---|
5293 | 2918 | |
---|
.. | .. |
---|
5311 | 2936 | xfs_daddr_t blk_no, rblk_no; |
---|
5312 | 2937 | xfs_daddr_t rhead_blk; |
---|
5313 | 2938 | char *offset; |
---|
5314 | | - xfs_buf_t *hbp, *dbp; |
---|
| 2939 | + char *hbp, *dbp; |
---|
5315 | 2940 | int error = 0, h_size, h_len; |
---|
5316 | 2941 | int error2 = 0; |
---|
5317 | 2942 | int bblks, split_bblks; |
---|
.. | .. |
---|
5336 | 2961 | * iclog header and extract the header size from it. Get a |
---|
5337 | 2962 | * new hbp that is the correct size. |
---|
5338 | 2963 | */ |
---|
5339 | | - hbp = xlog_get_bp(log, 1); |
---|
| 2964 | + hbp = xlog_alloc_buffer(log, 1); |
---|
5340 | 2965 | if (!hbp) |
---|
5341 | 2966 | return -ENOMEM; |
---|
5342 | 2967 | |
---|
.. | .. |
---|
5345 | 2970 | goto bread_err1; |
---|
5346 | 2971 | |
---|
5347 | 2972 | rhead = (xlog_rec_header_t *)offset; |
---|
5348 | | - error = xlog_valid_rec_header(log, rhead, tail_blk); |
---|
5349 | | - if (error) |
---|
5350 | | - goto bread_err1; |
---|
5351 | 2973 | |
---|
5352 | 2974 | /* |
---|
5353 | 2975 | * xfsprogs has a bug where record length is based on lsunit but |
---|
.. | .. |
---|
5362 | 2984 | */ |
---|
5363 | 2985 | h_size = be32_to_cpu(rhead->h_size); |
---|
5364 | 2986 | h_len = be32_to_cpu(rhead->h_len); |
---|
5365 | | - if (h_len > h_size) { |
---|
5366 | | - if (h_len <= log->l_mp->m_logbsize && |
---|
5367 | | - be32_to_cpu(rhead->h_num_logops) == 1) { |
---|
5368 | | - xfs_warn(log->l_mp, |
---|
| 2987 | + if (h_len > h_size && h_len <= log->l_mp->m_logbsize && |
---|
| 2988 | + rhead->h_num_logops == cpu_to_be32(1)) { |
---|
| 2989 | + xfs_warn(log->l_mp, |
---|
5369 | 2990 | "invalid iclog size (%d bytes), using lsunit (%d bytes)", |
---|
5370 | | - h_size, log->l_mp->m_logbsize); |
---|
5371 | | - h_size = log->l_mp->m_logbsize; |
---|
5372 | | - } else |
---|
5373 | | - return -EFSCORRUPTED; |
---|
| 2991 | + h_size, log->l_mp->m_logbsize); |
---|
| 2992 | + h_size = log->l_mp->m_logbsize; |
---|
5374 | 2993 | } |
---|
5375 | 2994 | |
---|
5376 | | - if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && |
---|
5377 | | - (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
---|
5378 | | - hblks = h_size / XLOG_HEADER_CYCLE_SIZE; |
---|
5379 | | - if (h_size % XLOG_HEADER_CYCLE_SIZE) |
---|
5380 | | - hblks++; |
---|
5381 | | - xlog_put_bp(hbp); |
---|
5382 | | - hbp = xlog_get_bp(log, hblks); |
---|
5383 | | - } else { |
---|
5384 | | - hblks = 1; |
---|
| 2995 | + error = xlog_valid_rec_header(log, rhead, tail_blk, h_size); |
---|
| 2996 | + if (error) |
---|
| 2997 | + goto bread_err1; |
---|
| 2998 | + |
---|
| 2999 | + hblks = xlog_logrec_hblks(log, rhead); |
---|
| 3000 | + if (hblks != 1) { |
---|
| 3001 | + kmem_free(hbp); |
---|
| 3002 | + hbp = xlog_alloc_buffer(log, hblks); |
---|
5385 | 3003 | } |
---|
5386 | 3004 | } else { |
---|
5387 | 3005 | ASSERT(log->l_sectBBsize == 1); |
---|
5388 | 3006 | hblks = 1; |
---|
5389 | | - hbp = xlog_get_bp(log, 1); |
---|
| 3007 | + hbp = xlog_alloc_buffer(log, 1); |
---|
5390 | 3008 | h_size = XLOG_BIG_RECORD_BSIZE; |
---|
5391 | 3009 | } |
---|
5392 | 3010 | |
---|
5393 | 3011 | if (!hbp) |
---|
5394 | 3012 | return -ENOMEM; |
---|
5395 | | - dbp = xlog_get_bp(log, BTOBB(h_size)); |
---|
| 3013 | + dbp = xlog_alloc_buffer(log, BTOBB(h_size)); |
---|
5396 | 3014 | if (!dbp) { |
---|
5397 | | - xlog_put_bp(hbp); |
---|
| 3015 | + kmem_free(hbp); |
---|
5398 | 3016 | return -ENOMEM; |
---|
5399 | 3017 | } |
---|
5400 | 3018 | |
---|
.. | .. |
---|
5409 | 3027 | /* |
---|
5410 | 3028 | * Check for header wrapping around physical end-of-log |
---|
5411 | 3029 | */ |
---|
5412 | | - offset = hbp->b_addr; |
---|
| 3030 | + offset = hbp; |
---|
5413 | 3031 | split_hblks = 0; |
---|
5414 | 3032 | wrapped_hblks = 0; |
---|
5415 | 3033 | if (blk_no + hblks <= log->l_logBBsize) { |
---|
.. | .. |
---|
5445 | 3063 | * - order is important. |
---|
5446 | 3064 | */ |
---|
5447 | 3065 | wrapped_hblks = hblks - split_hblks; |
---|
5448 | | - error = xlog_bread_offset(log, 0, |
---|
5449 | | - wrapped_hblks, hbp, |
---|
| 3066 | + error = xlog_bread_noalign(log, 0, |
---|
| 3067 | + wrapped_hblks, |
---|
5450 | 3068 | offset + BBTOB(split_hblks)); |
---|
5451 | 3069 | if (error) |
---|
5452 | 3070 | goto bread_err2; |
---|
5453 | 3071 | } |
---|
5454 | 3072 | rhead = (xlog_rec_header_t *)offset; |
---|
5455 | 3073 | error = xlog_valid_rec_header(log, rhead, |
---|
5456 | | - split_hblks ? blk_no : 0); |
---|
| 3074 | + split_hblks ? blk_no : 0, h_size); |
---|
5457 | 3075 | if (error) |
---|
5458 | 3076 | goto bread_err2; |
---|
5459 | 3077 | |
---|
.. | .. |
---|
5477 | 3095 | } else { |
---|
5478 | 3096 | /* This log record is split across the |
---|
5479 | 3097 | * physical end of log */ |
---|
5480 | | - offset = dbp->b_addr; |
---|
| 3098 | + offset = dbp; |
---|
5481 | 3099 | split_bblks = 0; |
---|
5482 | 3100 | if (blk_no != log->l_logBBsize) { |
---|
5483 | 3101 | /* some data is before the physical |
---|
.. | .. |
---|
5506 | 3124 | * _first_, then the log start (LR header end) |
---|
5507 | 3125 | * - order is important. |
---|
5508 | 3126 | */ |
---|
5509 | | - error = xlog_bread_offset(log, 0, |
---|
5510 | | - bblks - split_bblks, dbp, |
---|
| 3127 | + error = xlog_bread_noalign(log, 0, |
---|
| 3128 | + bblks - split_bblks, |
---|
5511 | 3129 | offset + BBTOB(split_bblks)); |
---|
5512 | 3130 | if (error) |
---|
5513 | 3131 | goto bread_err2; |
---|
.. | .. |
---|
5534 | 3152 | goto bread_err2; |
---|
5535 | 3153 | |
---|
5536 | 3154 | rhead = (xlog_rec_header_t *)offset; |
---|
5537 | | - error = xlog_valid_rec_header(log, rhead, blk_no); |
---|
| 3155 | + error = xlog_valid_rec_header(log, rhead, blk_no, h_size); |
---|
5538 | 3156 | if (error) |
---|
5539 | 3157 | goto bread_err2; |
---|
5540 | 3158 | |
---|
.. | .. |
---|
5555 | 3173 | } |
---|
5556 | 3174 | |
---|
5557 | 3175 | bread_err2: |
---|
5558 | | - xlog_put_bp(dbp); |
---|
| 3176 | + kmem_free(dbp); |
---|
5559 | 3177 | bread_err1: |
---|
5560 | | - xlog_put_bp(hbp); |
---|
| 3178 | + kmem_free(hbp); |
---|
5561 | 3179 | |
---|
5562 | 3180 | /* |
---|
5563 | 3181 | * Submit buffers that have been added from the last record processed, |
---|
.. | .. |
---|
5614 | 3232 | */ |
---|
5615 | 3233 | log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * |
---|
5616 | 3234 | sizeof(struct list_head), |
---|
5617 | | - KM_SLEEP); |
---|
| 3235 | + 0); |
---|
5618 | 3236 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) |
---|
5619 | 3237 | INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); |
---|
5620 | 3238 | |
---|
.. | .. |
---|
5651 | 3269 | */ |
---|
5652 | 3270 | STATIC int |
---|
5653 | 3271 | xlog_do_recover( |
---|
5654 | | - struct xlog *log, |
---|
5655 | | - xfs_daddr_t head_blk, |
---|
5656 | | - xfs_daddr_t tail_blk) |
---|
| 3272 | + struct xlog *log, |
---|
| 3273 | + xfs_daddr_t head_blk, |
---|
| 3274 | + xfs_daddr_t tail_blk) |
---|
5657 | 3275 | { |
---|
5658 | | - struct xfs_mount *mp = log->l_mp; |
---|
5659 | | - int error; |
---|
5660 | | - xfs_buf_t *bp; |
---|
5661 | | - xfs_sb_t *sbp; |
---|
| 3276 | + struct xfs_mount *mp = log->l_mp; |
---|
| 3277 | + struct xfs_buf *bp = mp->m_sb_bp; |
---|
| 3278 | + struct xfs_sb *sbp = &mp->m_sb; |
---|
| 3279 | + int error; |
---|
5662 | 3280 | |
---|
5663 | 3281 | trace_xfs_log_recover(log, head_blk, tail_blk); |
---|
5664 | 3282 | |
---|
.. | .. |
---|
5672 | 3290 | /* |
---|
5673 | 3291 | * If IO errors happened during recovery, bail out. |
---|
5674 | 3292 | */ |
---|
5675 | | - if (XFS_FORCED_SHUTDOWN(mp)) { |
---|
| 3293 | + if (XFS_FORCED_SHUTDOWN(mp)) |
---|
5676 | 3294 | return -EIO; |
---|
5677 | | - } |
---|
5678 | 3295 | |
---|
5679 | 3296 | /* |
---|
5680 | 3297 | * We now update the tail_lsn since much of the recovery has completed |
---|
.. | .. |
---|
5688 | 3305 | xlog_assign_tail_lsn(mp); |
---|
5689 | 3306 | |
---|
5690 | 3307 | /* |
---|
5691 | | - * Now that we've finished replaying all buffer and inode |
---|
5692 | | - * updates, re-read in the superblock and reverify it. |
---|
| 3308 | + * Now that we've finished replaying all buffer and inode updates, |
---|
| 3309 | + * re-read the superblock and reverify it. |
---|
5693 | 3310 | */ |
---|
5694 | | - bp = xfs_getsb(mp, 0); |
---|
5695 | | - bp->b_flags &= ~(XBF_DONE | XBF_ASYNC); |
---|
5696 | | - ASSERT(!(bp->b_flags & XBF_WRITE)); |
---|
5697 | | - bp->b_flags |= XBF_READ; |
---|
5698 | | - bp->b_ops = &xfs_sb_buf_ops; |
---|
5699 | | - |
---|
5700 | | - error = xfs_buf_submit(bp); |
---|
| 3311 | + xfs_buf_lock(bp); |
---|
| 3312 | + xfs_buf_hold(bp); |
---|
| 3313 | + error = _xfs_buf_read(bp, XBF_READ); |
---|
5701 | 3314 | if (error) { |
---|
5702 | 3315 | if (!XFS_FORCED_SHUTDOWN(mp)) { |
---|
5703 | | - xfs_buf_ioerror_alert(bp, __func__); |
---|
| 3316 | + xfs_buf_ioerror_alert(bp, __this_address); |
---|
5704 | 3317 | ASSERT(0); |
---|
5705 | 3318 | } |
---|
5706 | 3319 | xfs_buf_relse(bp); |
---|
.. | .. |
---|
5708 | 3321 | } |
---|
5709 | 3322 | |
---|
5710 | 3323 | /* Convert superblock from on-disk format */ |
---|
5711 | | - sbp = &mp->m_sb; |
---|
5712 | | - xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); |
---|
| 3324 | + xfs_sb_from_disk(sbp, bp->b_addr); |
---|
5713 | 3325 | xfs_buf_relse(bp); |
---|
5714 | 3326 | |
---|
5715 | 3327 | /* re-initialise in-core superblock and geometry structures */ |
---|
.. | .. |
---|
5838 | 3450 | int error; |
---|
5839 | 3451 | error = xlog_recover_process_intents(log); |
---|
5840 | 3452 | if (error) { |
---|
| 3453 | + /* |
---|
| 3454 | + * Cancel all the unprocessed intent items now so that |
---|
| 3455 | + * we don't leave them pinned in the AIL. This can |
---|
| 3456 | + * cause the AIL to livelock on the pinned item if |
---|
| 3457 | + * anyone tries to push the AIL (inode reclaim does |
---|
| 3458 | + * this) before we get around to xfs_log_mount_cancel. |
---|
| 3459 | + */ |
---|
| 3460 | + xlog_recover_cancel_intents(log); |
---|
| 3461 | + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); |
---|
5841 | 3462 | xfs_alert(log->l_mp, "Failed to recover intents"); |
---|
5842 | 3463 | return error; |
---|
5843 | 3464 | } |
---|
.. | .. |
---|
5864 | 3485 | return 0; |
---|
5865 | 3486 | } |
---|
5866 | 3487 | |
---|
5867 | | -int |
---|
| 3488 | +void |
---|
5868 | 3489 | xlog_recover_cancel( |
---|
5869 | 3490 | struct xlog *log) |
---|
5870 | 3491 | { |
---|
5871 | | - int error = 0; |
---|
5872 | | - |
---|
5873 | 3492 | if (log->l_flags & XLOG_RECOVERY_NEEDED) |
---|
5874 | | - error = xlog_recover_cancel_intents(log); |
---|
5875 | | - |
---|
5876 | | - return error; |
---|
| 3493 | + xlog_recover_cancel_intents(log); |
---|
5877 | 3494 | } |
---|
5878 | 3495 | |
---|
5879 | 3496 | #if defined(DEBUG) |
---|
.. | .. |
---|
5886 | 3503 | struct xlog *log) |
---|
5887 | 3504 | { |
---|
5888 | 3505 | xfs_mount_t *mp; |
---|
5889 | | - xfs_agf_t *agfp; |
---|
5890 | 3506 | xfs_buf_t *agfbp; |
---|
5891 | 3507 | xfs_buf_t *agibp; |
---|
5892 | 3508 | xfs_agnumber_t agno; |
---|
.. | .. |
---|
5906 | 3522 | xfs_alert(mp, "%s agf read failed agno %d error %d", |
---|
5907 | 3523 | __func__, agno, error); |
---|
5908 | 3524 | } else { |
---|
5909 | | - agfp = XFS_BUF_TO_AGF(agfbp); |
---|
| 3525 | + struct xfs_agf *agfp = agfbp->b_addr; |
---|
| 3526 | + |
---|
5910 | 3527 | freeblks += be32_to_cpu(agfp->agf_freeblks) + |
---|
5911 | 3528 | be32_to_cpu(agfp->agf_flcount); |
---|
5912 | 3529 | xfs_buf_relse(agfbp); |
---|
.. | .. |
---|
5917 | 3534 | xfs_alert(mp, "%s agi read failed agno %d error %d", |
---|
5918 | 3535 | __func__, agno, error); |
---|
5919 | 3536 | } else { |
---|
5920 | | - struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); |
---|
| 3537 | + struct xfs_agi *agi = agibp->b_addr; |
---|
5921 | 3538 | |
---|
5922 | 3539 | itotal += be32_to_cpu(agi->agi_count); |
---|
5923 | 3540 | ifree += be32_to_cpu(agi->agi_freecount); |
---|