| .. | .. |
|---|
| 13 | 13 | #include "xfs_sb.h" |
|---|
| 14 | 14 | #include "xfs_mount.h" |
|---|
| 15 | 15 | #include "xfs_defer.h" |
|---|
| 16 | | -#include "xfs_da_format.h" |
|---|
| 17 | | -#include "xfs_da_btree.h" |
|---|
| 18 | 16 | #include "xfs_inode.h" |
|---|
| 19 | 17 | #include "xfs_trans.h" |
|---|
| 20 | 18 | #include "xfs_log.h" |
|---|
| 21 | 19 | #include "xfs_log_priv.h" |
|---|
| 22 | 20 | #include "xfs_log_recover.h" |
|---|
| 23 | | -#include "xfs_inode_item.h" |
|---|
| 24 | | -#include "xfs_extfree_item.h" |
|---|
| 25 | 21 | #include "xfs_trans_priv.h" |
|---|
| 26 | 22 | #include "xfs_alloc.h" |
|---|
| 27 | 23 | #include "xfs_ialloc.h" |
|---|
| 28 | | -#include "xfs_quota.h" |
|---|
| 29 | | -#include "xfs_cksum.h" |
|---|
| 30 | 24 | #include "xfs_trace.h" |
|---|
| 31 | 25 | #include "xfs_icache.h" |
|---|
| 32 | | -#include "xfs_bmap_btree.h" |
|---|
| 33 | 26 | #include "xfs_error.h" |
|---|
| 34 | | -#include "xfs_dir2.h" |
|---|
| 35 | | -#include "xfs_rmap_item.h" |
|---|
| 36 | 27 | #include "xfs_buf_item.h" |
|---|
| 37 | | -#include "xfs_refcount_item.h" |
|---|
| 38 | | -#include "xfs_bmap_item.h" |
|---|
| 39 | 28 | |
|---|
| 40 | 29 | #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) |
|---|
| 41 | 30 | |
|---|
| .. | .. |
|---|
| 59 | 48 | struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *); |
|---|
| 60 | 49 | |
|---|
| 61 | 50 | /* |
|---|
| 62 | | - * This structure is used during recovery to record the buf log items which |
|---|
| 63 | | - * have been canceled and should not be replayed. |
|---|
| 64 | | - */ |
|---|
| 65 | | -struct xfs_buf_cancel { |
|---|
| 66 | | - xfs_daddr_t bc_blkno; |
|---|
| 67 | | - uint bc_len; |
|---|
| 68 | | - int bc_refcount; |
|---|
| 69 | | - struct list_head bc_list; |
|---|
| 70 | | -}; |
|---|
| 71 | | - |
|---|
| 72 | | -/* |
|---|
| 73 | 51 | * Sector aligned buffer routines for buffer create/read/write/access |
|---|
| 74 | 52 | */ |
|---|
| 75 | 53 | |
|---|
| .. | .. |
|---|
| 79 | 57 | * are valid, false otherwise. |
|---|
| 80 | 58 | */ |
|---|
| 81 | 59 | static inline bool |
|---|
| 82 | | -xlog_verify_bp( |
|---|
| 60 | +xlog_verify_bno( |
|---|
| 83 | 61 | struct xlog *log, |
|---|
| 84 | 62 | xfs_daddr_t blk_no, |
|---|
| 85 | 63 | int bbcount) |
|---|
| .. | .. |
|---|
| 92 | 70 | } |
|---|
| 93 | 71 | |
|---|
| 94 | 72 | /* |
|---|
| 95 | | - * Allocate a buffer to hold log data. The buffer needs to be able |
|---|
| 96 | | - * to map to a range of nbblks basic blocks at any valid (basic |
|---|
| 97 | | - * block) offset within the log. |
|---|
| 73 | + * Allocate a buffer to hold log data. The buffer needs to be able to map to |
|---|
| 74 | + * a range of nbblks basic blocks at any valid offset within the log. |
|---|
| 98 | 75 | */ |
|---|
| 99 | | -STATIC xfs_buf_t * |
|---|
| 100 | | -xlog_get_bp( |
|---|
| 76 | +static char * |
|---|
| 77 | +xlog_alloc_buffer( |
|---|
| 101 | 78 | struct xlog *log, |
|---|
| 102 | 79 | int nbblks) |
|---|
| 103 | 80 | { |
|---|
| 104 | | - struct xfs_buf *bp; |
|---|
| 81 | + int align_mask = xfs_buftarg_dma_alignment(log->l_targ); |
|---|
| 105 | 82 | |
|---|
| 106 | 83 | /* |
|---|
| 107 | 84 | * Pass log block 0 since we don't have an addr yet, buffer will be |
|---|
| 108 | 85 | * verified on read. |
|---|
| 109 | 86 | */ |
|---|
| 110 | | - if (!xlog_verify_bp(log, 0, nbblks)) { |
|---|
| 87 | + if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, 0, nbblks))) { |
|---|
| 111 | 88 | xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", |
|---|
| 112 | 89 | nbblks); |
|---|
| 113 | | - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
|---|
| 114 | 90 | return NULL; |
|---|
| 115 | 91 | } |
|---|
| 116 | 92 | |
|---|
| 117 | 93 | /* |
|---|
| 118 | | - * We do log I/O in units of log sectors (a power-of-2 |
|---|
| 119 | | - * multiple of the basic block size), so we round up the |
|---|
| 120 | | - * requested size to accommodate the basic blocks required |
|---|
| 121 | | - * for complete log sectors. |
|---|
| 94 | + * We do log I/O in units of log sectors (a power-of-2 multiple of the |
|---|
| 95 | + * basic block size), so we round up the requested size to accommodate |
|---|
| 96 | + * the basic blocks required for complete log sectors. |
|---|
| 122 | 97 | * |
|---|
| 123 | | - * In addition, the buffer may be used for a non-sector- |
|---|
| 124 | | - * aligned block offset, in which case an I/O of the |
|---|
| 125 | | - * requested size could extend beyond the end of the |
|---|
| 126 | | - * buffer. If the requested size is only 1 basic block it |
|---|
| 127 | | - * will never straddle a sector boundary, so this won't be |
|---|
| 128 | | - * an issue. Nor will this be a problem if the log I/O is |
|---|
| 129 | | - * done in basic blocks (sector size 1). But otherwise we |
|---|
| 130 | | - * extend the buffer by one extra log sector to ensure |
|---|
| 131 | | - * there's space to accommodate this possibility. |
|---|
| 98 | + * In addition, the buffer may be used for a non-sector-aligned block |
|---|
| 99 | + * offset, in which case an I/O of the requested size could extend |
|---|
| 100 | + * beyond the end of the buffer. If the requested size is only 1 basic |
|---|
| 101 | + * block it will never straddle a sector boundary, so this won't be an |
|---|
| 102 | + * issue. Nor will this be a problem if the log I/O is done in basic |
|---|
| 103 | + * blocks (sector size 1). But otherwise we extend the buffer by one |
|---|
| 104 | + * extra log sector to ensure there's space to accommodate this |
|---|
| 105 | + * possibility. |
|---|
| 132 | 106 | */ |
|---|
| 133 | 107 | if (nbblks > 1 && log->l_sectBBsize > 1) |
|---|
| 134 | 108 | nbblks += log->l_sectBBsize; |
|---|
| 135 | 109 | nbblks = round_up(nbblks, log->l_sectBBsize); |
|---|
| 136 | | - |
|---|
| 137 | | - bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0); |
|---|
| 138 | | - if (bp) |
|---|
| 139 | | - xfs_buf_unlock(bp); |
|---|
| 140 | | - return bp; |
|---|
| 141 | | -} |
|---|
| 142 | | - |
|---|
| 143 | | -STATIC void |
|---|
| 144 | | -xlog_put_bp( |
|---|
| 145 | | - xfs_buf_t *bp) |
|---|
| 146 | | -{ |
|---|
| 147 | | - xfs_buf_free(bp); |
|---|
| 110 | + return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL | KM_ZERO); |
|---|
| 148 | 111 | } |
|---|
| 149 | 112 | |
|---|
| 150 | 113 | /* |
|---|
| 151 | 114 | * Return the address of the start of the given block number's data |
|---|
| 152 | 115 | * in a log buffer. The buffer covers a log sector-aligned region. |
|---|
| 153 | 116 | */ |
|---|
| 154 | | -STATIC char * |
|---|
| 117 | +static inline unsigned int |
|---|
| 155 | 118 | xlog_align( |
|---|
| 156 | 119 | struct xlog *log, |
|---|
| 157 | | - xfs_daddr_t blk_no, |
|---|
| 158 | | - int nbblks, |
|---|
| 159 | | - struct xfs_buf *bp) |
|---|
| 120 | + xfs_daddr_t blk_no) |
|---|
| 160 | 121 | { |
|---|
| 161 | | - xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); |
|---|
| 162 | | - |
|---|
| 163 | | - ASSERT(offset + nbblks <= bp->b_length); |
|---|
| 164 | | - return bp->b_addr + BBTOB(offset); |
|---|
| 122 | + return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1)); |
|---|
| 165 | 123 | } |
|---|
| 166 | 124 | |
|---|
| 167 | | - |
|---|
| 168 | | -/* |
|---|
| 169 | | - * nbblks should be uint, but oh well. Just want to catch that 32-bit length. |
|---|
| 170 | | - */ |
|---|
| 171 | | -STATIC int |
|---|
| 172 | | -xlog_bread_noalign( |
|---|
| 173 | | - struct xlog *log, |
|---|
| 174 | | - xfs_daddr_t blk_no, |
|---|
| 175 | | - int nbblks, |
|---|
| 176 | | - struct xfs_buf *bp) |
|---|
| 125 | +static int |
|---|
| 126 | +xlog_do_io( |
|---|
| 127 | + struct xlog *log, |
|---|
| 128 | + xfs_daddr_t blk_no, |
|---|
| 129 | + unsigned int nbblks, |
|---|
| 130 | + char *data, |
|---|
| 131 | + unsigned int op) |
|---|
| 177 | 132 | { |
|---|
| 178 | | - int error; |
|---|
| 133 | + int error; |
|---|
| 179 | 134 | |
|---|
| 180 | | - if (!xlog_verify_bp(log, blk_no, nbblks)) { |
|---|
| 135 | + if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, blk_no, nbblks))) { |
|---|
| 181 | 136 | xfs_warn(log->l_mp, |
|---|
| 182 | 137 | "Invalid log block/length (0x%llx, 0x%x) for buffer", |
|---|
| 183 | 138 | blk_no, nbblks); |
|---|
| 184 | | - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
|---|
| 185 | 139 | return -EFSCORRUPTED; |
|---|
| 186 | 140 | } |
|---|
| 187 | 141 | |
|---|
| 188 | 142 | blk_no = round_down(blk_no, log->l_sectBBsize); |
|---|
| 189 | 143 | nbblks = round_up(nbblks, log->l_sectBBsize); |
|---|
| 190 | | - |
|---|
| 191 | 144 | ASSERT(nbblks > 0); |
|---|
| 192 | | - ASSERT(nbblks <= bp->b_length); |
|---|
| 193 | 145 | |
|---|
| 194 | | - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
|---|
| 195 | | - bp->b_flags |= XBF_READ; |
|---|
| 196 | | - bp->b_io_length = nbblks; |
|---|
| 197 | | - bp->b_error = 0; |
|---|
| 198 | | - |
|---|
| 199 | | - error = xfs_buf_submit(bp); |
|---|
| 200 | | - if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) |
|---|
| 201 | | - xfs_buf_ioerror_alert(bp, __func__); |
|---|
| 146 | + error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, |
|---|
| 147 | + BBTOB(nbblks), data, op); |
|---|
| 148 | + if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) { |
|---|
| 149 | + xfs_alert(log->l_mp, |
|---|
| 150 | + "log recovery %s I/O error at daddr 0x%llx len %d error %d", |
|---|
| 151 | + op == REQ_OP_WRITE ? "write" : "read", |
|---|
| 152 | + blk_no, nbblks, error); |
|---|
| 153 | + } |
|---|
| 202 | 154 | return error; |
|---|
| 155 | +} |
|---|
| 156 | + |
|---|
| 157 | +STATIC int |
|---|
| 158 | +xlog_bread_noalign( |
|---|
| 159 | + struct xlog *log, |
|---|
| 160 | + xfs_daddr_t blk_no, |
|---|
| 161 | + int nbblks, |
|---|
| 162 | + char *data) |
|---|
| 163 | +{ |
|---|
| 164 | + return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); |
|---|
| 203 | 165 | } |
|---|
| 204 | 166 | |
|---|
| 205 | 167 | STATIC int |
|---|
| .. | .. |
|---|
| 207 | 169 | struct xlog *log, |
|---|
| 208 | 170 | xfs_daddr_t blk_no, |
|---|
| 209 | 171 | int nbblks, |
|---|
| 210 | | - struct xfs_buf *bp, |
|---|
| 172 | + char *data, |
|---|
| 211 | 173 | char **offset) |
|---|
| 212 | 174 | { |
|---|
| 213 | 175 | int error; |
|---|
| 214 | 176 | |
|---|
| 215 | | - error = xlog_bread_noalign(log, blk_no, nbblks, bp); |
|---|
| 216 | | - if (error) |
|---|
| 217 | | - return error; |
|---|
| 218 | | - |
|---|
| 219 | | - *offset = xlog_align(log, blk_no, nbblks, bp); |
|---|
| 220 | | - return 0; |
|---|
| 177 | + error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); |
|---|
| 178 | + if (!error) |
|---|
| 179 | + *offset = data + xlog_align(log, blk_no); |
|---|
| 180 | + return error; |
|---|
| 221 | 181 | } |
|---|
| 222 | 182 | |
|---|
| 223 | | -/* |
|---|
| 224 | | - * Read at an offset into the buffer. Returns with the buffer in it's original |
|---|
| 225 | | - * state regardless of the result of the read. |
|---|
| 226 | | - */ |
|---|
| 227 | | -STATIC int |
|---|
| 228 | | -xlog_bread_offset( |
|---|
| 229 | | - struct xlog *log, |
|---|
| 230 | | - xfs_daddr_t blk_no, /* block to read from */ |
|---|
| 231 | | - int nbblks, /* blocks to read */ |
|---|
| 232 | | - struct xfs_buf *bp, |
|---|
| 233 | | - char *offset) |
|---|
| 234 | | -{ |
|---|
| 235 | | - char *orig_offset = bp->b_addr; |
|---|
| 236 | | - int orig_len = BBTOB(bp->b_length); |
|---|
| 237 | | - int error, error2; |
|---|
| 238 | | - |
|---|
| 239 | | - error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); |
|---|
| 240 | | - if (error) |
|---|
| 241 | | - return error; |
|---|
| 242 | | - |
|---|
| 243 | | - error = xlog_bread_noalign(log, blk_no, nbblks, bp); |
|---|
| 244 | | - |
|---|
| 245 | | - /* must reset buffer pointer even on error */ |
|---|
| 246 | | - error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); |
|---|
| 247 | | - if (error) |
|---|
| 248 | | - return error; |
|---|
| 249 | | - return error2; |
|---|
| 250 | | -} |
|---|
| 251 | | - |
|---|
| 252 | | -/* |
|---|
| 253 | | - * Write out the buffer at the given block for the given number of blocks. |
|---|
| 254 | | - * The buffer is kept locked across the write and is returned locked. |
|---|
| 255 | | - * This can only be used for synchronous log writes. |
|---|
| 256 | | - */ |
|---|
| 257 | 183 | STATIC int |
|---|
| 258 | 184 | xlog_bwrite( |
|---|
| 259 | 185 | struct xlog *log, |
|---|
| 260 | 186 | xfs_daddr_t blk_no, |
|---|
| 261 | 187 | int nbblks, |
|---|
| 262 | | - struct xfs_buf *bp) |
|---|
| 188 | + char *data) |
|---|
| 263 | 189 | { |
|---|
| 264 | | - int error; |
|---|
| 265 | | - |
|---|
| 266 | | - if (!xlog_verify_bp(log, blk_no, nbblks)) { |
|---|
| 267 | | - xfs_warn(log->l_mp, |
|---|
| 268 | | - "Invalid log block/length (0x%llx, 0x%x) for buffer", |
|---|
| 269 | | - blk_no, nbblks); |
|---|
| 270 | | - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); |
|---|
| 271 | | - return -EFSCORRUPTED; |
|---|
| 272 | | - } |
|---|
| 273 | | - |
|---|
| 274 | | - blk_no = round_down(blk_no, log->l_sectBBsize); |
|---|
| 275 | | - nbblks = round_up(nbblks, log->l_sectBBsize); |
|---|
| 276 | | - |
|---|
| 277 | | - ASSERT(nbblks > 0); |
|---|
| 278 | | - ASSERT(nbblks <= bp->b_length); |
|---|
| 279 | | - |
|---|
| 280 | | - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); |
|---|
| 281 | | - xfs_buf_hold(bp); |
|---|
| 282 | | - xfs_buf_lock(bp); |
|---|
| 283 | | - bp->b_io_length = nbblks; |
|---|
| 284 | | - bp->b_error = 0; |
|---|
| 285 | | - |
|---|
| 286 | | - error = xfs_bwrite(bp); |
|---|
| 287 | | - if (error) |
|---|
| 288 | | - xfs_buf_ioerror_alert(bp, __func__); |
|---|
| 289 | | - xfs_buf_relse(bp); |
|---|
| 290 | | - return error; |
|---|
| 190 | + return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE); |
|---|
| 291 | 191 | } |
|---|
| 292 | 192 | |
|---|
| 293 | 193 | #ifdef DEBUG |
|---|
| .. | .. |
|---|
| 323 | 223 | * (XLOG_FMT_UNKNOWN). This stops us from trying to recover |
|---|
| 324 | 224 | * a dirty log created in IRIX. |
|---|
| 325 | 225 | */ |
|---|
| 326 | | - if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) { |
|---|
| 226 | + if (XFS_IS_CORRUPT(mp, head->h_fmt != cpu_to_be32(XLOG_FMT))) { |
|---|
| 327 | 227 | xfs_warn(mp, |
|---|
| 328 | 228 | "dirty log written in incompatible format - can't recover"); |
|---|
| 329 | 229 | xlog_header_check_dump(mp, head); |
|---|
| 330 | | - XFS_ERROR_REPORT("xlog_header_check_recover(1)", |
|---|
| 331 | | - XFS_ERRLEVEL_HIGH, mp); |
|---|
| 332 | 230 | return -EFSCORRUPTED; |
|---|
| 333 | | - } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
|---|
| 231 | + } |
|---|
| 232 | + if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid, |
|---|
| 233 | + &head->h_fs_uuid))) { |
|---|
| 334 | 234 | xfs_warn(mp, |
|---|
| 335 | 235 | "dirty log entry has mismatched uuid - can't recover"); |
|---|
| 336 | 236 | xlog_header_check_dump(mp, head); |
|---|
| 337 | | - XFS_ERROR_REPORT("xlog_header_check_recover(2)", |
|---|
| 338 | | - XFS_ERRLEVEL_HIGH, mp); |
|---|
| 339 | 237 | return -EFSCORRUPTED; |
|---|
| 340 | 238 | } |
|---|
| 341 | 239 | return 0; |
|---|
| .. | .. |
|---|
| 358 | 256 | * by IRIX and continue. |
|---|
| 359 | 257 | */ |
|---|
| 360 | 258 | xfs_warn(mp, "null uuid in log - IRIX style log"); |
|---|
| 361 | | - } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { |
|---|
| 259 | + } else if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid, |
|---|
| 260 | + &head->h_fs_uuid))) { |
|---|
| 362 | 261 | xfs_warn(mp, "log has mismatched uuid - can't recover"); |
|---|
| 363 | 262 | xlog_header_check_dump(mp, head); |
|---|
| 364 | | - XFS_ERROR_REPORT("xlog_header_check_mount", |
|---|
| 365 | | - XFS_ERRLEVEL_HIGH, mp); |
|---|
| 366 | 263 | return -EFSCORRUPTED; |
|---|
| 367 | 264 | } |
|---|
| 368 | 265 | return 0; |
|---|
| 369 | | -} |
|---|
| 370 | | - |
|---|
| 371 | | -STATIC void |
|---|
| 372 | | -xlog_recover_iodone( |
|---|
| 373 | | - struct xfs_buf *bp) |
|---|
| 374 | | -{ |
|---|
| 375 | | - if (bp->b_error) { |
|---|
| 376 | | - /* |
|---|
| 377 | | - * We're not going to bother about retrying |
|---|
| 378 | | - * this during recovery. One strike! |
|---|
| 379 | | - */ |
|---|
| 380 | | - if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { |
|---|
| 381 | | - xfs_buf_ioerror_alert(bp, __func__); |
|---|
| 382 | | - xfs_force_shutdown(bp->b_target->bt_mount, |
|---|
| 383 | | - SHUTDOWN_META_IO_ERROR); |
|---|
| 384 | | - } |
|---|
| 385 | | - } |
|---|
| 386 | | - |
|---|
| 387 | | - /* |
|---|
| 388 | | - * On v5 supers, a bli could be attached to update the metadata LSN. |
|---|
| 389 | | - * Clean it up. |
|---|
| 390 | | - */ |
|---|
| 391 | | - if (bp->b_log_item) |
|---|
| 392 | | - xfs_buf_item_relse(bp); |
|---|
| 393 | | - ASSERT(bp->b_log_item == NULL); |
|---|
| 394 | | - |
|---|
| 395 | | - bp->b_iodone = NULL; |
|---|
| 396 | | - xfs_buf_ioend(bp); |
|---|
| 397 | 266 | } |
|---|
| 398 | 267 | |
|---|
| 399 | 268 | /* |
|---|
| .. | .. |
|---|
| 405 | 274 | STATIC int |
|---|
| 406 | 275 | xlog_find_cycle_start( |
|---|
| 407 | 276 | struct xlog *log, |
|---|
| 408 | | - struct xfs_buf *bp, |
|---|
| 277 | + char *buffer, |
|---|
| 409 | 278 | xfs_daddr_t first_blk, |
|---|
| 410 | 279 | xfs_daddr_t *last_blk, |
|---|
| 411 | 280 | uint cycle) |
|---|
| .. | .. |
|---|
| 419 | 288 | end_blk = *last_blk; |
|---|
| 420 | 289 | mid_blk = BLK_AVG(first_blk, end_blk); |
|---|
| 421 | 290 | while (mid_blk != first_blk && mid_blk != end_blk) { |
|---|
| 422 | | - error = xlog_bread(log, mid_blk, 1, bp, &offset); |
|---|
| 291 | + error = xlog_bread(log, mid_blk, 1, buffer, &offset); |
|---|
| 423 | 292 | if (error) |
|---|
| 424 | 293 | return error; |
|---|
| 425 | 294 | mid_cycle = xlog_get_cycle(offset); |
|---|
| .. | .. |
|---|
| 455 | 324 | { |
|---|
| 456 | 325 | xfs_daddr_t i, j; |
|---|
| 457 | 326 | uint cycle; |
|---|
| 458 | | - xfs_buf_t *bp; |
|---|
| 327 | + char *buffer; |
|---|
| 459 | 328 | xfs_daddr_t bufblks; |
|---|
| 460 | 329 | char *buf = NULL; |
|---|
| 461 | 330 | int error = 0; |
|---|
| .. | .. |
|---|
| 469 | 338 | bufblks = 1 << ffs(nbblks); |
|---|
| 470 | 339 | while (bufblks > log->l_logBBsize) |
|---|
| 471 | 340 | bufblks >>= 1; |
|---|
| 472 | | - while (!(bp = xlog_get_bp(log, bufblks))) { |
|---|
| 341 | + while (!(buffer = xlog_alloc_buffer(log, bufblks))) { |
|---|
| 473 | 342 | bufblks >>= 1; |
|---|
| 474 | 343 | if (bufblks < log->l_sectBBsize) |
|---|
| 475 | 344 | return -ENOMEM; |
|---|
| .. | .. |
|---|
| 480 | 349 | |
|---|
| 481 | 350 | bcount = min(bufblks, (start_blk + nbblks - i)); |
|---|
| 482 | 351 | |
|---|
| 483 | | - error = xlog_bread(log, i, bcount, bp, &buf); |
|---|
| 352 | + error = xlog_bread(log, i, bcount, buffer, &buf); |
|---|
| 484 | 353 | if (error) |
|---|
| 485 | 354 | goto out; |
|---|
| 486 | 355 | |
|---|
| .. | .. |
|---|
| 498 | 367 | *new_blk = -1; |
|---|
| 499 | 368 | |
|---|
| 500 | 369 | out: |
|---|
| 501 | | - xlog_put_bp(bp); |
|---|
| 370 | + kmem_free(buffer); |
|---|
| 502 | 371 | return error; |
|---|
| 372 | +} |
|---|
| 373 | + |
|---|
| 374 | +static inline int |
|---|
| 375 | +xlog_logrec_hblks(struct xlog *log, struct xlog_rec_header *rh) |
|---|
| 376 | +{ |
|---|
| 377 | + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
|---|
| 378 | + int h_size = be32_to_cpu(rh->h_size); |
|---|
| 379 | + |
|---|
| 380 | + if ((be32_to_cpu(rh->h_version) & XLOG_VERSION_2) && |
|---|
| 381 | + h_size > XLOG_HEADER_CYCLE_SIZE) |
|---|
| 382 | + return DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE); |
|---|
| 383 | + } |
|---|
| 384 | + return 1; |
|---|
| 503 | 385 | } |
|---|
| 504 | 386 | |
|---|
| 505 | 387 | /* |
|---|
| .. | .. |
|---|
| 522 | 404 | int extra_bblks) |
|---|
| 523 | 405 | { |
|---|
| 524 | 406 | xfs_daddr_t i; |
|---|
| 525 | | - xfs_buf_t *bp; |
|---|
| 407 | + char *buffer; |
|---|
| 526 | 408 | char *offset = NULL; |
|---|
| 527 | 409 | xlog_rec_header_t *head = NULL; |
|---|
| 528 | 410 | int error = 0; |
|---|
| .. | .. |
|---|
| 532 | 414 | |
|---|
| 533 | 415 | ASSERT(start_blk != 0 || *last_blk != start_blk); |
|---|
| 534 | 416 | |
|---|
| 535 | | - if (!(bp = xlog_get_bp(log, num_blks))) { |
|---|
| 536 | | - if (!(bp = xlog_get_bp(log, 1))) |
|---|
| 417 | + buffer = xlog_alloc_buffer(log, num_blks); |
|---|
| 418 | + if (!buffer) { |
|---|
| 419 | + buffer = xlog_alloc_buffer(log, 1); |
|---|
| 420 | + if (!buffer) |
|---|
| 537 | 421 | return -ENOMEM; |
|---|
| 538 | 422 | smallmem = 1; |
|---|
| 539 | 423 | } else { |
|---|
| 540 | | - error = xlog_bread(log, start_blk, num_blks, bp, &offset); |
|---|
| 424 | + error = xlog_bread(log, start_blk, num_blks, buffer, &offset); |
|---|
| 541 | 425 | if (error) |
|---|
| 542 | 426 | goto out; |
|---|
| 543 | 427 | offset += ((num_blks - 1) << BBSHIFT); |
|---|
| .. | .. |
|---|
| 549 | 433 | xfs_warn(log->l_mp, |
|---|
| 550 | 434 | "Log inconsistent (didn't find previous header)"); |
|---|
| 551 | 435 | ASSERT(0); |
|---|
| 552 | | - error = -EIO; |
|---|
| 436 | + error = -EFSCORRUPTED; |
|---|
| 553 | 437 | goto out; |
|---|
| 554 | 438 | } |
|---|
| 555 | 439 | |
|---|
| 556 | 440 | if (smallmem) { |
|---|
| 557 | | - error = xlog_bread(log, i, 1, bp, &offset); |
|---|
| 441 | + error = xlog_bread(log, i, 1, buffer, &offset); |
|---|
| 558 | 442 | if (error) |
|---|
| 559 | 443 | goto out; |
|---|
| 560 | 444 | } |
|---|
| .. | .. |
|---|
| 592 | 476 | * reset last_blk. Only when last_blk points in the middle of a log |
|---|
| 593 | 477 | * record do we update last_blk. |
|---|
| 594 | 478 | */ |
|---|
| 595 | | - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
|---|
| 596 | | - uint h_size = be32_to_cpu(head->h_size); |
|---|
| 597 | | - |
|---|
| 598 | | - xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE; |
|---|
| 599 | | - if (h_size % XLOG_HEADER_CYCLE_SIZE) |
|---|
| 600 | | - xhdrs++; |
|---|
| 601 | | - } else { |
|---|
| 602 | | - xhdrs = 1; |
|---|
| 603 | | - } |
|---|
| 479 | + xhdrs = xlog_logrec_hblks(log, head); |
|---|
| 604 | 480 | |
|---|
| 605 | 481 | if (*last_blk - i + extra_bblks != |
|---|
| 606 | 482 | BTOBB(be32_to_cpu(head->h_len)) + xhdrs) |
|---|
| 607 | 483 | *last_blk = i; |
|---|
| 608 | 484 | |
|---|
| 609 | 485 | out: |
|---|
| 610 | | - xlog_put_bp(bp); |
|---|
| 486 | + kmem_free(buffer); |
|---|
| 611 | 487 | return error; |
|---|
| 612 | 488 | } |
|---|
| 613 | 489 | |
|---|
| .. | .. |
|---|
| 629 | 505 | struct xlog *log, |
|---|
| 630 | 506 | xfs_daddr_t *return_head_blk) |
|---|
| 631 | 507 | { |
|---|
| 632 | | - xfs_buf_t *bp; |
|---|
| 508 | + char *buffer; |
|---|
| 633 | 509 | char *offset; |
|---|
| 634 | 510 | xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; |
|---|
| 635 | 511 | int num_scan_bblks; |
|---|
| .. | .. |
|---|
| 659 | 535 | } |
|---|
| 660 | 536 | |
|---|
| 661 | 537 | first_blk = 0; /* get cycle # of 1st block */ |
|---|
| 662 | | - bp = xlog_get_bp(log, 1); |
|---|
| 663 | | - if (!bp) |
|---|
| 538 | + buffer = xlog_alloc_buffer(log, 1); |
|---|
| 539 | + if (!buffer) |
|---|
| 664 | 540 | return -ENOMEM; |
|---|
| 665 | 541 | |
|---|
| 666 | | - error = xlog_bread(log, 0, 1, bp, &offset); |
|---|
| 542 | + error = xlog_bread(log, 0, 1, buffer, &offset); |
|---|
| 667 | 543 | if (error) |
|---|
| 668 | | - goto bp_err; |
|---|
| 544 | + goto out_free_buffer; |
|---|
| 669 | 545 | |
|---|
| 670 | 546 | first_half_cycle = xlog_get_cycle(offset); |
|---|
| 671 | 547 | |
|---|
| 672 | 548 | last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ |
|---|
| 673 | | - error = xlog_bread(log, last_blk, 1, bp, &offset); |
|---|
| 549 | + error = xlog_bread(log, last_blk, 1, buffer, &offset); |
|---|
| 674 | 550 | if (error) |
|---|
| 675 | | - goto bp_err; |
|---|
| 551 | + goto out_free_buffer; |
|---|
| 676 | 552 | |
|---|
| 677 | 553 | last_half_cycle = xlog_get_cycle(offset); |
|---|
| 678 | 554 | ASSERT(last_half_cycle != 0); |
|---|
| .. | .. |
|---|
| 740 | 616 | * ^ we want to locate this spot |
|---|
| 741 | 617 | */ |
|---|
| 742 | 618 | stop_on_cycle = last_half_cycle; |
|---|
| 743 | | - if ((error = xlog_find_cycle_start(log, bp, first_blk, |
|---|
| 744 | | - &head_blk, last_half_cycle))) |
|---|
| 745 | | - goto bp_err; |
|---|
| 619 | + error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk, |
|---|
| 620 | + last_half_cycle); |
|---|
| 621 | + if (error) |
|---|
| 622 | + goto out_free_buffer; |
|---|
| 746 | 623 | } |
|---|
| 747 | 624 | |
|---|
| 748 | 625 | /* |
|---|
| .. | .. |
|---|
| 762 | 639 | if ((error = xlog_find_verify_cycle(log, |
|---|
| 763 | 640 | start_blk, num_scan_bblks, |
|---|
| 764 | 641 | stop_on_cycle, &new_blk))) |
|---|
| 765 | | - goto bp_err; |
|---|
| 642 | + goto out_free_buffer; |
|---|
| 766 | 643 | if (new_blk != -1) |
|---|
| 767 | 644 | head_blk = new_blk; |
|---|
| 768 | 645 | } else { /* need to read 2 parts of log */ |
|---|
| .. | .. |
|---|
| 799 | 676 | if ((error = xlog_find_verify_cycle(log, start_blk, |
|---|
| 800 | 677 | num_scan_bblks - (int)head_blk, |
|---|
| 801 | 678 | (stop_on_cycle - 1), &new_blk))) |
|---|
| 802 | | - goto bp_err; |
|---|
| 679 | + goto out_free_buffer; |
|---|
| 803 | 680 | if (new_blk != -1) { |
|---|
| 804 | 681 | head_blk = new_blk; |
|---|
| 805 | 682 | goto validate_head; |
|---|
| .. | .. |
|---|
| 815 | 692 | if ((error = xlog_find_verify_cycle(log, |
|---|
| 816 | 693 | start_blk, (int)head_blk, |
|---|
| 817 | 694 | stop_on_cycle, &new_blk))) |
|---|
| 818 | | - goto bp_err; |
|---|
| 695 | + goto out_free_buffer; |
|---|
| 819 | 696 | if (new_blk != -1) |
|---|
| 820 | 697 | head_blk = new_blk; |
|---|
| 821 | 698 | } |
|---|
| .. | .. |
|---|
| 834 | 711 | if (error == 1) |
|---|
| 835 | 712 | error = -EIO; |
|---|
| 836 | 713 | if (error) |
|---|
| 837 | | - goto bp_err; |
|---|
| 714 | + goto out_free_buffer; |
|---|
| 838 | 715 | } else { |
|---|
| 839 | 716 | start_blk = 0; |
|---|
| 840 | 717 | ASSERT(head_blk <= INT_MAX); |
|---|
| 841 | 718 | error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); |
|---|
| 842 | 719 | if (error < 0) |
|---|
| 843 | | - goto bp_err; |
|---|
| 720 | + goto out_free_buffer; |
|---|
| 844 | 721 | if (error == 1) { |
|---|
| 845 | 722 | /* We hit the beginning of the log during our search */ |
|---|
| 846 | 723 | start_blk = log_bbnum - (num_scan_bblks - head_blk); |
|---|
| .. | .. |
|---|
| 853 | 730 | if (error == 1) |
|---|
| 854 | 731 | error = -EIO; |
|---|
| 855 | 732 | if (error) |
|---|
| 856 | | - goto bp_err; |
|---|
| 733 | + goto out_free_buffer; |
|---|
| 857 | 734 | if (new_blk != log_bbnum) |
|---|
| 858 | 735 | head_blk = new_blk; |
|---|
| 859 | 736 | } else if (error) |
|---|
| 860 | | - goto bp_err; |
|---|
| 737 | + goto out_free_buffer; |
|---|
| 861 | 738 | } |
|---|
| 862 | 739 | |
|---|
| 863 | | - xlog_put_bp(bp); |
|---|
| 740 | + kmem_free(buffer); |
|---|
| 864 | 741 | if (head_blk == log_bbnum) |
|---|
| 865 | 742 | *return_head_blk = 0; |
|---|
| 866 | 743 | else |
|---|
| .. | .. |
|---|
| 873 | 750 | */ |
|---|
| 874 | 751 | return 0; |
|---|
| 875 | 752 | |
|---|
| 876 | | - bp_err: |
|---|
| 877 | | - xlog_put_bp(bp); |
|---|
| 878 | | - |
|---|
| 753 | +out_free_buffer: |
|---|
| 754 | + kmem_free(buffer); |
|---|
| 879 | 755 | if (error) |
|---|
| 880 | 756 | xfs_warn(log->l_mp, "failed to find log head"); |
|---|
| 881 | 757 | return error; |
|---|
| .. | .. |
|---|
| 895 | 771 | xfs_daddr_t head_blk, |
|---|
| 896 | 772 | xfs_daddr_t tail_blk, |
|---|
| 897 | 773 | int count, |
|---|
| 898 | | - struct xfs_buf *bp, |
|---|
| 774 | + char *buffer, |
|---|
| 899 | 775 | xfs_daddr_t *rblk, |
|---|
| 900 | 776 | struct xlog_rec_header **rhead, |
|---|
| 901 | 777 | bool *wrapped) |
|---|
| .. | .. |
|---|
| 914 | 790 | */ |
|---|
| 915 | 791 | end_blk = head_blk > tail_blk ? tail_blk : 0; |
|---|
| 916 | 792 | for (i = (int) head_blk - 1; i >= end_blk; i--) { |
|---|
| 917 | | - error = xlog_bread(log, i, 1, bp, &offset); |
|---|
| 793 | + error = xlog_bread(log, i, 1, buffer, &offset); |
|---|
| 918 | 794 | if (error) |
|---|
| 919 | 795 | goto out_error; |
|---|
| 920 | 796 | |
|---|
| .. | .. |
|---|
| 933 | 809 | */ |
|---|
| 934 | 810 | if (tail_blk >= head_blk && found != count) { |
|---|
| 935 | 811 | for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) { |
|---|
| 936 | | - error = xlog_bread(log, i, 1, bp, &offset); |
|---|
| 812 | + error = xlog_bread(log, i, 1, buffer, &offset); |
|---|
| 937 | 813 | if (error) |
|---|
| 938 | 814 | goto out_error; |
|---|
| 939 | 815 | |
|---|
| .. | .. |
|---|
| 969 | 845 | xfs_daddr_t head_blk, |
|---|
| 970 | 846 | xfs_daddr_t tail_blk, |
|---|
| 971 | 847 | int count, |
|---|
| 972 | | - struct xfs_buf *bp, |
|---|
| 848 | + char *buffer, |
|---|
| 973 | 849 | xfs_daddr_t *rblk, |
|---|
| 974 | 850 | struct xlog_rec_header **rhead, |
|---|
| 975 | 851 | bool *wrapped) |
|---|
| .. | .. |
|---|
| 988 | 864 | */ |
|---|
| 989 | 865 | end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1; |
|---|
| 990 | 866 | for (i = (int) tail_blk; i <= end_blk; i++) { |
|---|
| 991 | | - error = xlog_bread(log, i, 1, bp, &offset); |
|---|
| 867 | + error = xlog_bread(log, i, 1, buffer, &offset); |
|---|
| 992 | 868 | if (error) |
|---|
| 993 | 869 | goto out_error; |
|---|
| 994 | 870 | |
|---|
| .. | .. |
|---|
| 1006 | 882 | */ |
|---|
| 1007 | 883 | if (tail_blk > head_blk && found != count) { |
|---|
| 1008 | 884 | for (i = 0; i < (int) head_blk; i++) { |
|---|
| 1009 | | - error = xlog_bread(log, i, 1, bp, &offset); |
|---|
| 885 | + error = xlog_bread(log, i, 1, buffer, &offset); |
|---|
| 1010 | 886 | if (error) |
|---|
| 1011 | 887 | goto out_error; |
|---|
| 1012 | 888 | |
|---|
| .. | .. |
|---|
| 1069 | 945 | int hsize) |
|---|
| 1070 | 946 | { |
|---|
| 1071 | 947 | struct xlog_rec_header *thead; |
|---|
| 1072 | | - struct xfs_buf *bp; |
|---|
| 948 | + char *buffer; |
|---|
| 1073 | 949 | xfs_daddr_t first_bad; |
|---|
| 1074 | 950 | int error = 0; |
|---|
| 1075 | 951 | bool wrapped; |
|---|
| 1076 | 952 | xfs_daddr_t tmp_tail; |
|---|
| 1077 | 953 | xfs_daddr_t orig_tail = *tail_blk; |
|---|
| 1078 | 954 | |
|---|
| 1079 | | - bp = xlog_get_bp(log, 1); |
|---|
| 1080 | | - if (!bp) |
|---|
| 955 | + buffer = xlog_alloc_buffer(log, 1); |
|---|
| 956 | + if (!buffer) |
|---|
| 1081 | 957 | return -ENOMEM; |
|---|
| 1082 | 958 | |
|---|
| 1083 | 959 | /* |
|---|
| 1084 | 960 | * Make sure the tail points to a record (returns positive count on |
|---|
| 1085 | 961 | * success). |
|---|
| 1086 | 962 | */ |
|---|
| 1087 | | - error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp, |
|---|
| 963 | + error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer, |
|---|
| 1088 | 964 | &tmp_tail, &thead, &wrapped); |
|---|
| 1089 | 965 | if (error < 0) |
|---|
| 1090 | 966 | goto out; |
|---|
| .. | .. |
|---|
| 1113 | 989 | break; |
|---|
| 1114 | 990 | |
|---|
| 1115 | 991 | /* skip to the next record; returns positive count on success */ |
|---|
| 1116 | | - error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp, |
|---|
| 1117 | | - &tmp_tail, &thead, &wrapped); |
|---|
| 992 | + error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, |
|---|
| 993 | + buffer, &tmp_tail, &thead, &wrapped); |
|---|
| 1118 | 994 | if (error < 0) |
|---|
| 1119 | 995 | goto out; |
|---|
| 1120 | 996 | |
|---|
| .. | .. |
|---|
| 1129 | 1005 | "Tail block (0x%llx) overwrite detected. Updated to 0x%llx", |
|---|
| 1130 | 1006 | orig_tail, *tail_blk); |
|---|
| 1131 | 1007 | out: |
|---|
| 1132 | | - xlog_put_bp(bp); |
|---|
| 1008 | + kmem_free(buffer); |
|---|
| 1133 | 1009 | return error; |
|---|
| 1134 | 1010 | } |
|---|
| 1135 | 1011 | |
|---|
| .. | .. |
|---|
| 1151 | 1027 | struct xlog *log, |
|---|
| 1152 | 1028 | xfs_daddr_t *head_blk, /* in/out: unverified head */ |
|---|
| 1153 | 1029 | xfs_daddr_t *tail_blk, /* out: tail block */ |
|---|
| 1154 | | - struct xfs_buf *bp, |
|---|
| 1030 | + char *buffer, |
|---|
| 1155 | 1031 | xfs_daddr_t *rhead_blk, /* start blk of last record */ |
|---|
| 1156 | 1032 | struct xlog_rec_header **rhead, /* ptr to last record */ |
|---|
| 1157 | 1033 | bool *wrapped) /* last rec. wraps phys. log */ |
|---|
| 1158 | 1034 | { |
|---|
| 1159 | 1035 | struct xlog_rec_header *tmp_rhead; |
|---|
| 1160 | | - struct xfs_buf *tmp_bp; |
|---|
| 1036 | + char *tmp_buffer; |
|---|
| 1161 | 1037 | xfs_daddr_t first_bad; |
|---|
| 1162 | 1038 | xfs_daddr_t tmp_rhead_blk; |
|---|
| 1163 | 1039 | int found; |
|---|
| .. | .. |
|---|
| 1168 | 1044 | * Check the head of the log for torn writes. Search backwards from the |
|---|
| 1169 | 1045 | * head until we hit the tail or the maximum number of log record I/Os |
|---|
| 1170 | 1046 | * that could have been in flight at one time. Use a temporary buffer so |
|---|
| 1171 | | - * we don't trash the rhead/bp pointers from the caller. |
|---|
| 1047 | + * we don't trash the rhead/buffer pointers from the caller. |
|---|
| 1172 | 1048 | */ |
|---|
| 1173 | | - tmp_bp = xlog_get_bp(log, 1); |
|---|
| 1174 | | - if (!tmp_bp) |
|---|
| 1049 | + tmp_buffer = xlog_alloc_buffer(log, 1); |
|---|
| 1050 | + if (!tmp_buffer) |
|---|
| 1175 | 1051 | return -ENOMEM; |
|---|
| 1176 | 1052 | error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk, |
|---|
| 1177 | | - XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk, |
|---|
| 1178 | | - &tmp_rhead, &tmp_wrapped); |
|---|
| 1179 | | - xlog_put_bp(tmp_bp); |
|---|
| 1053 | + XLOG_MAX_ICLOGS, tmp_buffer, |
|---|
| 1054 | + &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped); |
|---|
| 1055 | + kmem_free(tmp_buffer); |
|---|
| 1180 | 1056 | if (error < 0) |
|---|
| 1181 | 1057 | return error; |
|---|
| 1182 | 1058 | |
|---|
| .. | .. |
|---|
| 1203 | 1079 | * |
|---|
| 1204 | 1080 | * Note that xlog_find_tail() clears the blocks at the new head |
|---|
| 1205 | 1081 | * (i.e., the records with invalid CRC) if the cycle number |
|---|
| 1206 | | - * matches the the current cycle. |
|---|
| 1082 | + * matches the current cycle. |
|---|
| 1207 | 1083 | */ |
|---|
| 1208 | | - found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp, |
|---|
| 1209 | | - rhead_blk, rhead, wrapped); |
|---|
| 1084 | + found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, |
|---|
| 1085 | + buffer, rhead_blk, rhead, wrapped); |
|---|
| 1210 | 1086 | if (found < 0) |
|---|
| 1211 | 1087 | return found; |
|---|
| 1212 | 1088 | if (found == 0) /* XXX: right thing to do here? */ |
|---|
| .. | .. |
|---|
| 1266 | 1142 | xfs_daddr_t *tail_blk, |
|---|
| 1267 | 1143 | struct xlog_rec_header *rhead, |
|---|
| 1268 | 1144 | xfs_daddr_t rhead_blk, |
|---|
| 1269 | | - struct xfs_buf *bp, |
|---|
| 1145 | + char *buffer, |
|---|
| 1270 | 1146 | bool *clean) |
|---|
| 1271 | 1147 | { |
|---|
| 1272 | 1148 | struct xlog_op_header *op_head; |
|---|
| .. | .. |
|---|
| 1287 | 1163 | * below. We won't want to clear the unmount record if there is one, so |
|---|
| 1288 | 1164 | * we pass the lsn of the unmount record rather than the block after it. |
|---|
| 1289 | 1165 | */ |
|---|
| 1290 | | - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { |
|---|
| 1291 | | - int h_size = be32_to_cpu(rhead->h_size); |
|---|
| 1292 | | - int h_version = be32_to_cpu(rhead->h_version); |
|---|
| 1293 | | - |
|---|
| 1294 | | - if ((h_version & XLOG_VERSION_2) && |
|---|
| 1295 | | - (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
|---|
| 1296 | | - hblks = h_size / XLOG_HEADER_CYCLE_SIZE; |
|---|
| 1297 | | - if (h_size % XLOG_HEADER_CYCLE_SIZE) |
|---|
| 1298 | | - hblks++; |
|---|
| 1299 | | - } else { |
|---|
| 1300 | | - hblks = 1; |
|---|
| 1301 | | - } |
|---|
| 1302 | | - } else { |
|---|
| 1303 | | - hblks = 1; |
|---|
| 1304 | | - } |
|---|
| 1305 | | - |
|---|
| 1166 | + hblks = xlog_logrec_hblks(log, rhead); |
|---|
| 1306 | 1167 | after_umount_blk = xlog_wrap_logbno(log, |
|---|
| 1307 | 1168 | rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len))); |
|---|
| 1308 | 1169 | |
|---|
| 1309 | 1170 | if (*head_blk == after_umount_blk && |
|---|
| 1310 | 1171 | be32_to_cpu(rhead->h_num_logops) == 1) { |
|---|
| 1311 | 1172 | umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks); |
|---|
| 1312 | | - error = xlog_bread(log, umount_data_blk, 1, bp, &offset); |
|---|
| 1173 | + error = xlog_bread(log, umount_data_blk, 1, buffer, &offset); |
|---|
| 1313 | 1174 | if (error) |
|---|
| 1314 | 1175 | return error; |
|---|
| 1315 | 1176 | |
|---|
| .. | .. |
|---|
| 1388 | 1249 | { |
|---|
| 1389 | 1250 | xlog_rec_header_t *rhead; |
|---|
| 1390 | 1251 | char *offset = NULL; |
|---|
| 1391 | | - xfs_buf_t *bp; |
|---|
| 1252 | + char *buffer; |
|---|
| 1392 | 1253 | int error; |
|---|
| 1393 | 1254 | xfs_daddr_t rhead_blk; |
|---|
| 1394 | 1255 | xfs_lsn_t tail_lsn; |
|---|
| .. | .. |
|---|
| 1402 | 1263 | return error; |
|---|
| 1403 | 1264 | ASSERT(*head_blk < INT_MAX); |
|---|
| 1404 | 1265 | |
|---|
| 1405 | | - bp = xlog_get_bp(log, 1); |
|---|
| 1406 | | - if (!bp) |
|---|
| 1266 | + buffer = xlog_alloc_buffer(log, 1); |
|---|
| 1267 | + if (!buffer) |
|---|
| 1407 | 1268 | return -ENOMEM; |
|---|
| 1408 | 1269 | if (*head_blk == 0) { /* special case */ |
|---|
| 1409 | | - error = xlog_bread(log, 0, 1, bp, &offset); |
|---|
| 1270 | + error = xlog_bread(log, 0, 1, buffer, &offset); |
|---|
| 1410 | 1271 | if (error) |
|---|
| 1411 | 1272 | goto done; |
|---|
| 1412 | 1273 | |
|---|
| .. | .. |
|---|
| 1422 | 1283 | * block. This wraps all the way back around to the head so something is |
|---|
| 1423 | 1284 | * seriously wrong if we can't find it. |
|---|
| 1424 | 1285 | */ |
|---|
| 1425 | | - error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, |
|---|
| 1286 | + error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer, |
|---|
| 1426 | 1287 | &rhead_blk, &rhead, &wrapped); |
|---|
| 1427 | 1288 | if (error < 0) |
|---|
| 1428 | | - return error; |
|---|
| 1289 | + goto done; |
|---|
| 1429 | 1290 | if (!error) { |
|---|
| 1430 | 1291 | xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); |
|---|
| 1431 | | - return -EIO; |
|---|
| 1292 | + error = -EFSCORRUPTED; |
|---|
| 1293 | + goto done; |
|---|
| 1432 | 1294 | } |
|---|
| 1433 | 1295 | *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); |
|---|
| 1434 | 1296 | |
|---|
| .. | .. |
|---|
| 1443 | 1305 | * state to determine whether recovery is necessary. |
|---|
| 1444 | 1306 | */ |
|---|
| 1445 | 1307 | error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, |
|---|
| 1446 | | - rhead_blk, bp, &clean); |
|---|
| 1308 | + rhead_blk, buffer, &clean); |
|---|
| 1447 | 1309 | if (error) |
|---|
| 1448 | 1310 | goto done; |
|---|
| 1449 | 1311 | |
|---|
| .. | .. |
|---|
| 1460 | 1322 | if (!clean) { |
|---|
| 1461 | 1323 | xfs_daddr_t orig_head = *head_blk; |
|---|
| 1462 | 1324 | |
|---|
| 1463 | | - error = xlog_verify_head(log, head_blk, tail_blk, bp, |
|---|
| 1325 | + error = xlog_verify_head(log, head_blk, tail_blk, buffer, |
|---|
| 1464 | 1326 | &rhead_blk, &rhead, &wrapped); |
|---|
| 1465 | 1327 | if (error) |
|---|
| 1466 | 1328 | goto done; |
|---|
| .. | .. |
|---|
| 1471 | 1333 | wrapped); |
|---|
| 1472 | 1334 | tail_lsn = atomic64_read(&log->l_tail_lsn); |
|---|
| 1473 | 1335 | error = xlog_check_unmount_rec(log, head_blk, tail_blk, |
|---|
| 1474 | | - rhead, rhead_blk, bp, |
|---|
| 1336 | + rhead, rhead_blk, buffer, |
|---|
| 1475 | 1337 | &clean); |
|---|
| 1476 | 1338 | if (error) |
|---|
| 1477 | 1339 | goto done; |
|---|
| .. | .. |
|---|
| 1505 | 1367 | * But... if the -device- itself is readonly, just skip this. |
|---|
| 1506 | 1368 | * We can't recover this device anyway, so it won't matter. |
|---|
| 1507 | 1369 | */ |
|---|
| 1508 | | - if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) |
|---|
| 1370 | + if (!xfs_readonly_buftarg(log->l_targ)) |
|---|
| 1509 | 1371 | error = xlog_clear_stale_blocks(log, tail_lsn); |
|---|
| 1510 | 1372 | |
|---|
| 1511 | 1373 | done: |
|---|
| 1512 | | - xlog_put_bp(bp); |
|---|
| 1374 | + kmem_free(buffer); |
|---|
| 1513 | 1375 | |
|---|
| 1514 | 1376 | if (error) |
|---|
| 1515 | 1377 | xfs_warn(log->l_mp, "failed to locate log tail"); |
|---|
| .. | .. |
|---|
| 1537 | 1399 | struct xlog *log, |
|---|
| 1538 | 1400 | xfs_daddr_t *blk_no) |
|---|
| 1539 | 1401 | { |
|---|
| 1540 | | - xfs_buf_t *bp; |
|---|
| 1402 | + char *buffer; |
|---|
| 1541 | 1403 | char *offset; |
|---|
| 1542 | 1404 | uint first_cycle, last_cycle; |
|---|
| 1543 | 1405 | xfs_daddr_t new_blk, last_blk, start_blk; |
|---|
| .. | .. |
|---|
| 1547 | 1409 | *blk_no = 0; |
|---|
| 1548 | 1410 | |
|---|
| 1549 | 1411 | /* check totally zeroed log */ |
|---|
| 1550 | | - bp = xlog_get_bp(log, 1); |
|---|
| 1551 | | - if (!bp) |
|---|
| 1412 | + buffer = xlog_alloc_buffer(log, 1); |
|---|
| 1413 | + if (!buffer) |
|---|
| 1552 | 1414 | return -ENOMEM; |
|---|
| 1553 | | - error = xlog_bread(log, 0, 1, bp, &offset); |
|---|
| 1415 | + error = xlog_bread(log, 0, 1, buffer, &offset); |
|---|
| 1554 | 1416 | if (error) |
|---|
| 1555 | | - goto bp_err; |
|---|
| 1417 | + goto out_free_buffer; |
|---|
| 1556 | 1418 | |
|---|
| 1557 | 1419 | first_cycle = xlog_get_cycle(offset); |
|---|
| 1558 | 1420 | if (first_cycle == 0) { /* completely zeroed log */ |
|---|
| 1559 | 1421 | *blk_no = 0; |
|---|
| 1560 | | - xlog_put_bp(bp); |
|---|
| 1422 | + kmem_free(buffer); |
|---|
| 1561 | 1423 | return 1; |
|---|
| 1562 | 1424 | } |
|---|
| 1563 | 1425 | |
|---|
| 1564 | 1426 | /* check partially zeroed log */ |
|---|
| 1565 | | - error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); |
|---|
| 1427 | + error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset); |
|---|
| 1566 | 1428 | if (error) |
|---|
| 1567 | | - goto bp_err; |
|---|
| 1429 | + goto out_free_buffer; |
|---|
| 1568 | 1430 | |
|---|
| 1569 | 1431 | last_cycle = xlog_get_cycle(offset); |
|---|
| 1570 | 1432 | if (last_cycle != 0) { /* log completely written to */ |
|---|
| 1571 | | - xlog_put_bp(bp); |
|---|
| 1433 | + kmem_free(buffer); |
|---|
| 1572 | 1434 | return 0; |
|---|
| 1573 | 1435 | } |
|---|
| 1574 | 1436 | |
|---|
| 1575 | 1437 | /* we have a partially zeroed log */ |
|---|
| 1576 | 1438 | last_blk = log_bbnum-1; |
|---|
| 1577 | | - if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) |
|---|
| 1578 | | - goto bp_err; |
|---|
| 1439 | + error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0); |
|---|
| 1440 | + if (error) |
|---|
| 1441 | + goto out_free_buffer; |
|---|
| 1579 | 1442 | |
|---|
| 1580 | 1443 | /* |
|---|
| 1581 | 1444 | * Validate the answer. Because there is no way to guarantee that |
|---|
| .. | .. |
|---|
| 1598 | 1461 | */ |
|---|
| 1599 | 1462 | if ((error = xlog_find_verify_cycle(log, start_blk, |
|---|
| 1600 | 1463 | (int)num_scan_bblks, 0, &new_blk))) |
|---|
| 1601 | | - goto bp_err; |
|---|
| 1464 | + goto out_free_buffer; |
|---|
| 1602 | 1465 | if (new_blk != -1) |
|---|
| 1603 | 1466 | last_blk = new_blk; |
|---|
| 1604 | 1467 | |
|---|
| .. | .. |
|---|
| 1610 | 1473 | if (error == 1) |
|---|
| 1611 | 1474 | error = -EIO; |
|---|
| 1612 | 1475 | if (error) |
|---|
| 1613 | | - goto bp_err; |
|---|
| 1476 | + goto out_free_buffer; |
|---|
| 1614 | 1477 | |
|---|
| 1615 | 1478 | *blk_no = last_blk; |
|---|
| 1616 | | -bp_err: |
|---|
| 1617 | | - xlog_put_bp(bp); |
|---|
| 1479 | +out_free_buffer: |
|---|
| 1480 | + kmem_free(buffer); |
|---|
| 1618 | 1481 | if (error) |
|---|
| 1619 | 1482 | return error; |
|---|
| 1620 | 1483 | return 1; |
|---|
| .. | .. |
|---|
| 1657 | 1520 | int tail_block) |
|---|
| 1658 | 1521 | { |
|---|
| 1659 | 1522 | char *offset; |
|---|
| 1660 | | - xfs_buf_t *bp; |
|---|
| 1523 | + char *buffer; |
|---|
| 1661 | 1524 | int balign, ealign; |
|---|
| 1662 | 1525 | int sectbb = log->l_sectBBsize; |
|---|
| 1663 | 1526 | int end_block = start_block + blocks; |
|---|
| .. | .. |
|---|
| 1674 | 1537 | bufblks = 1 << ffs(blocks); |
|---|
| 1675 | 1538 | while (bufblks > log->l_logBBsize) |
|---|
| 1676 | 1539 | bufblks >>= 1; |
|---|
| 1677 | | - while (!(bp = xlog_get_bp(log, bufblks))) { |
|---|
| 1540 | + while (!(buffer = xlog_alloc_buffer(log, bufblks))) { |
|---|
| 1678 | 1541 | bufblks >>= 1; |
|---|
| 1679 | 1542 | if (bufblks < sectbb) |
|---|
| 1680 | 1543 | return -ENOMEM; |
|---|
| .. | .. |
|---|
| 1686 | 1549 | */ |
|---|
| 1687 | 1550 | balign = round_down(start_block, sectbb); |
|---|
| 1688 | 1551 | if (balign != start_block) { |
|---|
| 1689 | | - error = xlog_bread_noalign(log, start_block, 1, bp); |
|---|
| 1552 | + error = xlog_bread_noalign(log, start_block, 1, buffer); |
|---|
| 1690 | 1553 | if (error) |
|---|
| 1691 | | - goto out_put_bp; |
|---|
| 1554 | + goto out_free_buffer; |
|---|
| 1692 | 1555 | |
|---|
| 1693 | 1556 | j = start_block - balign; |
|---|
| 1694 | 1557 | } |
|---|
| .. | .. |
|---|
| 1705 | 1568 | */ |
|---|
| 1706 | 1569 | ealign = round_down(end_block, sectbb); |
|---|
| 1707 | 1570 | if (j == 0 && (start_block + endcount > ealign)) { |
|---|
| 1708 | | - offset = bp->b_addr + BBTOB(ealign - start_block); |
|---|
| 1709 | | - error = xlog_bread_offset(log, ealign, sectbb, |
|---|
| 1710 | | - bp, offset); |
|---|
| 1571 | + error = xlog_bread_noalign(log, ealign, sectbb, |
|---|
| 1572 | + buffer + BBTOB(ealign - start_block)); |
|---|
| 1711 | 1573 | if (error) |
|---|
| 1712 | 1574 | break; |
|---|
| 1713 | 1575 | |
|---|
| 1714 | 1576 | } |
|---|
| 1715 | 1577 | |
|---|
| 1716 | | - offset = xlog_align(log, start_block, endcount, bp); |
|---|
| 1578 | + offset = buffer + xlog_align(log, start_block); |
|---|
| 1717 | 1579 | for (; j < endcount; j++) { |
|---|
| 1718 | 1580 | xlog_add_record(log, offset, cycle, i+j, |
|---|
| 1719 | 1581 | tail_cycle, tail_block); |
|---|
| 1720 | 1582 | offset += BBSIZE; |
|---|
| 1721 | 1583 | } |
|---|
| 1722 | | - error = xlog_bwrite(log, start_block, endcount, bp); |
|---|
| 1584 | + error = xlog_bwrite(log, start_block, endcount, buffer); |
|---|
| 1723 | 1585 | if (error) |
|---|
| 1724 | 1586 | break; |
|---|
| 1725 | 1587 | start_block += endcount; |
|---|
| 1726 | 1588 | j = 0; |
|---|
| 1727 | 1589 | } |
|---|
| 1728 | 1590 | |
|---|
| 1729 | | - out_put_bp: |
|---|
| 1730 | | - xlog_put_bp(bp); |
|---|
| 1591 | +out_free_buffer: |
|---|
| 1592 | + kmem_free(buffer); |
|---|
| 1731 | 1593 | return error; |
|---|
| 1732 | 1594 | } |
|---|
| 1733 | 1595 | |
|---|
| .. | .. |
|---|
| 1777 | 1639 | * the distance from the beginning of the log to the |
|---|
| 1778 | 1640 | * tail. |
|---|
| 1779 | 1641 | */ |
|---|
| 1780 | | - if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { |
|---|
| 1781 | | - XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", |
|---|
| 1782 | | - XFS_ERRLEVEL_LOW, log->l_mp); |
|---|
| 1642 | + if (XFS_IS_CORRUPT(log->l_mp, |
|---|
| 1643 | + head_block < tail_block || |
|---|
| 1644 | + head_block >= log->l_logBBsize)) |
|---|
| 1783 | 1645 | return -EFSCORRUPTED; |
|---|
| 1784 | | - } |
|---|
| 1785 | 1646 | tail_distance = tail_block + (log->l_logBBsize - head_block); |
|---|
| 1786 | 1647 | } else { |
|---|
| 1787 | 1648 | /* |
|---|
| .. | .. |
|---|
| 1789 | 1650 | * so the distance from the head to the tail is just |
|---|
| 1790 | 1651 | * the tail block minus the head block. |
|---|
| 1791 | 1652 | */ |
|---|
| 1792 | | - if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ |
|---|
| 1793 | | - XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", |
|---|
| 1794 | | - XFS_ERRLEVEL_LOW, log->l_mp); |
|---|
| 1653 | + if (XFS_IS_CORRUPT(log->l_mp, |
|---|
| 1654 | + head_block >= tail_block || |
|---|
| 1655 | + head_cycle != tail_cycle + 1)) |
|---|
| 1795 | 1656 | return -EFSCORRUPTED; |
|---|
| 1796 | | - } |
|---|
| 1797 | 1657 | tail_distance = tail_block - head_block; |
|---|
| 1798 | 1658 | } |
|---|
| 1799 | 1659 | |
|---|
| .. | .. |
|---|
| 1863 | 1723 | return 0; |
|---|
| 1864 | 1724 | } |
|---|
| 1865 | 1725 | |
|---|
| 1726 | +/* |
|---|
| 1727 | + * Release the recovered intent item in the AIL that matches the given intent |
|---|
| 1728 | + * type and intent id. |
|---|
| 1729 | + */ |
|---|
| 1730 | +void |
|---|
| 1731 | +xlog_recover_release_intent( |
|---|
| 1732 | + struct xlog *log, |
|---|
| 1733 | + unsigned short intent_type, |
|---|
| 1734 | + uint64_t intent_id) |
|---|
| 1735 | +{ |
|---|
| 1736 | + struct xfs_ail_cursor cur; |
|---|
| 1737 | + struct xfs_log_item *lip; |
|---|
| 1738 | + struct xfs_ail *ailp = log->l_ailp; |
|---|
| 1739 | + |
|---|
| 1740 | + spin_lock(&ailp->ail_lock); |
|---|
| 1741 | + for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL; |
|---|
| 1742 | + lip = xfs_trans_ail_cursor_next(ailp, &cur)) { |
|---|
| 1743 | + if (lip->li_type != intent_type) |
|---|
| 1744 | + continue; |
|---|
| 1745 | + if (!lip->li_ops->iop_match(lip, intent_id)) |
|---|
| 1746 | + continue; |
|---|
| 1747 | + |
|---|
| 1748 | + spin_unlock(&ailp->ail_lock); |
|---|
| 1749 | + lip->li_ops->iop_release(lip); |
|---|
| 1750 | + spin_lock(&ailp->ail_lock); |
|---|
| 1751 | + break; |
|---|
| 1752 | + } |
|---|
| 1753 | + |
|---|
| 1754 | + xfs_trans_ail_cursor_done(&cur); |
|---|
| 1755 | + spin_unlock(&ailp->ail_lock); |
|---|
| 1756 | +} |
|---|
| 1757 | + |
|---|
| 1866 | 1758 | /****************************************************************************** |
|---|
| 1867 | 1759 | * |
|---|
| 1868 | 1760 | * Log recover routines |
|---|
| 1869 | 1761 | * |
|---|
| 1870 | 1762 | ****************************************************************************** |
|---|
| 1871 | 1763 | */ |
|---|
| 1764 | +static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = { |
|---|
| 1765 | + &xlog_buf_item_ops, |
|---|
| 1766 | + &xlog_inode_item_ops, |
|---|
| 1767 | + &xlog_dquot_item_ops, |
|---|
| 1768 | + &xlog_quotaoff_item_ops, |
|---|
| 1769 | + &xlog_icreate_item_ops, |
|---|
| 1770 | + &xlog_efi_item_ops, |
|---|
| 1771 | + &xlog_efd_item_ops, |
|---|
| 1772 | + &xlog_rui_item_ops, |
|---|
| 1773 | + &xlog_rud_item_ops, |
|---|
| 1774 | + &xlog_cui_item_ops, |
|---|
| 1775 | + &xlog_cud_item_ops, |
|---|
| 1776 | + &xlog_bui_item_ops, |
|---|
| 1777 | + &xlog_bud_item_ops, |
|---|
| 1778 | +}; |
|---|
| 1779 | + |
|---|
| 1780 | +static const struct xlog_recover_item_ops * |
|---|
| 1781 | +xlog_find_item_ops( |
|---|
| 1782 | + struct xlog_recover_item *item) |
|---|
| 1783 | +{ |
|---|
| 1784 | + unsigned int i; |
|---|
| 1785 | + |
|---|
| 1786 | + for (i = 0; i < ARRAY_SIZE(xlog_recover_item_ops); i++) |
|---|
| 1787 | + if (ITEM_TYPE(item) == xlog_recover_item_ops[i]->item_type) |
|---|
| 1788 | + return xlog_recover_item_ops[i]; |
|---|
| 1789 | + |
|---|
| 1790 | + return NULL; |
|---|
| 1791 | +} |
|---|
| 1872 | 1792 | |
|---|
| 1873 | 1793 | /* |
|---|
| 1874 | 1794 | * Sort the log items in the transaction. |
|---|
| .. | .. |
|---|
| 1925 | 1845 | struct xlog_recover *trans, |
|---|
| 1926 | 1846 | int pass) |
|---|
| 1927 | 1847 | { |
|---|
| 1928 | | - xlog_recover_item_t *item, *n; |
|---|
| 1848 | + struct xlog_recover_item *item, *n; |
|---|
| 1929 | 1849 | int error = 0; |
|---|
| 1930 | 1850 | LIST_HEAD(sort_list); |
|---|
| 1931 | 1851 | LIST_HEAD(cancel_list); |
|---|
| 1932 | 1852 | LIST_HEAD(buffer_list); |
|---|
| 1933 | 1853 | LIST_HEAD(inode_buffer_list); |
|---|
| 1934 | | - LIST_HEAD(inode_list); |
|---|
| 1854 | + LIST_HEAD(item_list); |
|---|
| 1935 | 1855 | |
|---|
| 1936 | 1856 | list_splice_init(&trans->r_itemq, &sort_list); |
|---|
| 1937 | 1857 | list_for_each_entry_safe(item, n, &sort_list, ri_list) { |
|---|
| 1938 | | - xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
|---|
| 1858 | + enum xlog_recover_reorder fate = XLOG_REORDER_ITEM_LIST; |
|---|
| 1939 | 1859 | |
|---|
| 1940 | | - switch (ITEM_TYPE(item)) { |
|---|
| 1941 | | - case XFS_LI_ICREATE: |
|---|
| 1942 | | - list_move_tail(&item->ri_list, &buffer_list); |
|---|
| 1943 | | - break; |
|---|
| 1944 | | - case XFS_LI_BUF: |
|---|
| 1945 | | - if (buf_f->blf_flags & XFS_BLF_CANCEL) { |
|---|
| 1946 | | - trace_xfs_log_recover_item_reorder_head(log, |
|---|
| 1947 | | - trans, item, pass); |
|---|
| 1948 | | - list_move(&item->ri_list, &cancel_list); |
|---|
| 1949 | | - break; |
|---|
| 1950 | | - } |
|---|
| 1951 | | - if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { |
|---|
| 1952 | | - list_move(&item->ri_list, &inode_buffer_list); |
|---|
| 1953 | | - break; |
|---|
| 1954 | | - } |
|---|
| 1955 | | - list_move_tail(&item->ri_list, &buffer_list); |
|---|
| 1956 | | - break; |
|---|
| 1957 | | - case XFS_LI_INODE: |
|---|
| 1958 | | - case XFS_LI_DQUOT: |
|---|
| 1959 | | - case XFS_LI_QUOTAOFF: |
|---|
| 1960 | | - case XFS_LI_EFD: |
|---|
| 1961 | | - case XFS_LI_EFI: |
|---|
| 1962 | | - case XFS_LI_RUI: |
|---|
| 1963 | | - case XFS_LI_RUD: |
|---|
| 1964 | | - case XFS_LI_CUI: |
|---|
| 1965 | | - case XFS_LI_CUD: |
|---|
| 1966 | | - case XFS_LI_BUI: |
|---|
| 1967 | | - case XFS_LI_BUD: |
|---|
| 1968 | | - trace_xfs_log_recover_item_reorder_tail(log, |
|---|
| 1969 | | - trans, item, pass); |
|---|
| 1970 | | - list_move_tail(&item->ri_list, &inode_list); |
|---|
| 1971 | | - break; |
|---|
| 1972 | | - default: |
|---|
| 1860 | + item->ri_ops = xlog_find_item_ops(item); |
|---|
| 1861 | + if (!item->ri_ops) { |
|---|
| 1973 | 1862 | xfs_warn(log->l_mp, |
|---|
| 1974 | | - "%s: unrecognized type of log operation", |
|---|
| 1975 | | - __func__); |
|---|
| 1863 | + "%s: unrecognized type of log operation (%d)", |
|---|
| 1864 | + __func__, ITEM_TYPE(item)); |
|---|
| 1976 | 1865 | ASSERT(0); |
|---|
| 1977 | 1866 | /* |
|---|
| 1978 | 1867 | * return the remaining items back to the transaction |
|---|
| .. | .. |
|---|
| 1980 | 1869 | */ |
|---|
| 1981 | 1870 | if (!list_empty(&sort_list)) |
|---|
| 1982 | 1871 | list_splice_init(&sort_list, &trans->r_itemq); |
|---|
| 1983 | | - error = -EIO; |
|---|
| 1984 | | - goto out; |
|---|
| 1872 | + error = -EFSCORRUPTED; |
|---|
| 1873 | + break; |
|---|
| 1874 | + } |
|---|
| 1875 | + |
|---|
| 1876 | + if (item->ri_ops->reorder) |
|---|
| 1877 | + fate = item->ri_ops->reorder(item); |
|---|
| 1878 | + |
|---|
| 1879 | + switch (fate) { |
|---|
| 1880 | + case XLOG_REORDER_BUFFER_LIST: |
|---|
| 1881 | + list_move_tail(&item->ri_list, &buffer_list); |
|---|
| 1882 | + break; |
|---|
| 1883 | + case XLOG_REORDER_CANCEL_LIST: |
|---|
| 1884 | + trace_xfs_log_recover_item_reorder_head(log, |
|---|
| 1885 | + trans, item, pass); |
|---|
| 1886 | + list_move(&item->ri_list, &cancel_list); |
|---|
| 1887 | + break; |
|---|
| 1888 | + case XLOG_REORDER_INODE_BUFFER_LIST: |
|---|
| 1889 | + list_move(&item->ri_list, &inode_buffer_list); |
|---|
| 1890 | + break; |
|---|
| 1891 | + case XLOG_REORDER_ITEM_LIST: |
|---|
| 1892 | + trace_xfs_log_recover_item_reorder_tail(log, |
|---|
| 1893 | + trans, item, pass); |
|---|
| 1894 | + list_move_tail(&item->ri_list, &item_list); |
|---|
| 1895 | + break; |
|---|
| 1985 | 1896 | } |
|---|
| 1986 | 1897 | } |
|---|
| 1987 | | -out: |
|---|
| 1898 | + |
|---|
| 1988 | 1899 | ASSERT(list_empty(&sort_list)); |
|---|
| 1989 | 1900 | if (!list_empty(&buffer_list)) |
|---|
| 1990 | 1901 | list_splice(&buffer_list, &trans->r_itemq); |
|---|
| 1991 | | - if (!list_empty(&inode_list)) |
|---|
| 1992 | | - list_splice_tail(&inode_list, &trans->r_itemq); |
|---|
| 1902 | + if (!list_empty(&item_list)) |
|---|
| 1903 | + list_splice_tail(&item_list, &trans->r_itemq); |
|---|
| 1993 | 1904 | if (!list_empty(&inode_buffer_list)) |
|---|
| 1994 | 1905 | list_splice_tail(&inode_buffer_list, &trans->r_itemq); |
|---|
| 1995 | 1906 | if (!list_empty(&cancel_list)) |
|---|
| .. | .. |
|---|
| 1997 | 1908 | return error; |
|---|
| 1998 | 1909 | } |
|---|
| 1999 | 1910 | |
|---|
| 2000 | | -/* |
|---|
| 2001 | | - * Build up the table of buf cancel records so that we don't replay |
|---|
| 2002 | | - * cancelled data in the second pass. For buffer records that are |
|---|
| 2003 | | - * not cancel records, there is nothing to do here so we just return. |
|---|
| 2004 | | - * |
|---|
| 2005 | | - * If we get a cancel record which is already in the table, this indicates |
|---|
| 2006 | | - * that the buffer was cancelled multiple times. In order to ensure |
|---|
| 2007 | | - * that during pass 2 we keep the record in the table until we reach its |
|---|
| 2008 | | - * last occurrence in the log, we keep a reference count in the cancel |
|---|
| 2009 | | - * record in the table to tell us how many times we expect to see this |
|---|
| 2010 | | - * record during the second pass. |
|---|
| 2011 | | - */ |
|---|
| 2012 | | -STATIC int |
|---|
| 2013 | | -xlog_recover_buffer_pass1( |
|---|
| 2014 | | - struct xlog *log, |
|---|
| 2015 | | - struct xlog_recover_item *item) |
|---|
| 2016 | | -{ |
|---|
| 2017 | | - xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
|---|
| 2018 | | - struct list_head *bucket; |
|---|
| 2019 | | - struct xfs_buf_cancel *bcp; |
|---|
| 2020 | | - |
|---|
| 2021 | | - /* |
|---|
| 2022 | | - * If this isn't a cancel buffer item, then just return. |
|---|
| 2023 | | - */ |
|---|
| 2024 | | - if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { |
|---|
| 2025 | | - trace_xfs_log_recover_buf_not_cancel(log, buf_f); |
|---|
| 2026 | | - return 0; |
|---|
| 2027 | | - } |
|---|
| 2028 | | - |
|---|
| 2029 | | - /* |
|---|
| 2030 | | - * Insert an xfs_buf_cancel record into the hash table of them. |
|---|
| 2031 | | - * If there is already an identical record, bump its reference count. |
|---|
| 2032 | | - */ |
|---|
| 2033 | | - bucket = XLOG_BUF_CANCEL_BUCKET(log, buf_f->blf_blkno); |
|---|
| 2034 | | - list_for_each_entry(bcp, bucket, bc_list) { |
|---|
| 2035 | | - if (bcp->bc_blkno == buf_f->blf_blkno && |
|---|
| 2036 | | - bcp->bc_len == buf_f->blf_len) { |
|---|
| 2037 | | - bcp->bc_refcount++; |
|---|
| 2038 | | - trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f); |
|---|
| 2039 | | - return 0; |
|---|
| 2040 | | - } |
|---|
| 2041 | | - } |
|---|
| 2042 | | - |
|---|
| 2043 | | - bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP); |
|---|
| 2044 | | - bcp->bc_blkno = buf_f->blf_blkno; |
|---|
| 2045 | | - bcp->bc_len = buf_f->blf_len; |
|---|
| 2046 | | - bcp->bc_refcount = 1; |
|---|
| 2047 | | - list_add_tail(&bcp->bc_list, bucket); |
|---|
| 2048 | | - |
|---|
| 2049 | | - trace_xfs_log_recover_buf_cancel_add(log, buf_f); |
|---|
| 2050 | | - return 0; |
|---|
| 2051 | | -} |
|---|
| 2052 | | - |
|---|
| 2053 | | -/* |
|---|
| 2054 | | - * Check to see whether the buffer being recovered has a corresponding |
|---|
| 2055 | | - * entry in the buffer cancel record table. If it is, return the cancel |
|---|
| 2056 | | - * buffer structure to the caller. |
|---|
| 2057 | | - */ |
|---|
| 2058 | | -STATIC struct xfs_buf_cancel * |
|---|
| 2059 | | -xlog_peek_buffer_cancelled( |
|---|
| 1911 | +void |
|---|
| 1912 | +xlog_buf_readahead( |
|---|
| 2060 | 1913 | struct xlog *log, |
|---|
| 2061 | 1914 | xfs_daddr_t blkno, |
|---|
| 2062 | 1915 | uint len, |
|---|
| 2063 | | - unsigned short flags) |
|---|
| 1916 | + const struct xfs_buf_ops *ops) |
|---|
| 2064 | 1917 | { |
|---|
| 2065 | | - struct list_head *bucket; |
|---|
| 2066 | | - struct xfs_buf_cancel *bcp; |
|---|
| 2067 | | - |
|---|
| 2068 | | - if (!log->l_buf_cancel_table) { |
|---|
| 2069 | | - /* empty table means no cancelled buffers in the log */ |
|---|
| 2070 | | - ASSERT(!(flags & XFS_BLF_CANCEL)); |
|---|
| 2071 | | - return NULL; |
|---|
| 2072 | | - } |
|---|
| 2073 | | - |
|---|
| 2074 | | - bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); |
|---|
| 2075 | | - list_for_each_entry(bcp, bucket, bc_list) { |
|---|
| 2076 | | - if (bcp->bc_blkno == blkno && bcp->bc_len == len) |
|---|
| 2077 | | - return bcp; |
|---|
| 2078 | | - } |
|---|
| 2079 | | - |
|---|
| 2080 | | - /* |
|---|
| 2081 | | - * We didn't find a corresponding entry in the table, so return 0 so |
|---|
| 2082 | | - * that the buffer is NOT cancelled. |
|---|
| 2083 | | - */ |
|---|
| 2084 | | - ASSERT(!(flags & XFS_BLF_CANCEL)); |
|---|
| 2085 | | - return NULL; |
|---|
| 2086 | | -} |
|---|
| 2087 | | - |
|---|
| 2088 | | -/* |
|---|
| 2089 | | - * If the buffer is being cancelled then return 1 so that it will be cancelled, |
|---|
| 2090 | | - * otherwise return 0. If the buffer is actually a buffer cancel item |
|---|
| 2091 | | - * (XFS_BLF_CANCEL is set), then decrement the refcount on the entry in the |
|---|
| 2092 | | - * table and remove it from the table if this is the last reference. |
|---|
| 2093 | | - * |
|---|
| 2094 | | - * We remove the cancel record from the table when we encounter its last |
|---|
| 2095 | | - * occurrence in the log so that if the same buffer is re-used again after its |
|---|
| 2096 | | - * last cancellation we actually replay the changes made at that point. |
|---|
| 2097 | | - */ |
|---|
| 2098 | | -STATIC int |
|---|
| 2099 | | -xlog_check_buffer_cancelled( |
|---|
| 2100 | | - struct xlog *log, |
|---|
| 2101 | | - xfs_daddr_t blkno, |
|---|
| 2102 | | - uint len, |
|---|
| 2103 | | - unsigned short flags) |
|---|
| 2104 | | -{ |
|---|
| 2105 | | - struct xfs_buf_cancel *bcp; |
|---|
| 2106 | | - |
|---|
| 2107 | | - bcp = xlog_peek_buffer_cancelled(log, blkno, len, flags); |
|---|
| 2108 | | - if (!bcp) |
|---|
| 2109 | | - return 0; |
|---|
| 2110 | | - |
|---|
| 2111 | | - /* |
|---|
| 2112 | | - * We've go a match, so return 1 so that the recovery of this buffer |
|---|
| 2113 | | - * is cancelled. If this buffer is actually a buffer cancel log |
|---|
| 2114 | | - * item, then decrement the refcount on the one in the table and |
|---|
| 2115 | | - * remove it if this is the last reference. |
|---|
| 2116 | | - */ |
|---|
| 2117 | | - if (flags & XFS_BLF_CANCEL) { |
|---|
| 2118 | | - if (--bcp->bc_refcount == 0) { |
|---|
| 2119 | | - list_del(&bcp->bc_list); |
|---|
| 2120 | | - kmem_free(bcp); |
|---|
| 2121 | | - } |
|---|
| 2122 | | - } |
|---|
| 2123 | | - return 1; |
|---|
| 2124 | | -} |
|---|
| 2125 | | - |
|---|
| 2126 | | -/* |
|---|
| 2127 | | - * Perform recovery for a buffer full of inodes. In these buffers, the only |
|---|
| 2128 | | - * data which should be recovered is that which corresponds to the |
|---|
| 2129 | | - * di_next_unlinked pointers in the on disk inode structures. The rest of the |
|---|
| 2130 | | - * data for the inodes is always logged through the inodes themselves rather |
|---|
| 2131 | | - * than the inode buffer and is recovered in xlog_recover_inode_pass2(). |
|---|
| 2132 | | - * |
|---|
| 2133 | | - * The only time when buffers full of inodes are fully recovered is when the |
|---|
| 2134 | | - * buffer is full of newly allocated inodes. In this case the buffer will |
|---|
| 2135 | | - * not be marked as an inode buffer and so will be sent to |
|---|
| 2136 | | - * xlog_recover_do_reg_buffer() below during recovery. |
|---|
| 2137 | | - */ |
|---|
| 2138 | | -STATIC int |
|---|
| 2139 | | -xlog_recover_do_inode_buffer( |
|---|
| 2140 | | - struct xfs_mount *mp, |
|---|
| 2141 | | - xlog_recover_item_t *item, |
|---|
| 2142 | | - struct xfs_buf *bp, |
|---|
| 2143 | | - xfs_buf_log_format_t *buf_f) |
|---|
| 2144 | | -{ |
|---|
| 2145 | | - int i; |
|---|
| 2146 | | - int item_index = 0; |
|---|
| 2147 | | - int bit = 0; |
|---|
| 2148 | | - int nbits = 0; |
|---|
| 2149 | | - int reg_buf_offset = 0; |
|---|
| 2150 | | - int reg_buf_bytes = 0; |
|---|
| 2151 | | - int next_unlinked_offset; |
|---|
| 2152 | | - int inodes_per_buf; |
|---|
| 2153 | | - xfs_agino_t *logged_nextp; |
|---|
| 2154 | | - xfs_agino_t *buffer_nextp; |
|---|
| 2155 | | - |
|---|
| 2156 | | - trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); |
|---|
| 2157 | | - |
|---|
| 2158 | | - /* |
|---|
| 2159 | | - * Post recovery validation only works properly on CRC enabled |
|---|
| 2160 | | - * filesystems. |
|---|
| 2161 | | - */ |
|---|
| 2162 | | - if (xfs_sb_version_hascrc(&mp->m_sb)) |
|---|
| 2163 | | - bp->b_ops = &xfs_inode_buf_ops; |
|---|
| 2164 | | - |
|---|
| 2165 | | - inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; |
|---|
| 2166 | | - for (i = 0; i < inodes_per_buf; i++) { |
|---|
| 2167 | | - next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + |
|---|
| 2168 | | - offsetof(xfs_dinode_t, di_next_unlinked); |
|---|
| 2169 | | - |
|---|
| 2170 | | - while (next_unlinked_offset >= |
|---|
| 2171 | | - (reg_buf_offset + reg_buf_bytes)) { |
|---|
| 2172 | | - /* |
|---|
| 2173 | | - * The next di_next_unlinked field is beyond |
|---|
| 2174 | | - * the current logged region. Find the next |
|---|
| 2175 | | - * logged region that contains or is beyond |
|---|
| 2176 | | - * the current di_next_unlinked field. |
|---|
| 2177 | | - */ |
|---|
| 2178 | | - bit += nbits; |
|---|
| 2179 | | - bit = xfs_next_bit(buf_f->blf_data_map, |
|---|
| 2180 | | - buf_f->blf_map_size, bit); |
|---|
| 2181 | | - |
|---|
| 2182 | | - /* |
|---|
| 2183 | | - * If there are no more logged regions in the |
|---|
| 2184 | | - * buffer, then we're done. |
|---|
| 2185 | | - */ |
|---|
| 2186 | | - if (bit == -1) |
|---|
| 2187 | | - return 0; |
|---|
| 2188 | | - |
|---|
| 2189 | | - nbits = xfs_contig_bits(buf_f->blf_data_map, |
|---|
| 2190 | | - buf_f->blf_map_size, bit); |
|---|
| 2191 | | - ASSERT(nbits > 0); |
|---|
| 2192 | | - reg_buf_offset = bit << XFS_BLF_SHIFT; |
|---|
| 2193 | | - reg_buf_bytes = nbits << XFS_BLF_SHIFT; |
|---|
| 2194 | | - item_index++; |
|---|
| 2195 | | - } |
|---|
| 2196 | | - |
|---|
| 2197 | | - /* |
|---|
| 2198 | | - * If the current logged region starts after the current |
|---|
| 2199 | | - * di_next_unlinked field, then move on to the next |
|---|
| 2200 | | - * di_next_unlinked field. |
|---|
| 2201 | | - */ |
|---|
| 2202 | | - if (next_unlinked_offset < reg_buf_offset) |
|---|
| 2203 | | - continue; |
|---|
| 2204 | | - |
|---|
| 2205 | | - ASSERT(item->ri_buf[item_index].i_addr != NULL); |
|---|
| 2206 | | - ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); |
|---|
| 2207 | | - ASSERT((reg_buf_offset + reg_buf_bytes) <= |
|---|
| 2208 | | - BBTOB(bp->b_io_length)); |
|---|
| 2209 | | - |
|---|
| 2210 | | - /* |
|---|
| 2211 | | - * The current logged region contains a copy of the |
|---|
| 2212 | | - * current di_next_unlinked field. Extract its value |
|---|
| 2213 | | - * and copy it to the buffer copy. |
|---|
| 2214 | | - */ |
|---|
| 2215 | | - logged_nextp = item->ri_buf[item_index].i_addr + |
|---|
| 2216 | | - next_unlinked_offset - reg_buf_offset; |
|---|
| 2217 | | - if (unlikely(*logged_nextp == 0)) { |
|---|
| 2218 | | - xfs_alert(mp, |
|---|
| 2219 | | - "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). " |
|---|
| 2220 | | - "Trying to replay bad (0) inode di_next_unlinked field.", |
|---|
| 2221 | | - item, bp); |
|---|
| 2222 | | - XFS_ERROR_REPORT("xlog_recover_do_inode_buf", |
|---|
| 2223 | | - XFS_ERRLEVEL_LOW, mp); |
|---|
| 2224 | | - return -EFSCORRUPTED; |
|---|
| 2225 | | - } |
|---|
| 2226 | | - |
|---|
| 2227 | | - buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset); |
|---|
| 2228 | | - *buffer_nextp = *logged_nextp; |
|---|
| 2229 | | - |
|---|
| 2230 | | - /* |
|---|
| 2231 | | - * If necessary, recalculate the CRC in the on-disk inode. We |
|---|
| 2232 | | - * have to leave the inode in a consistent state for whoever |
|---|
| 2233 | | - * reads it next.... |
|---|
| 2234 | | - */ |
|---|
| 2235 | | - xfs_dinode_calc_crc(mp, |
|---|
| 2236 | | - xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); |
|---|
| 2237 | | - |
|---|
| 2238 | | - } |
|---|
| 2239 | | - |
|---|
| 2240 | | - return 0; |
|---|
| 2241 | | -} |
|---|
| 2242 | | - |
|---|
| 2243 | | -/* |
|---|
| 2244 | | - * V5 filesystems know the age of the buffer on disk being recovered. We can |
|---|
| 2245 | | - * have newer objects on disk than we are replaying, and so for these cases we |
|---|
| 2246 | | - * don't want to replay the current change as that will make the buffer contents |
|---|
| 2247 | | - * temporarily invalid on disk. |
|---|
| 2248 | | - * |
|---|
| 2249 | | - * The magic number might not match the buffer type we are going to recover |
|---|
| 2250 | | - * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags. Hence |
|---|
| 2251 | | - * extract the LSN of the existing object in the buffer based on it's current |
|---|
| 2252 | | - * magic number. If we don't recognise the magic number in the buffer, then |
|---|
| 2253 | | - * return a LSN of -1 so that the caller knows it was an unrecognised block and |
|---|
| 2254 | | - * so can recover the buffer. |
|---|
| 2255 | | - * |
|---|
| 2256 | | - * Note: we cannot rely solely on magic number matches to determine that the |
|---|
| 2257 | | - * buffer has a valid LSN - we also need to verify that it belongs to this |
|---|
| 2258 | | - * filesystem, so we need to extract the object's LSN and compare it to that |
|---|
| 2259 | | - * which we read from the superblock. If the UUIDs don't match, then we've got a |
|---|
| 2260 | | - * stale metadata block from an old filesystem instance that we need to recover |
|---|
| 2261 | | - * over the top of. |
|---|
| 2262 | | - */ |
|---|
| 2263 | | -static xfs_lsn_t |
|---|
| 2264 | | -xlog_recover_get_buf_lsn( |
|---|
| 2265 | | - struct xfs_mount *mp, |
|---|
| 2266 | | - struct xfs_buf *bp) |
|---|
| 2267 | | -{ |
|---|
| 2268 | | - uint32_t magic32; |
|---|
| 2269 | | - uint16_t magic16; |
|---|
| 2270 | | - uint16_t magicda; |
|---|
| 2271 | | - void *blk = bp->b_addr; |
|---|
| 2272 | | - uuid_t *uuid; |
|---|
| 2273 | | - xfs_lsn_t lsn = -1; |
|---|
| 2274 | | - |
|---|
| 2275 | | - /* v4 filesystems always recover immediately */ |
|---|
| 2276 | | - if (!xfs_sb_version_hascrc(&mp->m_sb)) |
|---|
| 2277 | | - goto recover_immediately; |
|---|
| 2278 | | - |
|---|
| 2279 | | - magic32 = be32_to_cpu(*(__be32 *)blk); |
|---|
| 2280 | | - switch (magic32) { |
|---|
| 2281 | | - case XFS_ABTB_CRC_MAGIC: |
|---|
| 2282 | | - case XFS_ABTC_CRC_MAGIC: |
|---|
| 2283 | | - case XFS_ABTB_MAGIC: |
|---|
| 2284 | | - case XFS_ABTC_MAGIC: |
|---|
| 2285 | | - case XFS_RMAP_CRC_MAGIC: |
|---|
| 2286 | | - case XFS_REFC_CRC_MAGIC: |
|---|
| 2287 | | - case XFS_IBT_CRC_MAGIC: |
|---|
| 2288 | | - case XFS_IBT_MAGIC: { |
|---|
| 2289 | | - struct xfs_btree_block *btb = blk; |
|---|
| 2290 | | - |
|---|
| 2291 | | - lsn = be64_to_cpu(btb->bb_u.s.bb_lsn); |
|---|
| 2292 | | - uuid = &btb->bb_u.s.bb_uuid; |
|---|
| 2293 | | - break; |
|---|
| 2294 | | - } |
|---|
| 2295 | | - case XFS_BMAP_CRC_MAGIC: |
|---|
| 2296 | | - case XFS_BMAP_MAGIC: { |
|---|
| 2297 | | - struct xfs_btree_block *btb = blk; |
|---|
| 2298 | | - |
|---|
| 2299 | | - lsn = be64_to_cpu(btb->bb_u.l.bb_lsn); |
|---|
| 2300 | | - uuid = &btb->bb_u.l.bb_uuid; |
|---|
| 2301 | | - break; |
|---|
| 2302 | | - } |
|---|
| 2303 | | - case XFS_AGF_MAGIC: |
|---|
| 2304 | | - lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn); |
|---|
| 2305 | | - uuid = &((struct xfs_agf *)blk)->agf_uuid; |
|---|
| 2306 | | - break; |
|---|
| 2307 | | - case XFS_AGFL_MAGIC: |
|---|
| 2308 | | - lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn); |
|---|
| 2309 | | - uuid = &((struct xfs_agfl *)blk)->agfl_uuid; |
|---|
| 2310 | | - break; |
|---|
| 2311 | | - case XFS_AGI_MAGIC: |
|---|
| 2312 | | - lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn); |
|---|
| 2313 | | - uuid = &((struct xfs_agi *)blk)->agi_uuid; |
|---|
| 2314 | | - break; |
|---|
| 2315 | | - case XFS_SYMLINK_MAGIC: |
|---|
| 2316 | | - lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn); |
|---|
| 2317 | | - uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid; |
|---|
| 2318 | | - break; |
|---|
| 2319 | | - case XFS_DIR3_BLOCK_MAGIC: |
|---|
| 2320 | | - case XFS_DIR3_DATA_MAGIC: |
|---|
| 2321 | | - case XFS_DIR3_FREE_MAGIC: |
|---|
| 2322 | | - lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn); |
|---|
| 2323 | | - uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; |
|---|
| 2324 | | - break; |
|---|
| 2325 | | - case XFS_ATTR3_RMT_MAGIC: |
|---|
| 2326 | | - /* |
|---|
| 2327 | | - * Remote attr blocks are written synchronously, rather than |
|---|
| 2328 | | - * being logged. That means they do not contain a valid LSN |
|---|
| 2329 | | - * (i.e. transactionally ordered) in them, and hence any time we |
|---|
| 2330 | | - * see a buffer to replay over the top of a remote attribute |
|---|
| 2331 | | - * block we should simply do so. |
|---|
| 2332 | | - */ |
|---|
| 2333 | | - goto recover_immediately; |
|---|
| 2334 | | - case XFS_SB_MAGIC: |
|---|
| 2335 | | - /* |
|---|
| 2336 | | - * superblock uuids are magic. We may or may not have a |
|---|
| 2337 | | - * sb_meta_uuid on disk, but it will be set in the in-core |
|---|
| 2338 | | - * superblock. We set the uuid pointer for verification |
|---|
| 2339 | | - * according to the superblock feature mask to ensure we check |
|---|
| 2340 | | - * the relevant UUID in the superblock. |
|---|
| 2341 | | - */ |
|---|
| 2342 | | - lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); |
|---|
| 2343 | | - if (xfs_sb_version_hasmetauuid(&mp->m_sb)) |
|---|
| 2344 | | - uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid; |
|---|
| 2345 | | - else |
|---|
| 2346 | | - uuid = &((struct xfs_dsb *)blk)->sb_uuid; |
|---|
| 2347 | | - break; |
|---|
| 2348 | | - default: |
|---|
| 2349 | | - break; |
|---|
| 2350 | | - } |
|---|
| 2351 | | - |
|---|
| 2352 | | - if (lsn != (xfs_lsn_t)-1) { |
|---|
| 2353 | | - if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid)) |
|---|
| 2354 | | - goto recover_immediately; |
|---|
| 2355 | | - return lsn; |
|---|
| 2356 | | - } |
|---|
| 2357 | | - |
|---|
| 2358 | | - magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic); |
|---|
| 2359 | | - switch (magicda) { |
|---|
| 2360 | | - case XFS_DIR3_LEAF1_MAGIC: |
|---|
| 2361 | | - case XFS_DIR3_LEAFN_MAGIC: |
|---|
| 2362 | | - case XFS_DA3_NODE_MAGIC: |
|---|
| 2363 | | - lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); |
|---|
| 2364 | | - uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; |
|---|
| 2365 | | - break; |
|---|
| 2366 | | - default: |
|---|
| 2367 | | - break; |
|---|
| 2368 | | - } |
|---|
| 2369 | | - |
|---|
| 2370 | | - if (lsn != (xfs_lsn_t)-1) { |
|---|
| 2371 | | - if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) |
|---|
| 2372 | | - goto recover_immediately; |
|---|
| 2373 | | - return lsn; |
|---|
| 2374 | | - } |
|---|
| 2375 | | - |
|---|
| 2376 | | - /* |
|---|
| 2377 | | - * We do individual object checks on dquot and inode buffers as they |
|---|
| 2378 | | - * have their own individual LSN records. Also, we could have a stale |
|---|
| 2379 | | - * buffer here, so we have to at least recognise these buffer types. |
|---|
| 2380 | | - * |
|---|
| 2381 | | - * A notd complexity here is inode unlinked list processing - it logs |
|---|
| 2382 | | - * the inode directly in the buffer, but we don't know which inodes have |
|---|
| 2383 | | - * been modified, and there is no global buffer LSN. Hence we need to |
|---|
| 2384 | | - * recover all inode buffer types immediately. This problem will be |
|---|
| 2385 | | - * fixed by logical logging of the unlinked list modifications. |
|---|
| 2386 | | - */ |
|---|
| 2387 | | - magic16 = be16_to_cpu(*(__be16 *)blk); |
|---|
| 2388 | | - switch (magic16) { |
|---|
| 2389 | | - case XFS_DQUOT_MAGIC: |
|---|
| 2390 | | - case XFS_DINODE_MAGIC: |
|---|
| 2391 | | - goto recover_immediately; |
|---|
| 2392 | | - default: |
|---|
| 2393 | | - break; |
|---|
| 2394 | | - } |
|---|
| 2395 | | - |
|---|
| 2396 | | - /* unknown buffer contents, recover immediately */ |
|---|
| 2397 | | - |
|---|
| 2398 | | -recover_immediately: |
|---|
| 2399 | | - return (xfs_lsn_t)-1; |
|---|
| 2400 | | - |
|---|
| 2401 | | -} |
|---|
| 2402 | | - |
|---|
| 2403 | | -/* |
|---|
| 2404 | | - * Validate the recovered buffer is of the correct type and attach the |
|---|
| 2405 | | - * appropriate buffer operations to them for writeback. Magic numbers are in a |
|---|
| 2406 | | - * few places: |
|---|
| 2407 | | - * the first 16 bits of the buffer (inode buffer, dquot buffer), |
|---|
| 2408 | | - * the first 32 bits of the buffer (most blocks), |
|---|
| 2409 | | - * inside a struct xfs_da_blkinfo at the start of the buffer. |
|---|
| 2410 | | - */ |
|---|
| 2411 | | -static void |
|---|
| 2412 | | -xlog_recover_validate_buf_type( |
|---|
| 2413 | | - struct xfs_mount *mp, |
|---|
| 2414 | | - struct xfs_buf *bp, |
|---|
| 2415 | | - xfs_buf_log_format_t *buf_f, |
|---|
| 2416 | | - xfs_lsn_t current_lsn) |
|---|
| 2417 | | -{ |
|---|
| 2418 | | - struct xfs_da_blkinfo *info = bp->b_addr; |
|---|
| 2419 | | - uint32_t magic32; |
|---|
| 2420 | | - uint16_t magic16; |
|---|
| 2421 | | - uint16_t magicda; |
|---|
| 2422 | | - char *warnmsg = NULL; |
|---|
| 2423 | | - |
|---|
| 2424 | | - /* |
|---|
| 2425 | | - * We can only do post recovery validation on items on CRC enabled |
|---|
| 2426 | | - * fielsystems as we need to know when the buffer was written to be able |
|---|
| 2427 | | - * to determine if we should have replayed the item. If we replay old |
|---|
| 2428 | | - * metadata over a newer buffer, then it will enter a temporarily |
|---|
| 2429 | | - * inconsistent state resulting in verification failures. Hence for now |
|---|
| 2430 | | - * just avoid the verification stage for non-crc filesystems |
|---|
| 2431 | | - */ |
|---|
| 2432 | | - if (!xfs_sb_version_hascrc(&mp->m_sb)) |
|---|
| 2433 | | - return; |
|---|
| 2434 | | - |
|---|
| 2435 | | - magic32 = be32_to_cpu(*(__be32 *)bp->b_addr); |
|---|
| 2436 | | - magic16 = be16_to_cpu(*(__be16*)bp->b_addr); |
|---|
| 2437 | | - magicda = be16_to_cpu(info->magic); |
|---|
| 2438 | | - switch (xfs_blft_from_flags(buf_f)) { |
|---|
| 2439 | | - case XFS_BLFT_BTREE_BUF: |
|---|
| 2440 | | - switch (magic32) { |
|---|
| 2441 | | - case XFS_ABTB_CRC_MAGIC: |
|---|
| 2442 | | - case XFS_ABTC_CRC_MAGIC: |
|---|
| 2443 | | - case XFS_ABTB_MAGIC: |
|---|
| 2444 | | - case XFS_ABTC_MAGIC: |
|---|
| 2445 | | - bp->b_ops = &xfs_allocbt_buf_ops; |
|---|
| 2446 | | - break; |
|---|
| 2447 | | - case XFS_IBT_CRC_MAGIC: |
|---|
| 2448 | | - case XFS_FIBT_CRC_MAGIC: |
|---|
| 2449 | | - case XFS_IBT_MAGIC: |
|---|
| 2450 | | - case XFS_FIBT_MAGIC: |
|---|
| 2451 | | - bp->b_ops = &xfs_inobt_buf_ops; |
|---|
| 2452 | | - break; |
|---|
| 2453 | | - case XFS_BMAP_CRC_MAGIC: |
|---|
| 2454 | | - case XFS_BMAP_MAGIC: |
|---|
| 2455 | | - bp->b_ops = &xfs_bmbt_buf_ops; |
|---|
| 2456 | | - break; |
|---|
| 2457 | | - case XFS_RMAP_CRC_MAGIC: |
|---|
| 2458 | | - bp->b_ops = &xfs_rmapbt_buf_ops; |
|---|
| 2459 | | - break; |
|---|
| 2460 | | - case XFS_REFC_CRC_MAGIC: |
|---|
| 2461 | | - bp->b_ops = &xfs_refcountbt_buf_ops; |
|---|
| 2462 | | - break; |
|---|
| 2463 | | - default: |
|---|
| 2464 | | - warnmsg = "Bad btree block magic!"; |
|---|
| 2465 | | - break; |
|---|
| 2466 | | - } |
|---|
| 2467 | | - break; |
|---|
| 2468 | | - case XFS_BLFT_AGF_BUF: |
|---|
| 2469 | | - if (magic32 != XFS_AGF_MAGIC) { |
|---|
| 2470 | | - warnmsg = "Bad AGF block magic!"; |
|---|
| 2471 | | - break; |
|---|
| 2472 | | - } |
|---|
| 2473 | | - bp->b_ops = &xfs_agf_buf_ops; |
|---|
| 2474 | | - break; |
|---|
| 2475 | | - case XFS_BLFT_AGFL_BUF: |
|---|
| 2476 | | - if (magic32 != XFS_AGFL_MAGIC) { |
|---|
| 2477 | | - warnmsg = "Bad AGFL block magic!"; |
|---|
| 2478 | | - break; |
|---|
| 2479 | | - } |
|---|
| 2480 | | - bp->b_ops = &xfs_agfl_buf_ops; |
|---|
| 2481 | | - break; |
|---|
| 2482 | | - case XFS_BLFT_AGI_BUF: |
|---|
| 2483 | | - if (magic32 != XFS_AGI_MAGIC) { |
|---|
| 2484 | | - warnmsg = "Bad AGI block magic!"; |
|---|
| 2485 | | - break; |
|---|
| 2486 | | - } |
|---|
| 2487 | | - bp->b_ops = &xfs_agi_buf_ops; |
|---|
| 2488 | | - break; |
|---|
| 2489 | | - case XFS_BLFT_UDQUOT_BUF: |
|---|
| 2490 | | - case XFS_BLFT_PDQUOT_BUF: |
|---|
| 2491 | | - case XFS_BLFT_GDQUOT_BUF: |
|---|
| 2492 | | -#ifdef CONFIG_XFS_QUOTA |
|---|
| 2493 | | - if (magic16 != XFS_DQUOT_MAGIC) { |
|---|
| 2494 | | - warnmsg = "Bad DQUOT block magic!"; |
|---|
| 2495 | | - break; |
|---|
| 2496 | | - } |
|---|
| 2497 | | - bp->b_ops = &xfs_dquot_buf_ops; |
|---|
| 2498 | | -#else |
|---|
| 2499 | | - xfs_alert(mp, |
|---|
| 2500 | | - "Trying to recover dquots without QUOTA support built in!"); |
|---|
| 2501 | | - ASSERT(0); |
|---|
| 2502 | | -#endif |
|---|
| 2503 | | - break; |
|---|
| 2504 | | - case XFS_BLFT_DINO_BUF: |
|---|
| 2505 | | - if (magic16 != XFS_DINODE_MAGIC) { |
|---|
| 2506 | | - warnmsg = "Bad INODE block magic!"; |
|---|
| 2507 | | - break; |
|---|
| 2508 | | - } |
|---|
| 2509 | | - bp->b_ops = &xfs_inode_buf_ops; |
|---|
| 2510 | | - break; |
|---|
| 2511 | | - case XFS_BLFT_SYMLINK_BUF: |
|---|
| 2512 | | - if (magic32 != XFS_SYMLINK_MAGIC) { |
|---|
| 2513 | | - warnmsg = "Bad symlink block magic!"; |
|---|
| 2514 | | - break; |
|---|
| 2515 | | - } |
|---|
| 2516 | | - bp->b_ops = &xfs_symlink_buf_ops; |
|---|
| 2517 | | - break; |
|---|
| 2518 | | - case XFS_BLFT_DIR_BLOCK_BUF: |
|---|
| 2519 | | - if (magic32 != XFS_DIR2_BLOCK_MAGIC && |
|---|
| 2520 | | - magic32 != XFS_DIR3_BLOCK_MAGIC) { |
|---|
| 2521 | | - warnmsg = "Bad dir block magic!"; |
|---|
| 2522 | | - break; |
|---|
| 2523 | | - } |
|---|
| 2524 | | - bp->b_ops = &xfs_dir3_block_buf_ops; |
|---|
| 2525 | | - break; |
|---|
| 2526 | | - case XFS_BLFT_DIR_DATA_BUF: |
|---|
| 2527 | | - if (magic32 != XFS_DIR2_DATA_MAGIC && |
|---|
| 2528 | | - magic32 != XFS_DIR3_DATA_MAGIC) { |
|---|
| 2529 | | - warnmsg = "Bad dir data magic!"; |
|---|
| 2530 | | - break; |
|---|
| 2531 | | - } |
|---|
| 2532 | | - bp->b_ops = &xfs_dir3_data_buf_ops; |
|---|
| 2533 | | - break; |
|---|
| 2534 | | - case XFS_BLFT_DIR_FREE_BUF: |
|---|
| 2535 | | - if (magic32 != XFS_DIR2_FREE_MAGIC && |
|---|
| 2536 | | - magic32 != XFS_DIR3_FREE_MAGIC) { |
|---|
| 2537 | | - warnmsg = "Bad dir3 free magic!"; |
|---|
| 2538 | | - break; |
|---|
| 2539 | | - } |
|---|
| 2540 | | - bp->b_ops = &xfs_dir3_free_buf_ops; |
|---|
| 2541 | | - break; |
|---|
| 2542 | | - case XFS_BLFT_DIR_LEAF1_BUF: |
|---|
| 2543 | | - if (magicda != XFS_DIR2_LEAF1_MAGIC && |
|---|
| 2544 | | - magicda != XFS_DIR3_LEAF1_MAGIC) { |
|---|
| 2545 | | - warnmsg = "Bad dir leaf1 magic!"; |
|---|
| 2546 | | - break; |
|---|
| 2547 | | - } |
|---|
| 2548 | | - bp->b_ops = &xfs_dir3_leaf1_buf_ops; |
|---|
| 2549 | | - break; |
|---|
| 2550 | | - case XFS_BLFT_DIR_LEAFN_BUF: |
|---|
| 2551 | | - if (magicda != XFS_DIR2_LEAFN_MAGIC && |
|---|
| 2552 | | - magicda != XFS_DIR3_LEAFN_MAGIC) { |
|---|
| 2553 | | - warnmsg = "Bad dir leafn magic!"; |
|---|
| 2554 | | - break; |
|---|
| 2555 | | - } |
|---|
| 2556 | | - bp->b_ops = &xfs_dir3_leafn_buf_ops; |
|---|
| 2557 | | - break; |
|---|
| 2558 | | - case XFS_BLFT_DA_NODE_BUF: |
|---|
| 2559 | | - if (magicda != XFS_DA_NODE_MAGIC && |
|---|
| 2560 | | - magicda != XFS_DA3_NODE_MAGIC) { |
|---|
| 2561 | | - warnmsg = "Bad da node magic!"; |
|---|
| 2562 | | - break; |
|---|
| 2563 | | - } |
|---|
| 2564 | | - bp->b_ops = &xfs_da3_node_buf_ops; |
|---|
| 2565 | | - break; |
|---|
| 2566 | | - case XFS_BLFT_ATTR_LEAF_BUF: |
|---|
| 2567 | | - if (magicda != XFS_ATTR_LEAF_MAGIC && |
|---|
| 2568 | | - magicda != XFS_ATTR3_LEAF_MAGIC) { |
|---|
| 2569 | | - warnmsg = "Bad attr leaf magic!"; |
|---|
| 2570 | | - break; |
|---|
| 2571 | | - } |
|---|
| 2572 | | - bp->b_ops = &xfs_attr3_leaf_buf_ops; |
|---|
| 2573 | | - break; |
|---|
| 2574 | | - case XFS_BLFT_ATTR_RMT_BUF: |
|---|
| 2575 | | - if (magic32 != XFS_ATTR3_RMT_MAGIC) { |
|---|
| 2576 | | - warnmsg = "Bad attr remote magic!"; |
|---|
| 2577 | | - break; |
|---|
| 2578 | | - } |
|---|
| 2579 | | - bp->b_ops = &xfs_attr3_rmt_buf_ops; |
|---|
| 2580 | | - break; |
|---|
| 2581 | | - case XFS_BLFT_SB_BUF: |
|---|
| 2582 | | - if (magic32 != XFS_SB_MAGIC) { |
|---|
| 2583 | | - warnmsg = "Bad SB block magic!"; |
|---|
| 2584 | | - break; |
|---|
| 2585 | | - } |
|---|
| 2586 | | - bp->b_ops = &xfs_sb_buf_ops; |
|---|
| 2587 | | - break; |
|---|
| 2588 | | -#ifdef CONFIG_XFS_RT |
|---|
| 2589 | | - case XFS_BLFT_RTBITMAP_BUF: |
|---|
| 2590 | | - case XFS_BLFT_RTSUMMARY_BUF: |
|---|
| 2591 | | - /* no magic numbers for verification of RT buffers */ |
|---|
| 2592 | | - bp->b_ops = &xfs_rtbuf_ops; |
|---|
| 2593 | | - break; |
|---|
| 2594 | | -#endif /* CONFIG_XFS_RT */ |
|---|
| 2595 | | - default: |
|---|
| 2596 | | - xfs_warn(mp, "Unknown buffer type %d!", |
|---|
| 2597 | | - xfs_blft_from_flags(buf_f)); |
|---|
| 2598 | | - break; |
|---|
| 2599 | | - } |
|---|
| 2600 | | - |
|---|
| 2601 | | - /* |
|---|
| 2602 | | - * Nothing else to do in the case of a NULL current LSN as this means |
|---|
| 2603 | | - * the buffer is more recent than the change in the log and will be |
|---|
| 2604 | | - * skipped. |
|---|
| 2605 | | - */ |
|---|
| 2606 | | - if (current_lsn == NULLCOMMITLSN) |
|---|
| 2607 | | - return; |
|---|
| 2608 | | - |
|---|
| 2609 | | - if (warnmsg) { |
|---|
| 2610 | | - xfs_warn(mp, warnmsg); |
|---|
| 2611 | | - ASSERT(0); |
|---|
| 2612 | | - } |
|---|
| 2613 | | - |
|---|
| 2614 | | - /* |
|---|
| 2615 | | - * We must update the metadata LSN of the buffer as it is written out to |
|---|
| 2616 | | - * ensure that older transactions never replay over this one and corrupt |
|---|
| 2617 | | - * the buffer. This can occur if log recovery is interrupted at some |
|---|
| 2618 | | - * point after the current transaction completes, at which point a |
|---|
| 2619 | | - * subsequent mount starts recovery from the beginning. |
|---|
| 2620 | | - * |
|---|
| 2621 | | - * Write verifiers update the metadata LSN from log items attached to |
|---|
| 2622 | | - * the buffer. Therefore, initialize a bli purely to carry the LSN to |
|---|
| 2623 | | - * the verifier. We'll clean it up in our ->iodone() callback. |
|---|
| 2624 | | - */ |
|---|
| 2625 | | - if (bp->b_ops) { |
|---|
| 2626 | | - struct xfs_buf_log_item *bip; |
|---|
| 2627 | | - |
|---|
| 2628 | | - ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone); |
|---|
| 2629 | | - bp->b_iodone = xlog_recover_iodone; |
|---|
| 2630 | | - xfs_buf_item_init(bp, mp); |
|---|
| 2631 | | - bip = bp->b_log_item; |
|---|
| 2632 | | - bip->bli_item.li_lsn = current_lsn; |
|---|
| 2633 | | - } |
|---|
| 2634 | | -} |
|---|
| 2635 | | - |
|---|
| 2636 | | -/* |
|---|
| 2637 | | - * Perform a 'normal' buffer recovery. Each logged region of the |
|---|
| 2638 | | - * buffer should be copied over the corresponding region in the |
|---|
| 2639 | | - * given buffer. The bitmap in the buf log format structure indicates |
|---|
| 2640 | | - * where to place the logged data. |
|---|
| 2641 | | - */ |
|---|
| 2642 | | -STATIC void |
|---|
| 2643 | | -xlog_recover_do_reg_buffer( |
|---|
| 2644 | | - struct xfs_mount *mp, |
|---|
| 2645 | | - xlog_recover_item_t *item, |
|---|
| 2646 | | - struct xfs_buf *bp, |
|---|
| 2647 | | - xfs_buf_log_format_t *buf_f, |
|---|
| 2648 | | - xfs_lsn_t current_lsn) |
|---|
| 2649 | | -{ |
|---|
| 2650 | | - int i; |
|---|
| 2651 | | - int bit; |
|---|
| 2652 | | - int nbits; |
|---|
| 2653 | | - xfs_failaddr_t fa; |
|---|
| 2654 | | - |
|---|
| 2655 | | - trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); |
|---|
| 2656 | | - |
|---|
| 2657 | | - bit = 0; |
|---|
| 2658 | | - i = 1; /* 0 is the buf format structure */ |
|---|
| 2659 | | - while (1) { |
|---|
| 2660 | | - bit = xfs_next_bit(buf_f->blf_data_map, |
|---|
| 2661 | | - buf_f->blf_map_size, bit); |
|---|
| 2662 | | - if (bit == -1) |
|---|
| 2663 | | - break; |
|---|
| 2664 | | - nbits = xfs_contig_bits(buf_f->blf_data_map, |
|---|
| 2665 | | - buf_f->blf_map_size, bit); |
|---|
| 2666 | | - ASSERT(nbits > 0); |
|---|
| 2667 | | - ASSERT(item->ri_buf[i].i_addr != NULL); |
|---|
| 2668 | | - ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); |
|---|
| 2669 | | - ASSERT(BBTOB(bp->b_io_length) >= |
|---|
| 2670 | | - ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); |
|---|
| 2671 | | - |
|---|
| 2672 | | - /* |
|---|
| 2673 | | - * The dirty regions logged in the buffer, even though |
|---|
| 2674 | | - * contiguous, may span multiple chunks. This is because the |
|---|
| 2675 | | - * dirty region may span a physical page boundary in a buffer |
|---|
| 2676 | | - * and hence be split into two separate vectors for writing into |
|---|
| 2677 | | - * the log. Hence we need to trim nbits back to the length of |
|---|
| 2678 | | - * the current region being copied out of the log. |
|---|
| 2679 | | - */ |
|---|
| 2680 | | - if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT)) |
|---|
| 2681 | | - nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT; |
|---|
| 2682 | | - |
|---|
| 2683 | | - /* |
|---|
| 2684 | | - * Do a sanity check if this is a dquot buffer. Just checking |
|---|
| 2685 | | - * the first dquot in the buffer should do. XXXThis is |
|---|
| 2686 | | - * probably a good thing to do for other buf types also. |
|---|
| 2687 | | - */ |
|---|
| 2688 | | - fa = NULL; |
|---|
| 2689 | | - if (buf_f->blf_flags & |
|---|
| 2690 | | - (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
|---|
| 2691 | | - if (item->ri_buf[i].i_addr == NULL) { |
|---|
| 2692 | | - xfs_alert(mp, |
|---|
| 2693 | | - "XFS: NULL dquot in %s.", __func__); |
|---|
| 2694 | | - goto next; |
|---|
| 2695 | | - } |
|---|
| 2696 | | - if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { |
|---|
| 2697 | | - xfs_alert(mp, |
|---|
| 2698 | | - "XFS: dquot too small (%d) in %s.", |
|---|
| 2699 | | - item->ri_buf[i].i_len, __func__); |
|---|
| 2700 | | - goto next; |
|---|
| 2701 | | - } |
|---|
| 2702 | | - fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr, |
|---|
| 2703 | | - -1, 0); |
|---|
| 2704 | | - if (fa) { |
|---|
| 2705 | | - xfs_alert(mp, |
|---|
| 2706 | | - "dquot corrupt at %pS trying to replay into block 0x%llx", |
|---|
| 2707 | | - fa, bp->b_bn); |
|---|
| 2708 | | - goto next; |
|---|
| 2709 | | - } |
|---|
| 2710 | | - } |
|---|
| 2711 | | - |
|---|
| 2712 | | - memcpy(xfs_buf_offset(bp, |
|---|
| 2713 | | - (uint)bit << XFS_BLF_SHIFT), /* dest */ |
|---|
| 2714 | | - item->ri_buf[i].i_addr, /* source */ |
|---|
| 2715 | | - nbits<<XFS_BLF_SHIFT); /* length */ |
|---|
| 2716 | | - next: |
|---|
| 2717 | | - i++; |
|---|
| 2718 | | - bit += nbits; |
|---|
| 2719 | | - } |
|---|
| 2720 | | - |
|---|
| 2721 | | - /* Shouldn't be any more regions */ |
|---|
| 2722 | | - ASSERT(i == item->ri_total); |
|---|
| 2723 | | - |
|---|
| 2724 | | - xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn); |
|---|
| 2725 | | -} |
|---|
| 2726 | | - |
|---|
| 2727 | | -/* |
|---|
| 2728 | | - * Perform a dquot buffer recovery. |
|---|
| 2729 | | - * Simple algorithm: if we have found a QUOTAOFF log item of the same type |
|---|
| 2730 | | - * (ie. USR or GRP), then just toss this buffer away; don't recover it. |
|---|
| 2731 | | - * Else, treat it as a regular buffer and do recovery. |
|---|
| 2732 | | - * |
|---|
| 2733 | | - * Return false if the buffer was tossed and true if we recovered the buffer to |
|---|
| 2734 | | - * indicate to the caller if the buffer needs writing. |
|---|
| 2735 | | - */ |
|---|
| 2736 | | -STATIC bool |
|---|
| 2737 | | -xlog_recover_do_dquot_buffer( |
|---|
| 2738 | | - struct xfs_mount *mp, |
|---|
| 2739 | | - struct xlog *log, |
|---|
| 2740 | | - struct xlog_recover_item *item, |
|---|
| 2741 | | - struct xfs_buf *bp, |
|---|
| 2742 | | - struct xfs_buf_log_format *buf_f) |
|---|
| 2743 | | -{ |
|---|
| 2744 | | - uint type; |
|---|
| 2745 | | - |
|---|
| 2746 | | - trace_xfs_log_recover_buf_dquot_buf(log, buf_f); |
|---|
| 2747 | | - |
|---|
| 2748 | | - /* |
|---|
| 2749 | | - * Filesystems are required to send in quota flags at mount time. |
|---|
| 2750 | | - */ |
|---|
| 2751 | | - if (!mp->m_qflags) |
|---|
| 2752 | | - return false; |
|---|
| 2753 | | - |
|---|
| 2754 | | - type = 0; |
|---|
| 2755 | | - if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) |
|---|
| 2756 | | - type |= XFS_DQ_USER; |
|---|
| 2757 | | - if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF) |
|---|
| 2758 | | - type |= XFS_DQ_PROJ; |
|---|
| 2759 | | - if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF) |
|---|
| 2760 | | - type |= XFS_DQ_GROUP; |
|---|
| 2761 | | - /* |
|---|
| 2762 | | - * This type of quotas was turned off, so ignore this buffer |
|---|
| 2763 | | - */ |
|---|
| 2764 | | - if (log->l_quotaoffs_flag & type) |
|---|
| 2765 | | - return false; |
|---|
| 2766 | | - |
|---|
| 2767 | | - xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN); |
|---|
| 2768 | | - return true; |
|---|
| 2769 | | -} |
|---|
| 2770 | | - |
|---|
| 2771 | | -/* |
|---|
| 2772 | | - * This routine replays a modification made to a buffer at runtime. |
|---|
| 2773 | | - * There are actually two types of buffer, regular and inode, which |
|---|
| 2774 | | - * are handled differently. Inode buffers are handled differently |
|---|
| 2775 | | - * in that we only recover a specific set of data from them, namely |
|---|
| 2776 | | - * the inode di_next_unlinked fields. This is because all other inode |
|---|
| 2777 | | - * data is actually logged via inode records and any data we replay |
|---|
| 2778 | | - * here which overlaps that may be stale. |
|---|
| 2779 | | - * |
|---|
| 2780 | | - * When meta-data buffers are freed at run time we log a buffer item |
|---|
| 2781 | | - * with the XFS_BLF_CANCEL bit set to indicate that previous copies |
|---|
| 2782 | | - * of the buffer in the log should not be replayed at recovery time. |
|---|
| 2783 | | - * This is so that if the blocks covered by the buffer are reused for |
|---|
| 2784 | | - * file data before we crash we don't end up replaying old, freed |
|---|
| 2785 | | - * meta-data into a user's file. |
|---|
| 2786 | | - * |
|---|
| 2787 | | - * To handle the cancellation of buffer log items, we make two passes |
|---|
| 2788 | | - * over the log during recovery. During the first we build a table of |
|---|
| 2789 | | - * those buffers which have been cancelled, and during the second we |
|---|
| 2790 | | - * only replay those buffers which do not have corresponding cancel |
|---|
| 2791 | | - * records in the table. See xlog_recover_buffer_pass[1,2] above |
|---|
| 2792 | | - * for more details on the implementation of the table of cancel records. |
|---|
| 2793 | | - */ |
|---|
| 2794 | | -STATIC int |
|---|
| 2795 | | -xlog_recover_buffer_pass2( |
|---|
| 2796 | | - struct xlog *log, |
|---|
| 2797 | | - struct list_head *buffer_list, |
|---|
| 2798 | | - struct xlog_recover_item *item, |
|---|
| 2799 | | - xfs_lsn_t current_lsn) |
|---|
| 2800 | | -{ |
|---|
| 2801 | | - xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
|---|
| 2802 | | - xfs_mount_t *mp = log->l_mp; |
|---|
| 2803 | | - xfs_buf_t *bp; |
|---|
| 2804 | | - int error; |
|---|
| 2805 | | - uint buf_flags; |
|---|
| 2806 | | - xfs_lsn_t lsn; |
|---|
| 2807 | | - |
|---|
| 2808 | | - /* |
|---|
| 2809 | | - * In this pass we only want to recover all the buffers which have |
|---|
| 2810 | | - * not been cancelled and are not cancellation buffers themselves. |
|---|
| 2811 | | - */ |
|---|
| 2812 | | - if (xlog_check_buffer_cancelled(log, buf_f->blf_blkno, |
|---|
| 2813 | | - buf_f->blf_len, buf_f->blf_flags)) { |
|---|
| 2814 | | - trace_xfs_log_recover_buf_cancel(log, buf_f); |
|---|
| 2815 | | - return 0; |
|---|
| 2816 | | - } |
|---|
| 2817 | | - |
|---|
| 2818 | | - trace_xfs_log_recover_buf_recover(log, buf_f); |
|---|
| 2819 | | - |
|---|
| 2820 | | - buf_flags = 0; |
|---|
| 2821 | | - if (buf_f->blf_flags & XFS_BLF_INODE_BUF) |
|---|
| 2822 | | - buf_flags |= XBF_UNMAPPED; |
|---|
| 2823 | | - |
|---|
| 2824 | | - bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, |
|---|
| 2825 | | - buf_flags, NULL); |
|---|
| 2826 | | - if (!bp) |
|---|
| 2827 | | - return -ENOMEM; |
|---|
| 2828 | | - error = bp->b_error; |
|---|
| 2829 | | - if (error) { |
|---|
| 2830 | | - xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); |
|---|
| 2831 | | - goto out_release; |
|---|
| 2832 | | - } |
|---|
| 2833 | | - |
|---|
| 2834 | | - /* |
|---|
| 2835 | | - * Recover the buffer only if we get an LSN from it and it's less than |
|---|
| 2836 | | - * the lsn of the transaction we are replaying. |
|---|
| 2837 | | - * |
|---|
| 2838 | | - * Note that we have to be extremely careful of readahead here. |
|---|
| 2839 | | - * Readahead does not attach verfiers to the buffers so if we don't |
|---|
| 2840 | | - * actually do any replay after readahead because of the LSN we found |
|---|
| 2841 | | - * in the buffer if more recent than that current transaction then we |
|---|
| 2842 | | - * need to attach the verifier directly. Failure to do so can lead to |
|---|
| 2843 | | - * future recovery actions (e.g. EFI and unlinked list recovery) can |
|---|
| 2844 | | - * operate on the buffers and they won't get the verifier attached. This |
|---|
| 2845 | | - * can lead to blocks on disk having the correct content but a stale |
|---|
| 2846 | | - * CRC. |
|---|
| 2847 | | - * |
|---|
| 2848 | | - * It is safe to assume these clean buffers are currently up to date. |
|---|
| 2849 | | - * If the buffer is dirtied by a later transaction being replayed, then |
|---|
| 2850 | | - * the verifier will be reset to match whatever recover turns that |
|---|
| 2851 | | - * buffer into. |
|---|
| 2852 | | - */ |
|---|
| 2853 | | - lsn = xlog_recover_get_buf_lsn(mp, bp); |
|---|
| 2854 | | - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { |
|---|
| 2855 | | - trace_xfs_log_recover_buf_skip(log, buf_f); |
|---|
| 2856 | | - xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); |
|---|
| 2857 | | - goto out_release; |
|---|
| 2858 | | - } |
|---|
| 2859 | | - |
|---|
| 2860 | | - if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { |
|---|
| 2861 | | - error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
|---|
| 2862 | | - if (error) |
|---|
| 2863 | | - goto out_release; |
|---|
| 2864 | | - } else if (buf_f->blf_flags & |
|---|
| 2865 | | - (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { |
|---|
| 2866 | | - bool dirty; |
|---|
| 2867 | | - |
|---|
| 2868 | | - dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); |
|---|
| 2869 | | - if (!dirty) |
|---|
| 2870 | | - goto out_release; |
|---|
| 2871 | | - } else { |
|---|
| 2872 | | - xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn); |
|---|
| 2873 | | - } |
|---|
| 2874 | | - |
|---|
| 2875 | | - /* |
|---|
| 2876 | | - * Perform delayed write on the buffer. Asynchronous writes will be |
|---|
| 2877 | | - * slower when taking into account all the buffers to be flushed. |
|---|
| 2878 | | - * |
|---|
| 2879 | | - * Also make sure that only inode buffers with good sizes stay in |
|---|
| 2880 | | - * the buffer cache. The kernel moves inodes in buffers of 1 block |
|---|
| 2881 | | - * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode |
|---|
| 2882 | | - * buffers in the log can be a different size if the log was generated |
|---|
| 2883 | | - * by an older kernel using unclustered inode buffers or a newer kernel |
|---|
| 2884 | | - * running with a different inode cluster size. Regardless, if the |
|---|
| 2885 | | - * the inode buffer size isn't max(blocksize, mp->m_inode_cluster_size) |
|---|
| 2886 | | - * for *our* value of mp->m_inode_cluster_size, then we need to keep |
|---|
| 2887 | | - * the buffer out of the buffer cache so that the buffer won't |
|---|
| 2888 | | - * overlap with future reads of those inodes. |
|---|
| 2889 | | - */ |
|---|
| 2890 | | - if (XFS_DINODE_MAGIC == |
|---|
| 2891 | | - be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && |
|---|
| 2892 | | - (BBTOB(bp->b_io_length) != max(log->l_mp->m_sb.sb_blocksize, |
|---|
| 2893 | | - (uint32_t)log->l_mp->m_inode_cluster_size))) { |
|---|
| 2894 | | - xfs_buf_stale(bp); |
|---|
| 2895 | | - error = xfs_bwrite(bp); |
|---|
| 2896 | | - } else { |
|---|
| 2897 | | - ASSERT(bp->b_target->bt_mount == mp); |
|---|
| 2898 | | - bp->b_iodone = xlog_recover_iodone; |
|---|
| 2899 | | - xfs_buf_delwri_queue(bp, buffer_list); |
|---|
| 2900 | | - } |
|---|
| 2901 | | - |
|---|
| 2902 | | -out_release: |
|---|
| 2903 | | - xfs_buf_relse(bp); |
|---|
| 2904 | | - return error; |
|---|
| 2905 | | -} |
|---|
| 2906 | | - |
|---|
| 2907 | | -/* |
|---|
| 2908 | | - * Inode fork owner changes |
|---|
| 2909 | | - * |
|---|
| 2910 | | - * If we have been told that we have to reparent the inode fork, it's because an |
|---|
| 2911 | | - * extent swap operation on a CRC enabled filesystem has been done and we are |
|---|
| 2912 | | - * replaying it. We need to walk the BMBT of the appropriate fork and change the |
|---|
| 2913 | | - * owners of it. |
|---|
| 2914 | | - * |
|---|
| 2915 | | - * The complexity here is that we don't have an inode context to work with, so |
|---|
| 2916 | | - * after we've replayed the inode we need to instantiate one. This is where the |
|---|
| 2917 | | - * fun begins. |
|---|
| 2918 | | - * |
|---|
| 2919 | | - * We are in the middle of log recovery, so we can't run transactions. That |
|---|
| 2920 | | - * means we cannot use cache coherent inode instantiation via xfs_iget(), as |
|---|
| 2921 | | - * that will result in the corresponding iput() running the inode through |
|---|
| 2922 | | - * xfs_inactive(). If we've just replayed an inode core that changes the link |
|---|
| 2923 | | - * count to zero (i.e. it's been unlinked), then xfs_inactive() will run |
|---|
| 2924 | | - * transactions (bad!). |
|---|
| 2925 | | - * |
|---|
| 2926 | | - * So, to avoid this, we instantiate an inode directly from the inode core we've |
|---|
| 2927 | | - * just recovered. We have the buffer still locked, and all we really need to |
|---|
| 2928 | | - * instantiate is the inode core and the forks being modified. We can do this |
|---|
| 2929 | | - * manually, then run the inode btree owner change, and then tear down the |
|---|
| 2930 | | - * xfs_inode without having to run any transactions at all. |
|---|
| 2931 | | - * |
|---|
| 2932 | | - * Also, because we don't have a transaction context available here but need to |
|---|
| 2933 | | - * gather all the buffers we modify for writeback so we pass the buffer_list |
|---|
| 2934 | | - * instead for the operation to use. |
|---|
| 2935 | | - */ |
|---|
| 2936 | | - |
|---|
| 2937 | | -STATIC int |
|---|
| 2938 | | -xfs_recover_inode_owner_change( |
|---|
| 2939 | | - struct xfs_mount *mp, |
|---|
| 2940 | | - struct xfs_dinode *dip, |
|---|
| 2941 | | - struct xfs_inode_log_format *in_f, |
|---|
| 2942 | | - struct list_head *buffer_list) |
|---|
| 2943 | | -{ |
|---|
| 2944 | | - struct xfs_inode *ip; |
|---|
| 2945 | | - int error; |
|---|
| 2946 | | - |
|---|
| 2947 | | - ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)); |
|---|
| 2948 | | - |
|---|
| 2949 | | - ip = xfs_inode_alloc(mp, in_f->ilf_ino); |
|---|
| 2950 | | - if (!ip) |
|---|
| 2951 | | - return -ENOMEM; |
|---|
| 2952 | | - |
|---|
| 2953 | | - /* instantiate the inode */ |
|---|
| 2954 | | - xfs_inode_from_disk(ip, dip); |
|---|
| 2955 | | - ASSERT(ip->i_d.di_version >= 3); |
|---|
| 2956 | | - |
|---|
| 2957 | | - error = xfs_iformat_fork(ip, dip); |
|---|
| 2958 | | - if (error) |
|---|
| 2959 | | - goto out_free_ip; |
|---|
| 2960 | | - |
|---|
| 2961 | | - if (!xfs_inode_verify_forks(ip)) { |
|---|
| 2962 | | - error = -EFSCORRUPTED; |
|---|
| 2963 | | - goto out_free_ip; |
|---|
| 2964 | | - } |
|---|
| 2965 | | - |
|---|
| 2966 | | - if (in_f->ilf_fields & XFS_ILOG_DOWNER) { |
|---|
| 2967 | | - ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT); |
|---|
| 2968 | | - error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK, |
|---|
| 2969 | | - ip->i_ino, buffer_list); |
|---|
| 2970 | | - if (error) |
|---|
| 2971 | | - goto out_free_ip; |
|---|
| 2972 | | - } |
|---|
| 2973 | | - |
|---|
| 2974 | | - if (in_f->ilf_fields & XFS_ILOG_AOWNER) { |
|---|
| 2975 | | - ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT); |
|---|
| 2976 | | - error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK, |
|---|
| 2977 | | - ip->i_ino, buffer_list); |
|---|
| 2978 | | - if (error) |
|---|
| 2979 | | - goto out_free_ip; |
|---|
| 2980 | | - } |
|---|
| 2981 | | - |
|---|
| 2982 | | -out_free_ip: |
|---|
| 2983 | | - xfs_inode_free(ip); |
|---|
| 2984 | | - return error; |
|---|
| 2985 | | -} |
|---|
| 2986 | | - |
|---|
| 2987 | | -STATIC int |
|---|
| 2988 | | -xlog_recover_inode_pass2( |
|---|
| 2989 | | - struct xlog *log, |
|---|
| 2990 | | - struct list_head *buffer_list, |
|---|
| 2991 | | - struct xlog_recover_item *item, |
|---|
| 2992 | | - xfs_lsn_t current_lsn) |
|---|
| 2993 | | -{ |
|---|
| 2994 | | - struct xfs_inode_log_format *in_f; |
|---|
| 2995 | | - xfs_mount_t *mp = log->l_mp; |
|---|
| 2996 | | - xfs_buf_t *bp; |
|---|
| 2997 | | - xfs_dinode_t *dip; |
|---|
| 2998 | | - int len; |
|---|
| 2999 | | - char *src; |
|---|
| 3000 | | - char *dest; |
|---|
| 3001 | | - int error; |
|---|
| 3002 | | - int attr_index; |
|---|
| 3003 | | - uint fields; |
|---|
| 3004 | | - struct xfs_log_dinode *ldip; |
|---|
| 3005 | | - uint isize; |
|---|
| 3006 | | - int need_free = 0; |
|---|
| 3007 | | - |
|---|
| 3008 | | - if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { |
|---|
| 3009 | | - in_f = item->ri_buf[0].i_addr; |
|---|
| 3010 | | - } else { |
|---|
| 3011 | | - in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), KM_SLEEP); |
|---|
| 3012 | | - need_free = 1; |
|---|
| 3013 | | - error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f); |
|---|
| 3014 | | - if (error) |
|---|
| 3015 | | - goto error; |
|---|
| 3016 | | - } |
|---|
| 3017 | | - |
|---|
| 3018 | | - /* |
|---|
| 3019 | | - * Inode buffers can be freed, look out for it, |
|---|
| 3020 | | - * and do not replay the inode. |
|---|
| 3021 | | - */ |
|---|
| 3022 | | - if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, |
|---|
| 3023 | | - in_f->ilf_len, 0)) { |
|---|
| 3024 | | - error = 0; |
|---|
| 3025 | | - trace_xfs_log_recover_inode_cancel(log, in_f); |
|---|
| 3026 | | - goto error; |
|---|
| 3027 | | - } |
|---|
| 3028 | | - trace_xfs_log_recover_inode_recover(log, in_f); |
|---|
| 3029 | | - |
|---|
| 3030 | | - bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, |
|---|
| 3031 | | - &xfs_inode_buf_ops); |
|---|
| 3032 | | - if (!bp) { |
|---|
| 3033 | | - error = -ENOMEM; |
|---|
| 3034 | | - goto error; |
|---|
| 3035 | | - } |
|---|
| 3036 | | - error = bp->b_error; |
|---|
| 3037 | | - if (error) { |
|---|
| 3038 | | - xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)"); |
|---|
| 3039 | | - goto out_release; |
|---|
| 3040 | | - } |
|---|
| 3041 | | - ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); |
|---|
| 3042 | | - dip = xfs_buf_offset(bp, in_f->ilf_boffset); |
|---|
| 3043 | | - |
|---|
| 3044 | | - /* |
|---|
| 3045 | | - * Make sure the place we're flushing out to really looks |
|---|
| 3046 | | - * like an inode! |
|---|
| 3047 | | - */ |
|---|
| 3048 | | - if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { |
|---|
| 3049 | | - xfs_alert(mp, |
|---|
| 3050 | | - "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld", |
|---|
| 3051 | | - __func__, dip, bp, in_f->ilf_ino); |
|---|
| 3052 | | - XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", |
|---|
| 3053 | | - XFS_ERRLEVEL_LOW, mp); |
|---|
| 3054 | | - error = -EFSCORRUPTED; |
|---|
| 3055 | | - goto out_release; |
|---|
| 3056 | | - } |
|---|
| 3057 | | - ldip = item->ri_buf[1].i_addr; |
|---|
| 3058 | | - if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) { |
|---|
| 3059 | | - xfs_alert(mp, |
|---|
| 3060 | | - "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld", |
|---|
| 3061 | | - __func__, item, in_f->ilf_ino); |
|---|
| 3062 | | - XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", |
|---|
| 3063 | | - XFS_ERRLEVEL_LOW, mp); |
|---|
| 3064 | | - error = -EFSCORRUPTED; |
|---|
| 3065 | | - goto out_release; |
|---|
| 3066 | | - } |
|---|
| 3067 | | - |
|---|
| 3068 | | - /* |
|---|
| 3069 | | - * If the inode has an LSN in it, recover the inode only if it's less |
|---|
| 3070 | | - * than the lsn of the transaction we are replaying. Note: we still |
|---|
| 3071 | | - * need to replay an owner change even though the inode is more recent |
|---|
| 3072 | | - * than the transaction as there is no guarantee that all the btree |
|---|
| 3073 | | - * blocks are more recent than this transaction, too. |
|---|
| 3074 | | - */ |
|---|
| 3075 | | - if (dip->di_version >= 3) { |
|---|
| 3076 | | - xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); |
|---|
| 3077 | | - |
|---|
| 3078 | | - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { |
|---|
| 3079 | | - trace_xfs_log_recover_inode_skip(log, in_f); |
|---|
| 3080 | | - error = 0; |
|---|
| 3081 | | - goto out_owner_change; |
|---|
| 3082 | | - } |
|---|
| 3083 | | - } |
|---|
| 3084 | | - |
|---|
| 3085 | | - /* |
|---|
| 3086 | | - * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes |
|---|
| 3087 | | - * are transactional and if ordering is necessary we can determine that |
|---|
| 3088 | | - * more accurately by the LSN field in the V3 inode core. Don't trust |
|---|
| 3089 | | - * the inode versions we might be changing them here - use the |
|---|
| 3090 | | - * superblock flag to determine whether we need to look at di_flushiter |
|---|
| 3091 | | - * to skip replay when the on disk inode is newer than the log one |
|---|
| 3092 | | - */ |
|---|
| 3093 | | - if (!xfs_sb_version_hascrc(&mp->m_sb) && |
|---|
| 3094 | | - ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { |
|---|
| 3095 | | - /* |
|---|
| 3096 | | - * Deal with the wrap case, DI_MAX_FLUSH is less |
|---|
| 3097 | | - * than smaller numbers |
|---|
| 3098 | | - */ |
|---|
| 3099 | | - if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && |
|---|
| 3100 | | - ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) { |
|---|
| 3101 | | - /* do nothing */ |
|---|
| 3102 | | - } else { |
|---|
| 3103 | | - trace_xfs_log_recover_inode_skip(log, in_f); |
|---|
| 3104 | | - error = 0; |
|---|
| 3105 | | - goto out_release; |
|---|
| 3106 | | - } |
|---|
| 3107 | | - } |
|---|
| 3108 | | - |
|---|
| 3109 | | - /* Take the opportunity to reset the flush iteration count */ |
|---|
| 3110 | | - ldip->di_flushiter = 0; |
|---|
| 3111 | | - |
|---|
| 3112 | | - if (unlikely(S_ISREG(ldip->di_mode))) { |
|---|
| 3113 | | - if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && |
|---|
| 3114 | | - (ldip->di_format != XFS_DINODE_FMT_BTREE)) { |
|---|
| 3115 | | - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", |
|---|
| 3116 | | - XFS_ERRLEVEL_LOW, mp, ldip, |
|---|
| 3117 | | - sizeof(*ldip)); |
|---|
| 3118 | | - xfs_alert(mp, |
|---|
| 3119 | | - "%s: Bad regular inode log record, rec ptr "PTR_FMT", " |
|---|
| 3120 | | - "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", |
|---|
| 3121 | | - __func__, item, dip, bp, in_f->ilf_ino); |
|---|
| 3122 | | - error = -EFSCORRUPTED; |
|---|
| 3123 | | - goto out_release; |
|---|
| 3124 | | - } |
|---|
| 3125 | | - } else if (unlikely(S_ISDIR(ldip->di_mode))) { |
|---|
| 3126 | | - if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && |
|---|
| 3127 | | - (ldip->di_format != XFS_DINODE_FMT_BTREE) && |
|---|
| 3128 | | - (ldip->di_format != XFS_DINODE_FMT_LOCAL)) { |
|---|
| 3129 | | - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", |
|---|
| 3130 | | - XFS_ERRLEVEL_LOW, mp, ldip, |
|---|
| 3131 | | - sizeof(*ldip)); |
|---|
| 3132 | | - xfs_alert(mp, |
|---|
| 3133 | | - "%s: Bad dir inode log record, rec ptr "PTR_FMT", " |
|---|
| 3134 | | - "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld", |
|---|
| 3135 | | - __func__, item, dip, bp, in_f->ilf_ino); |
|---|
| 3136 | | - error = -EFSCORRUPTED; |
|---|
| 3137 | | - goto out_release; |
|---|
| 3138 | | - } |
|---|
| 3139 | | - } |
|---|
| 3140 | | - if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){ |
|---|
| 3141 | | - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", |
|---|
| 3142 | | - XFS_ERRLEVEL_LOW, mp, ldip, |
|---|
| 3143 | | - sizeof(*ldip)); |
|---|
| 3144 | | - xfs_alert(mp, |
|---|
| 3145 | | - "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " |
|---|
| 3146 | | - "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld", |
|---|
| 3147 | | - __func__, item, dip, bp, in_f->ilf_ino, |
|---|
| 3148 | | - ldip->di_nextents + ldip->di_anextents, |
|---|
| 3149 | | - ldip->di_nblocks); |
|---|
| 3150 | | - error = -EFSCORRUPTED; |
|---|
| 3151 | | - goto out_release; |
|---|
| 3152 | | - } |
|---|
| 3153 | | - if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) { |
|---|
| 3154 | | - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", |
|---|
| 3155 | | - XFS_ERRLEVEL_LOW, mp, ldip, |
|---|
| 3156 | | - sizeof(*ldip)); |
|---|
| 3157 | | - xfs_alert(mp, |
|---|
| 3158 | | - "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", " |
|---|
| 3159 | | - "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__, |
|---|
| 3160 | | - item, dip, bp, in_f->ilf_ino, ldip->di_forkoff); |
|---|
| 3161 | | - error = -EFSCORRUPTED; |
|---|
| 3162 | | - goto out_release; |
|---|
| 3163 | | - } |
|---|
| 3164 | | - isize = xfs_log_dinode_size(ldip->di_version); |
|---|
| 3165 | | - if (unlikely(item->ri_buf[1].i_len > isize)) { |
|---|
| 3166 | | - XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", |
|---|
| 3167 | | - XFS_ERRLEVEL_LOW, mp, ldip, |
|---|
| 3168 | | - sizeof(*ldip)); |
|---|
| 3169 | | - xfs_alert(mp, |
|---|
| 3170 | | - "%s: Bad inode log record length %d, rec ptr "PTR_FMT, |
|---|
| 3171 | | - __func__, item->ri_buf[1].i_len, item); |
|---|
| 3172 | | - error = -EFSCORRUPTED; |
|---|
| 3173 | | - goto out_release; |
|---|
| 3174 | | - } |
|---|
| 3175 | | - |
|---|
| 3176 | | - /* recover the log dinode inode into the on disk inode */ |
|---|
| 3177 | | - xfs_log_dinode_to_disk(ldip, dip); |
|---|
| 3178 | | - |
|---|
| 3179 | | - fields = in_f->ilf_fields; |
|---|
| 3180 | | - if (fields & XFS_ILOG_DEV) |
|---|
| 3181 | | - xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); |
|---|
| 3182 | | - |
|---|
| 3183 | | - if (in_f->ilf_size == 2) |
|---|
| 3184 | | - goto out_owner_change; |
|---|
| 3185 | | - len = item->ri_buf[2].i_len; |
|---|
| 3186 | | - src = item->ri_buf[2].i_addr; |
|---|
| 3187 | | - ASSERT(in_f->ilf_size <= 4); |
|---|
| 3188 | | - ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); |
|---|
| 3189 | | - ASSERT(!(fields & XFS_ILOG_DFORK) || |
|---|
| 3190 | | - (len == in_f->ilf_dsize)); |
|---|
| 3191 | | - |
|---|
| 3192 | | - switch (fields & XFS_ILOG_DFORK) { |
|---|
| 3193 | | - case XFS_ILOG_DDATA: |
|---|
| 3194 | | - case XFS_ILOG_DEXT: |
|---|
| 3195 | | - memcpy(XFS_DFORK_DPTR(dip), src, len); |
|---|
| 3196 | | - break; |
|---|
| 3197 | | - |
|---|
| 3198 | | - case XFS_ILOG_DBROOT: |
|---|
| 3199 | | - xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, |
|---|
| 3200 | | - (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), |
|---|
| 3201 | | - XFS_DFORK_DSIZE(dip, mp)); |
|---|
| 3202 | | - break; |
|---|
| 3203 | | - |
|---|
| 3204 | | - default: |
|---|
| 3205 | | - /* |
|---|
| 3206 | | - * There are no data fork flags set. |
|---|
| 3207 | | - */ |
|---|
| 3208 | | - ASSERT((fields & XFS_ILOG_DFORK) == 0); |
|---|
| 3209 | | - break; |
|---|
| 3210 | | - } |
|---|
| 3211 | | - |
|---|
| 3212 | | - /* |
|---|
| 3213 | | - * If we logged any attribute data, recover it. There may or |
|---|
| 3214 | | - * may not have been any other non-core data logged in this |
|---|
| 3215 | | - * transaction. |
|---|
| 3216 | | - */ |
|---|
| 3217 | | - if (in_f->ilf_fields & XFS_ILOG_AFORK) { |
|---|
| 3218 | | - if (in_f->ilf_fields & XFS_ILOG_DFORK) { |
|---|
| 3219 | | - attr_index = 3; |
|---|
| 3220 | | - } else { |
|---|
| 3221 | | - attr_index = 2; |
|---|
| 3222 | | - } |
|---|
| 3223 | | - len = item->ri_buf[attr_index].i_len; |
|---|
| 3224 | | - src = item->ri_buf[attr_index].i_addr; |
|---|
| 3225 | | - ASSERT(len == in_f->ilf_asize); |
|---|
| 3226 | | - |
|---|
| 3227 | | - switch (in_f->ilf_fields & XFS_ILOG_AFORK) { |
|---|
| 3228 | | - case XFS_ILOG_ADATA: |
|---|
| 3229 | | - case XFS_ILOG_AEXT: |
|---|
| 3230 | | - dest = XFS_DFORK_APTR(dip); |
|---|
| 3231 | | - ASSERT(len <= XFS_DFORK_ASIZE(dip, mp)); |
|---|
| 3232 | | - memcpy(dest, src, len); |
|---|
| 3233 | | - break; |
|---|
| 3234 | | - |
|---|
| 3235 | | - case XFS_ILOG_ABROOT: |
|---|
| 3236 | | - dest = XFS_DFORK_APTR(dip); |
|---|
| 3237 | | - xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, |
|---|
| 3238 | | - len, (xfs_bmdr_block_t*)dest, |
|---|
| 3239 | | - XFS_DFORK_ASIZE(dip, mp)); |
|---|
| 3240 | | - break; |
|---|
| 3241 | | - |
|---|
| 3242 | | - default: |
|---|
| 3243 | | - xfs_warn(log->l_mp, "%s: Invalid flag", __func__); |
|---|
| 3244 | | - ASSERT(0); |
|---|
| 3245 | | - error = -EIO; |
|---|
| 3246 | | - goto out_release; |
|---|
| 3247 | | - } |
|---|
| 3248 | | - } |
|---|
| 3249 | | - |
|---|
| 3250 | | -out_owner_change: |
|---|
| 3251 | | - /* Recover the swapext owner change unless inode has been deleted */ |
|---|
| 3252 | | - if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) && |
|---|
| 3253 | | - (dip->di_mode != 0)) |
|---|
| 3254 | | - error = xfs_recover_inode_owner_change(mp, dip, in_f, |
|---|
| 3255 | | - buffer_list); |
|---|
| 3256 | | - /* re-generate the checksum. */ |
|---|
| 3257 | | - xfs_dinode_calc_crc(log->l_mp, dip); |
|---|
| 3258 | | - |
|---|
| 3259 | | - ASSERT(bp->b_target->bt_mount == mp); |
|---|
| 3260 | | - bp->b_iodone = xlog_recover_iodone; |
|---|
| 3261 | | - xfs_buf_delwri_queue(bp, buffer_list); |
|---|
| 3262 | | - |
|---|
| 3263 | | -out_release: |
|---|
| 3264 | | - xfs_buf_relse(bp); |
|---|
| 3265 | | -error: |
|---|
| 3266 | | - if (need_free) |
|---|
| 3267 | | - kmem_free(in_f); |
|---|
| 3268 | | - return error; |
|---|
| 3269 | | -} |
|---|
| 3270 | | - |
|---|
| 3271 | | -/* |
|---|
| 3272 | | - * Recover QUOTAOFF records. We simply make a note of it in the xlog |
|---|
| 3273 | | - * structure, so that we know not to do any dquot item or dquot buffer recovery, |
|---|
| 3274 | | - * of that type. |
|---|
| 3275 | | - */ |
|---|
| 3276 | | -STATIC int |
|---|
| 3277 | | -xlog_recover_quotaoff_pass1( |
|---|
| 3278 | | - struct xlog *log, |
|---|
| 3279 | | - struct xlog_recover_item *item) |
|---|
| 3280 | | -{ |
|---|
| 3281 | | - xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr; |
|---|
| 3282 | | - ASSERT(qoff_f); |
|---|
| 3283 | | - |
|---|
| 3284 | | - /* |
|---|
| 3285 | | - * The logitem format's flag tells us if this was user quotaoff, |
|---|
| 3286 | | - * group/project quotaoff or both. |
|---|
| 3287 | | - */ |
|---|
| 3288 | | - if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) |
|---|
| 3289 | | - log->l_quotaoffs_flag |= XFS_DQ_USER; |
|---|
| 3290 | | - if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) |
|---|
| 3291 | | - log->l_quotaoffs_flag |= XFS_DQ_PROJ; |
|---|
| 3292 | | - if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) |
|---|
| 3293 | | - log->l_quotaoffs_flag |= XFS_DQ_GROUP; |
|---|
| 3294 | | - |
|---|
| 3295 | | - return 0; |
|---|
| 3296 | | -} |
|---|
| 3297 | | - |
|---|
| 3298 | | -/* |
|---|
| 3299 | | - * Recover a dquot record |
|---|
| 3300 | | - */ |
|---|
| 3301 | | -STATIC int |
|---|
| 3302 | | -xlog_recover_dquot_pass2( |
|---|
| 3303 | | - struct xlog *log, |
|---|
| 3304 | | - struct list_head *buffer_list, |
|---|
| 3305 | | - struct xlog_recover_item *item, |
|---|
| 3306 | | - xfs_lsn_t current_lsn) |
|---|
| 3307 | | -{ |
|---|
| 3308 | | - xfs_mount_t *mp = log->l_mp; |
|---|
| 3309 | | - xfs_buf_t *bp; |
|---|
| 3310 | | - struct xfs_disk_dquot *ddq, *recddq; |
|---|
| 3311 | | - xfs_failaddr_t fa; |
|---|
| 3312 | | - int error; |
|---|
| 3313 | | - xfs_dq_logformat_t *dq_f; |
|---|
| 3314 | | - uint type; |
|---|
| 3315 | | - |
|---|
| 3316 | | - |
|---|
| 3317 | | - /* |
|---|
| 3318 | | - * Filesystems are required to send in quota flags at mount time. |
|---|
| 3319 | | - */ |
|---|
| 3320 | | - if (mp->m_qflags == 0) |
|---|
| 3321 | | - return 0; |
|---|
| 3322 | | - |
|---|
| 3323 | | - recddq = item->ri_buf[1].i_addr; |
|---|
| 3324 | | - if (recddq == NULL) { |
|---|
| 3325 | | - xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); |
|---|
| 3326 | | - return -EIO; |
|---|
| 3327 | | - } |
|---|
| 3328 | | - if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { |
|---|
| 3329 | | - xfs_alert(log->l_mp, "dquot too small (%d) in %s.", |
|---|
| 3330 | | - item->ri_buf[1].i_len, __func__); |
|---|
| 3331 | | - return -EIO; |
|---|
| 3332 | | - } |
|---|
| 3333 | | - |
|---|
| 3334 | | - /* |
|---|
| 3335 | | - * This type of quotas was turned off, so ignore this record. |
|---|
| 3336 | | - */ |
|---|
| 3337 | | - type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); |
|---|
| 3338 | | - ASSERT(type); |
|---|
| 3339 | | - if (log->l_quotaoffs_flag & type) |
|---|
| 3340 | | - return 0; |
|---|
| 3341 | | - |
|---|
| 3342 | | - /* |
|---|
| 3343 | | - * At this point we know that quota was _not_ turned off. |
|---|
| 3344 | | - * Since the mount flags are not indicating to us otherwise, this |
|---|
| 3345 | | - * must mean that quota is on, and the dquot needs to be replayed. |
|---|
| 3346 | | - * Remember that we may not have fully recovered the superblock yet, |
|---|
| 3347 | | - * so we can't do the usual trick of looking at the SB quota bits. |
|---|
| 3348 | | - * |
|---|
| 3349 | | - * The other possibility, of course, is that the quota subsystem was |
|---|
| 3350 | | - * removed since the last mount - ENOSYS. |
|---|
| 3351 | | - */ |
|---|
| 3352 | | - dq_f = item->ri_buf[0].i_addr; |
|---|
| 3353 | | - ASSERT(dq_f); |
|---|
| 3354 | | - fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0); |
|---|
| 3355 | | - if (fa) { |
|---|
| 3356 | | - xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS", |
|---|
| 3357 | | - dq_f->qlf_id, fa); |
|---|
| 3358 | | - return -EIO; |
|---|
| 3359 | | - } |
|---|
| 3360 | | - ASSERT(dq_f->qlf_len == 1); |
|---|
| 3361 | | - |
|---|
| 3362 | | - /* |
|---|
| 3363 | | - * At this point we are assuming that the dquots have been allocated |
|---|
| 3364 | | - * and hence the buffer has valid dquots stamped in it. It should, |
|---|
| 3365 | | - * therefore, pass verifier validation. If the dquot is bad, then the |
|---|
| 3366 | | - * we'll return an error here, so we don't need to specifically check |
|---|
| 3367 | | - * the dquot in the buffer after the verifier has run. |
|---|
| 3368 | | - */ |
|---|
| 3369 | | - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, |
|---|
| 3370 | | - XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp, |
|---|
| 3371 | | - &xfs_dquot_buf_ops); |
|---|
| 3372 | | - if (error) |
|---|
| 3373 | | - return error; |
|---|
| 3374 | | - |
|---|
| 3375 | | - ASSERT(bp); |
|---|
| 3376 | | - ddq = xfs_buf_offset(bp, dq_f->qlf_boffset); |
|---|
| 3377 | | - |
|---|
| 3378 | | - /* |
|---|
| 3379 | | - * If the dquot has an LSN in it, recover the dquot only if it's less |
|---|
| 3380 | | - * than the lsn of the transaction we are replaying. |
|---|
| 3381 | | - */ |
|---|
| 3382 | | - if (xfs_sb_version_hascrc(&mp->m_sb)) { |
|---|
| 3383 | | - struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq; |
|---|
| 3384 | | - xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn); |
|---|
| 3385 | | - |
|---|
| 3386 | | - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { |
|---|
| 3387 | | - goto out_release; |
|---|
| 3388 | | - } |
|---|
| 3389 | | - } |
|---|
| 3390 | | - |
|---|
| 3391 | | - memcpy(ddq, recddq, item->ri_buf[1].i_len); |
|---|
| 3392 | | - if (xfs_sb_version_hascrc(&mp->m_sb)) { |
|---|
| 3393 | | - xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), |
|---|
| 3394 | | - XFS_DQUOT_CRC_OFF); |
|---|
| 3395 | | - } |
|---|
| 3396 | | - |
|---|
| 3397 | | - ASSERT(dq_f->qlf_size == 2); |
|---|
| 3398 | | - ASSERT(bp->b_target->bt_mount == mp); |
|---|
| 3399 | | - bp->b_iodone = xlog_recover_iodone; |
|---|
| 3400 | | - xfs_buf_delwri_queue(bp, buffer_list); |
|---|
| 3401 | | - |
|---|
| 3402 | | -out_release: |
|---|
| 3403 | | - xfs_buf_relse(bp); |
|---|
| 3404 | | - return 0; |
|---|
| 3405 | | -} |
|---|
| 3406 | | - |
|---|
| 3407 | | -/* |
|---|
| 3408 | | - * This routine is called to create an in-core extent free intent |
|---|
| 3409 | | - * item from the efi format structure which was logged on disk. |
|---|
| 3410 | | - * It allocates an in-core efi, copies the extents from the format |
|---|
| 3411 | | - * structure into it, and adds the efi to the AIL with the given |
|---|
| 3412 | | - * LSN. |
|---|
| 3413 | | - */ |
|---|
| 3414 | | -STATIC int |
|---|
| 3415 | | -xlog_recover_efi_pass2( |
|---|
| 3416 | | - struct xlog *log, |
|---|
| 3417 | | - struct xlog_recover_item *item, |
|---|
| 3418 | | - xfs_lsn_t lsn) |
|---|
| 3419 | | -{ |
|---|
| 3420 | | - int error; |
|---|
| 3421 | | - struct xfs_mount *mp = log->l_mp; |
|---|
| 3422 | | - struct xfs_efi_log_item *efip; |
|---|
| 3423 | | - struct xfs_efi_log_format *efi_formatp; |
|---|
| 3424 | | - |
|---|
| 3425 | | - efi_formatp = item->ri_buf[0].i_addr; |
|---|
| 3426 | | - |
|---|
| 3427 | | - efip = xfs_efi_init(mp, efi_formatp->efi_nextents); |
|---|
| 3428 | | - error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format); |
|---|
| 3429 | | - if (error) { |
|---|
| 3430 | | - xfs_efi_item_free(efip); |
|---|
| 3431 | | - return error; |
|---|
| 3432 | | - } |
|---|
| 3433 | | - atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); |
|---|
| 3434 | | - |
|---|
| 3435 | | - spin_lock(&log->l_ailp->ail_lock); |
|---|
| 3436 | | - /* |
|---|
| 3437 | | - * The EFI has two references. One for the EFD and one for EFI to ensure |
|---|
| 3438 | | - * it makes it into the AIL. Insert the EFI into the AIL directly and |
|---|
| 3439 | | - * drop the EFI reference. Note that xfs_trans_ail_update() drops the |
|---|
| 3440 | | - * AIL lock. |
|---|
| 3441 | | - */ |
|---|
| 3442 | | - xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn); |
|---|
| 3443 | | - xfs_efi_release(efip); |
|---|
| 3444 | | - return 0; |
|---|
| 3445 | | -} |
|---|
| 3446 | | - |
|---|
| 3447 | | - |
|---|
| 3448 | | -/* |
|---|
| 3449 | | - * This routine is called when an EFD format structure is found in a committed |
|---|
| 3450 | | - * transaction in the log. Its purpose is to cancel the corresponding EFI if it |
|---|
| 3451 | | - * was still in the log. To do this it searches the AIL for the EFI with an id |
|---|
| 3452 | | - * equal to that in the EFD format structure. If we find it we drop the EFD |
|---|
| 3453 | | - * reference, which removes the EFI from the AIL and frees it. |
|---|
| 3454 | | - */ |
|---|
| 3455 | | -STATIC int |
|---|
| 3456 | | -xlog_recover_efd_pass2( |
|---|
| 3457 | | - struct xlog *log, |
|---|
| 3458 | | - struct xlog_recover_item *item) |
|---|
| 3459 | | -{ |
|---|
| 3460 | | - xfs_efd_log_format_t *efd_formatp; |
|---|
| 3461 | | - xfs_efi_log_item_t *efip = NULL; |
|---|
| 3462 | | - xfs_log_item_t *lip; |
|---|
| 3463 | | - uint64_t efi_id; |
|---|
| 3464 | | - struct xfs_ail_cursor cur; |
|---|
| 3465 | | - struct xfs_ail *ailp = log->l_ailp; |
|---|
| 3466 | | - |
|---|
| 3467 | | - efd_formatp = item->ri_buf[0].i_addr; |
|---|
| 3468 | | - ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + |
|---|
| 3469 | | - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || |
|---|
| 3470 | | - (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + |
|---|
| 3471 | | - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); |
|---|
| 3472 | | - efi_id = efd_formatp->efd_efi_id; |
|---|
| 3473 | | - |
|---|
| 3474 | | - /* |
|---|
| 3475 | | - * Search for the EFI with the id in the EFD format structure in the |
|---|
| 3476 | | - * AIL. |
|---|
| 3477 | | - */ |
|---|
| 3478 | | - spin_lock(&ailp->ail_lock); |
|---|
| 3479 | | - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
|---|
| 3480 | | - while (lip != NULL) { |
|---|
| 3481 | | - if (lip->li_type == XFS_LI_EFI) { |
|---|
| 3482 | | - efip = (xfs_efi_log_item_t *)lip; |
|---|
| 3483 | | - if (efip->efi_format.efi_id == efi_id) { |
|---|
| 3484 | | - /* |
|---|
| 3485 | | - * Drop the EFD reference to the EFI. This |
|---|
| 3486 | | - * removes the EFI from the AIL and frees it. |
|---|
| 3487 | | - */ |
|---|
| 3488 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 3489 | | - xfs_efi_release(efip); |
|---|
| 3490 | | - spin_lock(&ailp->ail_lock); |
|---|
| 3491 | | - break; |
|---|
| 3492 | | - } |
|---|
| 3493 | | - } |
|---|
| 3494 | | - lip = xfs_trans_ail_cursor_next(ailp, &cur); |
|---|
| 3495 | | - } |
|---|
| 3496 | | - |
|---|
| 3497 | | - xfs_trans_ail_cursor_done(&cur); |
|---|
| 3498 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 3499 | | - |
|---|
| 3500 | | - return 0; |
|---|
| 3501 | | -} |
|---|
| 3502 | | - |
|---|
| 3503 | | -/* |
|---|
| 3504 | | - * This routine is called to create an in-core extent rmap update |
|---|
| 3505 | | - * item from the rui format structure which was logged on disk. |
|---|
| 3506 | | - * It allocates an in-core rui, copies the extents from the format |
|---|
| 3507 | | - * structure into it, and adds the rui to the AIL with the given |
|---|
| 3508 | | - * LSN. |
|---|
| 3509 | | - */ |
|---|
| 3510 | | -STATIC int |
|---|
| 3511 | | -xlog_recover_rui_pass2( |
|---|
| 3512 | | - struct xlog *log, |
|---|
| 3513 | | - struct xlog_recover_item *item, |
|---|
| 3514 | | - xfs_lsn_t lsn) |
|---|
| 3515 | | -{ |
|---|
| 3516 | | - int error; |
|---|
| 3517 | | - struct xfs_mount *mp = log->l_mp; |
|---|
| 3518 | | - struct xfs_rui_log_item *ruip; |
|---|
| 3519 | | - struct xfs_rui_log_format *rui_formatp; |
|---|
| 3520 | | - |
|---|
| 3521 | | - rui_formatp = item->ri_buf[0].i_addr; |
|---|
| 3522 | | - |
|---|
| 3523 | | - ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); |
|---|
| 3524 | | - error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format); |
|---|
| 3525 | | - if (error) { |
|---|
| 3526 | | - xfs_rui_item_free(ruip); |
|---|
| 3527 | | - return error; |
|---|
| 3528 | | - } |
|---|
| 3529 | | - atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents); |
|---|
| 3530 | | - |
|---|
| 3531 | | - spin_lock(&log->l_ailp->ail_lock); |
|---|
| 3532 | | - /* |
|---|
| 3533 | | - * The RUI has two references. One for the RUD and one for RUI to ensure |
|---|
| 3534 | | - * it makes it into the AIL. Insert the RUI into the AIL directly and |
|---|
| 3535 | | - * drop the RUI reference. Note that xfs_trans_ail_update() drops the |
|---|
| 3536 | | - * AIL lock. |
|---|
| 3537 | | - */ |
|---|
| 3538 | | - xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn); |
|---|
| 3539 | | - xfs_rui_release(ruip); |
|---|
| 3540 | | - return 0; |
|---|
| 3541 | | -} |
|---|
| 3542 | | - |
|---|
| 3543 | | - |
|---|
| 3544 | | -/* |
|---|
| 3545 | | - * This routine is called when an RUD format structure is found in a committed |
|---|
| 3546 | | - * transaction in the log. Its purpose is to cancel the corresponding RUI if it |
|---|
| 3547 | | - * was still in the log. To do this it searches the AIL for the RUI with an id |
|---|
| 3548 | | - * equal to that in the RUD format structure. If we find it we drop the RUD |
|---|
| 3549 | | - * reference, which removes the RUI from the AIL and frees it. |
|---|
| 3550 | | - */ |
|---|
| 3551 | | -STATIC int |
|---|
| 3552 | | -xlog_recover_rud_pass2( |
|---|
| 3553 | | - struct xlog *log, |
|---|
| 3554 | | - struct xlog_recover_item *item) |
|---|
| 3555 | | -{ |
|---|
| 3556 | | - struct xfs_rud_log_format *rud_formatp; |
|---|
| 3557 | | - struct xfs_rui_log_item *ruip = NULL; |
|---|
| 3558 | | - struct xfs_log_item *lip; |
|---|
| 3559 | | - uint64_t rui_id; |
|---|
| 3560 | | - struct xfs_ail_cursor cur; |
|---|
| 3561 | | - struct xfs_ail *ailp = log->l_ailp; |
|---|
| 3562 | | - |
|---|
| 3563 | | - rud_formatp = item->ri_buf[0].i_addr; |
|---|
| 3564 | | - ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format)); |
|---|
| 3565 | | - rui_id = rud_formatp->rud_rui_id; |
|---|
| 3566 | | - |
|---|
| 3567 | | - /* |
|---|
| 3568 | | - * Search for the RUI with the id in the RUD format structure in the |
|---|
| 3569 | | - * AIL. |
|---|
| 3570 | | - */ |
|---|
| 3571 | | - spin_lock(&ailp->ail_lock); |
|---|
| 3572 | | - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
|---|
| 3573 | | - while (lip != NULL) { |
|---|
| 3574 | | - if (lip->li_type == XFS_LI_RUI) { |
|---|
| 3575 | | - ruip = (struct xfs_rui_log_item *)lip; |
|---|
| 3576 | | - if (ruip->rui_format.rui_id == rui_id) { |
|---|
| 3577 | | - /* |
|---|
| 3578 | | - * Drop the RUD reference to the RUI. This |
|---|
| 3579 | | - * removes the RUI from the AIL and frees it. |
|---|
| 3580 | | - */ |
|---|
| 3581 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 3582 | | - xfs_rui_release(ruip); |
|---|
| 3583 | | - spin_lock(&ailp->ail_lock); |
|---|
| 3584 | | - break; |
|---|
| 3585 | | - } |
|---|
| 3586 | | - } |
|---|
| 3587 | | - lip = xfs_trans_ail_cursor_next(ailp, &cur); |
|---|
| 3588 | | - } |
|---|
| 3589 | | - |
|---|
| 3590 | | - xfs_trans_ail_cursor_done(&cur); |
|---|
| 3591 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 3592 | | - |
|---|
| 3593 | | - return 0; |
|---|
| 3594 | | -} |
|---|
| 3595 | | - |
|---|
| 3596 | | -/* |
|---|
| 3597 | | - * Copy an CUI format buffer from the given buf, and into the destination |
|---|
| 3598 | | - * CUI format structure. The CUI/CUD items were designed not to need any |
|---|
| 3599 | | - * special alignment handling. |
|---|
| 3600 | | - */ |
|---|
| 3601 | | -static int |
|---|
| 3602 | | -xfs_cui_copy_format( |
|---|
| 3603 | | - struct xfs_log_iovec *buf, |
|---|
| 3604 | | - struct xfs_cui_log_format *dst_cui_fmt) |
|---|
| 3605 | | -{ |
|---|
| 3606 | | - struct xfs_cui_log_format *src_cui_fmt; |
|---|
| 3607 | | - uint len; |
|---|
| 3608 | | - |
|---|
| 3609 | | - src_cui_fmt = buf->i_addr; |
|---|
| 3610 | | - len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents); |
|---|
| 3611 | | - |
|---|
| 3612 | | - if (buf->i_len == len) { |
|---|
| 3613 | | - memcpy(dst_cui_fmt, src_cui_fmt, len); |
|---|
| 3614 | | - return 0; |
|---|
| 3615 | | - } |
|---|
| 3616 | | - return -EFSCORRUPTED; |
|---|
| 3617 | | -} |
|---|
| 3618 | | - |
|---|
| 3619 | | -/* |
|---|
| 3620 | | - * This routine is called to create an in-core extent refcount update |
|---|
| 3621 | | - * item from the cui format structure which was logged on disk. |
|---|
| 3622 | | - * It allocates an in-core cui, copies the extents from the format |
|---|
| 3623 | | - * structure into it, and adds the cui to the AIL with the given |
|---|
| 3624 | | - * LSN. |
|---|
| 3625 | | - */ |
|---|
| 3626 | | -STATIC int |
|---|
| 3627 | | -xlog_recover_cui_pass2( |
|---|
| 3628 | | - struct xlog *log, |
|---|
| 3629 | | - struct xlog_recover_item *item, |
|---|
| 3630 | | - xfs_lsn_t lsn) |
|---|
| 3631 | | -{ |
|---|
| 3632 | | - int error; |
|---|
| 3633 | | - struct xfs_mount *mp = log->l_mp; |
|---|
| 3634 | | - struct xfs_cui_log_item *cuip; |
|---|
| 3635 | | - struct xfs_cui_log_format *cui_formatp; |
|---|
| 3636 | | - |
|---|
| 3637 | | - cui_formatp = item->ri_buf[0].i_addr; |
|---|
| 3638 | | - |
|---|
| 3639 | | - cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); |
|---|
| 3640 | | - error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format); |
|---|
| 3641 | | - if (error) { |
|---|
| 3642 | | - xfs_cui_item_free(cuip); |
|---|
| 3643 | | - return error; |
|---|
| 3644 | | - } |
|---|
| 3645 | | - atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); |
|---|
| 3646 | | - |
|---|
| 3647 | | - spin_lock(&log->l_ailp->ail_lock); |
|---|
| 3648 | | - /* |
|---|
| 3649 | | - * The CUI has two references. One for the CUD and one for CUI to ensure |
|---|
| 3650 | | - * it makes it into the AIL. Insert the CUI into the AIL directly and |
|---|
| 3651 | | - * drop the CUI reference. Note that xfs_trans_ail_update() drops the |
|---|
| 3652 | | - * AIL lock. |
|---|
| 3653 | | - */ |
|---|
| 3654 | | - xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn); |
|---|
| 3655 | | - xfs_cui_release(cuip); |
|---|
| 3656 | | - return 0; |
|---|
| 3657 | | -} |
|---|
| 3658 | | - |
|---|
| 3659 | | - |
|---|
| 3660 | | -/* |
|---|
| 3661 | | - * This routine is called when an CUD format structure is found in a committed |
|---|
| 3662 | | - * transaction in the log. Its purpose is to cancel the corresponding CUI if it |
|---|
| 3663 | | - * was still in the log. To do this it searches the AIL for the CUI with an id |
|---|
| 3664 | | - * equal to that in the CUD format structure. If we find it we drop the CUD |
|---|
| 3665 | | - * reference, which removes the CUI from the AIL and frees it. |
|---|
| 3666 | | - */ |
|---|
| 3667 | | -STATIC int |
|---|
| 3668 | | -xlog_recover_cud_pass2( |
|---|
| 3669 | | - struct xlog *log, |
|---|
| 3670 | | - struct xlog_recover_item *item) |
|---|
| 3671 | | -{ |
|---|
| 3672 | | - struct xfs_cud_log_format *cud_formatp; |
|---|
| 3673 | | - struct xfs_cui_log_item *cuip = NULL; |
|---|
| 3674 | | - struct xfs_log_item *lip; |
|---|
| 3675 | | - uint64_t cui_id; |
|---|
| 3676 | | - struct xfs_ail_cursor cur; |
|---|
| 3677 | | - struct xfs_ail *ailp = log->l_ailp; |
|---|
| 3678 | | - |
|---|
| 3679 | | - cud_formatp = item->ri_buf[0].i_addr; |
|---|
| 3680 | | - if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) |
|---|
| 3681 | | - return -EFSCORRUPTED; |
|---|
| 3682 | | - cui_id = cud_formatp->cud_cui_id; |
|---|
| 3683 | | - |
|---|
| 3684 | | - /* |
|---|
| 3685 | | - * Search for the CUI with the id in the CUD format structure in the |
|---|
| 3686 | | - * AIL. |
|---|
| 3687 | | - */ |
|---|
| 3688 | | - spin_lock(&ailp->ail_lock); |
|---|
| 3689 | | - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
|---|
| 3690 | | - while (lip != NULL) { |
|---|
| 3691 | | - if (lip->li_type == XFS_LI_CUI) { |
|---|
| 3692 | | - cuip = (struct xfs_cui_log_item *)lip; |
|---|
| 3693 | | - if (cuip->cui_format.cui_id == cui_id) { |
|---|
| 3694 | | - /* |
|---|
| 3695 | | - * Drop the CUD reference to the CUI. This |
|---|
| 3696 | | - * removes the CUI from the AIL and frees it. |
|---|
| 3697 | | - */ |
|---|
| 3698 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 3699 | | - xfs_cui_release(cuip); |
|---|
| 3700 | | - spin_lock(&ailp->ail_lock); |
|---|
| 3701 | | - break; |
|---|
| 3702 | | - } |
|---|
| 3703 | | - } |
|---|
| 3704 | | - lip = xfs_trans_ail_cursor_next(ailp, &cur); |
|---|
| 3705 | | - } |
|---|
| 3706 | | - |
|---|
| 3707 | | - xfs_trans_ail_cursor_done(&cur); |
|---|
| 3708 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 3709 | | - |
|---|
| 3710 | | - return 0; |
|---|
| 3711 | | -} |
|---|
| 3712 | | - |
|---|
| 3713 | | -/* |
|---|
| 3714 | | - * Copy an BUI format buffer from the given buf, and into the destination |
|---|
| 3715 | | - * BUI format structure. The BUI/BUD items were designed not to need any |
|---|
| 3716 | | - * special alignment handling. |
|---|
| 3717 | | - */ |
|---|
| 3718 | | -static int |
|---|
| 3719 | | -xfs_bui_copy_format( |
|---|
| 3720 | | - struct xfs_log_iovec *buf, |
|---|
| 3721 | | - struct xfs_bui_log_format *dst_bui_fmt) |
|---|
| 3722 | | -{ |
|---|
| 3723 | | - struct xfs_bui_log_format *src_bui_fmt; |
|---|
| 3724 | | - uint len; |
|---|
| 3725 | | - |
|---|
| 3726 | | - src_bui_fmt = buf->i_addr; |
|---|
| 3727 | | - len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents); |
|---|
| 3728 | | - |
|---|
| 3729 | | - if (buf->i_len == len) { |
|---|
| 3730 | | - memcpy(dst_bui_fmt, src_bui_fmt, len); |
|---|
| 3731 | | - return 0; |
|---|
| 3732 | | - } |
|---|
| 3733 | | - return -EFSCORRUPTED; |
|---|
| 3734 | | -} |
|---|
| 3735 | | - |
|---|
| 3736 | | -/* |
|---|
| 3737 | | - * This routine is called to create an in-core extent bmap update |
|---|
| 3738 | | - * item from the bui format structure which was logged on disk. |
|---|
| 3739 | | - * It allocates an in-core bui, copies the extents from the format |
|---|
| 3740 | | - * structure into it, and adds the bui to the AIL with the given |
|---|
| 3741 | | - * LSN. |
|---|
| 3742 | | - */ |
|---|
| 3743 | | -STATIC int |
|---|
| 3744 | | -xlog_recover_bui_pass2( |
|---|
| 3745 | | - struct xlog *log, |
|---|
| 3746 | | - struct xlog_recover_item *item, |
|---|
| 3747 | | - xfs_lsn_t lsn) |
|---|
| 3748 | | -{ |
|---|
| 3749 | | - int error; |
|---|
| 3750 | | - struct xfs_mount *mp = log->l_mp; |
|---|
| 3751 | | - struct xfs_bui_log_item *buip; |
|---|
| 3752 | | - struct xfs_bui_log_format *bui_formatp; |
|---|
| 3753 | | - |
|---|
| 3754 | | - bui_formatp = item->ri_buf[0].i_addr; |
|---|
| 3755 | | - |
|---|
| 3756 | | - if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) |
|---|
| 3757 | | - return -EFSCORRUPTED; |
|---|
| 3758 | | - buip = xfs_bui_init(mp); |
|---|
| 3759 | | - error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); |
|---|
| 3760 | | - if (error) { |
|---|
| 3761 | | - xfs_bui_item_free(buip); |
|---|
| 3762 | | - return error; |
|---|
| 3763 | | - } |
|---|
| 3764 | | - atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); |
|---|
| 3765 | | - |
|---|
| 3766 | | - spin_lock(&log->l_ailp->ail_lock); |
|---|
| 3767 | | - /* |
|---|
| 3768 | | - * The RUI has two references. One for the RUD and one for RUI to ensure |
|---|
| 3769 | | - * it makes it into the AIL. Insert the RUI into the AIL directly and |
|---|
| 3770 | | - * drop the RUI reference. Note that xfs_trans_ail_update() drops the |
|---|
| 3771 | | - * AIL lock. |
|---|
| 3772 | | - */ |
|---|
| 3773 | | - xfs_trans_ail_update(log->l_ailp, &buip->bui_item, lsn); |
|---|
| 3774 | | - xfs_bui_release(buip); |
|---|
| 3775 | | - return 0; |
|---|
| 3776 | | -} |
|---|
| 3777 | | - |
|---|
| 3778 | | - |
|---|
| 3779 | | -/* |
|---|
| 3780 | | - * This routine is called when an BUD format structure is found in a committed |
|---|
| 3781 | | - * transaction in the log. Its purpose is to cancel the corresponding BUI if it |
|---|
| 3782 | | - * was still in the log. To do this it searches the AIL for the BUI with an id |
|---|
| 3783 | | - * equal to that in the BUD format structure. If we find it we drop the BUD |
|---|
| 3784 | | - * reference, which removes the BUI from the AIL and frees it. |
|---|
| 3785 | | - */ |
|---|
| 3786 | | -STATIC int |
|---|
| 3787 | | -xlog_recover_bud_pass2( |
|---|
| 3788 | | - struct xlog *log, |
|---|
| 3789 | | - struct xlog_recover_item *item) |
|---|
| 3790 | | -{ |
|---|
| 3791 | | - struct xfs_bud_log_format *bud_formatp; |
|---|
| 3792 | | - struct xfs_bui_log_item *buip = NULL; |
|---|
| 3793 | | - struct xfs_log_item *lip; |
|---|
| 3794 | | - uint64_t bui_id; |
|---|
| 3795 | | - struct xfs_ail_cursor cur; |
|---|
| 3796 | | - struct xfs_ail *ailp = log->l_ailp; |
|---|
| 3797 | | - |
|---|
| 3798 | | - bud_formatp = item->ri_buf[0].i_addr; |
|---|
| 3799 | | - if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) |
|---|
| 3800 | | - return -EFSCORRUPTED; |
|---|
| 3801 | | - bui_id = bud_formatp->bud_bui_id; |
|---|
| 3802 | | - |
|---|
| 3803 | | - /* |
|---|
| 3804 | | - * Search for the BUI with the id in the BUD format structure in the |
|---|
| 3805 | | - * AIL. |
|---|
| 3806 | | - */ |
|---|
| 3807 | | - spin_lock(&ailp->ail_lock); |
|---|
| 3808 | | - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
|---|
| 3809 | | - while (lip != NULL) { |
|---|
| 3810 | | - if (lip->li_type == XFS_LI_BUI) { |
|---|
| 3811 | | - buip = (struct xfs_bui_log_item *)lip; |
|---|
| 3812 | | - if (buip->bui_format.bui_id == bui_id) { |
|---|
| 3813 | | - /* |
|---|
| 3814 | | - * Drop the BUD reference to the BUI. This |
|---|
| 3815 | | - * removes the BUI from the AIL and frees it. |
|---|
| 3816 | | - */ |
|---|
| 3817 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 3818 | | - xfs_bui_release(buip); |
|---|
| 3819 | | - spin_lock(&ailp->ail_lock); |
|---|
| 3820 | | - break; |
|---|
| 3821 | | - } |
|---|
| 3822 | | - } |
|---|
| 3823 | | - lip = xfs_trans_ail_cursor_next(ailp, &cur); |
|---|
| 3824 | | - } |
|---|
| 3825 | | - |
|---|
| 3826 | | - xfs_trans_ail_cursor_done(&cur); |
|---|
| 3827 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 3828 | | - |
|---|
| 3829 | | - return 0; |
|---|
| 3830 | | -} |
|---|
| 3831 | | - |
|---|
| 3832 | | -/* |
|---|
| 3833 | | - * This routine is called when an inode create format structure is found in a |
|---|
| 3834 | | - * committed transaction in the log. It's purpose is to initialise the inodes |
|---|
| 3835 | | - * being allocated on disk. This requires us to get inode cluster buffers that |
|---|
| 3836 | | - * match the range to be initialised, stamped with inode templates and written |
|---|
| 3837 | | - * by delayed write so that subsequent modifications will hit the cached buffer |
|---|
| 3838 | | - * and only need writing out at the end of recovery. |
|---|
| 3839 | | - */ |
|---|
| 3840 | | -STATIC int |
|---|
| 3841 | | -xlog_recover_do_icreate_pass2( |
|---|
| 3842 | | - struct xlog *log, |
|---|
| 3843 | | - struct list_head *buffer_list, |
|---|
| 3844 | | - xlog_recover_item_t *item) |
|---|
| 3845 | | -{ |
|---|
| 3846 | | - struct xfs_mount *mp = log->l_mp; |
|---|
| 3847 | | - struct xfs_icreate_log *icl; |
|---|
| 3848 | | - xfs_agnumber_t agno; |
|---|
| 3849 | | - xfs_agblock_t agbno; |
|---|
| 3850 | | - unsigned int count; |
|---|
| 3851 | | - unsigned int isize; |
|---|
| 3852 | | - xfs_agblock_t length; |
|---|
| 3853 | | - int blks_per_cluster; |
|---|
| 3854 | | - int bb_per_cluster; |
|---|
| 3855 | | - int cancel_count; |
|---|
| 3856 | | - int nbufs; |
|---|
| 3857 | | - int i; |
|---|
| 3858 | | - |
|---|
| 3859 | | - icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; |
|---|
| 3860 | | - if (icl->icl_type != XFS_LI_ICREATE) { |
|---|
| 3861 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); |
|---|
| 3862 | | - return -EINVAL; |
|---|
| 3863 | | - } |
|---|
| 3864 | | - |
|---|
| 3865 | | - if (icl->icl_size != 1) { |
|---|
| 3866 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); |
|---|
| 3867 | | - return -EINVAL; |
|---|
| 3868 | | - } |
|---|
| 3869 | | - |
|---|
| 3870 | | - agno = be32_to_cpu(icl->icl_ag); |
|---|
| 3871 | | - if (agno >= mp->m_sb.sb_agcount) { |
|---|
| 3872 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); |
|---|
| 3873 | | - return -EINVAL; |
|---|
| 3874 | | - } |
|---|
| 3875 | | - agbno = be32_to_cpu(icl->icl_agbno); |
|---|
| 3876 | | - if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { |
|---|
| 3877 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); |
|---|
| 3878 | | - return -EINVAL; |
|---|
| 3879 | | - } |
|---|
| 3880 | | - isize = be32_to_cpu(icl->icl_isize); |
|---|
| 3881 | | - if (isize != mp->m_sb.sb_inodesize) { |
|---|
| 3882 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); |
|---|
| 3883 | | - return -EINVAL; |
|---|
| 3884 | | - } |
|---|
| 3885 | | - count = be32_to_cpu(icl->icl_count); |
|---|
| 3886 | | - if (!count) { |
|---|
| 3887 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); |
|---|
| 3888 | | - return -EINVAL; |
|---|
| 3889 | | - } |
|---|
| 3890 | | - length = be32_to_cpu(icl->icl_length); |
|---|
| 3891 | | - if (!length || length >= mp->m_sb.sb_agblocks) { |
|---|
| 3892 | | - xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); |
|---|
| 3893 | | - return -EINVAL; |
|---|
| 3894 | | - } |
|---|
| 3895 | | - |
|---|
| 3896 | | - /* |
|---|
| 3897 | | - * The inode chunk is either full or sparse and we only support |
|---|
| 3898 | | - * m_ialloc_min_blks sized sparse allocations at this time. |
|---|
| 3899 | | - */ |
|---|
| 3900 | | - if (length != mp->m_ialloc_blks && |
|---|
| 3901 | | - length != mp->m_ialloc_min_blks) { |
|---|
| 3902 | | - xfs_warn(log->l_mp, |
|---|
| 3903 | | - "%s: unsupported chunk length", __FUNCTION__); |
|---|
| 3904 | | - return -EINVAL; |
|---|
| 3905 | | - } |
|---|
| 3906 | | - |
|---|
| 3907 | | - /* verify inode count is consistent with extent length */ |
|---|
| 3908 | | - if ((count >> mp->m_sb.sb_inopblog) != length) { |
|---|
| 3909 | | - xfs_warn(log->l_mp, |
|---|
| 3910 | | - "%s: inconsistent inode count and chunk length", |
|---|
| 3911 | | - __FUNCTION__); |
|---|
| 3912 | | - return -EINVAL; |
|---|
| 3913 | | - } |
|---|
| 3914 | | - |
|---|
| 3915 | | - /* |
|---|
| 3916 | | - * The icreate transaction can cover multiple cluster buffers and these |
|---|
| 3917 | | - * buffers could have been freed and reused. Check the individual |
|---|
| 3918 | | - * buffers for cancellation so we don't overwrite anything written after |
|---|
| 3919 | | - * a cancellation. |
|---|
| 3920 | | - */ |
|---|
| 3921 | | - blks_per_cluster = xfs_icluster_size_fsb(mp); |
|---|
| 3922 | | - bb_per_cluster = XFS_FSB_TO_BB(mp, blks_per_cluster); |
|---|
| 3923 | | - nbufs = length / blks_per_cluster; |
|---|
| 3924 | | - for (i = 0, cancel_count = 0; i < nbufs; i++) { |
|---|
| 3925 | | - xfs_daddr_t daddr; |
|---|
| 3926 | | - |
|---|
| 3927 | | - daddr = XFS_AGB_TO_DADDR(mp, agno, |
|---|
| 3928 | | - agbno + i * blks_per_cluster); |
|---|
| 3929 | | - if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0)) |
|---|
| 3930 | | - cancel_count++; |
|---|
| 3931 | | - } |
|---|
| 3932 | | - |
|---|
| 3933 | | - /* |
|---|
| 3934 | | - * We currently only use icreate for a single allocation at a time. This |
|---|
| 3935 | | - * means we should expect either all or none of the buffers to be |
|---|
| 3936 | | - * cancelled. Be conservative and skip replay if at least one buffer is |
|---|
| 3937 | | - * cancelled, but warn the user that something is awry if the buffers |
|---|
| 3938 | | - * are not consistent. |
|---|
| 3939 | | - * |
|---|
| 3940 | | - * XXX: This must be refined to only skip cancelled clusters once we use |
|---|
| 3941 | | - * icreate for multiple chunk allocations. |
|---|
| 3942 | | - */ |
|---|
| 3943 | | - ASSERT(!cancel_count || cancel_count == nbufs); |
|---|
| 3944 | | - if (cancel_count) { |
|---|
| 3945 | | - if (cancel_count != nbufs) |
|---|
| 3946 | | - xfs_warn(mp, |
|---|
| 3947 | | - "WARNING: partial inode chunk cancellation, skipped icreate."); |
|---|
| 3948 | | - trace_xfs_log_recover_icreate_cancel(log, icl); |
|---|
| 3949 | | - return 0; |
|---|
| 3950 | | - } |
|---|
| 3951 | | - |
|---|
| 3952 | | - trace_xfs_log_recover_icreate_recover(log, icl); |
|---|
| 3953 | | - return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, |
|---|
| 3954 | | - length, be32_to_cpu(icl->icl_gen)); |
|---|
| 3955 | | -} |
|---|
| 3956 | | - |
|---|
| 3957 | | -STATIC void |
|---|
| 3958 | | -xlog_recover_buffer_ra_pass2( |
|---|
| 3959 | | - struct xlog *log, |
|---|
| 3960 | | - struct xlog_recover_item *item) |
|---|
| 3961 | | -{ |
|---|
| 3962 | | - struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr; |
|---|
| 3963 | | - struct xfs_mount *mp = log->l_mp; |
|---|
| 3964 | | - |
|---|
| 3965 | | - if (xlog_peek_buffer_cancelled(log, buf_f->blf_blkno, |
|---|
| 3966 | | - buf_f->blf_len, buf_f->blf_flags)) { |
|---|
| 3967 | | - return; |
|---|
| 3968 | | - } |
|---|
| 3969 | | - |
|---|
| 3970 | | - xfs_buf_readahead(mp->m_ddev_targp, buf_f->blf_blkno, |
|---|
| 3971 | | - buf_f->blf_len, NULL); |
|---|
| 3972 | | -} |
|---|
| 3973 | | - |
|---|
| 3974 | | -STATIC void |
|---|
| 3975 | | -xlog_recover_inode_ra_pass2( |
|---|
| 3976 | | - struct xlog *log, |
|---|
| 3977 | | - struct xlog_recover_item *item) |
|---|
| 3978 | | -{ |
|---|
| 3979 | | - struct xfs_inode_log_format ilf_buf; |
|---|
| 3980 | | - struct xfs_inode_log_format *ilfp; |
|---|
| 3981 | | - struct xfs_mount *mp = log->l_mp; |
|---|
| 3982 | | - int error; |
|---|
| 3983 | | - |
|---|
| 3984 | | - if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { |
|---|
| 3985 | | - ilfp = item->ri_buf[0].i_addr; |
|---|
| 3986 | | - } else { |
|---|
| 3987 | | - ilfp = &ilf_buf; |
|---|
| 3988 | | - memset(ilfp, 0, sizeof(*ilfp)); |
|---|
| 3989 | | - error = xfs_inode_item_format_convert(&item->ri_buf[0], ilfp); |
|---|
| 3990 | | - if (error) |
|---|
| 3991 | | - return; |
|---|
| 3992 | | - } |
|---|
| 3993 | | - |
|---|
| 3994 | | - if (xlog_peek_buffer_cancelled(log, ilfp->ilf_blkno, ilfp->ilf_len, 0)) |
|---|
| 3995 | | - return; |
|---|
| 3996 | | - |
|---|
| 3997 | | - xfs_buf_readahead(mp->m_ddev_targp, ilfp->ilf_blkno, |
|---|
| 3998 | | - ilfp->ilf_len, &xfs_inode_buf_ra_ops); |
|---|
| 3999 | | -} |
|---|
| 4000 | | - |
|---|
| 4001 | | -STATIC void |
|---|
| 4002 | | -xlog_recover_dquot_ra_pass2( |
|---|
| 4003 | | - struct xlog *log, |
|---|
| 4004 | | - struct xlog_recover_item *item) |
|---|
| 4005 | | -{ |
|---|
| 4006 | | - struct xfs_mount *mp = log->l_mp; |
|---|
| 4007 | | - struct xfs_disk_dquot *recddq; |
|---|
| 4008 | | - struct xfs_dq_logformat *dq_f; |
|---|
| 4009 | | - uint type; |
|---|
| 4010 | | - int len; |
|---|
| 4011 | | - |
|---|
| 4012 | | - |
|---|
| 4013 | | - if (mp->m_qflags == 0) |
|---|
| 4014 | | - return; |
|---|
| 4015 | | - |
|---|
| 4016 | | - recddq = item->ri_buf[1].i_addr; |
|---|
| 4017 | | - if (recddq == NULL) |
|---|
| 4018 | | - return; |
|---|
| 4019 | | - if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) |
|---|
| 4020 | | - return; |
|---|
| 4021 | | - |
|---|
| 4022 | | - type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); |
|---|
| 4023 | | - ASSERT(type); |
|---|
| 4024 | | - if (log->l_quotaoffs_flag & type) |
|---|
| 4025 | | - return; |
|---|
| 4026 | | - |
|---|
| 4027 | | - dq_f = item->ri_buf[0].i_addr; |
|---|
| 4028 | | - ASSERT(dq_f); |
|---|
| 4029 | | - ASSERT(dq_f->qlf_len == 1); |
|---|
| 4030 | | - |
|---|
| 4031 | | - len = XFS_FSB_TO_BB(mp, dq_f->qlf_len); |
|---|
| 4032 | | - if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0)) |
|---|
| 4033 | | - return; |
|---|
| 4034 | | - |
|---|
| 4035 | | - xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len, |
|---|
| 4036 | | - &xfs_dquot_buf_ra_ops); |
|---|
| 4037 | | -} |
|---|
| 4038 | | - |
|---|
| 4039 | | -STATIC void |
|---|
| 4040 | | -xlog_recover_ra_pass2( |
|---|
| 4041 | | - struct xlog *log, |
|---|
| 4042 | | - struct xlog_recover_item *item) |
|---|
| 4043 | | -{ |
|---|
| 4044 | | - switch (ITEM_TYPE(item)) { |
|---|
| 4045 | | - case XFS_LI_BUF: |
|---|
| 4046 | | - xlog_recover_buffer_ra_pass2(log, item); |
|---|
| 4047 | | - break; |
|---|
| 4048 | | - case XFS_LI_INODE: |
|---|
| 4049 | | - xlog_recover_inode_ra_pass2(log, item); |
|---|
| 4050 | | - break; |
|---|
| 4051 | | - case XFS_LI_DQUOT: |
|---|
| 4052 | | - xlog_recover_dquot_ra_pass2(log, item); |
|---|
| 4053 | | - break; |
|---|
| 4054 | | - case XFS_LI_EFI: |
|---|
| 4055 | | - case XFS_LI_EFD: |
|---|
| 4056 | | - case XFS_LI_QUOTAOFF: |
|---|
| 4057 | | - case XFS_LI_RUI: |
|---|
| 4058 | | - case XFS_LI_RUD: |
|---|
| 4059 | | - case XFS_LI_CUI: |
|---|
| 4060 | | - case XFS_LI_CUD: |
|---|
| 4061 | | - case XFS_LI_BUI: |
|---|
| 4062 | | - case XFS_LI_BUD: |
|---|
| 4063 | | - default: |
|---|
| 4064 | | - break; |
|---|
| 4065 | | - } |
|---|
| 4066 | | -} |
|---|
| 4067 | | - |
|---|
| 4068 | | -STATIC int |
|---|
| 4069 | | -xlog_recover_commit_pass1( |
|---|
| 4070 | | - struct xlog *log, |
|---|
| 4071 | | - struct xlog_recover *trans, |
|---|
| 4072 | | - struct xlog_recover_item *item) |
|---|
| 4073 | | -{ |
|---|
| 4074 | | - trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); |
|---|
| 4075 | | - |
|---|
| 4076 | | - switch (ITEM_TYPE(item)) { |
|---|
| 4077 | | - case XFS_LI_BUF: |
|---|
| 4078 | | - return xlog_recover_buffer_pass1(log, item); |
|---|
| 4079 | | - case XFS_LI_QUOTAOFF: |
|---|
| 4080 | | - return xlog_recover_quotaoff_pass1(log, item); |
|---|
| 4081 | | - case XFS_LI_INODE: |
|---|
| 4082 | | - case XFS_LI_EFI: |
|---|
| 4083 | | - case XFS_LI_EFD: |
|---|
| 4084 | | - case XFS_LI_DQUOT: |
|---|
| 4085 | | - case XFS_LI_ICREATE: |
|---|
| 4086 | | - case XFS_LI_RUI: |
|---|
| 4087 | | - case XFS_LI_RUD: |
|---|
| 4088 | | - case XFS_LI_CUI: |
|---|
| 4089 | | - case XFS_LI_CUD: |
|---|
| 4090 | | - case XFS_LI_BUI: |
|---|
| 4091 | | - case XFS_LI_BUD: |
|---|
| 4092 | | - /* nothing to do in pass 1 */ |
|---|
| 4093 | | - return 0; |
|---|
| 4094 | | - default: |
|---|
| 4095 | | - xfs_warn(log->l_mp, "%s: invalid item type (%d)", |
|---|
| 4096 | | - __func__, ITEM_TYPE(item)); |
|---|
| 4097 | | - ASSERT(0); |
|---|
| 4098 | | - return -EIO; |
|---|
| 4099 | | - } |
|---|
| 4100 | | -} |
|---|
| 4101 | | - |
|---|
| 4102 | | -STATIC int |
|---|
| 4103 | | -xlog_recover_commit_pass2( |
|---|
| 4104 | | - struct xlog *log, |
|---|
| 4105 | | - struct xlog_recover *trans, |
|---|
| 4106 | | - struct list_head *buffer_list, |
|---|
| 4107 | | - struct xlog_recover_item *item) |
|---|
| 4108 | | -{ |
|---|
| 4109 | | - trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); |
|---|
| 4110 | | - |
|---|
| 4111 | | - switch (ITEM_TYPE(item)) { |
|---|
| 4112 | | - case XFS_LI_BUF: |
|---|
| 4113 | | - return xlog_recover_buffer_pass2(log, buffer_list, item, |
|---|
| 4114 | | - trans->r_lsn); |
|---|
| 4115 | | - case XFS_LI_INODE: |
|---|
| 4116 | | - return xlog_recover_inode_pass2(log, buffer_list, item, |
|---|
| 4117 | | - trans->r_lsn); |
|---|
| 4118 | | - case XFS_LI_EFI: |
|---|
| 4119 | | - return xlog_recover_efi_pass2(log, item, trans->r_lsn); |
|---|
| 4120 | | - case XFS_LI_EFD: |
|---|
| 4121 | | - return xlog_recover_efd_pass2(log, item); |
|---|
| 4122 | | - case XFS_LI_RUI: |
|---|
| 4123 | | - return xlog_recover_rui_pass2(log, item, trans->r_lsn); |
|---|
| 4124 | | - case XFS_LI_RUD: |
|---|
| 4125 | | - return xlog_recover_rud_pass2(log, item); |
|---|
| 4126 | | - case XFS_LI_CUI: |
|---|
| 4127 | | - return xlog_recover_cui_pass2(log, item, trans->r_lsn); |
|---|
| 4128 | | - case XFS_LI_CUD: |
|---|
| 4129 | | - return xlog_recover_cud_pass2(log, item); |
|---|
| 4130 | | - case XFS_LI_BUI: |
|---|
| 4131 | | - return xlog_recover_bui_pass2(log, item, trans->r_lsn); |
|---|
| 4132 | | - case XFS_LI_BUD: |
|---|
| 4133 | | - return xlog_recover_bud_pass2(log, item); |
|---|
| 4134 | | - case XFS_LI_DQUOT: |
|---|
| 4135 | | - return xlog_recover_dquot_pass2(log, buffer_list, item, |
|---|
| 4136 | | - trans->r_lsn); |
|---|
| 4137 | | - case XFS_LI_ICREATE: |
|---|
| 4138 | | - return xlog_recover_do_icreate_pass2(log, buffer_list, item); |
|---|
| 4139 | | - case XFS_LI_QUOTAOFF: |
|---|
| 4140 | | - /* nothing to do in pass2 */ |
|---|
| 4141 | | - return 0; |
|---|
| 4142 | | - default: |
|---|
| 4143 | | - xfs_warn(log->l_mp, "%s: invalid item type (%d)", |
|---|
| 4144 | | - __func__, ITEM_TYPE(item)); |
|---|
| 4145 | | - ASSERT(0); |
|---|
| 4146 | | - return -EIO; |
|---|
| 4147 | | - } |
|---|
| 1918 | + if (!xlog_is_buffer_cancelled(log, blkno, len)) |
|---|
| 1919 | + xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops); |
|---|
| 4148 | 1920 | } |
|---|
| 4149 | 1921 | |
|---|
| 4150 | 1922 | STATIC int |
|---|
| .. | .. |
|---|
| 4158 | 1930 | int error = 0; |
|---|
| 4159 | 1931 | |
|---|
| 4160 | 1932 | list_for_each_entry(item, item_list, ri_list) { |
|---|
| 4161 | | - error = xlog_recover_commit_pass2(log, trans, |
|---|
| 4162 | | - buffer_list, item); |
|---|
| 1933 | + trace_xfs_log_recover_item_recover(log, trans, item, |
|---|
| 1934 | + XLOG_RECOVER_PASS2); |
|---|
| 1935 | + |
|---|
| 1936 | + if (item->ri_ops->commit_pass2) |
|---|
| 1937 | + error = item->ri_ops->commit_pass2(log, buffer_list, |
|---|
| 1938 | + item, trans->r_lsn); |
|---|
| 4163 | 1939 | if (error) |
|---|
| 4164 | 1940 | return error; |
|---|
| 4165 | 1941 | } |
|---|
| .. | .. |
|---|
| 4196 | 1972 | return error; |
|---|
| 4197 | 1973 | |
|---|
| 4198 | 1974 | list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) { |
|---|
| 1975 | + trace_xfs_log_recover_item_recover(log, trans, item, pass); |
|---|
| 1976 | + |
|---|
| 4199 | 1977 | switch (pass) { |
|---|
| 4200 | 1978 | case XLOG_RECOVER_PASS1: |
|---|
| 4201 | | - error = xlog_recover_commit_pass1(log, trans, item); |
|---|
| 1979 | + if (item->ri_ops->commit_pass1) |
|---|
| 1980 | + error = item->ri_ops->commit_pass1(log, item); |
|---|
| 4202 | 1981 | break; |
|---|
| 4203 | 1982 | case XLOG_RECOVER_PASS2: |
|---|
| 4204 | | - xlog_recover_ra_pass2(log, item); |
|---|
| 1983 | + if (item->ri_ops->ra_pass2) |
|---|
| 1984 | + item->ri_ops->ra_pass2(log, item); |
|---|
| 4205 | 1985 | list_move_tail(&item->ri_list, &ra_list); |
|---|
| 4206 | 1986 | items_queued++; |
|---|
| 4207 | 1987 | if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) { |
|---|
| .. | .. |
|---|
| 4238 | 2018 | xlog_recover_add_item( |
|---|
| 4239 | 2019 | struct list_head *head) |
|---|
| 4240 | 2020 | { |
|---|
| 4241 | | - xlog_recover_item_t *item; |
|---|
| 2021 | + struct xlog_recover_item *item; |
|---|
| 4242 | 2022 | |
|---|
| 4243 | | - item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP); |
|---|
| 2023 | + item = kmem_zalloc(sizeof(struct xlog_recover_item), 0); |
|---|
| 4244 | 2024 | INIT_LIST_HEAD(&item->ri_list); |
|---|
| 4245 | 2025 | list_add_tail(&item->ri_list, head); |
|---|
| 4246 | 2026 | } |
|---|
| .. | .. |
|---|
| 4252 | 2032 | char *dp, |
|---|
| 4253 | 2033 | int len) |
|---|
| 4254 | 2034 | { |
|---|
| 4255 | | - xlog_recover_item_t *item; |
|---|
| 2035 | + struct xlog_recover_item *item; |
|---|
| 4256 | 2036 | char *ptr, *old_ptr; |
|---|
| 4257 | 2037 | int old_len; |
|---|
| 4258 | 2038 | |
|---|
| .. | .. |
|---|
| 4264 | 2044 | ASSERT(len <= sizeof(struct xfs_trans_header)); |
|---|
| 4265 | 2045 | if (len > sizeof(struct xfs_trans_header)) { |
|---|
| 4266 | 2046 | xfs_warn(log->l_mp, "%s: bad header length", __func__); |
|---|
| 4267 | | - return -EIO; |
|---|
| 2047 | + return -EFSCORRUPTED; |
|---|
| 4268 | 2048 | } |
|---|
| 4269 | 2049 | |
|---|
| 4270 | 2050 | xlog_recover_add_item(&trans->r_itemq); |
|---|
| .. | .. |
|---|
| 4275 | 2055 | } |
|---|
| 4276 | 2056 | |
|---|
| 4277 | 2057 | /* take the tail entry */ |
|---|
| 4278 | | - item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); |
|---|
| 2058 | + item = list_entry(trans->r_itemq.prev, struct xlog_recover_item, |
|---|
| 2059 | + ri_list); |
|---|
| 4279 | 2060 | |
|---|
| 4280 | 2061 | old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; |
|---|
| 4281 | 2062 | old_len = item->ri_buf[item->ri_cnt-1].i_len; |
|---|
| 4282 | 2063 | |
|---|
| 4283 | | - ptr = kmem_realloc(old_ptr, len + old_len, KM_SLEEP); |
|---|
| 2064 | + ptr = kvrealloc(old_ptr, old_len, len + old_len, GFP_KERNEL); |
|---|
| 2065 | + if (!ptr) |
|---|
| 2066 | + return -ENOMEM; |
|---|
| 4284 | 2067 | memcpy(&ptr[old_len], dp, len); |
|---|
| 4285 | 2068 | item->ri_buf[item->ri_cnt-1].i_len += len; |
|---|
| 4286 | 2069 | item->ri_buf[item->ri_cnt-1].i_addr = ptr; |
|---|
| .. | .. |
|---|
| 4309 | 2092 | int len) |
|---|
| 4310 | 2093 | { |
|---|
| 4311 | 2094 | struct xfs_inode_log_format *in_f; /* any will do */ |
|---|
| 4312 | | - xlog_recover_item_t *item; |
|---|
| 2095 | + struct xlog_recover_item *item; |
|---|
| 4313 | 2096 | char *ptr; |
|---|
| 4314 | 2097 | |
|---|
| 4315 | 2098 | if (!len) |
|---|
| .. | .. |
|---|
| 4320 | 2103 | xfs_warn(log->l_mp, "%s: bad header magic number", |
|---|
| 4321 | 2104 | __func__); |
|---|
| 4322 | 2105 | ASSERT(0); |
|---|
| 4323 | | - return -EIO; |
|---|
| 2106 | + return -EFSCORRUPTED; |
|---|
| 4324 | 2107 | } |
|---|
| 4325 | 2108 | |
|---|
| 4326 | 2109 | if (len > sizeof(struct xfs_trans_header)) { |
|---|
| 4327 | 2110 | xfs_warn(log->l_mp, "%s: bad header length", __func__); |
|---|
| 4328 | 2111 | ASSERT(0); |
|---|
| 4329 | | - return -EIO; |
|---|
| 2112 | + return -EFSCORRUPTED; |
|---|
| 4330 | 2113 | } |
|---|
| 4331 | 2114 | |
|---|
| 4332 | 2115 | /* |
|---|
| .. | .. |
|---|
| 4340 | 2123 | return 0; |
|---|
| 4341 | 2124 | } |
|---|
| 4342 | 2125 | |
|---|
| 4343 | | - ptr = kmem_alloc(len, KM_SLEEP); |
|---|
| 2126 | + ptr = kmem_alloc(len, 0); |
|---|
| 4344 | 2127 | memcpy(ptr, dp, len); |
|---|
| 4345 | 2128 | in_f = (struct xfs_inode_log_format *)ptr; |
|---|
| 4346 | 2129 | |
|---|
| 4347 | 2130 | /* take the tail entry */ |
|---|
| 4348 | | - item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list); |
|---|
| 2131 | + item = list_entry(trans->r_itemq.prev, struct xlog_recover_item, |
|---|
| 2132 | + ri_list); |
|---|
| 4349 | 2133 | if (item->ri_total != 0 && |
|---|
| 4350 | 2134 | item->ri_total == item->ri_cnt) { |
|---|
| 4351 | 2135 | /* tail item is in use, get a new one */ |
|---|
| 4352 | 2136 | xlog_recover_add_item(&trans->r_itemq); |
|---|
| 4353 | 2137 | item = list_entry(trans->r_itemq.prev, |
|---|
| 4354 | | - xlog_recover_item_t, ri_list); |
|---|
| 2138 | + struct xlog_recover_item, ri_list); |
|---|
| 4355 | 2139 | } |
|---|
| 4356 | 2140 | |
|---|
| 4357 | 2141 | if (item->ri_total == 0) { /* first region to be added */ |
|---|
| .. | .. |
|---|
| 4362 | 2146 | in_f->ilf_size); |
|---|
| 4363 | 2147 | ASSERT(0); |
|---|
| 4364 | 2148 | kmem_free(ptr); |
|---|
| 4365 | | - return -EIO; |
|---|
| 2149 | + return -EFSCORRUPTED; |
|---|
| 4366 | 2150 | } |
|---|
| 4367 | 2151 | |
|---|
| 4368 | 2152 | item->ri_total = in_f->ilf_size; |
|---|
| 4369 | 2153 | item->ri_buf = |
|---|
| 4370 | 2154 | kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), |
|---|
| 4371 | | - KM_SLEEP); |
|---|
| 2155 | + 0); |
|---|
| 4372 | 2156 | } |
|---|
| 4373 | | - ASSERT(item->ri_total > item->ri_cnt); |
|---|
| 2157 | + |
|---|
| 2158 | + if (item->ri_total <= item->ri_cnt) { |
|---|
| 2159 | + xfs_warn(log->l_mp, |
|---|
| 2160 | + "log item region count (%d) overflowed size (%d)", |
|---|
| 2161 | + item->ri_cnt, item->ri_total); |
|---|
| 2162 | + ASSERT(0); |
|---|
| 2163 | + kmem_free(ptr); |
|---|
| 2164 | + return -EFSCORRUPTED; |
|---|
| 2165 | + } |
|---|
| 2166 | + |
|---|
| 4374 | 2167 | /* Description region is ri_buf[0] */ |
|---|
| 4375 | 2168 | item->ri_buf[item->ri_cnt].i_addr = ptr; |
|---|
| 4376 | 2169 | item->ri_buf[item->ri_cnt].i_len = len; |
|---|
| .. | .. |
|---|
| 4388 | 2181 | xlog_recover_free_trans( |
|---|
| 4389 | 2182 | struct xlog_recover *trans) |
|---|
| 4390 | 2183 | { |
|---|
| 4391 | | - xlog_recover_item_t *item, *n; |
|---|
| 2184 | + struct xlog_recover_item *item, *n; |
|---|
| 4392 | 2185 | int i; |
|---|
| 4393 | 2186 | |
|---|
| 4394 | 2187 | hlist_del_init(&trans->r_list); |
|---|
| .. | .. |
|---|
| 4457 | 2250 | default: |
|---|
| 4458 | 2251 | xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags); |
|---|
| 4459 | 2252 | ASSERT(0); |
|---|
| 4460 | | - error = -EIO; |
|---|
| 2253 | + error = -EFSCORRUPTED; |
|---|
| 4461 | 2254 | break; |
|---|
| 4462 | 2255 | } |
|---|
| 4463 | 2256 | if (error || freeit) |
|---|
| .. | .. |
|---|
| 4502 | 2295 | * This is a new transaction so allocate a new recovery container to |
|---|
| 4503 | 2296 | * hold the recovery ops that will follow. |
|---|
| 4504 | 2297 | */ |
|---|
| 4505 | | - trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP); |
|---|
| 2298 | + trans = kmem_zalloc(sizeof(struct xlog_recover), 0); |
|---|
| 4506 | 2299 | trans->r_log_tid = tid; |
|---|
| 4507 | 2300 | trans->r_lsn = be64_to_cpu(rhead->h_lsn); |
|---|
| 4508 | 2301 | INIT_LIST_HEAD(&trans->r_itemq); |
|---|
| .. | .. |
|---|
| 4537 | 2330 | xfs_warn(log->l_mp, "%s: bad clientid 0x%x", |
|---|
| 4538 | 2331 | __func__, ohead->oh_clientid); |
|---|
| 4539 | 2332 | ASSERT(0); |
|---|
| 4540 | | - return -EIO; |
|---|
| 2333 | + return -EFSCORRUPTED; |
|---|
| 4541 | 2334 | } |
|---|
| 4542 | 2335 | |
|---|
| 4543 | 2336 | /* |
|---|
| .. | .. |
|---|
| 4547 | 2340 | if (dp + len > end) { |
|---|
| 4548 | 2341 | xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len); |
|---|
| 4549 | 2342 | WARN_ON(1); |
|---|
| 4550 | | - return -EIO; |
|---|
| 2343 | + return -EFSCORRUPTED; |
|---|
| 4551 | 2344 | } |
|---|
| 4552 | 2345 | |
|---|
| 4553 | 2346 | trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead); |
|---|
| .. | .. |
|---|
| 4640 | 2433 | return 0; |
|---|
| 4641 | 2434 | } |
|---|
| 4642 | 2435 | |
|---|
| 4643 | | -/* Recover the EFI if necessary. */ |
|---|
| 4644 | | -STATIC int |
|---|
| 4645 | | -xlog_recover_process_efi( |
|---|
| 4646 | | - struct xfs_mount *mp, |
|---|
| 4647 | | - struct xfs_ail *ailp, |
|---|
| 4648 | | - struct xfs_log_item *lip) |
|---|
| 4649 | | -{ |
|---|
| 4650 | | - struct xfs_efi_log_item *efip; |
|---|
| 4651 | | - int error; |
|---|
| 4652 | | - |
|---|
| 4653 | | - /* |
|---|
| 4654 | | - * Skip EFIs that we've already processed. |
|---|
| 4655 | | - */ |
|---|
| 4656 | | - efip = container_of(lip, struct xfs_efi_log_item, efi_item); |
|---|
| 4657 | | - if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) |
|---|
| 4658 | | - return 0; |
|---|
| 4659 | | - |
|---|
| 4660 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 4661 | | - error = xfs_efi_recover(mp, efip); |
|---|
| 4662 | | - spin_lock(&ailp->ail_lock); |
|---|
| 4663 | | - |
|---|
| 4664 | | - return error; |
|---|
| 4665 | | -} |
|---|
| 4666 | | - |
|---|
| 4667 | | -/* Release the EFI since we're cancelling everything. */ |
|---|
| 4668 | | -STATIC void |
|---|
| 4669 | | -xlog_recover_cancel_efi( |
|---|
| 4670 | | - struct xfs_mount *mp, |
|---|
| 4671 | | - struct xfs_ail *ailp, |
|---|
| 4672 | | - struct xfs_log_item *lip) |
|---|
| 4673 | | -{ |
|---|
| 4674 | | - struct xfs_efi_log_item *efip; |
|---|
| 4675 | | - |
|---|
| 4676 | | - efip = container_of(lip, struct xfs_efi_log_item, efi_item); |
|---|
| 4677 | | - |
|---|
| 4678 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 4679 | | - xfs_efi_release(efip); |
|---|
| 4680 | | - spin_lock(&ailp->ail_lock); |
|---|
| 4681 | | -} |
|---|
| 4682 | | - |
|---|
| 4683 | | -/* Recover the RUI if necessary. */ |
|---|
| 4684 | | -STATIC int |
|---|
| 4685 | | -xlog_recover_process_rui( |
|---|
| 4686 | | - struct xfs_mount *mp, |
|---|
| 4687 | | - struct xfs_ail *ailp, |
|---|
| 4688 | | - struct xfs_log_item *lip) |
|---|
| 4689 | | -{ |
|---|
| 4690 | | - struct xfs_rui_log_item *ruip; |
|---|
| 4691 | | - int error; |
|---|
| 4692 | | - |
|---|
| 4693 | | - /* |
|---|
| 4694 | | - * Skip RUIs that we've already processed. |
|---|
| 4695 | | - */ |
|---|
| 4696 | | - ruip = container_of(lip, struct xfs_rui_log_item, rui_item); |
|---|
| 4697 | | - if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags)) |
|---|
| 4698 | | - return 0; |
|---|
| 4699 | | - |
|---|
| 4700 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 4701 | | - error = xfs_rui_recover(mp, ruip); |
|---|
| 4702 | | - spin_lock(&ailp->ail_lock); |
|---|
| 4703 | | - |
|---|
| 4704 | | - return error; |
|---|
| 4705 | | -} |
|---|
| 4706 | | - |
|---|
| 4707 | | -/* Release the RUI since we're cancelling everything. */ |
|---|
| 4708 | | -STATIC void |
|---|
| 4709 | | -xlog_recover_cancel_rui( |
|---|
| 4710 | | - struct xfs_mount *mp, |
|---|
| 4711 | | - struct xfs_ail *ailp, |
|---|
| 4712 | | - struct xfs_log_item *lip) |
|---|
| 4713 | | -{ |
|---|
| 4714 | | - struct xfs_rui_log_item *ruip; |
|---|
| 4715 | | - |
|---|
| 4716 | | - ruip = container_of(lip, struct xfs_rui_log_item, rui_item); |
|---|
| 4717 | | - |
|---|
| 4718 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 4719 | | - xfs_rui_release(ruip); |
|---|
| 4720 | | - spin_lock(&ailp->ail_lock); |
|---|
| 4721 | | -} |
|---|
| 4722 | | - |
|---|
| 4723 | | -/* Recover the CUI if necessary. */ |
|---|
| 4724 | | -STATIC int |
|---|
| 4725 | | -xlog_recover_process_cui( |
|---|
| 4726 | | - struct xfs_trans *parent_tp, |
|---|
| 4727 | | - struct xfs_ail *ailp, |
|---|
| 4728 | | - struct xfs_log_item *lip) |
|---|
| 4729 | | -{ |
|---|
| 4730 | | - struct xfs_cui_log_item *cuip; |
|---|
| 4731 | | - int error; |
|---|
| 4732 | | - |
|---|
| 4733 | | - /* |
|---|
| 4734 | | - * Skip CUIs that we've already processed. |
|---|
| 4735 | | - */ |
|---|
| 4736 | | - cuip = container_of(lip, struct xfs_cui_log_item, cui_item); |
|---|
| 4737 | | - if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)) |
|---|
| 4738 | | - return 0; |
|---|
| 4739 | | - |
|---|
| 4740 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 4741 | | - error = xfs_cui_recover(parent_tp, cuip); |
|---|
| 4742 | | - spin_lock(&ailp->ail_lock); |
|---|
| 4743 | | - |
|---|
| 4744 | | - return error; |
|---|
| 4745 | | -} |
|---|
| 4746 | | - |
|---|
| 4747 | | -/* Release the CUI since we're cancelling everything. */ |
|---|
| 4748 | | -STATIC void |
|---|
| 4749 | | -xlog_recover_cancel_cui( |
|---|
| 4750 | | - struct xfs_mount *mp, |
|---|
| 4751 | | - struct xfs_ail *ailp, |
|---|
| 4752 | | - struct xfs_log_item *lip) |
|---|
| 4753 | | -{ |
|---|
| 4754 | | - struct xfs_cui_log_item *cuip; |
|---|
| 4755 | | - |
|---|
| 4756 | | - cuip = container_of(lip, struct xfs_cui_log_item, cui_item); |
|---|
| 4757 | | - |
|---|
| 4758 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 4759 | | - xfs_cui_release(cuip); |
|---|
| 4760 | | - spin_lock(&ailp->ail_lock); |
|---|
| 4761 | | -} |
|---|
| 4762 | | - |
|---|
| 4763 | | -/* Recover the BUI if necessary. */ |
|---|
| 4764 | | -STATIC int |
|---|
| 4765 | | -xlog_recover_process_bui( |
|---|
| 4766 | | - struct xfs_trans *parent_tp, |
|---|
| 4767 | | - struct xfs_ail *ailp, |
|---|
| 4768 | | - struct xfs_log_item *lip) |
|---|
| 4769 | | -{ |
|---|
| 4770 | | - struct xfs_bui_log_item *buip; |
|---|
| 4771 | | - int error; |
|---|
| 4772 | | - |
|---|
| 4773 | | - /* |
|---|
| 4774 | | - * Skip BUIs that we've already processed. |
|---|
| 4775 | | - */ |
|---|
| 4776 | | - buip = container_of(lip, struct xfs_bui_log_item, bui_item); |
|---|
| 4777 | | - if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)) |
|---|
| 4778 | | - return 0; |
|---|
| 4779 | | - |
|---|
| 4780 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 4781 | | - error = xfs_bui_recover(parent_tp, buip); |
|---|
| 4782 | | - spin_lock(&ailp->ail_lock); |
|---|
| 4783 | | - |
|---|
| 4784 | | - return error; |
|---|
| 4785 | | -} |
|---|
| 4786 | | - |
|---|
| 4787 | | -/* Release the BUI since we're cancelling everything. */ |
|---|
| 4788 | | -STATIC void |
|---|
| 4789 | | -xlog_recover_cancel_bui( |
|---|
| 4790 | | - struct xfs_mount *mp, |
|---|
| 4791 | | - struct xfs_ail *ailp, |
|---|
| 4792 | | - struct xfs_log_item *lip) |
|---|
| 4793 | | -{ |
|---|
| 4794 | | - struct xfs_bui_log_item *buip; |
|---|
| 4795 | | - |
|---|
| 4796 | | - buip = container_of(lip, struct xfs_bui_log_item, bui_item); |
|---|
| 4797 | | - |
|---|
| 4798 | | - spin_unlock(&ailp->ail_lock); |
|---|
| 4799 | | - xfs_bui_release(buip); |
|---|
| 4800 | | - spin_lock(&ailp->ail_lock); |
|---|
| 4801 | | -} |
|---|
| 4802 | | - |
|---|
| 4803 | | -/* Is this log item a deferred action intent? */ |
|---|
| 4804 | | -static inline bool xlog_item_is_intent(struct xfs_log_item *lip) |
|---|
| 4805 | | -{ |
|---|
| 4806 | | - switch (lip->li_type) { |
|---|
| 4807 | | - case XFS_LI_EFI: |
|---|
| 4808 | | - case XFS_LI_RUI: |
|---|
| 4809 | | - case XFS_LI_CUI: |
|---|
| 4810 | | - case XFS_LI_BUI: |
|---|
| 4811 | | - return true; |
|---|
| 4812 | | - default: |
|---|
| 4813 | | - return false; |
|---|
| 4814 | | - } |
|---|
| 4815 | | -} |
|---|
| 4816 | | - |
|---|
| 4817 | 2436 | /* Take all the collected deferred ops and finish them in order. */ |
|---|
| 4818 | 2437 | static int |
|---|
| 4819 | 2438 | xlog_finish_defer_ops( |
|---|
| 4820 | | - struct xfs_trans *parent_tp) |
|---|
| 2439 | + struct xfs_mount *mp, |
|---|
| 2440 | + struct list_head *capture_list) |
|---|
| 4821 | 2441 | { |
|---|
| 4822 | | - struct xfs_mount *mp = parent_tp->t_mountp; |
|---|
| 2442 | + struct xfs_defer_capture *dfc, *next; |
|---|
| 4823 | 2443 | struct xfs_trans *tp; |
|---|
| 4824 | | - int64_t freeblks; |
|---|
| 4825 | | - uint resblks; |
|---|
| 4826 | | - int error; |
|---|
| 2444 | + struct xfs_inode *ip; |
|---|
| 2445 | + int error = 0; |
|---|
| 4827 | 2446 | |
|---|
| 4828 | | - /* |
|---|
| 4829 | | - * We're finishing the defer_ops that accumulated as a result of |
|---|
| 4830 | | - * recovering unfinished intent items during log recovery. We |
|---|
| 4831 | | - * reserve an itruncate transaction because it is the largest |
|---|
| 4832 | | - * permanent transaction type. Since we're the only user of the fs |
|---|
| 4833 | | - * right now, take 93% (15/16) of the available free blocks. Use |
|---|
| 4834 | | - * weird math to avoid a 64-bit division. |
|---|
| 4835 | | - */ |
|---|
| 4836 | | - freeblks = percpu_counter_sum(&mp->m_fdblocks); |
|---|
| 4837 | | - if (freeblks <= 0) |
|---|
| 4838 | | - return -ENOSPC; |
|---|
| 4839 | | - resblks = min_t(int64_t, UINT_MAX, freeblks); |
|---|
| 4840 | | - resblks = (resblks * 15) >> 4; |
|---|
| 4841 | | - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, |
|---|
| 4842 | | - 0, XFS_TRANS_RESERVE, &tp); |
|---|
| 4843 | | - if (error) |
|---|
| 4844 | | - return error; |
|---|
| 4845 | | - /* transfer all collected dfops to this transaction */ |
|---|
| 4846 | | - xfs_defer_move(tp, parent_tp); |
|---|
| 2447 | + list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { |
|---|
| 2448 | + struct xfs_trans_res resv; |
|---|
| 4847 | 2449 | |
|---|
| 4848 | | - return xfs_trans_commit(tp); |
|---|
| 2450 | + /* |
|---|
| 2451 | + * Create a new transaction reservation from the captured |
|---|
| 2452 | + * information. Set logcount to 1 to force the new transaction |
|---|
| 2453 | + * to regrant every roll so that we can make forward progress |
|---|
| 2454 | + * in recovery no matter how full the log might be. |
|---|
| 2455 | + */ |
|---|
| 2456 | + resv.tr_logres = dfc->dfc_logres; |
|---|
| 2457 | + resv.tr_logcount = 1; |
|---|
| 2458 | + resv.tr_logflags = XFS_TRANS_PERM_LOG_RES; |
|---|
| 2459 | + |
|---|
| 2460 | + error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres, |
|---|
| 2461 | + dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp); |
|---|
| 2462 | + if (error) { |
|---|
| 2463 | + xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
|---|
| 2464 | + return error; |
|---|
| 2465 | + } |
|---|
| 2466 | + |
|---|
| 2467 | + /* |
|---|
| 2468 | + * Transfer to this new transaction all the dfops we captured |
|---|
| 2469 | + * from recovering a single intent item. |
|---|
| 2470 | + */ |
|---|
| 2471 | + list_del_init(&dfc->dfc_list); |
|---|
| 2472 | + xfs_defer_ops_continue(dfc, tp, &ip); |
|---|
| 2473 | + |
|---|
| 2474 | + error = xfs_trans_commit(tp); |
|---|
| 2475 | + if (ip) { |
|---|
| 2476 | + xfs_iunlock(ip, XFS_ILOCK_EXCL); |
|---|
| 2477 | + xfs_irele(ip); |
|---|
| 2478 | + } |
|---|
| 2479 | + if (error) |
|---|
| 2480 | + return error; |
|---|
| 2481 | + } |
|---|
| 2482 | + |
|---|
| 2483 | + ASSERT(list_empty(capture_list)); |
|---|
| 2484 | + return 0; |
|---|
| 4849 | 2485 | } |
|---|
| 4850 | 2486 | |
|---|
| 2487 | +/* Release all the captured defer ops and capture structures in this list. */ |
|---|
| 2488 | +static void |
|---|
| 2489 | +xlog_abort_defer_ops( |
|---|
| 2490 | + struct xfs_mount *mp, |
|---|
| 2491 | + struct list_head *capture_list) |
|---|
| 2492 | +{ |
|---|
| 2493 | + struct xfs_defer_capture *dfc; |
|---|
| 2494 | + struct xfs_defer_capture *next; |
|---|
| 2495 | + |
|---|
| 2496 | + list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { |
|---|
| 2497 | + list_del_init(&dfc->dfc_list); |
|---|
| 2498 | + xfs_defer_ops_release(mp, dfc); |
|---|
| 2499 | + } |
|---|
| 2500 | +} |
|---|
| 4851 | 2501 | /* |
|---|
| 4852 | 2502 | * When this is called, all of the log intent items which did not have |
|---|
| 4853 | 2503 | * corresponding log done items should be in the AIL. What we do now |
|---|
| .. | .. |
|---|
| 4868 | 2518 | xlog_recover_process_intents( |
|---|
| 4869 | 2519 | struct xlog *log) |
|---|
| 4870 | 2520 | { |
|---|
| 4871 | | - struct xfs_trans *parent_tp; |
|---|
| 2521 | + LIST_HEAD(capture_list); |
|---|
| 4872 | 2522 | struct xfs_ail_cursor cur; |
|---|
| 4873 | 2523 | struct xfs_log_item *lip; |
|---|
| 4874 | 2524 | struct xfs_ail *ailp; |
|---|
| 4875 | | - int error; |
|---|
| 2525 | + int error = 0; |
|---|
| 4876 | 2526 | #if defined(DEBUG) || defined(XFS_WARN) |
|---|
| 4877 | 2527 | xfs_lsn_t last_lsn; |
|---|
| 4878 | 2528 | #endif |
|---|
| 4879 | 2529 | |
|---|
| 4880 | | - /* |
|---|
| 4881 | | - * The intent recovery handlers commit transactions to complete recovery |
|---|
| 4882 | | - * for individual intents, but any new deferred operations that are |
|---|
| 4883 | | - * queued during that process are held off until the very end. The |
|---|
| 4884 | | - * purpose of this transaction is to serve as a container for deferred |
|---|
| 4885 | | - * operations. Each intent recovery handler must transfer dfops here |
|---|
| 4886 | | - * before its local transaction commits, and we'll finish the entire |
|---|
| 4887 | | - * list below. |
|---|
| 4888 | | - */ |
|---|
| 4889 | | - error = xfs_trans_alloc_empty(log->l_mp, &parent_tp); |
|---|
| 4890 | | - if (error) |
|---|
| 4891 | | - return error; |
|---|
| 4892 | | - |
|---|
| 4893 | 2530 | ailp = log->l_ailp; |
|---|
| 4894 | 2531 | spin_lock(&ailp->ail_lock); |
|---|
| 4895 | | - lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
|---|
| 4896 | 2532 | #if defined(DEBUG) || defined(XFS_WARN) |
|---|
| 4897 | 2533 | last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); |
|---|
| 4898 | 2534 | #endif |
|---|
| 4899 | | - while (lip != NULL) { |
|---|
| 2535 | + for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); |
|---|
| 2536 | + lip != NULL; |
|---|
| 2537 | + lip = xfs_trans_ail_cursor_next(ailp, &cur)) { |
|---|
| 4900 | 2538 | /* |
|---|
| 4901 | 2539 | * We're done when we see something other than an intent. |
|---|
| 4902 | 2540 | * There should be no intents left in the AIL now. |
|---|
| .. | .. |
|---|
| 4918 | 2556 | |
|---|
| 4919 | 2557 | /* |
|---|
| 4920 | 2558 | * NOTE: If your intent processing routine can create more |
|---|
| 4921 | | - * deferred ops, you /must/ attach them to the dfops in this |
|---|
| 4922 | | - * routine or else those subsequent intents will get |
|---|
| 2559 | + * deferred ops, you /must/ attach them to the capture list in |
|---|
| 2560 | + * the recover routine or else those subsequent intents will be |
|---|
| 4923 | 2561 | * replayed in the wrong order! |
|---|
| 4924 | 2562 | */ |
|---|
| 4925 | | - switch (lip->li_type) { |
|---|
| 4926 | | - case XFS_LI_EFI: |
|---|
| 4927 | | - error = xlog_recover_process_efi(log->l_mp, ailp, lip); |
|---|
| 4928 | | - break; |
|---|
| 4929 | | - case XFS_LI_RUI: |
|---|
| 4930 | | - error = xlog_recover_process_rui(log->l_mp, ailp, lip); |
|---|
| 4931 | | - break; |
|---|
| 4932 | | - case XFS_LI_CUI: |
|---|
| 4933 | | - error = xlog_recover_process_cui(parent_tp, ailp, lip); |
|---|
| 4934 | | - break; |
|---|
| 4935 | | - case XFS_LI_BUI: |
|---|
| 4936 | | - error = xlog_recover_process_bui(parent_tp, ailp, lip); |
|---|
| 4937 | | - break; |
|---|
| 4938 | | - } |
|---|
| 2563 | + spin_unlock(&ailp->ail_lock); |
|---|
| 2564 | + error = lip->li_ops->iop_recover(lip, &capture_list); |
|---|
| 2565 | + spin_lock(&ailp->ail_lock); |
|---|
| 4939 | 2566 | if (error) |
|---|
| 4940 | | - goto out; |
|---|
| 4941 | | - lip = xfs_trans_ail_cursor_next(ailp, &cur); |
|---|
| 2567 | + break; |
|---|
| 4942 | 2568 | } |
|---|
| 4943 | | -out: |
|---|
| 2569 | + |
|---|
| 4944 | 2570 | xfs_trans_ail_cursor_done(&cur); |
|---|
| 4945 | 2571 | spin_unlock(&ailp->ail_lock); |
|---|
| 4946 | | - if (!error) |
|---|
| 4947 | | - error = xlog_finish_defer_ops(parent_tp); |
|---|
| 4948 | | - xfs_trans_cancel(parent_tp); |
|---|
| 2572 | + if (error) |
|---|
| 2573 | + goto err; |
|---|
| 4949 | 2574 | |
|---|
| 2575 | + error = xlog_finish_defer_ops(log->l_mp, &capture_list); |
|---|
| 2576 | + if (error) |
|---|
| 2577 | + goto err; |
|---|
| 2578 | + |
|---|
| 2579 | + return 0; |
|---|
| 2580 | +err: |
|---|
| 2581 | + xlog_abort_defer_ops(log->l_mp, &capture_list); |
|---|
| 4950 | 2582 | return error; |
|---|
| 4951 | 2583 | } |
|---|
| 4952 | 2584 | |
|---|
| .. | .. |
|---|
| 4954 | 2586 | * A cancel occurs when the mount has failed and we're bailing out. |
|---|
| 4955 | 2587 | * Release all pending log intent items so they don't pin the AIL. |
|---|
| 4956 | 2588 | */ |
|---|
| 4957 | | -STATIC int |
|---|
| 2589 | +STATIC void |
|---|
| 4958 | 2590 | xlog_recover_cancel_intents( |
|---|
| 4959 | 2591 | struct xlog *log) |
|---|
| 4960 | 2592 | { |
|---|
| 4961 | 2593 | struct xfs_log_item *lip; |
|---|
| 4962 | | - int error = 0; |
|---|
| 4963 | 2594 | struct xfs_ail_cursor cur; |
|---|
| 4964 | 2595 | struct xfs_ail *ailp; |
|---|
| 4965 | 2596 | |
|---|
| .. | .. |
|---|
| 4979 | 2610 | break; |
|---|
| 4980 | 2611 | } |
|---|
| 4981 | 2612 | |
|---|
| 4982 | | - switch (lip->li_type) { |
|---|
| 4983 | | - case XFS_LI_EFI: |
|---|
| 4984 | | - xlog_recover_cancel_efi(log->l_mp, ailp, lip); |
|---|
| 4985 | | - break; |
|---|
| 4986 | | - case XFS_LI_RUI: |
|---|
| 4987 | | - xlog_recover_cancel_rui(log->l_mp, ailp, lip); |
|---|
| 4988 | | - break; |
|---|
| 4989 | | - case XFS_LI_CUI: |
|---|
| 4990 | | - xlog_recover_cancel_cui(log->l_mp, ailp, lip); |
|---|
| 4991 | | - break; |
|---|
| 4992 | | - case XFS_LI_BUI: |
|---|
| 4993 | | - xlog_recover_cancel_bui(log->l_mp, ailp, lip); |
|---|
| 4994 | | - break; |
|---|
| 4995 | | - } |
|---|
| 4996 | | - |
|---|
| 2613 | + spin_unlock(&ailp->ail_lock); |
|---|
| 2614 | + lip->li_ops->iop_release(lip); |
|---|
| 2615 | + spin_lock(&ailp->ail_lock); |
|---|
| 4997 | 2616 | lip = xfs_trans_ail_cursor_next(ailp, &cur); |
|---|
| 4998 | 2617 | } |
|---|
| 4999 | 2618 | |
|---|
| 5000 | 2619 | xfs_trans_ail_cursor_done(&cur); |
|---|
| 5001 | 2620 | spin_unlock(&ailp->ail_lock); |
|---|
| 5002 | | - return error; |
|---|
| 5003 | 2621 | } |
|---|
| 5004 | 2622 | |
|---|
| 5005 | 2623 | /* |
|---|
| .. | .. |
|---|
| 5026 | 2644 | if (error) |
|---|
| 5027 | 2645 | goto out_abort; |
|---|
| 5028 | 2646 | |
|---|
| 5029 | | - agi = XFS_BUF_TO_AGI(agibp); |
|---|
| 2647 | + agi = agibp->b_addr; |
|---|
| 5030 | 2648 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); |
|---|
| 5031 | 2649 | offset = offsetof(xfs_agi_t, agi_unlinked) + |
|---|
| 5032 | 2650 | (sizeof(xfs_agino_t) * bucket); |
|---|
| .. | .. |
|---|
| 5066 | 2684 | /* |
|---|
| 5067 | 2685 | * Get the on disk inode to find the next inode in the bucket. |
|---|
| 5068 | 2686 | */ |
|---|
| 5069 | | - error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0); |
|---|
| 2687 | + error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0); |
|---|
| 5070 | 2688 | if (error) |
|---|
| 5071 | 2689 | goto fail_iput; |
|---|
| 5072 | 2690 | |
|---|
| .. | .. |
|---|
| 5103 | 2721 | } |
|---|
| 5104 | 2722 | |
|---|
| 5105 | 2723 | /* |
|---|
| 5106 | | - * xlog_iunlink_recover |
|---|
| 2724 | + * Recover AGI unlinked lists |
|---|
| 5107 | 2725 | * |
|---|
| 5108 | | - * This is called during recovery to process any inodes which |
|---|
| 5109 | | - * we unlinked but not freed when the system crashed. These |
|---|
| 5110 | | - * inodes will be on the lists in the AGI blocks. What we do |
|---|
| 5111 | | - * here is scan all the AGIs and fully truncate and free any |
|---|
| 5112 | | - * inodes found on the lists. Each inode is removed from the |
|---|
| 5113 | | - * lists when it has been fully truncated and is freed. The |
|---|
| 5114 | | - * freeing of the inode and its removal from the list must be |
|---|
| 5115 | | - * atomic. |
|---|
| 2726 | + * This is called during recovery to process any inodes which we unlinked but |
|---|
| 2727 | + * not freed when the system crashed. These inodes will be on the lists in the |
|---|
| 2728 | + * AGI blocks. What we do here is scan all the AGIs and fully truncate and free |
|---|
| 2729 | + * any inodes found on the lists. Each inode is removed from the lists when it |
|---|
| 2730 | + * has been fully truncated and is freed. The freeing of the inode and its |
|---|
| 2731 | + * removal from the list must be atomic. |
|---|
| 2732 | + * |
|---|
| 2733 | + * If everything we touch in the agi processing loop is already in memory, this |
|---|
| 2734 | + * loop can hold the cpu for a long time. It runs without lock contention, |
|---|
| 2735 | + * memory allocation contention, the need wait for IO, etc, and so will run |
|---|
| 2736 | + * until we either run out of inodes to process, run low on memory or we run out |
|---|
| 2737 | + * of log space. |
|---|
| 2738 | + * |
|---|
| 2739 | + * This behaviour is bad for latency on single CPU and non-preemptible kernels, |
|---|
| 2740 | + * and can prevent other filesytem work (such as CIL pushes) from running. This |
|---|
| 2741 | + * can lead to deadlocks if the recovery process runs out of log reservation |
|---|
| 2742 | + * space. Hence we need to yield the CPU when there is other kernel work |
|---|
| 2743 | + * scheduled on this CPU to ensure other scheduled work can run without undue |
|---|
| 2744 | + * latency. |
|---|
| 5116 | 2745 | */ |
|---|
| 5117 | 2746 | STATIC void |
|---|
| 5118 | 2747 | xlog_recover_process_iunlinks( |
|---|
| .. | .. |
|---|
| 5151 | 2780 | * buffer reference though, so that it stays pinned in memory |
|---|
| 5152 | 2781 | * while we need the buffer. |
|---|
| 5153 | 2782 | */ |
|---|
| 5154 | | - agi = XFS_BUF_TO_AGI(agibp); |
|---|
| 2783 | + agi = agibp->b_addr; |
|---|
| 5155 | 2784 | xfs_buf_unlock(agibp); |
|---|
| 5156 | 2785 | |
|---|
| 5157 | 2786 | for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { |
|---|
| .. | .. |
|---|
| 5159 | 2788 | while (agino != NULLAGINO) { |
|---|
| 5160 | 2789 | agino = xlog_recover_process_one_iunlink(mp, |
|---|
| 5161 | 2790 | agno, agino, bucket); |
|---|
| 2791 | + cond_resched(); |
|---|
| 5162 | 2792 | } |
|---|
| 5163 | 2793 | } |
|---|
| 5164 | 2794 | xfs_buf_rele(agibp); |
|---|
| 5165 | 2795 | } |
|---|
| 5166 | 2796 | } |
|---|
| 5167 | 2797 | |
|---|
| 5168 | | -STATIC int |
|---|
| 2798 | +STATIC void |
|---|
| 5169 | 2799 | xlog_unpack_data( |
|---|
| 5170 | 2800 | struct xlog_rec_header *rhead, |
|---|
| 5171 | 2801 | char *dp, |
|---|
| .. | .. |
|---|
| 5188 | 2818 | dp += BBSIZE; |
|---|
| 5189 | 2819 | } |
|---|
| 5190 | 2820 | } |
|---|
| 5191 | | - |
|---|
| 5192 | | - return 0; |
|---|
| 5193 | 2821 | } |
|---|
| 5194 | 2822 | |
|---|
| 5195 | 2823 | /* |
|---|
| .. | .. |
|---|
| 5204 | 2832 | int pass, |
|---|
| 5205 | 2833 | struct list_head *buffer_list) |
|---|
| 5206 | 2834 | { |
|---|
| 5207 | | - int error; |
|---|
| 5208 | 2835 | __le32 old_crc = rhead->h_crc; |
|---|
| 5209 | 2836 | __le32 crc; |
|---|
| 5210 | | - |
|---|
| 5211 | 2837 | |
|---|
| 5212 | 2838 | crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); |
|---|
| 5213 | 2839 | |
|---|
| .. | .. |
|---|
| 5243 | 2869 | * If the filesystem is CRC enabled, this mismatch becomes a |
|---|
| 5244 | 2870 | * fatal log corruption failure. |
|---|
| 5245 | 2871 | */ |
|---|
| 5246 | | - if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) |
|---|
| 2872 | + if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) { |
|---|
| 2873 | + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); |
|---|
| 5247 | 2874 | return -EFSCORRUPTED; |
|---|
| 2875 | + } |
|---|
| 5248 | 2876 | } |
|---|
| 5249 | 2877 | |
|---|
| 5250 | | - error = xlog_unpack_data(rhead, dp, log); |
|---|
| 5251 | | - if (error) |
|---|
| 5252 | | - return error; |
|---|
| 2878 | + xlog_unpack_data(rhead, dp, log); |
|---|
| 5253 | 2879 | |
|---|
| 5254 | 2880 | return xlog_recover_process_data(log, rhash, rhead, dp, pass, |
|---|
| 5255 | 2881 | buffer_list); |
|---|
| .. | .. |
|---|
| 5259 | 2885 | xlog_valid_rec_header( |
|---|
| 5260 | 2886 | struct xlog *log, |
|---|
| 5261 | 2887 | struct xlog_rec_header *rhead, |
|---|
| 5262 | | - xfs_daddr_t blkno) |
|---|
| 2888 | + xfs_daddr_t blkno, |
|---|
| 2889 | + int bufsize) |
|---|
| 5263 | 2890 | { |
|---|
| 5264 | 2891 | int hlen; |
|---|
| 5265 | 2892 | |
|---|
| 5266 | | - if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { |
|---|
| 5267 | | - XFS_ERROR_REPORT("xlog_valid_rec_header(1)", |
|---|
| 5268 | | - XFS_ERRLEVEL_LOW, log->l_mp); |
|---|
| 2893 | + if (XFS_IS_CORRUPT(log->l_mp, |
|---|
| 2894 | + rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) |
|---|
| 5269 | 2895 | return -EFSCORRUPTED; |
|---|
| 5270 | | - } |
|---|
| 5271 | | - if (unlikely( |
|---|
| 5272 | | - (!rhead->h_version || |
|---|
| 5273 | | - (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { |
|---|
| 2896 | + if (XFS_IS_CORRUPT(log->l_mp, |
|---|
| 2897 | + (!rhead->h_version || |
|---|
| 2898 | + (be32_to_cpu(rhead->h_version) & |
|---|
| 2899 | + (~XLOG_VERSION_OKBITS))))) { |
|---|
| 5274 | 2900 | xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", |
|---|
| 5275 | 2901 | __func__, be32_to_cpu(rhead->h_version)); |
|---|
| 5276 | | - return -EIO; |
|---|
| 2902 | + return -EFSCORRUPTED; |
|---|
| 5277 | 2903 | } |
|---|
| 5278 | 2904 | |
|---|
| 5279 | | - /* LR body must have data or it wouldn't have been written */ |
|---|
| 2905 | + /* |
|---|
| 2906 | + * LR body must have data (or it wouldn't have been written) |
|---|
| 2907 | + * and h_len must not be greater than LR buffer size. |
|---|
| 2908 | + */ |
|---|
| 5280 | 2909 | hlen = be32_to_cpu(rhead->h_len); |
|---|
| 5281 | | - if (unlikely( hlen <= 0 || hlen > INT_MAX )) { |
|---|
| 5282 | | - XFS_ERROR_REPORT("xlog_valid_rec_header(2)", |
|---|
| 5283 | | - XFS_ERRLEVEL_LOW, log->l_mp); |
|---|
| 2910 | + if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > bufsize)) |
|---|
| 5284 | 2911 | return -EFSCORRUPTED; |
|---|
| 5285 | | - } |
|---|
| 5286 | | - if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { |
|---|
| 5287 | | - XFS_ERROR_REPORT("xlog_valid_rec_header(3)", |
|---|
| 5288 | | - XFS_ERRLEVEL_LOW, log->l_mp); |
|---|
| 2912 | + |
|---|
| 2913 | + if (XFS_IS_CORRUPT(log->l_mp, |
|---|
| 2914 | + blkno > log->l_logBBsize || blkno > INT_MAX)) |
|---|
| 5289 | 2915 | return -EFSCORRUPTED; |
|---|
| 5290 | | - } |
|---|
| 5291 | 2916 | return 0; |
|---|
| 5292 | 2917 | } |
|---|
| 5293 | 2918 | |
|---|
| .. | .. |
|---|
| 5311 | 2936 | xfs_daddr_t blk_no, rblk_no; |
|---|
| 5312 | 2937 | xfs_daddr_t rhead_blk; |
|---|
| 5313 | 2938 | char *offset; |
|---|
| 5314 | | - xfs_buf_t *hbp, *dbp; |
|---|
| 2939 | + char *hbp, *dbp; |
|---|
| 5315 | 2940 | int error = 0, h_size, h_len; |
|---|
| 5316 | 2941 | int error2 = 0; |
|---|
| 5317 | 2942 | int bblks, split_bblks; |
|---|
| .. | .. |
|---|
| 5336 | 2961 | * iclog header and extract the header size from it. Get a |
|---|
| 5337 | 2962 | * new hbp that is the correct size. |
|---|
| 5338 | 2963 | */ |
|---|
| 5339 | | - hbp = xlog_get_bp(log, 1); |
|---|
| 2964 | + hbp = xlog_alloc_buffer(log, 1); |
|---|
| 5340 | 2965 | if (!hbp) |
|---|
| 5341 | 2966 | return -ENOMEM; |
|---|
| 5342 | 2967 | |
|---|
| .. | .. |
|---|
| 5345 | 2970 | goto bread_err1; |
|---|
| 5346 | 2971 | |
|---|
| 5347 | 2972 | rhead = (xlog_rec_header_t *)offset; |
|---|
| 5348 | | - error = xlog_valid_rec_header(log, rhead, tail_blk); |
|---|
| 5349 | | - if (error) |
|---|
| 5350 | | - goto bread_err1; |
|---|
| 5351 | 2973 | |
|---|
| 5352 | 2974 | /* |
|---|
| 5353 | 2975 | * xfsprogs has a bug where record length is based on lsunit but |
|---|
| .. | .. |
|---|
| 5362 | 2984 | */ |
|---|
| 5363 | 2985 | h_size = be32_to_cpu(rhead->h_size); |
|---|
| 5364 | 2986 | h_len = be32_to_cpu(rhead->h_len); |
|---|
| 5365 | | - if (h_len > h_size) { |
|---|
| 5366 | | - if (h_len <= log->l_mp->m_logbsize && |
|---|
| 5367 | | - be32_to_cpu(rhead->h_num_logops) == 1) { |
|---|
| 5368 | | - xfs_warn(log->l_mp, |
|---|
| 2987 | + if (h_len > h_size && h_len <= log->l_mp->m_logbsize && |
|---|
| 2988 | + rhead->h_num_logops == cpu_to_be32(1)) { |
|---|
| 2989 | + xfs_warn(log->l_mp, |
|---|
| 5369 | 2990 | "invalid iclog size (%d bytes), using lsunit (%d bytes)", |
|---|
| 5370 | | - h_size, log->l_mp->m_logbsize); |
|---|
| 5371 | | - h_size = log->l_mp->m_logbsize; |
|---|
| 5372 | | - } else |
|---|
| 5373 | | - return -EFSCORRUPTED; |
|---|
| 2991 | + h_size, log->l_mp->m_logbsize); |
|---|
| 2992 | + h_size = log->l_mp->m_logbsize; |
|---|
| 5374 | 2993 | } |
|---|
| 5375 | 2994 | |
|---|
| 5376 | | - if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && |
|---|
| 5377 | | - (h_size > XLOG_HEADER_CYCLE_SIZE)) { |
|---|
| 5378 | | - hblks = h_size / XLOG_HEADER_CYCLE_SIZE; |
|---|
| 5379 | | - if (h_size % XLOG_HEADER_CYCLE_SIZE) |
|---|
| 5380 | | - hblks++; |
|---|
| 5381 | | - xlog_put_bp(hbp); |
|---|
| 5382 | | - hbp = xlog_get_bp(log, hblks); |
|---|
| 5383 | | - } else { |
|---|
| 5384 | | - hblks = 1; |
|---|
| 2995 | + error = xlog_valid_rec_header(log, rhead, tail_blk, h_size); |
|---|
| 2996 | + if (error) |
|---|
| 2997 | + goto bread_err1; |
|---|
| 2998 | + |
|---|
| 2999 | + hblks = xlog_logrec_hblks(log, rhead); |
|---|
| 3000 | + if (hblks != 1) { |
|---|
| 3001 | + kmem_free(hbp); |
|---|
| 3002 | + hbp = xlog_alloc_buffer(log, hblks); |
|---|
| 5385 | 3003 | } |
|---|
| 5386 | 3004 | } else { |
|---|
| 5387 | 3005 | ASSERT(log->l_sectBBsize == 1); |
|---|
| 5388 | 3006 | hblks = 1; |
|---|
| 5389 | | - hbp = xlog_get_bp(log, 1); |
|---|
| 3007 | + hbp = xlog_alloc_buffer(log, 1); |
|---|
| 5390 | 3008 | h_size = XLOG_BIG_RECORD_BSIZE; |
|---|
| 5391 | 3009 | } |
|---|
| 5392 | 3010 | |
|---|
| 5393 | 3011 | if (!hbp) |
|---|
| 5394 | 3012 | return -ENOMEM; |
|---|
| 5395 | | - dbp = xlog_get_bp(log, BTOBB(h_size)); |
|---|
| 3013 | + dbp = xlog_alloc_buffer(log, BTOBB(h_size)); |
|---|
| 5396 | 3014 | if (!dbp) { |
|---|
| 5397 | | - xlog_put_bp(hbp); |
|---|
| 3015 | + kmem_free(hbp); |
|---|
| 5398 | 3016 | return -ENOMEM; |
|---|
| 5399 | 3017 | } |
|---|
| 5400 | 3018 | |
|---|
| .. | .. |
|---|
| 5409 | 3027 | /* |
|---|
| 5410 | 3028 | * Check for header wrapping around physical end-of-log |
|---|
| 5411 | 3029 | */ |
|---|
| 5412 | | - offset = hbp->b_addr; |
|---|
| 3030 | + offset = hbp; |
|---|
| 5413 | 3031 | split_hblks = 0; |
|---|
| 5414 | 3032 | wrapped_hblks = 0; |
|---|
| 5415 | 3033 | if (blk_no + hblks <= log->l_logBBsize) { |
|---|
| .. | .. |
|---|
| 5445 | 3063 | * - order is important. |
|---|
| 5446 | 3064 | */ |
|---|
| 5447 | 3065 | wrapped_hblks = hblks - split_hblks; |
|---|
| 5448 | | - error = xlog_bread_offset(log, 0, |
|---|
| 5449 | | - wrapped_hblks, hbp, |
|---|
| 3066 | + error = xlog_bread_noalign(log, 0, |
|---|
| 3067 | + wrapped_hblks, |
|---|
| 5450 | 3068 | offset + BBTOB(split_hblks)); |
|---|
| 5451 | 3069 | if (error) |
|---|
| 5452 | 3070 | goto bread_err2; |
|---|
| 5453 | 3071 | } |
|---|
| 5454 | 3072 | rhead = (xlog_rec_header_t *)offset; |
|---|
| 5455 | 3073 | error = xlog_valid_rec_header(log, rhead, |
|---|
| 5456 | | - split_hblks ? blk_no : 0); |
|---|
| 3074 | + split_hblks ? blk_no : 0, h_size); |
|---|
| 5457 | 3075 | if (error) |
|---|
| 5458 | 3076 | goto bread_err2; |
|---|
| 5459 | 3077 | |
|---|
| .. | .. |
|---|
| 5477 | 3095 | } else { |
|---|
| 5478 | 3096 | /* This log record is split across the |
|---|
| 5479 | 3097 | * physical end of log */ |
|---|
| 5480 | | - offset = dbp->b_addr; |
|---|
| 3098 | + offset = dbp; |
|---|
| 5481 | 3099 | split_bblks = 0; |
|---|
| 5482 | 3100 | if (blk_no != log->l_logBBsize) { |
|---|
| 5483 | 3101 | /* some data is before the physical |
|---|
| .. | .. |
|---|
| 5506 | 3124 | * _first_, then the log start (LR header end) |
|---|
| 5507 | 3125 | * - order is important. |
|---|
| 5508 | 3126 | */ |
|---|
| 5509 | | - error = xlog_bread_offset(log, 0, |
|---|
| 5510 | | - bblks - split_bblks, dbp, |
|---|
| 3127 | + error = xlog_bread_noalign(log, 0, |
|---|
| 3128 | + bblks - split_bblks, |
|---|
| 5511 | 3129 | offset + BBTOB(split_bblks)); |
|---|
| 5512 | 3130 | if (error) |
|---|
| 5513 | 3131 | goto bread_err2; |
|---|
| .. | .. |
|---|
| 5534 | 3152 | goto bread_err2; |
|---|
| 5535 | 3153 | |
|---|
| 5536 | 3154 | rhead = (xlog_rec_header_t *)offset; |
|---|
| 5537 | | - error = xlog_valid_rec_header(log, rhead, blk_no); |
|---|
| 3155 | + error = xlog_valid_rec_header(log, rhead, blk_no, h_size); |
|---|
| 5538 | 3156 | if (error) |
|---|
| 5539 | 3157 | goto bread_err2; |
|---|
| 5540 | 3158 | |
|---|
| .. | .. |
|---|
| 5555 | 3173 | } |
|---|
| 5556 | 3174 | |
|---|
| 5557 | 3175 | bread_err2: |
|---|
| 5558 | | - xlog_put_bp(dbp); |
|---|
| 3176 | + kmem_free(dbp); |
|---|
| 5559 | 3177 | bread_err1: |
|---|
| 5560 | | - xlog_put_bp(hbp); |
|---|
| 3178 | + kmem_free(hbp); |
|---|
| 5561 | 3179 | |
|---|
| 5562 | 3180 | /* |
|---|
| 5563 | 3181 | * Submit buffers that have been added from the last record processed, |
|---|
| .. | .. |
|---|
| 5614 | 3232 | */ |
|---|
| 5615 | 3233 | log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE * |
|---|
| 5616 | 3234 | sizeof(struct list_head), |
|---|
| 5617 | | - KM_SLEEP); |
|---|
| 3235 | + 0); |
|---|
| 5618 | 3236 | for (i = 0; i < XLOG_BC_TABLE_SIZE; i++) |
|---|
| 5619 | 3237 | INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); |
|---|
| 5620 | 3238 | |
|---|
| .. | .. |
|---|
| 5651 | 3269 | */ |
|---|
| 5652 | 3270 | STATIC int |
|---|
| 5653 | 3271 | xlog_do_recover( |
|---|
| 5654 | | - struct xlog *log, |
|---|
| 5655 | | - xfs_daddr_t head_blk, |
|---|
| 5656 | | - xfs_daddr_t tail_blk) |
|---|
| 3272 | + struct xlog *log, |
|---|
| 3273 | + xfs_daddr_t head_blk, |
|---|
| 3274 | + xfs_daddr_t tail_blk) |
|---|
| 5657 | 3275 | { |
|---|
| 5658 | | - struct xfs_mount *mp = log->l_mp; |
|---|
| 5659 | | - int error; |
|---|
| 5660 | | - xfs_buf_t *bp; |
|---|
| 5661 | | - xfs_sb_t *sbp; |
|---|
| 3276 | + struct xfs_mount *mp = log->l_mp; |
|---|
| 3277 | + struct xfs_buf *bp = mp->m_sb_bp; |
|---|
| 3278 | + struct xfs_sb *sbp = &mp->m_sb; |
|---|
| 3279 | + int error; |
|---|
| 5662 | 3280 | |
|---|
| 5663 | 3281 | trace_xfs_log_recover(log, head_blk, tail_blk); |
|---|
| 5664 | 3282 | |
|---|
| .. | .. |
|---|
| 5672 | 3290 | /* |
|---|
| 5673 | 3291 | * If IO errors happened during recovery, bail out. |
|---|
| 5674 | 3292 | */ |
|---|
| 5675 | | - if (XFS_FORCED_SHUTDOWN(mp)) { |
|---|
| 3293 | + if (XFS_FORCED_SHUTDOWN(mp)) |
|---|
| 5676 | 3294 | return -EIO; |
|---|
| 5677 | | - } |
|---|
| 5678 | 3295 | |
|---|
| 5679 | 3296 | /* |
|---|
| 5680 | 3297 | * We now update the tail_lsn since much of the recovery has completed |
|---|
| .. | .. |
|---|
| 5688 | 3305 | xlog_assign_tail_lsn(mp); |
|---|
| 5689 | 3306 | |
|---|
| 5690 | 3307 | /* |
|---|
| 5691 | | - * Now that we've finished replaying all buffer and inode |
|---|
| 5692 | | - * updates, re-read in the superblock and reverify it. |
|---|
| 3308 | + * Now that we've finished replaying all buffer and inode updates, |
|---|
| 3309 | + * re-read the superblock and reverify it. |
|---|
| 5693 | 3310 | */ |
|---|
| 5694 | | - bp = xfs_getsb(mp, 0); |
|---|
| 5695 | | - bp->b_flags &= ~(XBF_DONE | XBF_ASYNC); |
|---|
| 5696 | | - ASSERT(!(bp->b_flags & XBF_WRITE)); |
|---|
| 5697 | | - bp->b_flags |= XBF_READ; |
|---|
| 5698 | | - bp->b_ops = &xfs_sb_buf_ops; |
|---|
| 5699 | | - |
|---|
| 5700 | | - error = xfs_buf_submit(bp); |
|---|
| 3311 | + xfs_buf_lock(bp); |
|---|
| 3312 | + xfs_buf_hold(bp); |
|---|
| 3313 | + error = _xfs_buf_read(bp, XBF_READ); |
|---|
| 5701 | 3314 | if (error) { |
|---|
| 5702 | 3315 | if (!XFS_FORCED_SHUTDOWN(mp)) { |
|---|
| 5703 | | - xfs_buf_ioerror_alert(bp, __func__); |
|---|
| 3316 | + xfs_buf_ioerror_alert(bp, __this_address); |
|---|
| 5704 | 3317 | ASSERT(0); |
|---|
| 5705 | 3318 | } |
|---|
| 5706 | 3319 | xfs_buf_relse(bp); |
|---|
| .. | .. |
|---|
| 5708 | 3321 | } |
|---|
| 5709 | 3322 | |
|---|
| 5710 | 3323 | /* Convert superblock from on-disk format */ |
|---|
| 5711 | | - sbp = &mp->m_sb; |
|---|
| 5712 | | - xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); |
|---|
| 3324 | + xfs_sb_from_disk(sbp, bp->b_addr); |
|---|
| 5713 | 3325 | xfs_buf_relse(bp); |
|---|
| 5714 | 3326 | |
|---|
| 5715 | 3327 | /* re-initialise in-core superblock and geometry structures */ |
|---|
| .. | .. |
|---|
| 5838 | 3450 | int error; |
|---|
| 5839 | 3451 | error = xlog_recover_process_intents(log); |
|---|
| 5840 | 3452 | if (error) { |
|---|
| 3453 | + /* |
|---|
| 3454 | + * Cancel all the unprocessed intent items now so that |
|---|
| 3455 | + * we don't leave them pinned in the AIL. This can |
|---|
| 3456 | + * cause the AIL to livelock on the pinned item if |
|---|
| 3457 | + * anyone tries to push the AIL (inode reclaim does |
|---|
| 3458 | + * this) before we get around to xfs_log_mount_cancel. |
|---|
| 3459 | + */ |
|---|
| 3460 | + xlog_recover_cancel_intents(log); |
|---|
| 3461 | + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); |
|---|
| 5841 | 3462 | xfs_alert(log->l_mp, "Failed to recover intents"); |
|---|
| 5842 | 3463 | return error; |
|---|
| 5843 | 3464 | } |
|---|
| .. | .. |
|---|
| 5864 | 3485 | return 0; |
|---|
| 5865 | 3486 | } |
|---|
| 5866 | 3487 | |
|---|
| 5867 | | -int |
|---|
| 3488 | +void |
|---|
| 5868 | 3489 | xlog_recover_cancel( |
|---|
| 5869 | 3490 | struct xlog *log) |
|---|
| 5870 | 3491 | { |
|---|
| 5871 | | - int error = 0; |
|---|
| 5872 | | - |
|---|
| 5873 | 3492 | if (log->l_flags & XLOG_RECOVERY_NEEDED) |
|---|
| 5874 | | - error = xlog_recover_cancel_intents(log); |
|---|
| 5875 | | - |
|---|
| 5876 | | - return error; |
|---|
| 3493 | + xlog_recover_cancel_intents(log); |
|---|
| 5877 | 3494 | } |
|---|
| 5878 | 3495 | |
|---|
| 5879 | 3496 | #if defined(DEBUG) |
|---|
| .. | .. |
|---|
| 5886 | 3503 | struct xlog *log) |
|---|
| 5887 | 3504 | { |
|---|
| 5888 | 3505 | xfs_mount_t *mp; |
|---|
| 5889 | | - xfs_agf_t *agfp; |
|---|
| 5890 | 3506 | xfs_buf_t *agfbp; |
|---|
| 5891 | 3507 | xfs_buf_t *agibp; |
|---|
| 5892 | 3508 | xfs_agnumber_t agno; |
|---|
| .. | .. |
|---|
| 5906 | 3522 | xfs_alert(mp, "%s agf read failed agno %d error %d", |
|---|
| 5907 | 3523 | __func__, agno, error); |
|---|
| 5908 | 3524 | } else { |
|---|
| 5909 | | - agfp = XFS_BUF_TO_AGF(agfbp); |
|---|
| 3525 | + struct xfs_agf *agfp = agfbp->b_addr; |
|---|
| 3526 | + |
|---|
| 5910 | 3527 | freeblks += be32_to_cpu(agfp->agf_freeblks) + |
|---|
| 5911 | 3528 | be32_to_cpu(agfp->agf_flcount); |
|---|
| 5912 | 3529 | xfs_buf_relse(agfbp); |
|---|
| .. | .. |
|---|
| 5917 | 3534 | xfs_alert(mp, "%s agi read failed agno %d error %d", |
|---|
| 5918 | 3535 | __func__, agno, error); |
|---|
| 5919 | 3536 | } else { |
|---|
| 5920 | | - struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); |
|---|
| 3537 | + struct xfs_agi *agi = agibp->b_addr; |
|---|
| 5921 | 3538 | |
|---|
| 5922 | 3539 | itotal += be32_to_cpu(agi->agi_count); |
|---|
| 5923 | 3540 | ifree += be32_to_cpu(agi->agi_freecount); |
|---|