.. | .. |
---|
5 | 5 | */ |
---|
6 | 6 | #include "xfs.h" |
---|
7 | 7 | #include "xfs_fs.h" |
---|
| 8 | +#include "xfs_shared.h" |
---|
8 | 9 | #include "xfs_format.h" |
---|
9 | 10 | #include "xfs_log_format.h" |
---|
10 | 11 | #include "xfs_trans_resv.h" |
---|
.. | .. |
---|
12 | 13 | #include "xfs_inode.h" |
---|
13 | 14 | #include "xfs_trans.h" |
---|
14 | 15 | #include "xfs_inode_item.h" |
---|
15 | | -#include "xfs_error.h" |
---|
16 | 16 | #include "xfs_trace.h" |
---|
17 | 17 | #include "xfs_trans_priv.h" |
---|
18 | 18 | #include "xfs_buf_item.h" |
---|
19 | 19 | #include "xfs_log.h" |
---|
| 20 | +#include "xfs_error.h" |
---|
20 | 21 | |
---|
21 | 22 | #include <linux/iversion.h> |
---|
22 | 23 | |
---|
.. | .. |
---|
27 | 28 | return container_of(lip, struct xfs_inode_log_item, ili_item); |
---|
28 | 29 | } |
---|
29 | 30 | |
---|
| 31 | +/* |
---|
| 32 | + * The logged size of an inode fork is always the current size of the inode |
---|
| 33 | + * fork. This means that when an inode fork is relogged, the size of the logged |
---|
| 34 | + * region is determined by the current state, not the combination of the |
---|
| 35 | + * previously logged state + the current state. This is different relogging |
---|
| 36 | + * behaviour to most other log items which will retain the size of the |
---|
| 37 | + * previously logged changes when smaller regions are relogged. |
---|
| 38 | + * |
---|
| 39 | + * Hence operations that remove data from the inode fork (e.g. shortform |
---|
| 40 | + * dir/attr remove, extent form extent removal, etc), the size of the relogged |
---|
| 41 | + * inode gets -smaller- rather than stays the same size as the previously logged |
---|
| 42 | + * size and this can result in the committing transaction reducing the amount of |
---|
| 43 | + * space being consumed by the CIL. |
---|
| 44 | + */ |
---|
30 | 45 | STATIC void |
---|
31 | 46 | xfs_inode_item_data_fork_size( |
---|
32 | 47 | struct xfs_inode_log_item *iip, |
---|
.. | .. |
---|
35 | 50 | { |
---|
36 | 51 | struct xfs_inode *ip = iip->ili_inode; |
---|
37 | 52 | |
---|
38 | | - switch (ip->i_d.di_format) { |
---|
| 53 | + switch (ip->i_df.if_format) { |
---|
39 | 54 | case XFS_DINODE_FMT_EXTENTS: |
---|
40 | 55 | if ((iip->ili_fields & XFS_ILOG_DEXT) && |
---|
41 | | - ip->i_d.di_nextents > 0 && |
---|
| 56 | + ip->i_df.if_nextents > 0 && |
---|
42 | 57 | ip->i_df.if_bytes > 0) { |
---|
43 | 58 | /* worst case, doesn't subtract delalloc extents */ |
---|
44 | 59 | *nbytes += XFS_IFORK_DSIZE(ip); |
---|
.. | .. |
---|
76 | 91 | { |
---|
77 | 92 | struct xfs_inode *ip = iip->ili_inode; |
---|
78 | 93 | |
---|
79 | | - switch (ip->i_d.di_aformat) { |
---|
| 94 | + switch (ip->i_afp->if_format) { |
---|
80 | 95 | case XFS_DINODE_FMT_EXTENTS: |
---|
81 | 96 | if ((iip->ili_fields & XFS_ILOG_AEXT) && |
---|
82 | | - ip->i_d.di_anextents > 0 && |
---|
| 97 | + ip->i_afp->if_nextents > 0 && |
---|
83 | 98 | ip->i_afp->if_bytes > 0) { |
---|
84 | 99 | /* worst case, doesn't subtract unused space */ |
---|
85 | 100 | *nbytes += XFS_IFORK_ASIZE(ip); |
---|
.. | .. |
---|
124 | 139 | |
---|
125 | 140 | *nvecs += 2; |
---|
126 | 141 | *nbytes += sizeof(struct xfs_inode_log_format) + |
---|
127 | | - xfs_log_dinode_size(ip->i_d.di_version); |
---|
| 142 | + xfs_log_dinode_size(ip->i_mount); |
---|
128 | 143 | |
---|
129 | 144 | xfs_inode_item_data_fork_size(iip, nvecs, nbytes); |
---|
130 | 145 | if (XFS_IFORK_Q(ip)) |
---|
.. | .. |
---|
141 | 156 | struct xfs_inode *ip = iip->ili_inode; |
---|
142 | 157 | size_t data_bytes; |
---|
143 | 158 | |
---|
144 | | - switch (ip->i_d.di_format) { |
---|
| 159 | + switch (ip->i_df.if_format) { |
---|
145 | 160 | case XFS_DINODE_FMT_EXTENTS: |
---|
146 | 161 | iip->ili_fields &= |
---|
147 | 162 | ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV); |
---|
148 | 163 | |
---|
149 | 164 | if ((iip->ili_fields & XFS_ILOG_DEXT) && |
---|
150 | | - ip->i_d.di_nextents > 0 && |
---|
| 165 | + ip->i_df.if_nextents > 0 && |
---|
151 | 166 | ip->i_df.if_bytes > 0) { |
---|
152 | 167 | struct xfs_bmbt_rec *p; |
---|
153 | 168 | |
---|
.. | .. |
---|
190 | 205 | ip->i_df.if_bytes > 0) { |
---|
191 | 206 | /* |
---|
192 | 207 | * Round i_bytes up to a word boundary. |
---|
193 | | - * The underlying memory is guaranteed to |
---|
| 208 | + * The underlying memory is guaranteed |
---|
194 | 209 | * to be there by xfs_idata_realloc(). |
---|
195 | 210 | */ |
---|
196 | 211 | data_bytes = roundup(ip->i_df.if_bytes, 4); |
---|
.. | .. |
---|
226 | 241 | struct xfs_inode *ip = iip->ili_inode; |
---|
227 | 242 | size_t data_bytes; |
---|
228 | 243 | |
---|
229 | | - switch (ip->i_d.di_aformat) { |
---|
| 244 | + switch (ip->i_afp->if_format) { |
---|
230 | 245 | case XFS_DINODE_FMT_EXTENTS: |
---|
231 | 246 | iip->ili_fields &= |
---|
232 | 247 | ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT); |
---|
233 | 248 | |
---|
234 | 249 | if ((iip->ili_fields & XFS_ILOG_AEXT) && |
---|
235 | | - ip->i_d.di_anextents > 0 && |
---|
| 250 | + ip->i_afp->if_nextents > 0 && |
---|
236 | 251 | ip->i_afp->if_bytes > 0) { |
---|
237 | 252 | struct xfs_bmbt_rec *p; |
---|
238 | 253 | |
---|
239 | 254 | ASSERT(xfs_iext_count(ip->i_afp) == |
---|
240 | | - ip->i_d.di_anextents); |
---|
| 255 | + ip->i_afp->if_nextents); |
---|
241 | 256 | |
---|
242 | 257 | p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT); |
---|
243 | 258 | data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK); |
---|
.. | .. |
---|
274 | 289 | ip->i_afp->if_bytes > 0) { |
---|
275 | 290 | /* |
---|
276 | 291 | * Round i_bytes up to a word boundary. |
---|
277 | | - * The underlying memory is guaranteed to |
---|
| 292 | + * The underlying memory is guaranteed |
---|
278 | 293 | * to be there by xfs_idata_realloc(). |
---|
279 | 294 | */ |
---|
280 | 295 | data_bytes = roundup(ip->i_afp->if_bytes, 4); |
---|
.. | .. |
---|
294 | 309 | } |
---|
295 | 310 | } |
---|
296 | 311 | |
---|
| 312 | +/* |
---|
| 313 | + * Convert an incore timestamp to a log timestamp. Note that the log format |
---|
| 314 | + * specifies host endian format! |
---|
| 315 | + */ |
---|
| 316 | +static inline xfs_ictimestamp_t |
---|
| 317 | +xfs_inode_to_log_dinode_ts( |
---|
| 318 | + struct xfs_inode *ip, |
---|
| 319 | + const struct timespec64 tv) |
---|
| 320 | +{ |
---|
| 321 | + struct xfs_legacy_ictimestamp *lits; |
---|
| 322 | + xfs_ictimestamp_t its; |
---|
| 323 | + |
---|
| 324 | + if (xfs_inode_has_bigtime(ip)) |
---|
| 325 | + return xfs_inode_encode_bigtime(tv); |
---|
| 326 | + |
---|
| 327 | + lits = (struct xfs_legacy_ictimestamp *)&its; |
---|
| 328 | + lits->t_sec = tv.tv_sec; |
---|
| 329 | + lits->t_nsec = tv.tv_nsec; |
---|
| 330 | + |
---|
| 331 | + return its; |
---|
| 332 | +} |
---|
| 333 | + |
---|
297 | 334 | static void |
---|
298 | 335 | xfs_inode_to_log_dinode( |
---|
299 | 336 | struct xfs_inode *ip, |
---|
.. | .. |
---|
304 | 341 | struct inode *inode = VFS_I(ip); |
---|
305 | 342 | |
---|
306 | 343 | to->di_magic = XFS_DINODE_MAGIC; |
---|
307 | | - |
---|
308 | | - to->di_version = from->di_version; |
---|
309 | | - to->di_format = from->di_format; |
---|
310 | | - to->di_uid = from->di_uid; |
---|
311 | | - to->di_gid = from->di_gid; |
---|
312 | | - to->di_projid_lo = from->di_projid_lo; |
---|
313 | | - to->di_projid_hi = from->di_projid_hi; |
---|
| 344 | + to->di_format = xfs_ifork_format(&ip->i_df); |
---|
| 345 | + to->di_uid = i_uid_read(inode); |
---|
| 346 | + to->di_gid = i_gid_read(inode); |
---|
| 347 | + to->di_projid_lo = from->di_projid & 0xffff; |
---|
| 348 | + to->di_projid_hi = from->di_projid >> 16; |
---|
314 | 349 | |
---|
315 | 350 | memset(to->di_pad, 0, sizeof(to->di_pad)); |
---|
316 | 351 | memset(to->di_pad3, 0, sizeof(to->di_pad3)); |
---|
317 | | - to->di_atime.t_sec = inode->i_atime.tv_sec; |
---|
318 | | - to->di_atime.t_nsec = inode->i_atime.tv_nsec; |
---|
319 | | - to->di_mtime.t_sec = inode->i_mtime.tv_sec; |
---|
320 | | - to->di_mtime.t_nsec = inode->i_mtime.tv_nsec; |
---|
321 | | - to->di_ctime.t_sec = inode->i_ctime.tv_sec; |
---|
322 | | - to->di_ctime.t_nsec = inode->i_ctime.tv_nsec; |
---|
| 352 | + to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime); |
---|
| 353 | + to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime); |
---|
| 354 | + to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode->i_ctime); |
---|
323 | 355 | to->di_nlink = inode->i_nlink; |
---|
324 | 356 | to->di_gen = inode->i_generation; |
---|
325 | 357 | to->di_mode = inode->i_mode; |
---|
.. | .. |
---|
327 | 359 | to->di_size = from->di_size; |
---|
328 | 360 | to->di_nblocks = from->di_nblocks; |
---|
329 | 361 | to->di_extsize = from->di_extsize; |
---|
330 | | - to->di_nextents = from->di_nextents; |
---|
331 | | - to->di_anextents = from->di_anextents; |
---|
| 362 | + to->di_nextents = xfs_ifork_nextents(&ip->i_df); |
---|
| 363 | + to->di_anextents = xfs_ifork_nextents(ip->i_afp); |
---|
332 | 364 | to->di_forkoff = from->di_forkoff; |
---|
333 | | - to->di_aformat = from->di_aformat; |
---|
| 365 | + to->di_aformat = xfs_ifork_format(ip->i_afp); |
---|
334 | 366 | to->di_dmevmask = from->di_dmevmask; |
---|
335 | 367 | to->di_dmstate = from->di_dmstate; |
---|
336 | 368 | to->di_flags = from->di_flags; |
---|
.. | .. |
---|
338 | 370 | /* log a dummy value to ensure log structure is fully initialised */ |
---|
339 | 371 | to->di_next_unlinked = NULLAGINO; |
---|
340 | 372 | |
---|
341 | | - if (from->di_version == 3) { |
---|
| 373 | + if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { |
---|
| 374 | + to->di_version = 3; |
---|
342 | 375 | to->di_changecount = inode_peek_iversion(inode); |
---|
343 | | - to->di_crtime.t_sec = from->di_crtime.t_sec; |
---|
344 | | - to->di_crtime.t_nsec = from->di_crtime.t_nsec; |
---|
| 376 | + to->di_crtime = xfs_inode_to_log_dinode_ts(ip, from->di_crtime); |
---|
345 | 377 | to->di_flags2 = from->di_flags2; |
---|
346 | 378 | to->di_cowextsize = from->di_cowextsize; |
---|
347 | 379 | to->di_ino = ip->i_ino; |
---|
.. | .. |
---|
350 | 382 | uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); |
---|
351 | 383 | to->di_flushiter = 0; |
---|
352 | 384 | } else { |
---|
| 385 | + to->di_version = 2; |
---|
353 | 386 | to->di_flushiter = from->di_flushiter; |
---|
354 | 387 | } |
---|
355 | 388 | } |
---|
.. | .. |
---|
369 | 402 | |
---|
370 | 403 | dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE); |
---|
371 | 404 | xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn); |
---|
372 | | - xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version)); |
---|
| 405 | + xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_mount)); |
---|
373 | 406 | } |
---|
374 | 407 | |
---|
375 | 408 | /* |
---|
.. | .. |
---|
393 | 426 | struct xfs_inode *ip = iip->ili_inode; |
---|
394 | 427 | struct xfs_log_iovec *vecp = NULL; |
---|
395 | 428 | struct xfs_inode_log_format *ilf; |
---|
396 | | - |
---|
397 | | - ASSERT(ip->i_d.di_version > 1); |
---|
398 | 429 | |
---|
399 | 430 | ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT); |
---|
400 | 431 | ilf->ilf_type = XFS_LI_INODE; |
---|
.. | .. |
---|
440 | 471 | struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode; |
---|
441 | 472 | |
---|
442 | 473 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
---|
| 474 | + ASSERT(lip->li_buf); |
---|
443 | 475 | |
---|
444 | 476 | trace_xfs_inode_pin(ip, _RET_IP_); |
---|
445 | 477 | atomic_inc(&ip->i_pincount); |
---|
.. | .. |
---|
451 | 483 | * item which was previously pinned with a call to xfs_inode_item_pin(). |
---|
452 | 484 | * |
---|
453 | 485 | * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0. |
---|
| 486 | + * |
---|
| 487 | + * Note that unpin can race with inode cluster buffer freeing marking the buffer |
---|
| 488 | + * stale. In that case, flush completions are run from the buffer unpin call, |
---|
| 489 | + * which may happen before the inode is unpinned. If we lose the race, there |
---|
| 490 | + * will be no buffer attached to the log item, but the inode will be marked |
---|
| 491 | + * XFS_ISTALE. |
---|
454 | 492 | */ |
---|
455 | 493 | STATIC void |
---|
456 | 494 | xfs_inode_item_unpin( |
---|
.. | .. |
---|
460 | 498 | struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode; |
---|
461 | 499 | |
---|
462 | 500 | trace_xfs_inode_unpin(ip, _RET_IP_); |
---|
| 501 | + ASSERT(lip->li_buf || xfs_iflags_test(ip, XFS_ISTALE)); |
---|
463 | 502 | ASSERT(atomic_read(&ip->i_pincount) > 0); |
---|
464 | 503 | if (atomic_dec_and_test(&ip->i_pincount)) |
---|
465 | 504 | wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT); |
---|
466 | | -} |
---|
467 | | - |
---|
468 | | -/* |
---|
469 | | - * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer |
---|
470 | | - * have been failed during writeback |
---|
471 | | - * |
---|
472 | | - * This informs the AIL that the inode is already flush locked on the next push, |
---|
473 | | - * and acquires a hold on the buffer to ensure that it isn't reclaimed before |
---|
474 | | - * dirty data makes it to disk. |
---|
475 | | - */ |
---|
476 | | -STATIC void |
---|
477 | | -xfs_inode_item_error( |
---|
478 | | - struct xfs_log_item *lip, |
---|
479 | | - struct xfs_buf *bp) |
---|
480 | | -{ |
---|
481 | | - ASSERT(xfs_isiflocked(INODE_ITEM(lip)->ili_inode)); |
---|
482 | | - xfs_set_li_failed(lip, bp); |
---|
483 | 505 | } |
---|
484 | 506 | |
---|
485 | 507 | STATIC uint |
---|
.. | .. |
---|
495 | 517 | uint rval = XFS_ITEM_SUCCESS; |
---|
496 | 518 | int error; |
---|
497 | 519 | |
---|
498 | | - if (xfs_ipincount(ip) > 0) |
---|
| 520 | + ASSERT(iip->ili_item.li_buf); |
---|
| 521 | + |
---|
| 522 | + if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp) || |
---|
| 523 | + (ip->i_flags & XFS_ISTALE)) |
---|
499 | 524 | return XFS_ITEM_PINNED; |
---|
500 | 525 | |
---|
501 | | - /* |
---|
502 | | - * The buffer containing this item failed to be written back |
---|
503 | | - * previously. Resubmit the buffer for IO. |
---|
504 | | - */ |
---|
505 | | - if (test_bit(XFS_LI_FAILED, &lip->li_flags)) { |
---|
506 | | - if (!xfs_buf_trylock(bp)) |
---|
507 | | - return XFS_ITEM_LOCKED; |
---|
| 526 | + if (xfs_iflags_test(ip, XFS_IFLUSHING)) |
---|
| 527 | + return XFS_ITEM_FLUSHING; |
---|
508 | 528 | |
---|
509 | | - if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) |
---|
510 | | - rval = XFS_ITEM_FLUSHING; |
---|
511 | | - |
---|
512 | | - xfs_buf_unlock(bp); |
---|
513 | | - return rval; |
---|
514 | | - } |
---|
515 | | - |
---|
516 | | - if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) |
---|
| 529 | + if (!xfs_buf_trylock(bp)) |
---|
517 | 530 | return XFS_ITEM_LOCKED; |
---|
518 | | - |
---|
519 | | - /* |
---|
520 | | - * Re-check the pincount now that we stabilized the value by |
---|
521 | | - * taking the ilock. |
---|
522 | | - */ |
---|
523 | | - if (xfs_ipincount(ip) > 0) { |
---|
524 | | - rval = XFS_ITEM_PINNED; |
---|
525 | | - goto out_unlock; |
---|
526 | | - } |
---|
527 | | - |
---|
528 | | - /* |
---|
529 | | - * Stale inode items should force out the iclog. |
---|
530 | | - */ |
---|
531 | | - if (ip->i_flags & XFS_ISTALE) { |
---|
532 | | - rval = XFS_ITEM_PINNED; |
---|
533 | | - goto out_unlock; |
---|
534 | | - } |
---|
535 | | - |
---|
536 | | - /* |
---|
537 | | - * Someone else is already flushing the inode. Nothing we can do |
---|
538 | | - * here but wait for the flush to finish and remove the item from |
---|
539 | | - * the AIL. |
---|
540 | | - */ |
---|
541 | | - if (!xfs_iflock_nowait(ip)) { |
---|
542 | | - rval = XFS_ITEM_FLUSHING; |
---|
543 | | - goto out_unlock; |
---|
544 | | - } |
---|
545 | | - |
---|
546 | | - ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); |
---|
547 | | - ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); |
---|
548 | 531 | |
---|
549 | 532 | spin_unlock(&lip->li_ailp->ail_lock); |
---|
550 | 533 | |
---|
551 | | - error = xfs_iflush(ip, &bp); |
---|
| 534 | + /* |
---|
| 535 | + * We need to hold a reference for flushing the cluster buffer as it may |
---|
| 536 | + * fail the buffer without IO submission. In which case, we better get a |
---|
| 537 | + * reference for that completion because otherwise we don't get a |
---|
| 538 | + * reference for IO until we queue the buffer for delwri submission. |
---|
| 539 | + */ |
---|
| 540 | + xfs_buf_hold(bp); |
---|
| 541 | + error = xfs_iflush_cluster(bp); |
---|
552 | 542 | if (!error) { |
---|
553 | 543 | if (!xfs_buf_delwri_queue(bp, buffer_list)) |
---|
554 | 544 | rval = XFS_ITEM_FLUSHING; |
---|
555 | 545 | xfs_buf_relse(bp); |
---|
| 546 | + } else { |
---|
| 547 | + /* |
---|
| 548 | + * Release the buffer if we were unable to flush anything. On |
---|
| 549 | + * any other error, the buffer has already been released. |
---|
| 550 | + */ |
---|
| 551 | + if (error == -EAGAIN) |
---|
| 552 | + xfs_buf_relse(bp); |
---|
| 553 | + rval = XFS_ITEM_LOCKED; |
---|
556 | 554 | } |
---|
557 | 555 | |
---|
558 | 556 | spin_lock(&lip->li_ailp->ail_lock); |
---|
559 | | -out_unlock: |
---|
560 | | - xfs_iunlock(ip, XFS_ILOCK_SHARED); |
---|
561 | 557 | return rval; |
---|
562 | 558 | } |
---|
563 | 559 | |
---|
.. | .. |
---|
565 | 561 | * Unlock the inode associated with the inode log item. |
---|
566 | 562 | */ |
---|
567 | 563 | STATIC void |
---|
568 | | -xfs_inode_item_unlock( |
---|
| 564 | +xfs_inode_item_release( |
---|
569 | 565 | struct xfs_log_item *lip) |
---|
570 | 566 | { |
---|
571 | 567 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); |
---|
.. | .. |
---|
621 | 617 | STATIC void |
---|
622 | 618 | xfs_inode_item_committing( |
---|
623 | 619 | struct xfs_log_item *lip, |
---|
624 | | - xfs_lsn_t lsn) |
---|
| 620 | + xfs_csn_t seq) |
---|
625 | 621 | { |
---|
626 | | - INODE_ITEM(lip)->ili_last_lsn = lsn; |
---|
| 622 | + INODE_ITEM(lip)->ili_commit_seq = seq; |
---|
| 623 | + return xfs_inode_item_release(lip); |
---|
627 | 624 | } |
---|
628 | 625 | |
---|
629 | | -/* |
---|
630 | | - * This is the ops vector shared by all buf log items. |
---|
631 | | - */ |
---|
632 | 626 | static const struct xfs_item_ops xfs_inode_item_ops = { |
---|
633 | 627 | .iop_size = xfs_inode_item_size, |
---|
634 | 628 | .iop_format = xfs_inode_item_format, |
---|
635 | 629 | .iop_pin = xfs_inode_item_pin, |
---|
636 | 630 | .iop_unpin = xfs_inode_item_unpin, |
---|
637 | | - .iop_unlock = xfs_inode_item_unlock, |
---|
| 631 | + .iop_release = xfs_inode_item_release, |
---|
638 | 632 | .iop_committed = xfs_inode_item_committed, |
---|
639 | 633 | .iop_push = xfs_inode_item_push, |
---|
640 | | - .iop_committing = xfs_inode_item_committing, |
---|
641 | | - .iop_error = xfs_inode_item_error |
---|
| 634 | + .iop_committing = xfs_inode_item_committing, |
---|
642 | 635 | }; |
---|
643 | 636 | |
---|
644 | 637 | |
---|
.. | .. |
---|
653 | 646 | struct xfs_inode_log_item *iip; |
---|
654 | 647 | |
---|
655 | 648 | ASSERT(ip->i_itemp == NULL); |
---|
656 | | - iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); |
---|
| 649 | + iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_zone, |
---|
| 650 | + GFP_KERNEL | __GFP_NOFAIL); |
---|
657 | 651 | |
---|
658 | 652 | iip->ili_inode = ip; |
---|
| 653 | + spin_lock_init(&iip->ili_lock); |
---|
659 | 654 | xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE, |
---|
660 | 655 | &xfs_inode_item_ops); |
---|
661 | 656 | } |
---|
.. | .. |
---|
665 | 660 | */ |
---|
666 | 661 | void |
---|
667 | 662 | xfs_inode_item_destroy( |
---|
668 | | - xfs_inode_t *ip) |
---|
| 663 | + struct xfs_inode *ip) |
---|
669 | 664 | { |
---|
670 | | - kmem_free(ip->i_itemp->ili_item.li_lv_shadow); |
---|
671 | | - kmem_zone_free(xfs_ili_zone, ip->i_itemp); |
---|
| 665 | + struct xfs_inode_log_item *iip = ip->i_itemp; |
---|
| 666 | + |
---|
| 667 | + ASSERT(iip->ili_item.li_buf == NULL); |
---|
| 668 | + |
---|
| 669 | + ip->i_itemp = NULL; |
---|
| 670 | + kmem_free(iip->ili_item.li_lv_shadow); |
---|
| 671 | + kmem_cache_free(xfs_ili_zone, iip); |
---|
672 | 672 | } |
---|
673 | 673 | |
---|
674 | 674 | |
---|
675 | 675 | /* |
---|
676 | | - * This is the inode flushing I/O completion routine. It is called |
---|
677 | | - * from interrupt level when the buffer containing the inode is |
---|
678 | | - * flushed to disk. It is responsible for removing the inode item |
---|
679 | | - * from the AIL if it has not been re-logged, and unlocking the inode's |
---|
680 | | - * flush lock. |
---|
681 | | - * |
---|
682 | | - * To reduce AIL lock traffic as much as possible, we scan the buffer log item |
---|
683 | | - * list for other inodes that will run this function. We remove them from the |
---|
684 | | - * buffer list so we can process all the inode IO completions in one AIL lock |
---|
685 | | - * traversal. |
---|
| 676 | + * We only want to pull the item from the AIL if it is actually there |
---|
| 677 | + * and its location in the log has not changed since we started the |
---|
| 678 | + * flush. Thus, we only bother if the inode's lsn has not changed. |
---|
686 | 679 | */ |
---|
687 | | -void |
---|
688 | | -xfs_iflush_done( |
---|
689 | | - struct xfs_buf *bp, |
---|
690 | | - struct xfs_log_item *lip) |
---|
| 680 | +static void |
---|
| 681 | +xfs_iflush_ail_updates( |
---|
| 682 | + struct xfs_ail *ailp, |
---|
| 683 | + struct list_head *list) |
---|
691 | 684 | { |
---|
692 | | - struct xfs_inode_log_item *iip; |
---|
693 | | - struct xfs_log_item *blip, *n; |
---|
694 | | - struct xfs_ail *ailp = lip->li_ailp; |
---|
695 | | - int need_ail = 0; |
---|
696 | | - LIST_HEAD(tmp); |
---|
| 685 | + struct xfs_log_item *lip; |
---|
| 686 | + xfs_lsn_t tail_lsn = 0; |
---|
697 | 687 | |
---|
698 | | - /* |
---|
699 | | - * Scan the buffer IO completions for other inodes being completed and |
---|
700 | | - * attach them to the current inode log item. |
---|
701 | | - */ |
---|
| 688 | + /* this is an opencoded batch version of xfs_trans_ail_delete */ |
---|
| 689 | + spin_lock(&ailp->ail_lock); |
---|
| 690 | + list_for_each_entry(lip, list, li_bio_list) { |
---|
| 691 | + xfs_lsn_t lsn; |
---|
702 | 692 | |
---|
703 | | - list_add_tail(&lip->li_bio_list, &tmp); |
---|
704 | | - |
---|
705 | | - list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) { |
---|
706 | | - if (lip->li_cb != xfs_iflush_done) |
---|
| 693 | + clear_bit(XFS_LI_FAILED, &lip->li_flags); |
---|
| 694 | + if (INODE_ITEM(lip)->ili_flush_lsn != lip->li_lsn) |
---|
707 | 695 | continue; |
---|
708 | 696 | |
---|
709 | | - list_move_tail(&blip->li_bio_list, &tmp); |
---|
710 | | - /* |
---|
711 | | - * while we have the item, do the unlocked check for needing |
---|
712 | | - * the AIL lock. |
---|
713 | | - */ |
---|
714 | | - iip = INODE_ITEM(blip); |
---|
715 | | - if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || |
---|
716 | | - test_bit(XFS_LI_FAILED, &blip->li_flags)) |
---|
717 | | - need_ail++; |
---|
| 697 | + lsn = xfs_ail_delete_one(ailp, lip); |
---|
| 698 | + if (!tail_lsn && lsn) |
---|
| 699 | + tail_lsn = lsn; |
---|
718 | 700 | } |
---|
719 | | - |
---|
720 | | - /* make sure we capture the state of the initial inode. */ |
---|
721 | | - iip = INODE_ITEM(lip); |
---|
722 | | - if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) || |
---|
723 | | - test_bit(XFS_LI_FAILED, &lip->li_flags)) |
---|
724 | | - need_ail++; |
---|
725 | | - |
---|
726 | | - /* |
---|
727 | | - * We only want to pull the item from the AIL if it is |
---|
728 | | - * actually there and its location in the log has not |
---|
729 | | - * changed since we started the flush. Thus, we only bother |
---|
730 | | - * if the ili_logged flag is set and the inode's lsn has not |
---|
731 | | - * changed. First we check the lsn outside |
---|
732 | | - * the lock since it's cheaper, and then we recheck while |
---|
733 | | - * holding the lock before removing the inode from the AIL. |
---|
734 | | - */ |
---|
735 | | - if (need_ail) { |
---|
736 | | - bool mlip_changed = false; |
---|
737 | | - |
---|
738 | | - /* this is an opencoded batch version of xfs_trans_ail_delete */ |
---|
739 | | - spin_lock(&ailp->ail_lock); |
---|
740 | | - list_for_each_entry(blip, &tmp, li_bio_list) { |
---|
741 | | - if (INODE_ITEM(blip)->ili_logged && |
---|
742 | | - blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) |
---|
743 | | - mlip_changed |= xfs_ail_delete_one(ailp, blip); |
---|
744 | | - else { |
---|
745 | | - xfs_clear_li_failed(blip); |
---|
746 | | - } |
---|
747 | | - } |
---|
748 | | - |
---|
749 | | - if (mlip_changed) { |
---|
750 | | - if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount)) |
---|
751 | | - xlog_assign_tail_lsn_locked(ailp->ail_mount); |
---|
752 | | - if (list_empty(&ailp->ail_head)) |
---|
753 | | - wake_up_all(&ailp->ail_empty); |
---|
754 | | - } |
---|
755 | | - spin_unlock(&ailp->ail_lock); |
---|
756 | | - |
---|
757 | | - if (mlip_changed) |
---|
758 | | - xfs_log_space_wake(ailp->ail_mount); |
---|
759 | | - } |
---|
760 | | - |
---|
761 | | - /* |
---|
762 | | - * clean up and unlock the flush lock now we are done. We can clear the |
---|
763 | | - * ili_last_fields bits now that we know that the data corresponding to |
---|
764 | | - * them is safely on disk. |
---|
765 | | - */ |
---|
766 | | - list_for_each_entry_safe(blip, n, &tmp, li_bio_list) { |
---|
767 | | - list_del_init(&blip->li_bio_list); |
---|
768 | | - iip = INODE_ITEM(blip); |
---|
769 | | - iip->ili_logged = 0; |
---|
770 | | - iip->ili_last_fields = 0; |
---|
771 | | - xfs_ifunlock(iip->ili_inode); |
---|
772 | | - } |
---|
773 | | - list_del(&tmp); |
---|
| 701 | + xfs_ail_update_finish(ailp, tail_lsn); |
---|
774 | 702 | } |
---|
775 | 703 | |
---|
776 | 704 | /* |
---|
777 | | - * This is the inode flushing abort routine. It is called from xfs_iflush when |
---|
| 705 | + * Walk the list of inodes that have completed their IOs. If they are clean |
---|
| 706 | + * remove them from the list and dissociate them from the buffer. Buffers that |
---|
| 707 | + * are still dirty remain linked to the buffer and on the list. Caller must |
---|
| 708 | + * handle them appropriately. |
---|
| 709 | + */ |
---|
| 710 | +static void |
---|
| 711 | +xfs_iflush_finish( |
---|
| 712 | + struct xfs_buf *bp, |
---|
| 713 | + struct list_head *list) |
---|
| 714 | +{ |
---|
| 715 | + struct xfs_log_item *lip, *n; |
---|
| 716 | + |
---|
| 717 | + list_for_each_entry_safe(lip, n, list, li_bio_list) { |
---|
| 718 | + struct xfs_inode_log_item *iip = INODE_ITEM(lip); |
---|
| 719 | + bool drop_buffer = false; |
---|
| 720 | + |
---|
| 721 | + spin_lock(&iip->ili_lock); |
---|
| 722 | + |
---|
| 723 | + /* |
---|
| 724 | + * Remove the reference to the cluster buffer if the inode is |
---|
| 725 | + * clean in memory and drop the buffer reference once we've |
---|
| 726 | + * dropped the locks we hold. |
---|
| 727 | + */ |
---|
| 728 | + ASSERT(iip->ili_item.li_buf == bp); |
---|
| 729 | + if (!iip->ili_fields) { |
---|
| 730 | + iip->ili_item.li_buf = NULL; |
---|
| 731 | + list_del_init(&lip->li_bio_list); |
---|
| 732 | + drop_buffer = true; |
---|
| 733 | + } |
---|
| 734 | + iip->ili_last_fields = 0; |
---|
| 735 | + iip->ili_flush_lsn = 0; |
---|
| 736 | + spin_unlock(&iip->ili_lock); |
---|
| 737 | + xfs_iflags_clear(iip->ili_inode, XFS_IFLUSHING); |
---|
| 738 | + if (drop_buffer) |
---|
| 739 | + xfs_buf_rele(bp); |
---|
| 740 | + } |
---|
| 741 | +} |
---|
| 742 | + |
---|
| 743 | +/* |
---|
| 744 | + * Inode buffer IO completion routine. It is responsible for removing inodes |
---|
| 745 | + * attached to the buffer from the AIL if they have not been re-logged and |
---|
| 746 | + * completing the inode flush. |
---|
| 747 | + */ |
---|
| 748 | +void |
---|
| 749 | +xfs_buf_inode_iodone( |
---|
| 750 | + struct xfs_buf *bp) |
---|
| 751 | +{ |
---|
| 752 | + struct xfs_log_item *lip, *n; |
---|
| 753 | + LIST_HEAD(flushed_inodes); |
---|
| 754 | + LIST_HEAD(ail_updates); |
---|
| 755 | + |
---|
| 756 | + /* |
---|
| 757 | + * Pull the attached inodes from the buffer one at a time and take the |
---|
| 758 | + * appropriate action on them. |
---|
| 759 | + */ |
---|
| 760 | + list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) { |
---|
| 761 | + struct xfs_inode_log_item *iip = INODE_ITEM(lip); |
---|
| 762 | + |
---|
| 763 | + if (xfs_iflags_test(iip->ili_inode, XFS_ISTALE)) { |
---|
| 764 | + xfs_iflush_abort(iip->ili_inode); |
---|
| 765 | + continue; |
---|
| 766 | + } |
---|
| 767 | + if (!iip->ili_last_fields) |
---|
| 768 | + continue; |
---|
| 769 | + |
---|
| 770 | + /* Do an unlocked check for needing the AIL lock. */ |
---|
| 771 | + if (iip->ili_flush_lsn == lip->li_lsn || |
---|
| 772 | + test_bit(XFS_LI_FAILED, &lip->li_flags)) |
---|
| 773 | + list_move_tail(&lip->li_bio_list, &ail_updates); |
---|
| 774 | + else |
---|
| 775 | + list_move_tail(&lip->li_bio_list, &flushed_inodes); |
---|
| 776 | + } |
---|
| 777 | + |
---|
| 778 | + if (!list_empty(&ail_updates)) { |
---|
| 779 | + xfs_iflush_ail_updates(bp->b_mount->m_ail, &ail_updates); |
---|
| 780 | + list_splice_tail(&ail_updates, &flushed_inodes); |
---|
| 781 | + } |
---|
| 782 | + |
---|
| 783 | + xfs_iflush_finish(bp, &flushed_inodes); |
---|
| 784 | + if (!list_empty(&flushed_inodes)) |
---|
| 785 | + list_splice_tail(&flushed_inodes, &bp->b_li_list); |
---|
| 786 | +} |
---|
| 787 | + |
---|
| 788 | +void |
---|
| 789 | +xfs_buf_inode_io_fail( |
---|
| 790 | + struct xfs_buf *bp) |
---|
| 791 | +{ |
---|
| 792 | + struct xfs_log_item *lip; |
---|
| 793 | + |
---|
| 794 | + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) |
---|
| 795 | + set_bit(XFS_LI_FAILED, &lip->li_flags); |
---|
| 796 | +} |
---|
| 797 | + |
---|
| 798 | +/* |
---|
| 799 | + * This is the inode flushing abort routine. It is called when |
---|
778 | 800 | * the filesystem is shutting down to clean up the inode state. It is |
---|
779 | 801 | * responsible for removing the inode item from the AIL if it has not been |
---|
780 | | - * re-logged, and unlocking the inode's flush lock. |
---|
| 802 | + * re-logged and clearing the inode's flush state. |
---|
781 | 803 | */ |
---|
782 | 804 | void |
---|
783 | 805 | xfs_iflush_abort( |
---|
784 | | - xfs_inode_t *ip, |
---|
785 | | - bool stale) |
---|
| 806 | + struct xfs_inode *ip) |
---|
786 | 807 | { |
---|
787 | | - xfs_inode_log_item_t *iip = ip->i_itemp; |
---|
| 808 | + struct xfs_inode_log_item *iip = ip->i_itemp; |
---|
| 809 | + struct xfs_buf *bp = NULL; |
---|
788 | 810 | |
---|
789 | 811 | if (iip) { |
---|
790 | | - if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags)) { |
---|
791 | | - xfs_trans_ail_remove(&iip->ili_item, |
---|
792 | | - stale ? SHUTDOWN_LOG_IO_ERROR : |
---|
793 | | - SHUTDOWN_CORRUPT_INCORE); |
---|
794 | | - } |
---|
795 | | - iip->ili_logged = 0; |
---|
796 | 812 | /* |
---|
797 | | - * Clear the ili_last_fields bits now that we know that the |
---|
798 | | - * data corresponding to them is safely on disk. |
---|
| 813 | + * Clear the failed bit before removing the item from the AIL so |
---|
| 814 | + * xfs_trans_ail_delete() doesn't try to clear and release the |
---|
| 815 | + * buffer attached to the log item before we are done with it. |
---|
799 | 816 | */ |
---|
800 | | - iip->ili_last_fields = 0; |
---|
| 817 | + clear_bit(XFS_LI_FAILED, &iip->ili_item.li_flags); |
---|
| 818 | + xfs_trans_ail_delete(&iip->ili_item, 0); |
---|
| 819 | + |
---|
801 | 820 | /* |
---|
802 | 821 | * Clear the inode logging fields so no more flushes are |
---|
803 | 822 | * attempted. |
---|
804 | 823 | */ |
---|
| 824 | + spin_lock(&iip->ili_lock); |
---|
| 825 | + iip->ili_last_fields = 0; |
---|
805 | 826 | iip->ili_fields = 0; |
---|
806 | 827 | iip->ili_fsync_fields = 0; |
---|
| 828 | + iip->ili_flush_lsn = 0; |
---|
| 829 | + bp = iip->ili_item.li_buf; |
---|
| 830 | + iip->ili_item.li_buf = NULL; |
---|
| 831 | + list_del_init(&iip->ili_item.li_bio_list); |
---|
| 832 | + spin_unlock(&iip->ili_lock); |
---|
807 | 833 | } |
---|
808 | | - /* |
---|
809 | | - * Release the inode's flush lock since we're done with it. |
---|
810 | | - */ |
---|
811 | | - xfs_ifunlock(ip); |
---|
812 | | -} |
---|
813 | | - |
---|
814 | | -void |
---|
815 | | -xfs_istale_done( |
---|
816 | | - struct xfs_buf *bp, |
---|
817 | | - struct xfs_log_item *lip) |
---|
818 | | -{ |
---|
819 | | - xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true); |
---|
| 834 | + xfs_iflags_clear(ip, XFS_IFLUSHING); |
---|
| 835 | + if (bp) |
---|
| 836 | + xfs_buf_rele(bp); |
---|
820 | 837 | } |
---|
821 | 838 | |
---|
822 | 839 | /* |
---|
.. | .. |
---|
830 | 847 | { |
---|
831 | 848 | struct xfs_inode_log_format_32 *in_f32 = buf->i_addr; |
---|
832 | 849 | |
---|
833 | | - if (buf->i_len != sizeof(*in_f32)) |
---|
| 850 | + if (buf->i_len != sizeof(*in_f32)) { |
---|
| 851 | + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); |
---|
834 | 852 | return -EFSCORRUPTED; |
---|
| 853 | + } |
---|
835 | 854 | |
---|
836 | 855 | in_f->ilf_type = in_f32->ilf_type; |
---|
837 | 856 | in_f->ilf_size = in_f32->ilf_size; |
---|