.. | .. |
---|
3 | 3 | * Copyright (c) 2000-2006 Silicon Graphics, Inc. |
---|
4 | 4 | * All Rights Reserved. |
---|
5 | 5 | */ |
---|
6 | | -#include <linux/log2.h> |
---|
7 | 6 | #include <linux/iversion.h> |
---|
8 | 7 | |
---|
9 | 8 | #include "xfs.h" |
---|
.. | .. |
---|
16 | 15 | #include "xfs_mount.h" |
---|
17 | 16 | #include "xfs_defer.h" |
---|
18 | 17 | #include "xfs_inode.h" |
---|
19 | | -#include "xfs_da_format.h" |
---|
20 | | -#include "xfs_da_btree.h" |
---|
21 | 18 | #include "xfs_dir2.h" |
---|
22 | | -#include "xfs_attr_sf.h" |
---|
23 | 19 | #include "xfs_attr.h" |
---|
24 | 20 | #include "xfs_trans_space.h" |
---|
25 | 21 | #include "xfs_trans.h" |
---|
.. | .. |
---|
32 | 28 | #include "xfs_error.h" |
---|
33 | 29 | #include "xfs_quota.h" |
---|
34 | 30 | #include "xfs_filestream.h" |
---|
35 | | -#include "xfs_cksum.h" |
---|
36 | 31 | #include "xfs_trace.h" |
---|
37 | 32 | #include "xfs_icache.h" |
---|
38 | 33 | #include "xfs_symlink.h" |
---|
.. | .. |
---|
40 | 35 | #include "xfs_log.h" |
---|
41 | 36 | #include "xfs_bmap_btree.h" |
---|
42 | 37 | #include "xfs_reflink.h" |
---|
43 | | -#include "xfs_dir2_priv.h" |
---|
44 | 38 | |
---|
45 | 39 | kmem_zone_t *xfs_inode_zone; |
---|
46 | 40 | |
---|
.. | .. |
---|
50 | 44 | */ |
---|
51 | 45 | #define XFS_ITRUNC_MAX_EXTENTS 2 |
---|
52 | 46 | |
---|
53 | | -STATIC int xfs_iflush_int(struct xfs_inode *, struct xfs_buf *); |
---|
54 | 47 | STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *); |
---|
55 | 48 | STATIC int xfs_iunlink_remove(struct xfs_trans *, struct xfs_inode *); |
---|
56 | 49 | |
---|
.. | .. |
---|
61 | 54 | xfs_get_extsz_hint( |
---|
62 | 55 | struct xfs_inode *ip) |
---|
63 | 56 | { |
---|
| 57 | + /* |
---|
| 58 | + * No point in aligning allocations if we need to COW to actually |
---|
| 59 | + * write to them. |
---|
| 60 | + */ |
---|
| 61 | + if (xfs_is_always_cow_inode(ip)) |
---|
| 62 | + return 0; |
---|
64 | 63 | if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) |
---|
65 | 64 | return ip->i_d.di_extsize; |
---|
66 | 65 | if (XFS_IS_REALTIME_INODE(ip)) |
---|
.. | .. |
---|
112 | 111 | { |
---|
113 | 112 | uint lock_mode = XFS_ILOCK_SHARED; |
---|
114 | 113 | |
---|
115 | | - if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && |
---|
| 114 | + if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE && |
---|
116 | 115 | (ip->i_df.if_flags & XFS_IFEXTENTS) == 0) |
---|
117 | 116 | lock_mode = XFS_ILOCK_EXCL; |
---|
118 | 117 | xfs_ilock(ip, lock_mode); |
---|
.. | .. |
---|
125 | 124 | { |
---|
126 | 125 | uint lock_mode = XFS_ILOCK_SHARED; |
---|
127 | 126 | |
---|
128 | | - if (ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE && |
---|
| 127 | + if (ip->i_afp && |
---|
| 128 | + ip->i_afp->if_format == XFS_DINODE_FMT_BTREE && |
---|
129 | 129 | (ip->i_afp->if_flags & XFS_IFEXTENTS) == 0) |
---|
130 | 130 | lock_mode = XFS_ILOCK_EXCL; |
---|
131 | 131 | xfs_ilock(ip, lock_mode); |
---|
.. | .. |
---|
144 | 144 | * |
---|
145 | 145 | * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock |
---|
146 | 146 | * |
---|
147 | | - * mmap_sem locking order: |
---|
| 147 | + * mmap_lock locking order: |
---|
148 | 148 | * |
---|
149 | | - * i_rwsem -> page lock -> mmap_sem |
---|
150 | | - * mmap_sem -> i_mmap_lock -> page_lock |
---|
| 149 | + * i_rwsem -> page lock -> mmap_lock |
---|
| 150 | + * mmap_lock -> i_mmap_lock -> page_lock |
---|
151 | 151 | * |
---|
152 | | - * The difference in mmap_sem locking order mean that we cannot hold the |
---|
| 152 | + * The difference in mmap_lock locking order mean that we cannot hold the |
---|
153 | 153 | * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can |
---|
154 | | - * fault in pages during copy in/out (for buffered IO) or require the mmap_sem |
---|
| 154 | + * fault in pages during copy in/out (for buffered IO) or require the mmap_lock |
---|
155 | 155 | * in get_user_pages() to map the user pages into the kernel address space for |
---|
156 | 156 | * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because |
---|
157 | | - * page faults already hold the mmap_sem. |
---|
| 157 | + * page faults already hold the mmap_lock. |
---|
158 | 158 | * |
---|
159 | 159 | * Hence to serialise fully against both syscall and mmap based IO, we need to |
---|
160 | 160 | * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both |
---|
.. | .. |
---|
441 | 441 | */ |
---|
442 | 442 | static void |
---|
443 | 443 | xfs_lock_inodes( |
---|
444 | | - xfs_inode_t **ips, |
---|
445 | | - int inodes, |
---|
446 | | - uint lock_mode) |
---|
| 444 | + struct xfs_inode **ips, |
---|
| 445 | + int inodes, |
---|
| 446 | + uint lock_mode) |
---|
447 | 447 | { |
---|
448 | | - int attempts = 0, i, j, try_lock; |
---|
449 | | - xfs_log_item_t *lp; |
---|
| 448 | + int attempts = 0, i, j, try_lock; |
---|
| 449 | + struct xfs_log_item *lp; |
---|
450 | 450 | |
---|
451 | 451 | /* |
---|
452 | 452 | * Currently supports between 2 and 5 inodes with exclusive locking. We |
---|
453 | 453 | * support an arbitrary depth of locking here, but absolute limits on |
---|
454 | | - * inodes depend on the the type of locking and the limits placed by |
---|
| 454 | + * inodes depend on the type of locking and the limits placed by |
---|
455 | 455 | * lockdep annotations in xfs_lock_inumorder. These are all checked by |
---|
456 | 456 | * the asserts. |
---|
457 | 457 | */ |
---|
.. | .. |
---|
485 | 485 | */ |
---|
486 | 486 | if (!try_lock) { |
---|
487 | 487 | for (j = (i - 1); j >= 0 && !try_lock; j--) { |
---|
488 | | - lp = (xfs_log_item_t *)ips[j]->i_itemp; |
---|
| 488 | + lp = &ips[j]->i_itemp->ili_item; |
---|
489 | 489 | if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) |
---|
490 | 490 | try_lock++; |
---|
491 | 491 | } |
---|
.. | .. |
---|
551 | 551 | struct xfs_inode *temp; |
---|
552 | 552 | uint mode_temp; |
---|
553 | 553 | int attempts = 0; |
---|
554 | | - xfs_log_item_t *lp; |
---|
| 554 | + struct xfs_log_item *lp; |
---|
555 | 555 | |
---|
556 | 556 | ASSERT(hweight32(ip0_mode) == 1); |
---|
557 | 557 | ASSERT(hweight32(ip1_mode) == 1); |
---|
.. | .. |
---|
585 | 585 | * the second lock. If we can't get it, we must release the first one |
---|
586 | 586 | * and try again. |
---|
587 | 587 | */ |
---|
588 | | - lp = (xfs_log_item_t *)ip0->i_itemp; |
---|
| 588 | + lp = &ip0->i_itemp->ili_item; |
---|
589 | 589 | if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) { |
---|
590 | 590 | if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) { |
---|
591 | 591 | xfs_iunlock(ip0, ip0_mode); |
---|
.. | .. |
---|
596 | 596 | } else { |
---|
597 | 597 | xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1)); |
---|
598 | 598 | } |
---|
599 | | -} |
---|
600 | | - |
---|
601 | | -void |
---|
602 | | -__xfs_iflock( |
---|
603 | | - struct xfs_inode *ip) |
---|
604 | | -{ |
---|
605 | | - wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); |
---|
606 | | - DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); |
---|
607 | | - |
---|
608 | | - do { |
---|
609 | | - prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); |
---|
610 | | - if (xfs_isiflocked(ip)) |
---|
611 | | - io_schedule(); |
---|
612 | | - } while (!xfs_iflock_nowait(ip)); |
---|
613 | | - |
---|
614 | | - finish_wait(wq, &wait.wq_entry); |
---|
615 | 599 | } |
---|
616 | 600 | |
---|
617 | 601 | STATIC uint |
---|
.. | .. |
---|
714 | 698 | return error; |
---|
715 | 699 | } |
---|
716 | 700 | |
---|
| 701 | +/* Propagate di_flags from a parent inode to a child inode. */ |
---|
| 702 | +static void |
---|
| 703 | +xfs_inode_inherit_flags( |
---|
| 704 | + struct xfs_inode *ip, |
---|
| 705 | + const struct xfs_inode *pip) |
---|
| 706 | +{ |
---|
| 707 | + unsigned int di_flags = 0; |
---|
| 708 | + umode_t mode = VFS_I(ip)->i_mode; |
---|
| 709 | + |
---|
| 710 | + if (S_ISDIR(mode)) { |
---|
| 711 | + if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) |
---|
| 712 | + di_flags |= XFS_DIFLAG_RTINHERIT; |
---|
| 713 | + if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { |
---|
| 714 | + di_flags |= XFS_DIFLAG_EXTSZINHERIT; |
---|
| 715 | + ip->i_d.di_extsize = pip->i_d.di_extsize; |
---|
| 716 | + } |
---|
| 717 | + if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) |
---|
| 718 | + di_flags |= XFS_DIFLAG_PROJINHERIT; |
---|
| 719 | + } else if (S_ISREG(mode)) { |
---|
| 720 | + if ((pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) && |
---|
| 721 | + xfs_sb_version_hasrealtime(&ip->i_mount->m_sb)) |
---|
| 722 | + di_flags |= XFS_DIFLAG_REALTIME; |
---|
| 723 | + if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { |
---|
| 724 | + di_flags |= XFS_DIFLAG_EXTSIZE; |
---|
| 725 | + ip->i_d.di_extsize = pip->i_d.di_extsize; |
---|
| 726 | + } |
---|
| 727 | + } |
---|
| 728 | + if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && |
---|
| 729 | + xfs_inherit_noatime) |
---|
| 730 | + di_flags |= XFS_DIFLAG_NOATIME; |
---|
| 731 | + if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && |
---|
| 732 | + xfs_inherit_nodump) |
---|
| 733 | + di_flags |= XFS_DIFLAG_NODUMP; |
---|
| 734 | + if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && |
---|
| 735 | + xfs_inherit_sync) |
---|
| 736 | + di_flags |= XFS_DIFLAG_SYNC; |
---|
| 737 | + if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && |
---|
| 738 | + xfs_inherit_nosymlinks) |
---|
| 739 | + di_flags |= XFS_DIFLAG_NOSYMLINKS; |
---|
| 740 | + if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && |
---|
| 741 | + xfs_inherit_nodefrag) |
---|
| 742 | + di_flags |= XFS_DIFLAG_NODEFRAG; |
---|
| 743 | + if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) |
---|
| 744 | + di_flags |= XFS_DIFLAG_FILESTREAM; |
---|
| 745 | + |
---|
| 746 | + ip->i_d.di_flags |= di_flags; |
---|
| 747 | +} |
---|
| 748 | + |
---|
| 749 | +/* Propagate di_flags2 from a parent inode to a child inode. */ |
---|
| 750 | +static void |
---|
| 751 | +xfs_inode_inherit_flags2( |
---|
| 752 | + struct xfs_inode *ip, |
---|
| 753 | + const struct xfs_inode *pip) |
---|
| 754 | +{ |
---|
| 755 | + if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) { |
---|
| 756 | + ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; |
---|
| 757 | + ip->i_d.di_cowextsize = pip->i_d.di_cowextsize; |
---|
| 758 | + } |
---|
| 759 | + if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) |
---|
| 760 | + ip->i_d.di_flags2 |= XFS_DIFLAG2_DAX; |
---|
| 761 | +} |
---|
| 762 | + |
---|
717 | 763 | /* |
---|
718 | 764 | * Allocate an inode on disk and return a copy of its in-core version. |
---|
719 | 765 | * The in-core inode is locked exclusively. Set mode, nlink, and rdev |
---|
.. | .. |
---|
756 | 802 | xfs_buf_t **ialloc_context, |
---|
757 | 803 | xfs_inode_t **ipp) |
---|
758 | 804 | { |
---|
| 805 | + struct inode *dir = pip ? VFS_I(pip) : NULL; |
---|
759 | 806 | struct xfs_mount *mp = tp->t_mountp; |
---|
760 | 807 | xfs_ino_t ino; |
---|
761 | 808 | xfs_inode_t *ip; |
---|
.. | .. |
---|
801 | 848 | return error; |
---|
802 | 849 | ASSERT(ip != NULL); |
---|
803 | 850 | inode = VFS_I(ip); |
---|
804 | | - |
---|
805 | | - /* |
---|
806 | | - * We always convert v1 inodes to v2 now - we only support filesystems |
---|
807 | | - * with >= v2 inode capability, so there is no reason for ever leaving |
---|
808 | | - * an inode in v1 format. |
---|
809 | | - */ |
---|
810 | | - if (ip->i_d.di_version == 1) |
---|
811 | | - ip->i_d.di_version = 2; |
---|
812 | | - |
---|
813 | | - inode->i_mode = mode; |
---|
814 | 851 | set_nlink(inode, nlink); |
---|
815 | | - ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid()); |
---|
816 | | - ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid()); |
---|
817 | 852 | inode->i_rdev = rdev; |
---|
818 | | - xfs_set_projid(ip, prid); |
---|
| 853 | + ip->i_d.di_projid = prid; |
---|
819 | 854 | |
---|
820 | | - if (pip && XFS_INHERIT_GID(pip)) { |
---|
821 | | - ip->i_d.di_gid = pip->i_d.di_gid; |
---|
822 | | - if ((VFS_I(pip)->i_mode & S_ISGID) && S_ISDIR(mode)) |
---|
823 | | - inode->i_mode |= S_ISGID; |
---|
| 855 | + if (dir && !(dir->i_mode & S_ISGID) && |
---|
| 856 | + (mp->m_flags & XFS_MOUNT_GRPID)) { |
---|
| 857 | + inode->i_uid = current_fsuid(); |
---|
| 858 | + inode->i_gid = dir->i_gid; |
---|
| 859 | + inode->i_mode = mode; |
---|
| 860 | + } else { |
---|
| 861 | + inode_init_owner(inode, dir, mode); |
---|
824 | 862 | } |
---|
825 | 863 | |
---|
826 | 864 | /* |
---|
.. | .. |
---|
828 | 866 | * ID or one of the supplementary group IDs, the S_ISGID bit is cleared |
---|
829 | 867 | * (and only if the irix_sgid_inherit compatibility variable is set). |
---|
830 | 868 | */ |
---|
831 | | - if ((irix_sgid_inherit) && |
---|
832 | | - (inode->i_mode & S_ISGID) && |
---|
833 | | - (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) |
---|
| 869 | + if (irix_sgid_inherit && |
---|
| 870 | + (inode->i_mode & S_ISGID) && !in_group_p(inode->i_gid)) |
---|
834 | 871 | inode->i_mode &= ~S_ISGID; |
---|
835 | 872 | |
---|
836 | 873 | ip->i_d.di_size = 0; |
---|
837 | | - ip->i_d.di_nextents = 0; |
---|
| 874 | + ip->i_df.if_nextents = 0; |
---|
838 | 875 | ASSERT(ip->i_d.di_nblocks == 0); |
---|
839 | 876 | |
---|
840 | 877 | tv = current_time(inode); |
---|
.. | .. |
---|
847 | 884 | ip->i_d.di_dmstate = 0; |
---|
848 | 885 | ip->i_d.di_flags = 0; |
---|
849 | 886 | |
---|
850 | | - if (ip->i_d.di_version == 3) { |
---|
| 887 | + if (xfs_sb_version_has_v3inode(&mp->m_sb)) { |
---|
851 | 888 | inode_set_iversion(inode, 1); |
---|
852 | | - ip->i_d.di_flags2 = 0; |
---|
| 889 | + ip->i_d.di_flags2 = mp->m_ino_geo.new_diflags2; |
---|
853 | 890 | ip->i_d.di_cowextsize = 0; |
---|
854 | | - ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec; |
---|
855 | | - ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec; |
---|
| 891 | + ip->i_d.di_crtime = tv; |
---|
856 | 892 | } |
---|
857 | | - |
---|
858 | 893 | |
---|
859 | 894 | flags = XFS_ILOG_CORE; |
---|
860 | 895 | switch (mode & S_IFMT) { |
---|
.. | .. |
---|
862 | 897 | case S_IFCHR: |
---|
863 | 898 | case S_IFBLK: |
---|
864 | 899 | case S_IFSOCK: |
---|
865 | | - ip->i_d.di_format = XFS_DINODE_FMT_DEV; |
---|
| 900 | + ip->i_df.if_format = XFS_DINODE_FMT_DEV; |
---|
866 | 901 | ip->i_df.if_flags = 0; |
---|
867 | 902 | flags |= XFS_ILOG_DEV; |
---|
868 | 903 | break; |
---|
869 | 904 | case S_IFREG: |
---|
870 | 905 | case S_IFDIR: |
---|
871 | | - if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { |
---|
872 | | - uint di_flags = 0; |
---|
873 | | - |
---|
874 | | - if (S_ISDIR(mode)) { |
---|
875 | | - if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) |
---|
876 | | - di_flags |= XFS_DIFLAG_RTINHERIT; |
---|
877 | | - if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { |
---|
878 | | - di_flags |= XFS_DIFLAG_EXTSZINHERIT; |
---|
879 | | - ip->i_d.di_extsize = pip->i_d.di_extsize; |
---|
880 | | - } |
---|
881 | | - if (pip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) |
---|
882 | | - di_flags |= XFS_DIFLAG_PROJINHERIT; |
---|
883 | | - } else if (S_ISREG(mode)) { |
---|
884 | | - if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT) |
---|
885 | | - di_flags |= XFS_DIFLAG_REALTIME; |
---|
886 | | - if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) { |
---|
887 | | - di_flags |= XFS_DIFLAG_EXTSIZE; |
---|
888 | | - ip->i_d.di_extsize = pip->i_d.di_extsize; |
---|
889 | | - } |
---|
890 | | - } |
---|
891 | | - if ((pip->i_d.di_flags & XFS_DIFLAG_NOATIME) && |
---|
892 | | - xfs_inherit_noatime) |
---|
893 | | - di_flags |= XFS_DIFLAG_NOATIME; |
---|
894 | | - if ((pip->i_d.di_flags & XFS_DIFLAG_NODUMP) && |
---|
895 | | - xfs_inherit_nodump) |
---|
896 | | - di_flags |= XFS_DIFLAG_NODUMP; |
---|
897 | | - if ((pip->i_d.di_flags & XFS_DIFLAG_SYNC) && |
---|
898 | | - xfs_inherit_sync) |
---|
899 | | - di_flags |= XFS_DIFLAG_SYNC; |
---|
900 | | - if ((pip->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) && |
---|
901 | | - xfs_inherit_nosymlinks) |
---|
902 | | - di_flags |= XFS_DIFLAG_NOSYMLINKS; |
---|
903 | | - if ((pip->i_d.di_flags & XFS_DIFLAG_NODEFRAG) && |
---|
904 | | - xfs_inherit_nodefrag) |
---|
905 | | - di_flags |= XFS_DIFLAG_NODEFRAG; |
---|
906 | | - if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) |
---|
907 | | - di_flags |= XFS_DIFLAG_FILESTREAM; |
---|
908 | | - |
---|
909 | | - ip->i_d.di_flags |= di_flags; |
---|
910 | | - } |
---|
911 | | - if (pip && |
---|
912 | | - (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY) && |
---|
913 | | - pip->i_d.di_version == 3 && |
---|
914 | | - ip->i_d.di_version == 3) { |
---|
915 | | - uint64_t di_flags2 = 0; |
---|
916 | | - |
---|
917 | | - if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) { |
---|
918 | | - di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; |
---|
919 | | - ip->i_d.di_cowextsize = pip->i_d.di_cowextsize; |
---|
920 | | - } |
---|
921 | | - if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) |
---|
922 | | - di_flags2 |= XFS_DIFLAG2_DAX; |
---|
923 | | - |
---|
924 | | - ip->i_d.di_flags2 |= di_flags2; |
---|
925 | | - } |
---|
| 906 | + if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) |
---|
| 907 | + xfs_inode_inherit_flags(ip, pip); |
---|
| 908 | + if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY)) |
---|
| 909 | + xfs_inode_inherit_flags2(ip, pip); |
---|
926 | 910 | /* FALLTHROUGH */ |
---|
927 | 911 | case S_IFLNK: |
---|
928 | | - ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; |
---|
| 912 | + ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; |
---|
929 | 913 | ip->i_df.if_flags = XFS_IFEXTENTS; |
---|
930 | 914 | ip->i_df.if_bytes = 0; |
---|
931 | 915 | ip->i_df.if_u1.if_root = NULL; |
---|
.. | .. |
---|
933 | 917 | default: |
---|
934 | 918 | ASSERT(0); |
---|
935 | 919 | } |
---|
936 | | - /* |
---|
937 | | - * Attribute fork settings for new inode. |
---|
938 | | - */ |
---|
939 | | - ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; |
---|
940 | | - ip->i_d.di_anextents = 0; |
---|
941 | 920 | |
---|
942 | 921 | /* |
---|
943 | 922 | * Log the new values stuffed into the inode. |
---|
.. | .. |
---|
1116 | 1095 | /* |
---|
1117 | 1096 | * Increment the link count on an inode & log the change. |
---|
1118 | 1097 | */ |
---|
1119 | | -static int |
---|
| 1098 | +static void |
---|
1120 | 1099 | xfs_bumplink( |
---|
1121 | 1100 | xfs_trans_t *tp, |
---|
1122 | 1101 | xfs_inode_t *ip) |
---|
1123 | 1102 | { |
---|
1124 | 1103 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); |
---|
1125 | 1104 | |
---|
1126 | | - ASSERT(ip->i_d.di_version > 1); |
---|
1127 | 1105 | inc_nlink(VFS_I(ip)); |
---|
1128 | 1106 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
---|
1129 | | - return 0; |
---|
1130 | 1107 | } |
---|
1131 | 1108 | |
---|
1132 | 1109 | int |
---|
.. | .. |
---|
1160 | 1137 | /* |
---|
1161 | 1138 | * Make sure that we have allocated dquot(s) on disk. |
---|
1162 | 1139 | */ |
---|
1163 | | - error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()), |
---|
1164 | | - xfs_kgid_to_gid(current_fsgid()), prid, |
---|
| 1140 | + error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, |
---|
1165 | 1141 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, |
---|
1166 | 1142 | &udqp, &gdqp, &pdqp); |
---|
1167 | 1143 | if (error) |
---|
.. | .. |
---|
1221 | 1197 | unlock_dp_on_error = false; |
---|
1222 | 1198 | |
---|
1223 | 1199 | error = xfs_dir_createname(tp, dp, name, ip->i_ino, |
---|
1224 | | - resblks ? |
---|
1225 | | - resblks - XFS_IALLOC_SPACE_RES(mp) : 0); |
---|
| 1200 | + resblks - XFS_IALLOC_SPACE_RES(mp)); |
---|
1226 | 1201 | if (error) { |
---|
1227 | 1202 | ASSERT(error != -ENOSPC); |
---|
1228 | 1203 | goto out_trans_cancel; |
---|
.. | .. |
---|
1235 | 1210 | if (error) |
---|
1236 | 1211 | goto out_trans_cancel; |
---|
1237 | 1212 | |
---|
1238 | | - error = xfs_bumplink(tp, dp); |
---|
1239 | | - if (error) |
---|
1240 | | - goto out_trans_cancel; |
---|
| 1213 | + xfs_bumplink(tp, dp); |
---|
1241 | 1214 | } |
---|
1242 | 1215 | |
---|
1243 | 1216 | /* |
---|
.. | .. |
---|
1313 | 1286 | /* |
---|
1314 | 1287 | * Make sure that we have allocated dquot(s) on disk. |
---|
1315 | 1288 | */ |
---|
1316 | | - error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()), |
---|
1317 | | - xfs_kgid_to_gid(current_fsgid()), prid, |
---|
| 1289 | + error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, |
---|
1318 | 1290 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, |
---|
1319 | 1291 | &udqp, &gdqp, &pdqp); |
---|
1320 | 1292 | if (error) |
---|
.. | .. |
---|
1427 | 1399 | * the tree quota mechanism could be circumvented. |
---|
1428 | 1400 | */ |
---|
1429 | 1401 | if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && |
---|
1430 | | - (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { |
---|
| 1402 | + tdp->i_d.di_projid != sip->i_d.di_projid)) { |
---|
1431 | 1403 | error = -EXDEV; |
---|
1432 | 1404 | goto error_return; |
---|
1433 | 1405 | } |
---|
.. | .. |
---|
1454 | 1426 | xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
---|
1455 | 1427 | xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); |
---|
1456 | 1428 | |
---|
1457 | | - error = xfs_bumplink(tp, sip); |
---|
1458 | | - if (error) |
---|
1459 | | - goto error_return; |
---|
| 1429 | + xfs_bumplink(tp, sip); |
---|
1460 | 1430 | |
---|
1461 | 1431 | /* |
---|
1462 | 1432 | * If this is a synchronous mount, make sure that the |
---|
.. | .. |
---|
1524 | 1494 | struct xfs_mount *mp = ip->i_mount; |
---|
1525 | 1495 | struct xfs_trans *tp = *tpp; |
---|
1526 | 1496 | xfs_fileoff_t first_unmap_block; |
---|
1527 | | - xfs_fileoff_t last_block; |
---|
1528 | 1497 | xfs_filblks_t unmap_len; |
---|
1529 | 1498 | int error = 0; |
---|
1530 | | - int done = 0; |
---|
1531 | 1499 | |
---|
1532 | 1500 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
---|
1533 | 1501 | ASSERT(!atomic_read(&VFS_I(ip)->i_count) || |
---|
.. | .. |
---|
1547 | 1515 | * the end of the file (in a crash where the space is allocated |
---|
1548 | 1516 | * but the inode size is not yet updated), simply remove any |
---|
1549 | 1517 | * blocks which show up between the new EOF and the maximum |
---|
1550 | | - * possible file size. If the first block to be removed is |
---|
1551 | | - * beyond the maximum file size (ie it is the same as last_block), |
---|
1552 | | - * then there is nothing to do. |
---|
| 1518 | + * possible file size. |
---|
| 1519 | + * |
---|
| 1520 | + * We have to free all the blocks to the bmbt maximum offset, even if |
---|
| 1521 | + * the page cache can't scale that far. |
---|
1553 | 1522 | */ |
---|
1554 | 1523 | first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); |
---|
1555 | | - last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); |
---|
1556 | | - if (first_unmap_block == last_block) |
---|
| 1524 | + if (first_unmap_block >= XFS_MAX_FILEOFF) { |
---|
| 1525 | + WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF); |
---|
1557 | 1526 | return 0; |
---|
| 1527 | + } |
---|
1558 | 1528 | |
---|
1559 | | - ASSERT(first_unmap_block < last_block); |
---|
1560 | | - unmap_len = last_block - first_unmap_block + 1; |
---|
1561 | | - while (!done) { |
---|
| 1529 | + unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1; |
---|
| 1530 | + while (unmap_len > 0) { |
---|
1562 | 1531 | ASSERT(tp->t_firstblock == NULLFSBLOCK); |
---|
1563 | | - error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags, |
---|
1564 | | - XFS_ITRUNC_MAX_EXTENTS, &done); |
---|
| 1532 | + error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len, |
---|
| 1533 | + flags, XFS_ITRUNC_MAX_EXTENTS); |
---|
1565 | 1534 | if (error) |
---|
1566 | 1535 | goto out; |
---|
1567 | 1536 | |
---|
1568 | | - /* |
---|
1569 | | - * Duplicate the transaction that has the permanent |
---|
1570 | | - * reservation and commit the old transaction. |
---|
1571 | | - */ |
---|
| 1537 | + /* free the just unmapped extents */ |
---|
1572 | 1538 | error = xfs_defer_finish(&tp); |
---|
1573 | | - if (error) |
---|
1574 | | - goto out; |
---|
1575 | | - |
---|
1576 | | - error = xfs_trans_roll_inode(&tp, ip); |
---|
1577 | 1539 | if (error) |
---|
1578 | 1540 | goto out; |
---|
1579 | 1541 | } |
---|
.. | .. |
---|
1581 | 1543 | if (whichfork == XFS_DATA_FORK) { |
---|
1582 | 1544 | /* Remove all pending CoW reservations. */ |
---|
1583 | 1545 | error = xfs_reflink_cancel_cow_blocks(ip, &tp, |
---|
1584 | | - first_unmap_block, last_block, true); |
---|
| 1546 | + first_unmap_block, XFS_MAX_FILEOFF, true); |
---|
1585 | 1547 | if (error) |
---|
1586 | 1548 | goto out; |
---|
1587 | 1549 | |
---|
.. | .. |
---|
1662 | 1624 | return 0; |
---|
1663 | 1625 | /* |
---|
1664 | 1626 | * If we can't get the iolock just skip truncating the blocks |
---|
1665 | | - * past EOF because we could deadlock with the mmap_sem |
---|
| 1627 | + * past EOF because we could deadlock with the mmap_lock |
---|
1666 | 1628 | * otherwise. We'll get another chance to drop them once the |
---|
1667 | 1629 | * last reference to the inode is dropped, so we'll never leak |
---|
1668 | 1630 | * blocks permanently. |
---|
.. | .. |
---|
1714 | 1676 | if (error) |
---|
1715 | 1677 | goto error_trans_cancel; |
---|
1716 | 1678 | |
---|
1717 | | - ASSERT(ip->i_d.di_nextents == 0); |
---|
| 1679 | + ASSERT(ip->i_df.if_nextents == 0); |
---|
1718 | 1680 | |
---|
1719 | 1681 | error = xfs_trans_commit(tp); |
---|
1720 | 1682 | if (error) |
---|
.. | .. |
---|
1883 | 1845 | |
---|
1884 | 1846 | if (S_ISREG(VFS_I(ip)->i_mode) && |
---|
1885 | 1847 | (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 || |
---|
1886 | | - ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0)) |
---|
| 1848 | + ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0)) |
---|
1887 | 1849 | truncate = 1; |
---|
1888 | 1850 | |
---|
1889 | 1851 | error = xfs_qm_dqattach(ip); |
---|
.. | .. |
---|
1909 | 1871 | } |
---|
1910 | 1872 | |
---|
1911 | 1873 | ASSERT(!ip->i_afp); |
---|
1912 | | - ASSERT(ip->i_d.di_anextents == 0); |
---|
1913 | 1874 | ASSERT(ip->i_d.di_forkoff == 0); |
---|
1914 | 1875 | |
---|
1915 | 1876 | /* |
---|
.. | .. |
---|
1926 | 1887 | } |
---|
1927 | 1888 | |
---|
1928 | 1889 | /* |
---|
| 1890 | + * In-Core Unlinked List Lookups |
---|
| 1891 | + * ============================= |
---|
| 1892 | + * |
---|
| 1893 | + * Every inode is supposed to be reachable from some other piece of metadata |
---|
| 1894 | + * with the exception of the root directory. Inodes with a connection to a |
---|
| 1895 | + * file descriptor but not linked from anywhere in the on-disk directory tree |
---|
| 1896 | + * are collectively known as unlinked inodes, though the filesystem itself |
---|
| 1897 | + * maintains links to these inodes so that on-disk metadata are consistent. |
---|
| 1898 | + * |
---|
| 1899 | + * XFS implements a per-AG on-disk hash table of unlinked inodes. The AGI |
---|
| 1900 | + * header contains a number of buckets that point to an inode, and each inode |
---|
| 1901 | + * record has a pointer to the next inode in the hash chain. This |
---|
| 1902 | + * singly-linked list causes scaling problems in the iunlink remove function |
---|
| 1903 | + * because we must walk that list to find the inode that points to the inode |
---|
| 1904 | + * being removed from the unlinked hash bucket list. |
---|
| 1905 | + * |
---|
| 1906 | + * What if we modelled the unlinked list as a collection of records capturing |
---|
| 1907 | + * "X.next_unlinked = Y" relations? If we indexed those records on Y, we'd |
---|
| 1908 | + * have a fast way to look up unlinked list predecessors, which avoids the |
---|
| 1909 | + * slow list walk. That's exactly what we do here (in-core) with a per-AG |
---|
| 1910 | + * rhashtable. |
---|
| 1911 | + * |
---|
| 1912 | + * Because this is a backref cache, we ignore operational failures since the |
---|
| 1913 | + * iunlink code can fall back to the slow bucket walk. The only errors that |
---|
| 1914 | + * should bubble out are for obviously incorrect situations. |
---|
| 1915 | + * |
---|
| 1916 | + * All users of the backref cache MUST hold the AGI buffer lock to serialize |
---|
| 1917 | + * access or have otherwise provided for concurrency control. |
---|
| 1918 | + */ |
---|
| 1919 | + |
---|
| 1920 | +/* Capture a "X.next_unlinked = Y" relationship. */ |
---|
| 1921 | +struct xfs_iunlink { |
---|
| 1922 | + struct rhash_head iu_rhash_head; |
---|
| 1923 | + xfs_agino_t iu_agino; /* X */ |
---|
| 1924 | + xfs_agino_t iu_next_unlinked; /* Y */ |
---|
| 1925 | +}; |
---|
| 1926 | + |
---|
| 1927 | +/* Unlinked list predecessor lookup hashtable construction */ |
---|
| 1928 | +static int |
---|
| 1929 | +xfs_iunlink_obj_cmpfn( |
---|
| 1930 | + struct rhashtable_compare_arg *arg, |
---|
| 1931 | + const void *obj) |
---|
| 1932 | +{ |
---|
| 1933 | + const xfs_agino_t *key = arg->key; |
---|
| 1934 | + const struct xfs_iunlink *iu = obj; |
---|
| 1935 | + |
---|
| 1936 | + if (iu->iu_next_unlinked != *key) |
---|
| 1937 | + return 1; |
---|
| 1938 | + return 0; |
---|
| 1939 | +} |
---|
| 1940 | + |
---|
| 1941 | +static const struct rhashtable_params xfs_iunlink_hash_params = { |
---|
| 1942 | + .min_size = XFS_AGI_UNLINKED_BUCKETS, |
---|
| 1943 | + .key_len = sizeof(xfs_agino_t), |
---|
| 1944 | + .key_offset = offsetof(struct xfs_iunlink, |
---|
| 1945 | + iu_next_unlinked), |
---|
| 1946 | + .head_offset = offsetof(struct xfs_iunlink, iu_rhash_head), |
---|
| 1947 | + .automatic_shrinking = true, |
---|
| 1948 | + .obj_cmpfn = xfs_iunlink_obj_cmpfn, |
---|
| 1949 | +}; |
---|
| 1950 | + |
---|
| 1951 | +/* |
---|
| 1952 | + * Return X, where X.next_unlinked == @agino. Returns NULLAGINO if no such |
---|
| 1953 | + * relation is found. |
---|
| 1954 | + */ |
---|
| 1955 | +static xfs_agino_t |
---|
| 1956 | +xfs_iunlink_lookup_backref( |
---|
| 1957 | + struct xfs_perag *pag, |
---|
| 1958 | + xfs_agino_t agino) |
---|
| 1959 | +{ |
---|
| 1960 | + struct xfs_iunlink *iu; |
---|
| 1961 | + |
---|
| 1962 | + iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino, |
---|
| 1963 | + xfs_iunlink_hash_params); |
---|
| 1964 | + return iu ? iu->iu_agino : NULLAGINO; |
---|
| 1965 | +} |
---|
| 1966 | + |
---|
| 1967 | +/* |
---|
| 1968 | + * Take ownership of an iunlink cache entry and insert it into the hash table. |
---|
| 1969 | + * If successful, the entry will be owned by the cache; if not, it is freed. |
---|
| 1970 | + * Either way, the caller does not own @iu after this call. |
---|
| 1971 | + */ |
---|
| 1972 | +static int |
---|
| 1973 | +xfs_iunlink_insert_backref( |
---|
| 1974 | + struct xfs_perag *pag, |
---|
| 1975 | + struct xfs_iunlink *iu) |
---|
| 1976 | +{ |
---|
| 1977 | + int error; |
---|
| 1978 | + |
---|
| 1979 | + error = rhashtable_insert_fast(&pag->pagi_unlinked_hash, |
---|
| 1980 | + &iu->iu_rhash_head, xfs_iunlink_hash_params); |
---|
| 1981 | + /* |
---|
| 1982 | + * Fail loudly if there already was an entry because that's a sign of |
---|
| 1983 | + * corruption of in-memory data. Also fail loudly if we see an error |
---|
| 1984 | + * code we didn't anticipate from the rhashtable code. Currently we |
---|
| 1985 | + * only anticipate ENOMEM. |
---|
| 1986 | + */ |
---|
| 1987 | + if (error) { |
---|
| 1988 | + WARN(error != -ENOMEM, "iunlink cache insert error %d", error); |
---|
| 1989 | + kmem_free(iu); |
---|
| 1990 | + } |
---|
| 1991 | + /* |
---|
| 1992 | + * Absorb any runtime errors that aren't a result of corruption because |
---|
| 1993 | + * this is a cache and we can always fall back to bucket list scanning. |
---|
| 1994 | + */ |
---|
| 1995 | + if (error != 0 && error != -EEXIST) |
---|
| 1996 | + error = 0; |
---|
| 1997 | + return error; |
---|
| 1998 | +} |
---|
| 1999 | + |
---|
| 2000 | +/* Remember that @prev_agino.next_unlinked = @this_agino. */ |
---|
| 2001 | +static int |
---|
| 2002 | +xfs_iunlink_add_backref( |
---|
| 2003 | + struct xfs_perag *pag, |
---|
| 2004 | + xfs_agino_t prev_agino, |
---|
| 2005 | + xfs_agino_t this_agino) |
---|
| 2006 | +{ |
---|
| 2007 | + struct xfs_iunlink *iu; |
---|
| 2008 | + |
---|
| 2009 | + if (XFS_TEST_ERROR(false, pag->pag_mount, XFS_ERRTAG_IUNLINK_FALLBACK)) |
---|
| 2010 | + return 0; |
---|
| 2011 | + |
---|
| 2012 | + iu = kmem_zalloc(sizeof(*iu), KM_NOFS); |
---|
| 2013 | + iu->iu_agino = prev_agino; |
---|
| 2014 | + iu->iu_next_unlinked = this_agino; |
---|
| 2015 | + |
---|
| 2016 | + return xfs_iunlink_insert_backref(pag, iu); |
---|
| 2017 | +} |
---|
| 2018 | + |
---|
| 2019 | +/* |
---|
| 2020 | + * Replace X.next_unlinked = @agino with X.next_unlinked = @next_unlinked. |
---|
| 2021 | + * If @next_unlinked is NULLAGINO, we drop the backref and exit. If there |
---|
| 2022 | + * wasn't any such entry then we don't bother. |
---|
| 2023 | + */ |
---|
| 2024 | +static int |
---|
| 2025 | +xfs_iunlink_change_backref( |
---|
| 2026 | + struct xfs_perag *pag, |
---|
| 2027 | + xfs_agino_t agino, |
---|
| 2028 | + xfs_agino_t next_unlinked) |
---|
| 2029 | +{ |
---|
| 2030 | + struct xfs_iunlink *iu; |
---|
| 2031 | + int error; |
---|
| 2032 | + |
---|
| 2033 | + /* Look up the old entry; if there wasn't one then exit. */ |
---|
| 2034 | + iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino, |
---|
| 2035 | + xfs_iunlink_hash_params); |
---|
| 2036 | + if (!iu) |
---|
| 2037 | + return 0; |
---|
| 2038 | + |
---|
| 2039 | + /* |
---|
| 2040 | + * Remove the entry. This shouldn't ever return an error, but if we |
---|
| 2041 | + * couldn't remove the old entry we don't want to add it again to the |
---|
| 2042 | + * hash table, and if the entry disappeared on us then someone's |
---|
| 2043 | + * violated the locking rules and we need to fail loudly. Either way |
---|
| 2044 | + * we cannot remove the inode because internal state is or would have |
---|
| 2045 | + * been corrupt. |
---|
| 2046 | + */ |
---|
| 2047 | + error = rhashtable_remove_fast(&pag->pagi_unlinked_hash, |
---|
| 2048 | + &iu->iu_rhash_head, xfs_iunlink_hash_params); |
---|
| 2049 | + if (error) |
---|
| 2050 | + return error; |
---|
| 2051 | + |
---|
| 2052 | + /* If there is no new next entry just free our item and return. */ |
---|
| 2053 | + if (next_unlinked == NULLAGINO) { |
---|
| 2054 | + kmem_free(iu); |
---|
| 2055 | + return 0; |
---|
| 2056 | + } |
---|
| 2057 | + |
---|
| 2058 | + /* Update the entry and re-add it to the hash table. */ |
---|
| 2059 | + iu->iu_next_unlinked = next_unlinked; |
---|
| 2060 | + return xfs_iunlink_insert_backref(pag, iu); |
---|
| 2061 | +} |
---|
| 2062 | + |
---|
| 2063 | +/* Set up the in-core predecessor structures. */ |
---|
| 2064 | +int |
---|
| 2065 | +xfs_iunlink_init( |
---|
| 2066 | + struct xfs_perag *pag) |
---|
| 2067 | +{ |
---|
| 2068 | + return rhashtable_init(&pag->pagi_unlinked_hash, |
---|
| 2069 | + &xfs_iunlink_hash_params); |
---|
| 2070 | +} |
---|
| 2071 | + |
---|
| 2072 | +/* Free the in-core predecessor structures. */ |
---|
| 2073 | +static void |
---|
| 2074 | +xfs_iunlink_free_item( |
---|
| 2075 | + void *ptr, |
---|
| 2076 | + void *arg) |
---|
| 2077 | +{ |
---|
| 2078 | + struct xfs_iunlink *iu = ptr; |
---|
| 2079 | + bool *freed_anything = arg; |
---|
| 2080 | + |
---|
| 2081 | + *freed_anything = true; |
---|
| 2082 | + kmem_free(iu); |
---|
| 2083 | +} |
---|
| 2084 | + |
---|
| 2085 | +void |
---|
| 2086 | +xfs_iunlink_destroy( |
---|
| 2087 | + struct xfs_perag *pag) |
---|
| 2088 | +{ |
---|
| 2089 | + bool freed_anything = false; |
---|
| 2090 | + |
---|
| 2091 | + rhashtable_free_and_destroy(&pag->pagi_unlinked_hash, |
---|
| 2092 | + xfs_iunlink_free_item, &freed_anything); |
---|
| 2093 | + |
---|
| 2094 | + ASSERT(freed_anything == false || XFS_FORCED_SHUTDOWN(pag->pag_mount)); |
---|
| 2095 | +} |
---|
| 2096 | + |
---|
| 2097 | +/* |
---|
| 2098 | + * Point the AGI unlinked bucket at an inode and log the results. The caller |
---|
| 2099 | + * is responsible for validating the old value. |
---|
| 2100 | + */ |
---|
| 2101 | +STATIC int |
---|
| 2102 | +xfs_iunlink_update_bucket( |
---|
| 2103 | + struct xfs_trans *tp, |
---|
| 2104 | + xfs_agnumber_t agno, |
---|
| 2105 | + struct xfs_buf *agibp, |
---|
| 2106 | + unsigned int bucket_index, |
---|
| 2107 | + xfs_agino_t new_agino) |
---|
| 2108 | +{ |
---|
| 2109 | + struct xfs_agi *agi = agibp->b_addr; |
---|
| 2110 | + xfs_agino_t old_value; |
---|
| 2111 | + int offset; |
---|
| 2112 | + |
---|
| 2113 | + ASSERT(xfs_verify_agino_or_null(tp->t_mountp, agno, new_agino)); |
---|
| 2114 | + |
---|
| 2115 | + old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); |
---|
| 2116 | + trace_xfs_iunlink_update_bucket(tp->t_mountp, agno, bucket_index, |
---|
| 2117 | + old_value, new_agino); |
---|
| 2118 | + |
---|
| 2119 | + /* |
---|
| 2120 | + * We should never find the head of the list already set to the value |
---|
| 2121 | + * passed in because either we're adding or removing ourselves from the |
---|
| 2122 | + * head of the list. |
---|
| 2123 | + */ |
---|
| 2124 | + if (old_value == new_agino) { |
---|
| 2125 | + xfs_buf_mark_corrupt(agibp); |
---|
| 2126 | + return -EFSCORRUPTED; |
---|
| 2127 | + } |
---|
| 2128 | + |
---|
| 2129 | + agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); |
---|
| 2130 | + offset = offsetof(struct xfs_agi, agi_unlinked) + |
---|
| 2131 | + (sizeof(xfs_agino_t) * bucket_index); |
---|
| 2132 | + xfs_trans_log_buf(tp, agibp, offset, offset + sizeof(xfs_agino_t) - 1); |
---|
| 2133 | + return 0; |
---|
| 2134 | +} |
---|
| 2135 | + |
---|
| 2136 | +/* Set an on-disk inode's next_unlinked pointer. */ |
---|
| 2137 | +STATIC void |
---|
| 2138 | +xfs_iunlink_update_dinode( |
---|
| 2139 | + struct xfs_trans *tp, |
---|
| 2140 | + xfs_agnumber_t agno, |
---|
| 2141 | + xfs_agino_t agino, |
---|
| 2142 | + struct xfs_buf *ibp, |
---|
| 2143 | + struct xfs_dinode *dip, |
---|
| 2144 | + struct xfs_imap *imap, |
---|
| 2145 | + xfs_agino_t next_agino) |
---|
| 2146 | +{ |
---|
| 2147 | + struct xfs_mount *mp = tp->t_mountp; |
---|
| 2148 | + int offset; |
---|
| 2149 | + |
---|
| 2150 | + ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino)); |
---|
| 2151 | + |
---|
| 2152 | + trace_xfs_iunlink_update_dinode(mp, agno, agino, |
---|
| 2153 | + be32_to_cpu(dip->di_next_unlinked), next_agino); |
---|
| 2154 | + |
---|
| 2155 | + dip->di_next_unlinked = cpu_to_be32(next_agino); |
---|
| 2156 | + offset = imap->im_boffset + |
---|
| 2157 | + offsetof(struct xfs_dinode, di_next_unlinked); |
---|
| 2158 | + |
---|
| 2159 | + /* need to recalc the inode CRC if appropriate */ |
---|
| 2160 | + xfs_dinode_calc_crc(mp, dip); |
---|
| 2161 | + xfs_trans_inode_buf(tp, ibp); |
---|
| 2162 | + xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1); |
---|
| 2163 | +} |
---|
| 2164 | + |
---|
| 2165 | +/* Set an in-core inode's unlinked pointer and return the old value. */ |
---|
| 2166 | +STATIC int |
---|
| 2167 | +xfs_iunlink_update_inode( |
---|
| 2168 | + struct xfs_trans *tp, |
---|
| 2169 | + struct xfs_inode *ip, |
---|
| 2170 | + xfs_agnumber_t agno, |
---|
| 2171 | + xfs_agino_t next_agino, |
---|
| 2172 | + xfs_agino_t *old_next_agino) |
---|
| 2173 | +{ |
---|
| 2174 | + struct xfs_mount *mp = tp->t_mountp; |
---|
| 2175 | + struct xfs_dinode *dip; |
---|
| 2176 | + struct xfs_buf *ibp; |
---|
| 2177 | + xfs_agino_t old_value; |
---|
| 2178 | + int error; |
---|
| 2179 | + |
---|
| 2180 | + ASSERT(xfs_verify_agino_or_null(mp, agno, next_agino)); |
---|
| 2181 | + |
---|
| 2182 | + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, 0); |
---|
| 2183 | + if (error) |
---|
| 2184 | + return error; |
---|
| 2185 | + |
---|
| 2186 | + /* Make sure the old pointer isn't garbage. */ |
---|
| 2187 | + old_value = be32_to_cpu(dip->di_next_unlinked); |
---|
| 2188 | + if (!xfs_verify_agino_or_null(mp, agno, old_value)) { |
---|
| 2189 | + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, |
---|
| 2190 | + sizeof(*dip), __this_address); |
---|
| 2191 | + error = -EFSCORRUPTED; |
---|
| 2192 | + goto out; |
---|
| 2193 | + } |
---|
| 2194 | + |
---|
| 2195 | + /* |
---|
| 2196 | + * Since we're updating a linked list, we should never find that the |
---|
| 2197 | + * current pointer is the same as the new value, unless we're |
---|
| 2198 | + * terminating the list. |
---|
| 2199 | + */ |
---|
| 2200 | + *old_next_agino = old_value; |
---|
| 2201 | + if (old_value == next_agino) { |
---|
| 2202 | + if (next_agino != NULLAGINO) { |
---|
| 2203 | + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, |
---|
| 2204 | + dip, sizeof(*dip), __this_address); |
---|
| 2205 | + error = -EFSCORRUPTED; |
---|
| 2206 | + } |
---|
| 2207 | + goto out; |
---|
| 2208 | + } |
---|
| 2209 | + |
---|
| 2210 | + /* Ok, update the new pointer. */ |
---|
| 2211 | + xfs_iunlink_update_dinode(tp, agno, XFS_INO_TO_AGINO(mp, ip->i_ino), |
---|
| 2212 | + ibp, dip, &ip->i_imap, next_agino); |
---|
| 2213 | + return 0; |
---|
| 2214 | +out: |
---|
| 2215 | + xfs_trans_brelse(tp, ibp); |
---|
| 2216 | + return error; |
---|
| 2217 | +} |
---|
| 2218 | + |
---|
| 2219 | +/* |
---|
1929 | 2220 | * This is called when the inode's link count has gone to 0 or we are creating |
---|
1930 | 2221 | * a tmpfile via O_TMPFILE. The inode @ip must have nlink == 0. |
---|
1931 | 2222 | * |
---|
.. | .. |
---|
1934 | 2225 | */ |
---|
1935 | 2226 | STATIC int |
---|
1936 | 2227 | xfs_iunlink( |
---|
1937 | | - struct xfs_trans *tp, |
---|
1938 | | - struct xfs_inode *ip) |
---|
| 2228 | + struct xfs_trans *tp, |
---|
| 2229 | + struct xfs_inode *ip) |
---|
1939 | 2230 | { |
---|
1940 | | - xfs_mount_t *mp = tp->t_mountp; |
---|
1941 | | - xfs_agi_t *agi; |
---|
1942 | | - xfs_dinode_t *dip; |
---|
1943 | | - xfs_buf_t *agibp; |
---|
1944 | | - xfs_buf_t *ibp; |
---|
1945 | | - xfs_agino_t agino; |
---|
1946 | | - short bucket_index; |
---|
1947 | | - int offset; |
---|
1948 | | - int error; |
---|
| 2231 | + struct xfs_mount *mp = tp->t_mountp; |
---|
| 2232 | + struct xfs_agi *agi; |
---|
| 2233 | + struct xfs_buf *agibp; |
---|
| 2234 | + xfs_agino_t next_agino; |
---|
| 2235 | + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); |
---|
| 2236 | + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); |
---|
| 2237 | + short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; |
---|
| 2238 | + int error; |
---|
1949 | 2239 | |
---|
1950 | 2240 | ASSERT(VFS_I(ip)->i_nlink == 0); |
---|
1951 | 2241 | ASSERT(VFS_I(ip)->i_mode != 0); |
---|
| 2242 | + trace_xfs_iunlink(ip); |
---|
1952 | 2243 | |
---|
1953 | | - /* |
---|
1954 | | - * Get the agi buffer first. It ensures lock ordering |
---|
1955 | | - * on the list. |
---|
1956 | | - */ |
---|
1957 | | - error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); |
---|
| 2244 | + /* Get the agi buffer first. It ensures lock ordering on the list. */ |
---|
| 2245 | + error = xfs_read_agi(mp, tp, agno, &agibp); |
---|
1958 | 2246 | if (error) |
---|
1959 | 2247 | return error; |
---|
1960 | | - agi = XFS_BUF_TO_AGI(agibp); |
---|
| 2248 | + agi = agibp->b_addr; |
---|
1961 | 2249 | |
---|
1962 | 2250 | /* |
---|
1963 | | - * Get the index into the agi hash table for the |
---|
1964 | | - * list this inode will go on. |
---|
| 2251 | + * Get the index into the agi hash table for the list this inode will |
---|
| 2252 | + * go on. Make sure the pointer isn't garbage and that this inode |
---|
| 2253 | + * isn't already on the list. |
---|
1965 | 2254 | */ |
---|
1966 | | - agino = XFS_INO_TO_AGINO(mp, ip->i_ino); |
---|
1967 | | - ASSERT(agino != 0); |
---|
1968 | | - bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; |
---|
1969 | | - ASSERT(agi->agi_unlinked[bucket_index]); |
---|
1970 | | - ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); |
---|
| 2255 | + next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); |
---|
| 2256 | + if (next_agino == agino || |
---|
| 2257 | + !xfs_verify_agino_or_null(mp, agno, next_agino)) { |
---|
| 2258 | + xfs_buf_mark_corrupt(agibp); |
---|
| 2259 | + return -EFSCORRUPTED; |
---|
| 2260 | + } |
---|
1971 | 2261 | |
---|
1972 | | - if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) { |
---|
| 2262 | + if (next_agino != NULLAGINO) { |
---|
| 2263 | + xfs_agino_t old_agino; |
---|
| 2264 | + |
---|
1973 | 2265 | /* |
---|
1974 | | - * There is already another inode in the bucket we need |
---|
1975 | | - * to add ourselves to. Add us at the front of the list. |
---|
1976 | | - * Here we put the head pointer into our next pointer, |
---|
1977 | | - * and then we fall through to point the head at us. |
---|
| 2266 | + * There is already another inode in the bucket, so point this |
---|
| 2267 | + * inode to the current head of the list. |
---|
1978 | 2268 | */ |
---|
1979 | | - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, |
---|
1980 | | - 0, 0); |
---|
| 2269 | + error = xfs_iunlink_update_inode(tp, ip, agno, next_agino, |
---|
| 2270 | + &old_agino); |
---|
| 2271 | + if (error) |
---|
| 2272 | + return error; |
---|
| 2273 | + ASSERT(old_agino == NULLAGINO); |
---|
| 2274 | + |
---|
| 2275 | + /* |
---|
| 2276 | + * agino has been unlinked, add a backref from the next inode |
---|
| 2277 | + * back to agino. |
---|
| 2278 | + */ |
---|
| 2279 | + error = xfs_iunlink_add_backref(agibp->b_pag, agino, next_agino); |
---|
| 2280 | + if (error) |
---|
| 2281 | + return error; |
---|
| 2282 | + } |
---|
| 2283 | + |
---|
| 2284 | + /* Point the head of the list to point to this inode. */ |
---|
| 2285 | + return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, agino); |
---|
| 2286 | +} |
---|
| 2287 | + |
---|
| 2288 | +/* Return the imap, dinode pointer, and buffer for an inode. */ |
---|
| 2289 | +STATIC int |
---|
| 2290 | +xfs_iunlink_map_ino( |
---|
| 2291 | + struct xfs_trans *tp, |
---|
| 2292 | + xfs_agnumber_t agno, |
---|
| 2293 | + xfs_agino_t agino, |
---|
| 2294 | + struct xfs_imap *imap, |
---|
| 2295 | + struct xfs_dinode **dipp, |
---|
| 2296 | + struct xfs_buf **bpp) |
---|
| 2297 | +{ |
---|
| 2298 | + struct xfs_mount *mp = tp->t_mountp; |
---|
| 2299 | + int error; |
---|
| 2300 | + |
---|
| 2301 | + imap->im_blkno = 0; |
---|
| 2302 | + error = xfs_imap(mp, tp, XFS_AGINO_TO_INO(mp, agno, agino), imap, 0); |
---|
| 2303 | + if (error) { |
---|
| 2304 | + xfs_warn(mp, "%s: xfs_imap returned error %d.", |
---|
| 2305 | + __func__, error); |
---|
| 2306 | + return error; |
---|
| 2307 | + } |
---|
| 2308 | + |
---|
| 2309 | + error = xfs_imap_to_bp(mp, tp, imap, dipp, bpp, 0); |
---|
| 2310 | + if (error) { |
---|
| 2311 | + xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.", |
---|
| 2312 | + __func__, error); |
---|
| 2313 | + return error; |
---|
| 2314 | + } |
---|
| 2315 | + |
---|
| 2316 | + return 0; |
---|
| 2317 | +} |
---|
| 2318 | + |
---|
| 2319 | +/* |
---|
| 2320 | + * Walk the unlinked chain from @head_agino until we find the inode that |
---|
| 2321 | + * points to @target_agino. Return the inode number, map, dinode pointer, |
---|
| 2322 | + * and inode cluster buffer of that inode as @agino, @imap, @dipp, and @bpp. |
---|
| 2323 | + * |
---|
| 2324 | + * @tp, @pag, @head_agino, and @target_agino are input parameters. |
---|
| 2325 | + * @agino, @imap, @dipp, and @bpp are all output parameters. |
---|
| 2326 | + * |
---|
| 2327 | + * Do not call this function if @target_agino is the head of the list. |
---|
| 2328 | + */ |
---|
| 2329 | +STATIC int |
---|
| 2330 | +xfs_iunlink_map_prev( |
---|
| 2331 | + struct xfs_trans *tp, |
---|
| 2332 | + xfs_agnumber_t agno, |
---|
| 2333 | + xfs_agino_t head_agino, |
---|
| 2334 | + xfs_agino_t target_agino, |
---|
| 2335 | + xfs_agino_t *agino, |
---|
| 2336 | + struct xfs_imap *imap, |
---|
| 2337 | + struct xfs_dinode **dipp, |
---|
| 2338 | + struct xfs_buf **bpp, |
---|
| 2339 | + struct xfs_perag *pag) |
---|
| 2340 | +{ |
---|
| 2341 | + struct xfs_mount *mp = tp->t_mountp; |
---|
| 2342 | + xfs_agino_t next_agino; |
---|
| 2343 | + int error; |
---|
| 2344 | + |
---|
| 2345 | + ASSERT(head_agino != target_agino); |
---|
| 2346 | + *bpp = NULL; |
---|
| 2347 | + |
---|
| 2348 | + /* See if our backref cache can find it faster. */ |
---|
| 2349 | + *agino = xfs_iunlink_lookup_backref(pag, target_agino); |
---|
| 2350 | + if (*agino != NULLAGINO) { |
---|
| 2351 | + error = xfs_iunlink_map_ino(tp, agno, *agino, imap, dipp, bpp); |
---|
1981 | 2352 | if (error) |
---|
1982 | 2353 | return error; |
---|
1983 | 2354 | |
---|
1984 | | - ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO)); |
---|
1985 | | - dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; |
---|
1986 | | - offset = ip->i_imap.im_boffset + |
---|
1987 | | - offsetof(xfs_dinode_t, di_next_unlinked); |
---|
| 2355 | + if (be32_to_cpu((*dipp)->di_next_unlinked) == target_agino) |
---|
| 2356 | + return 0; |
---|
1988 | 2357 | |
---|
1989 | | - /* need to recalc the inode CRC if appropriate */ |
---|
1990 | | - xfs_dinode_calc_crc(mp, dip); |
---|
1991 | | - |
---|
1992 | | - xfs_trans_inode_buf(tp, ibp); |
---|
1993 | | - xfs_trans_log_buf(tp, ibp, offset, |
---|
1994 | | - (offset + sizeof(xfs_agino_t) - 1)); |
---|
1995 | | - xfs_inobp_check(mp, ibp); |
---|
| 2358 | + /* |
---|
| 2359 | + * If we get here the cache contents were corrupt, so drop the |
---|
| 2360 | + * buffer and fall back to walking the bucket list. |
---|
| 2361 | + */ |
---|
| 2362 | + xfs_trans_brelse(tp, *bpp); |
---|
| 2363 | + *bpp = NULL; |
---|
| 2364 | + WARN_ON_ONCE(1); |
---|
1996 | 2365 | } |
---|
1997 | 2366 | |
---|
1998 | | - /* |
---|
1999 | | - * Point the bucket head pointer at the inode being inserted. |
---|
2000 | | - */ |
---|
2001 | | - ASSERT(agino != 0); |
---|
2002 | | - agi->agi_unlinked[bucket_index] = cpu_to_be32(agino); |
---|
2003 | | - offset = offsetof(xfs_agi_t, agi_unlinked) + |
---|
2004 | | - (sizeof(xfs_agino_t) * bucket_index); |
---|
2005 | | - xfs_trans_log_buf(tp, agibp, offset, |
---|
2006 | | - (offset + sizeof(xfs_agino_t) - 1)); |
---|
| 2367 | + trace_xfs_iunlink_map_prev_fallback(mp, agno); |
---|
| 2368 | + |
---|
| 2369 | + /* Otherwise, walk the entire bucket until we find it. */ |
---|
| 2370 | + next_agino = head_agino; |
---|
| 2371 | + while (next_agino != target_agino) { |
---|
| 2372 | + xfs_agino_t unlinked_agino; |
---|
| 2373 | + |
---|
| 2374 | + if (*bpp) |
---|
| 2375 | + xfs_trans_brelse(tp, *bpp); |
---|
| 2376 | + |
---|
| 2377 | + *agino = next_agino; |
---|
| 2378 | + error = xfs_iunlink_map_ino(tp, agno, next_agino, imap, dipp, |
---|
| 2379 | + bpp); |
---|
| 2380 | + if (error) |
---|
| 2381 | + return error; |
---|
| 2382 | + |
---|
| 2383 | + unlinked_agino = be32_to_cpu((*dipp)->di_next_unlinked); |
---|
| 2384 | + /* |
---|
| 2385 | + * Make sure this pointer is valid and isn't an obvious |
---|
| 2386 | + * infinite loop. |
---|
| 2387 | + */ |
---|
| 2388 | + if (!xfs_verify_agino(mp, agno, unlinked_agino) || |
---|
| 2389 | + next_agino == unlinked_agino) { |
---|
| 2390 | + XFS_CORRUPTION_ERROR(__func__, |
---|
| 2391 | + XFS_ERRLEVEL_LOW, mp, |
---|
| 2392 | + *dipp, sizeof(**dipp)); |
---|
| 2393 | + error = -EFSCORRUPTED; |
---|
| 2394 | + return error; |
---|
| 2395 | + } |
---|
| 2396 | + next_agino = unlinked_agino; |
---|
| 2397 | + } |
---|
| 2398 | + |
---|
2007 | 2399 | return 0; |
---|
2008 | 2400 | } |
---|
2009 | 2401 | |
---|
.. | .. |
---|
2012 | 2404 | */ |
---|
2013 | 2405 | STATIC int |
---|
2014 | 2406 | xfs_iunlink_remove( |
---|
2015 | | - xfs_trans_t *tp, |
---|
2016 | | - xfs_inode_t *ip) |
---|
| 2407 | + struct xfs_trans *tp, |
---|
| 2408 | + struct xfs_inode *ip) |
---|
2017 | 2409 | { |
---|
2018 | | - xfs_ino_t next_ino; |
---|
2019 | | - xfs_mount_t *mp; |
---|
2020 | | - xfs_agi_t *agi; |
---|
2021 | | - xfs_dinode_t *dip; |
---|
2022 | | - xfs_buf_t *agibp; |
---|
2023 | | - xfs_buf_t *ibp; |
---|
2024 | | - xfs_agnumber_t agno; |
---|
2025 | | - xfs_agino_t agino; |
---|
2026 | | - xfs_agino_t next_agino; |
---|
2027 | | - xfs_buf_t *last_ibp; |
---|
2028 | | - xfs_dinode_t *last_dip = NULL; |
---|
2029 | | - short bucket_index; |
---|
2030 | | - int offset, last_offset = 0; |
---|
2031 | | - int error; |
---|
| 2410 | + struct xfs_mount *mp = tp->t_mountp; |
---|
| 2411 | + struct xfs_agi *agi; |
---|
| 2412 | + struct xfs_buf *agibp; |
---|
| 2413 | + struct xfs_buf *last_ibp; |
---|
| 2414 | + struct xfs_dinode *last_dip = NULL; |
---|
| 2415 | + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino); |
---|
| 2416 | + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); |
---|
| 2417 | + xfs_agino_t next_agino; |
---|
| 2418 | + xfs_agino_t head_agino; |
---|
| 2419 | + short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; |
---|
| 2420 | + int error; |
---|
2032 | 2421 | |
---|
2033 | | - mp = tp->t_mountp; |
---|
2034 | | - agno = XFS_INO_TO_AGNO(mp, ip->i_ino); |
---|
| 2422 | + trace_xfs_iunlink_remove(ip); |
---|
2035 | 2423 | |
---|
2036 | | - /* |
---|
2037 | | - * Get the agi buffer first. It ensures lock ordering |
---|
2038 | | - * on the list. |
---|
2039 | | - */ |
---|
| 2424 | + /* Get the agi buffer first. It ensures lock ordering on the list. */ |
---|
2040 | 2425 | error = xfs_read_agi(mp, tp, agno, &agibp); |
---|
2041 | 2426 | if (error) |
---|
2042 | 2427 | return error; |
---|
2043 | | - |
---|
2044 | | - agi = XFS_BUF_TO_AGI(agibp); |
---|
| 2428 | + agi = agibp->b_addr; |
---|
2045 | 2429 | |
---|
2046 | 2430 | /* |
---|
2047 | | - * Get the index into the agi hash table for the |
---|
2048 | | - * list this inode will go on. |
---|
| 2431 | + * Get the index into the agi hash table for the list this inode will |
---|
| 2432 | + * go on. Make sure the head pointer isn't garbage. |
---|
2049 | 2433 | */ |
---|
2050 | | - agino = XFS_INO_TO_AGINO(mp, ip->i_ino); |
---|
2051 | | - if (!xfs_verify_agino(mp, agno, agino)) |
---|
2052 | | - return -EFSCORRUPTED; |
---|
2053 | | - bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; |
---|
2054 | | - if (!xfs_verify_agino(mp, agno, |
---|
2055 | | - be32_to_cpu(agi->agi_unlinked[bucket_index]))) { |
---|
| 2434 | + head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); |
---|
| 2435 | + if (!xfs_verify_agino(mp, agno, head_agino)) { |
---|
2056 | 2436 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, |
---|
2057 | 2437 | agi, sizeof(*agi)); |
---|
2058 | 2438 | return -EFSCORRUPTED; |
---|
2059 | 2439 | } |
---|
2060 | 2440 | |
---|
2061 | | - if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { |
---|
2062 | | - /* |
---|
2063 | | - * We're at the head of the list. Get the inode's on-disk |
---|
2064 | | - * buffer to see if there is anyone after us on the list. |
---|
2065 | | - * Only modify our next pointer if it is not already NULLAGINO. |
---|
2066 | | - * This saves us the overhead of dealing with the buffer when |
---|
2067 | | - * there is no need to change it. |
---|
2068 | | - */ |
---|
2069 | | - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, |
---|
2070 | | - 0, 0); |
---|
2071 | | - if (error) { |
---|
2072 | | - xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.", |
---|
2073 | | - __func__, error); |
---|
| 2441 | + /* |
---|
| 2442 | + * Set our inode's next_unlinked pointer to NULL and then return |
---|
| 2443 | + * the old pointer value so that we can update whatever was previous |
---|
| 2444 | + * to us in the list to point to whatever was next in the list. |
---|
| 2445 | + */ |
---|
| 2446 | + error = xfs_iunlink_update_inode(tp, ip, agno, NULLAGINO, &next_agino); |
---|
| 2447 | + if (error) |
---|
| 2448 | + return error; |
---|
| 2449 | + |
---|
| 2450 | + /* |
---|
| 2451 | + * If there was a backref pointing from the next inode back to this |
---|
| 2452 | + * one, remove it because we've removed this inode from the list. |
---|
| 2453 | + * |
---|
| 2454 | + * Later, if this inode was in the middle of the list we'll update |
---|
| 2455 | + * this inode's backref to point from the next inode. |
---|
| 2456 | + */ |
---|
| 2457 | + if (next_agino != NULLAGINO) { |
---|
| 2458 | + error = xfs_iunlink_change_backref(agibp->b_pag, next_agino, |
---|
| 2459 | + NULLAGINO); |
---|
| 2460 | + if (error) |
---|
2074 | 2461 | return error; |
---|
2075 | | - } |
---|
2076 | | - next_agino = be32_to_cpu(dip->di_next_unlinked); |
---|
2077 | | - ASSERT(next_agino != 0); |
---|
2078 | | - if (next_agino != NULLAGINO) { |
---|
2079 | | - dip->di_next_unlinked = cpu_to_be32(NULLAGINO); |
---|
2080 | | - offset = ip->i_imap.im_boffset + |
---|
2081 | | - offsetof(xfs_dinode_t, di_next_unlinked); |
---|
2082 | | - |
---|
2083 | | - /* need to recalc the inode CRC if appropriate */ |
---|
2084 | | - xfs_dinode_calc_crc(mp, dip); |
---|
2085 | | - |
---|
2086 | | - xfs_trans_inode_buf(tp, ibp); |
---|
2087 | | - xfs_trans_log_buf(tp, ibp, offset, |
---|
2088 | | - (offset + sizeof(xfs_agino_t) - 1)); |
---|
2089 | | - xfs_inobp_check(mp, ibp); |
---|
2090 | | - } else { |
---|
2091 | | - xfs_trans_brelse(tp, ibp); |
---|
2092 | | - } |
---|
2093 | | - /* |
---|
2094 | | - * Point the bucket head pointer at the next inode. |
---|
2095 | | - */ |
---|
2096 | | - ASSERT(next_agino != 0); |
---|
2097 | | - ASSERT(next_agino != agino); |
---|
2098 | | - agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino); |
---|
2099 | | - offset = offsetof(xfs_agi_t, agi_unlinked) + |
---|
2100 | | - (sizeof(xfs_agino_t) * bucket_index); |
---|
2101 | | - xfs_trans_log_buf(tp, agibp, offset, |
---|
2102 | | - (offset + sizeof(xfs_agino_t) - 1)); |
---|
2103 | | - } else { |
---|
2104 | | - /* |
---|
2105 | | - * We need to search the list for the inode being freed. |
---|
2106 | | - */ |
---|
2107 | | - next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); |
---|
2108 | | - last_ibp = NULL; |
---|
2109 | | - while (next_agino != agino) { |
---|
2110 | | - struct xfs_imap imap; |
---|
2111 | | - |
---|
2112 | | - if (last_ibp) |
---|
2113 | | - xfs_trans_brelse(tp, last_ibp); |
---|
2114 | | - |
---|
2115 | | - imap.im_blkno = 0; |
---|
2116 | | - next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); |
---|
2117 | | - |
---|
2118 | | - error = xfs_imap(mp, tp, next_ino, &imap, 0); |
---|
2119 | | - if (error) { |
---|
2120 | | - xfs_warn(mp, |
---|
2121 | | - "%s: xfs_imap returned error %d.", |
---|
2122 | | - __func__, error); |
---|
2123 | | - return error; |
---|
2124 | | - } |
---|
2125 | | - |
---|
2126 | | - error = xfs_imap_to_bp(mp, tp, &imap, &last_dip, |
---|
2127 | | - &last_ibp, 0, 0); |
---|
2128 | | - if (error) { |
---|
2129 | | - xfs_warn(mp, |
---|
2130 | | - "%s: xfs_imap_to_bp returned error %d.", |
---|
2131 | | - __func__, error); |
---|
2132 | | - return error; |
---|
2133 | | - } |
---|
2134 | | - |
---|
2135 | | - last_offset = imap.im_boffset; |
---|
2136 | | - next_agino = be32_to_cpu(last_dip->di_next_unlinked); |
---|
2137 | | - if (!xfs_verify_agino(mp, agno, next_agino)) { |
---|
2138 | | - XFS_CORRUPTION_ERROR(__func__, |
---|
2139 | | - XFS_ERRLEVEL_LOW, mp, |
---|
2140 | | - last_dip, sizeof(*last_dip)); |
---|
2141 | | - return -EFSCORRUPTED; |
---|
2142 | | - } |
---|
2143 | | - } |
---|
2144 | | - |
---|
2145 | | - /* |
---|
2146 | | - * Now last_ibp points to the buffer previous to us on the |
---|
2147 | | - * unlinked list. Pull us from the list. |
---|
2148 | | - */ |
---|
2149 | | - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp, |
---|
2150 | | - 0, 0); |
---|
2151 | | - if (error) { |
---|
2152 | | - xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.", |
---|
2153 | | - __func__, error); |
---|
2154 | | - return error; |
---|
2155 | | - } |
---|
2156 | | - next_agino = be32_to_cpu(dip->di_next_unlinked); |
---|
2157 | | - ASSERT(next_agino != 0); |
---|
2158 | | - ASSERT(next_agino != agino); |
---|
2159 | | - if (next_agino != NULLAGINO) { |
---|
2160 | | - dip->di_next_unlinked = cpu_to_be32(NULLAGINO); |
---|
2161 | | - offset = ip->i_imap.im_boffset + |
---|
2162 | | - offsetof(xfs_dinode_t, di_next_unlinked); |
---|
2163 | | - |
---|
2164 | | - /* need to recalc the inode CRC if appropriate */ |
---|
2165 | | - xfs_dinode_calc_crc(mp, dip); |
---|
2166 | | - |
---|
2167 | | - xfs_trans_inode_buf(tp, ibp); |
---|
2168 | | - xfs_trans_log_buf(tp, ibp, offset, |
---|
2169 | | - (offset + sizeof(xfs_agino_t) - 1)); |
---|
2170 | | - xfs_inobp_check(mp, ibp); |
---|
2171 | | - } else { |
---|
2172 | | - xfs_trans_brelse(tp, ibp); |
---|
2173 | | - } |
---|
2174 | | - /* |
---|
2175 | | - * Point the previous inode on the list to the next inode. |
---|
2176 | | - */ |
---|
2177 | | - last_dip->di_next_unlinked = cpu_to_be32(next_agino); |
---|
2178 | | - ASSERT(next_agino != 0); |
---|
2179 | | - offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); |
---|
2180 | | - |
---|
2181 | | - /* need to recalc the inode CRC if appropriate */ |
---|
2182 | | - xfs_dinode_calc_crc(mp, last_dip); |
---|
2183 | | - |
---|
2184 | | - xfs_trans_inode_buf(tp, last_ibp); |
---|
2185 | | - xfs_trans_log_buf(tp, last_ibp, offset, |
---|
2186 | | - (offset + sizeof(xfs_agino_t) - 1)); |
---|
2187 | | - xfs_inobp_check(mp, last_ibp); |
---|
2188 | 2462 | } |
---|
2189 | | - return 0; |
---|
| 2463 | + |
---|
| 2464 | + if (head_agino != agino) { |
---|
| 2465 | + struct xfs_imap imap; |
---|
| 2466 | + xfs_agino_t prev_agino; |
---|
| 2467 | + |
---|
| 2468 | + /* We need to search the list for the inode being freed. */ |
---|
| 2469 | + error = xfs_iunlink_map_prev(tp, agno, head_agino, agino, |
---|
| 2470 | + &prev_agino, &imap, &last_dip, &last_ibp, |
---|
| 2471 | + agibp->b_pag); |
---|
| 2472 | + if (error) |
---|
| 2473 | + return error; |
---|
| 2474 | + |
---|
| 2475 | + /* Point the previous inode on the list to the next inode. */ |
---|
| 2476 | + xfs_iunlink_update_dinode(tp, agno, prev_agino, last_ibp, |
---|
| 2477 | + last_dip, &imap, next_agino); |
---|
| 2478 | + |
---|
| 2479 | + /* |
---|
| 2480 | + * Now we deal with the backref for this inode. If this inode |
---|
| 2481 | + * pointed at a real inode, change the backref that pointed to |
---|
| 2482 | + * us to point to our old next. If this inode was the end of |
---|
| 2483 | + * the list, delete the backref that pointed to us. Note that |
---|
| 2484 | + * change_backref takes care of deleting the backref if |
---|
| 2485 | + * next_agino is NULLAGINO. |
---|
| 2486 | + */ |
---|
| 2487 | + return xfs_iunlink_change_backref(agibp->b_pag, agino, |
---|
| 2488 | + next_agino); |
---|
| 2489 | + } |
---|
| 2490 | + |
---|
| 2491 | + /* Point the head of the list to the next unlinked inode. */ |
---|
| 2492 | + return xfs_iunlink_update_bucket(tp, agno, agibp, bucket_index, |
---|
| 2493 | + next_agino); |
---|
| 2494 | +} |
---|
| 2495 | + |
---|
| 2496 | +/* |
---|
| 2497 | + * Look up the inode number specified and if it is not already marked XFS_ISTALE |
---|
| 2498 | + * mark it stale. We should only find clean inodes in this lookup that aren't |
---|
| 2499 | + * already stale. |
---|
| 2500 | + */ |
---|
| 2501 | +static void |
---|
| 2502 | +xfs_ifree_mark_inode_stale( |
---|
| 2503 | + struct xfs_buf *bp, |
---|
| 2504 | + struct xfs_inode *free_ip, |
---|
| 2505 | + xfs_ino_t inum) |
---|
| 2506 | +{ |
---|
| 2507 | + struct xfs_mount *mp = bp->b_mount; |
---|
| 2508 | + struct xfs_perag *pag = bp->b_pag; |
---|
| 2509 | + struct xfs_inode_log_item *iip; |
---|
| 2510 | + struct xfs_inode *ip; |
---|
| 2511 | + |
---|
| 2512 | +retry: |
---|
| 2513 | + rcu_read_lock(); |
---|
| 2514 | + ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum)); |
---|
| 2515 | + |
---|
| 2516 | + /* Inode not in memory, nothing to do */ |
---|
| 2517 | + if (!ip) { |
---|
| 2518 | + rcu_read_unlock(); |
---|
| 2519 | + return; |
---|
| 2520 | + } |
---|
| 2521 | + |
---|
| 2522 | + /* |
---|
| 2523 | + * because this is an RCU protected lookup, we could find a recently |
---|
| 2524 | + * freed or even reallocated inode during the lookup. We need to check |
---|
| 2525 | + * under the i_flags_lock for a valid inode here. Skip it if it is not |
---|
| 2526 | + * valid, the wrong inode or stale. |
---|
| 2527 | + */ |
---|
| 2528 | + spin_lock(&ip->i_flags_lock); |
---|
| 2529 | + if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) |
---|
| 2530 | + goto out_iflags_unlock; |
---|
| 2531 | + |
---|
| 2532 | + /* |
---|
| 2533 | + * Don't try to lock/unlock the current inode, but we _cannot_ skip the |
---|
| 2534 | + * other inodes that we did not find in the list attached to the buffer |
---|
| 2535 | + * and are not already marked stale. If we can't lock it, back off and |
---|
| 2536 | + * retry. |
---|
| 2537 | + */ |
---|
| 2538 | + if (ip != free_ip) { |
---|
| 2539 | + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
---|
| 2540 | + spin_unlock(&ip->i_flags_lock); |
---|
| 2541 | + rcu_read_unlock(); |
---|
| 2542 | + delay(1); |
---|
| 2543 | + goto retry; |
---|
| 2544 | + } |
---|
| 2545 | + } |
---|
| 2546 | + ip->i_flags |= XFS_ISTALE; |
---|
| 2547 | + |
---|
| 2548 | + /* |
---|
| 2549 | + * If the inode is flushing, it is already attached to the buffer. All |
---|
| 2550 | + * we needed to do here is mark the inode stale so buffer IO completion |
---|
| 2551 | + * will remove it from the AIL. |
---|
| 2552 | + */ |
---|
| 2553 | + iip = ip->i_itemp; |
---|
| 2554 | + if (__xfs_iflags_test(ip, XFS_IFLUSHING)) { |
---|
| 2555 | + ASSERT(!list_empty(&iip->ili_item.li_bio_list)); |
---|
| 2556 | + ASSERT(iip->ili_last_fields); |
---|
| 2557 | + goto out_iunlock; |
---|
| 2558 | + } |
---|
| 2559 | + |
---|
| 2560 | + /* |
---|
| 2561 | + * Inodes not attached to the buffer can be released immediately. |
---|
| 2562 | + * Everything else has to go through xfs_iflush_abort() on journal |
---|
| 2563 | + * commit as the flock synchronises removal of the inode from the |
---|
| 2564 | + * cluster buffer against inode reclaim. |
---|
| 2565 | + */ |
---|
| 2566 | + if (!iip || list_empty(&iip->ili_item.li_bio_list)) |
---|
| 2567 | + goto out_iunlock; |
---|
| 2568 | + |
---|
| 2569 | + __xfs_iflags_set(ip, XFS_IFLUSHING); |
---|
| 2570 | + spin_unlock(&ip->i_flags_lock); |
---|
| 2571 | + rcu_read_unlock(); |
---|
| 2572 | + |
---|
| 2573 | + /* we have a dirty inode in memory that has not yet been flushed. */ |
---|
| 2574 | + spin_lock(&iip->ili_lock); |
---|
| 2575 | + iip->ili_last_fields = iip->ili_fields; |
---|
| 2576 | + iip->ili_fields = 0; |
---|
| 2577 | + iip->ili_fsync_fields = 0; |
---|
| 2578 | + spin_unlock(&iip->ili_lock); |
---|
| 2579 | + ASSERT(iip->ili_last_fields); |
---|
| 2580 | + |
---|
| 2581 | + if (ip != free_ip) |
---|
| 2582 | + xfs_iunlock(ip, XFS_ILOCK_EXCL); |
---|
| 2583 | + return; |
---|
| 2584 | + |
---|
| 2585 | +out_iunlock: |
---|
| 2586 | + if (ip != free_ip) |
---|
| 2587 | + xfs_iunlock(ip, XFS_ILOCK_EXCL); |
---|
| 2588 | +out_iflags_unlock: |
---|
| 2589 | + spin_unlock(&ip->i_flags_lock); |
---|
| 2590 | + rcu_read_unlock(); |
---|
2190 | 2591 | } |
---|
2191 | 2592 | |
---|
2192 | 2593 | /* |
---|
.. | .. |
---|
2196 | 2597 | */ |
---|
2197 | 2598 | STATIC int |
---|
2198 | 2599 | xfs_ifree_cluster( |
---|
2199 | | - xfs_inode_t *free_ip, |
---|
2200 | | - xfs_trans_t *tp, |
---|
| 2600 | + struct xfs_inode *free_ip, |
---|
| 2601 | + struct xfs_trans *tp, |
---|
2201 | 2602 | struct xfs_icluster *xic) |
---|
2202 | 2603 | { |
---|
2203 | | - xfs_mount_t *mp = free_ip->i_mount; |
---|
2204 | | - int blks_per_cluster; |
---|
2205 | | - int inodes_per_cluster; |
---|
| 2604 | + struct xfs_mount *mp = free_ip->i_mount; |
---|
| 2605 | + struct xfs_ino_geometry *igeo = M_IGEO(mp); |
---|
| 2606 | + struct xfs_buf *bp; |
---|
| 2607 | + xfs_daddr_t blkno; |
---|
| 2608 | + xfs_ino_t inum = xic->first_ino; |
---|
2206 | 2609 | int nbufs; |
---|
2207 | 2610 | int i, j; |
---|
2208 | 2611 | int ioffset; |
---|
2209 | | - xfs_daddr_t blkno; |
---|
2210 | | - xfs_buf_t *bp; |
---|
2211 | | - xfs_inode_t *ip; |
---|
2212 | | - xfs_inode_log_item_t *iip; |
---|
2213 | | - struct xfs_log_item *lip; |
---|
2214 | | - struct xfs_perag *pag; |
---|
2215 | | - xfs_ino_t inum; |
---|
| 2612 | + int error; |
---|
2216 | 2613 | |
---|
2217 | | - inum = xic->first_ino; |
---|
2218 | | - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); |
---|
2219 | | - blks_per_cluster = xfs_icluster_size_fsb(mp); |
---|
2220 | | - inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; |
---|
2221 | | - nbufs = mp->m_ialloc_blks / blks_per_cluster; |
---|
| 2614 | + nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster; |
---|
2222 | 2615 | |
---|
2223 | | - for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) { |
---|
| 2616 | + for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) { |
---|
2224 | 2617 | /* |
---|
2225 | 2618 | * The allocation bitmap tells us which inodes of the chunk were |
---|
2226 | 2619 | * physically allocated. Skip the cluster if an inode falls into |
---|
.. | .. |
---|
2228 | 2621 | */ |
---|
2229 | 2622 | ioffset = inum - xic->first_ino; |
---|
2230 | 2623 | if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) { |
---|
2231 | | - ASSERT(ioffset % inodes_per_cluster == 0); |
---|
| 2624 | + ASSERT(ioffset % igeo->inodes_per_cluster == 0); |
---|
2232 | 2625 | continue; |
---|
2233 | 2626 | } |
---|
2234 | 2627 | |
---|
.. | .. |
---|
2237 | 2630 | |
---|
2238 | 2631 | /* |
---|
2239 | 2632 | * We obtain and lock the backing buffer first in the process |
---|
2240 | | - * here, as we have to ensure that any dirty inode that we |
---|
2241 | | - * can't get the flush lock on is attached to the buffer. |
---|
| 2633 | + * here to ensure dirty inodes attached to the buffer remain in |
---|
| 2634 | + * the flushing state while we mark them stale. |
---|
| 2635 | + * |
---|
2242 | 2636 | * If we scan the in-memory inodes first, then buffer IO can |
---|
2243 | 2637 | * complete before we get a lock on it, and hence we may fail |
---|
2244 | 2638 | * to mark all the active inodes on the buffer stale. |
---|
2245 | 2639 | */ |
---|
2246 | | - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, |
---|
2247 | | - mp->m_bsize * blks_per_cluster, |
---|
2248 | | - XBF_UNMAPPED); |
---|
2249 | | - |
---|
2250 | | - if (!bp) |
---|
2251 | | - return -ENOMEM; |
---|
| 2640 | + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, |
---|
| 2641 | + mp->m_bsize * igeo->blocks_per_cluster, |
---|
| 2642 | + XBF_UNMAPPED, &bp); |
---|
| 2643 | + if (error) |
---|
| 2644 | + return error; |
---|
2252 | 2645 | |
---|
2253 | 2646 | /* |
---|
2254 | 2647 | * This buffer may not have been correctly initialised as we |
---|
.. | .. |
---|
2259 | 2652 | * want it to fail. We can acheive this by adding a write |
---|
2260 | 2653 | * verifier to the buffer. |
---|
2261 | 2654 | */ |
---|
2262 | | - bp->b_ops = &xfs_inode_buf_ops; |
---|
| 2655 | + bp->b_ops = &xfs_inode_buf_ops; |
---|
2263 | 2656 | |
---|
2264 | 2657 | /* |
---|
2265 | | - * Walk the inodes already attached to the buffer and mark them |
---|
2266 | | - * stale. These will all have the flush locks held, so an |
---|
2267 | | - * in-memory inode walk can't lock them. By marking them all |
---|
2268 | | - * stale first, we will not attempt to lock them in the loop |
---|
2269 | | - * below as the XFS_ISTALE flag will be set. |
---|
| 2658 | + * Now we need to set all the cached clean inodes as XFS_ISTALE, |
---|
| 2659 | + * too. This requires lookups, and will skip inodes that we've |
---|
| 2660 | + * already marked XFS_ISTALE. |
---|
2270 | 2661 | */ |
---|
2271 | | - list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { |
---|
2272 | | - if (lip->li_type == XFS_LI_INODE) { |
---|
2273 | | - iip = (xfs_inode_log_item_t *)lip; |
---|
2274 | | - ASSERT(iip->ili_logged == 1); |
---|
2275 | | - lip->li_cb = xfs_istale_done; |
---|
2276 | | - xfs_trans_ail_copy_lsn(mp->m_ail, |
---|
2277 | | - &iip->ili_flush_lsn, |
---|
2278 | | - &iip->ili_item.li_lsn); |
---|
2279 | | - xfs_iflags_set(iip->ili_inode, XFS_ISTALE); |
---|
2280 | | - } |
---|
2281 | | - } |
---|
2282 | | - |
---|
2283 | | - |
---|
2284 | | - /* |
---|
2285 | | - * For each inode in memory attempt to add it to the inode |
---|
2286 | | - * buffer and set it up for being staled on buffer IO |
---|
2287 | | - * completion. This is safe as we've locked out tail pushing |
---|
2288 | | - * and flushing by locking the buffer. |
---|
2289 | | - * |
---|
2290 | | - * We have already marked every inode that was part of a |
---|
2291 | | - * transaction stale above, which means there is no point in |
---|
2292 | | - * even trying to lock them. |
---|
2293 | | - */ |
---|
2294 | | - for (i = 0; i < inodes_per_cluster; i++) { |
---|
2295 | | -retry: |
---|
2296 | | - rcu_read_lock(); |
---|
2297 | | - ip = radix_tree_lookup(&pag->pag_ici_root, |
---|
2298 | | - XFS_INO_TO_AGINO(mp, (inum + i))); |
---|
2299 | | - |
---|
2300 | | - /* Inode not in memory, nothing to do */ |
---|
2301 | | - if (!ip) { |
---|
2302 | | - rcu_read_unlock(); |
---|
2303 | | - continue; |
---|
2304 | | - } |
---|
2305 | | - |
---|
2306 | | - /* |
---|
2307 | | - * because this is an RCU protected lookup, we could |
---|
2308 | | - * find a recently freed or even reallocated inode |
---|
2309 | | - * during the lookup. We need to check under the |
---|
2310 | | - * i_flags_lock for a valid inode here. Skip it if it |
---|
2311 | | - * is not valid, the wrong inode or stale. |
---|
2312 | | - */ |
---|
2313 | | - spin_lock(&ip->i_flags_lock); |
---|
2314 | | - if (ip->i_ino != inum + i || |
---|
2315 | | - __xfs_iflags_test(ip, XFS_ISTALE)) { |
---|
2316 | | - spin_unlock(&ip->i_flags_lock); |
---|
2317 | | - rcu_read_unlock(); |
---|
2318 | | - continue; |
---|
2319 | | - } |
---|
2320 | | - spin_unlock(&ip->i_flags_lock); |
---|
2321 | | - |
---|
2322 | | - /* |
---|
2323 | | - * Don't try to lock/unlock the current inode, but we |
---|
2324 | | - * _cannot_ skip the other inodes that we did not find |
---|
2325 | | - * in the list attached to the buffer and are not |
---|
2326 | | - * already marked stale. If we can't lock it, back off |
---|
2327 | | - * and retry. |
---|
2328 | | - */ |
---|
2329 | | - if (ip != free_ip) { |
---|
2330 | | - if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
---|
2331 | | - rcu_read_unlock(); |
---|
2332 | | - delay(1); |
---|
2333 | | - goto retry; |
---|
2334 | | - } |
---|
2335 | | - |
---|
2336 | | - /* |
---|
2337 | | - * Check the inode number again in case we're |
---|
2338 | | - * racing with freeing in xfs_reclaim_inode(). |
---|
2339 | | - * See the comments in that function for more |
---|
2340 | | - * information as to why the initial check is |
---|
2341 | | - * not sufficient. |
---|
2342 | | - */ |
---|
2343 | | - if (ip->i_ino != inum + i) { |
---|
2344 | | - xfs_iunlock(ip, XFS_ILOCK_EXCL); |
---|
2345 | | - rcu_read_unlock(); |
---|
2346 | | - continue; |
---|
2347 | | - } |
---|
2348 | | - } |
---|
2349 | | - rcu_read_unlock(); |
---|
2350 | | - |
---|
2351 | | - xfs_iflock(ip); |
---|
2352 | | - xfs_iflags_set(ip, XFS_ISTALE); |
---|
2353 | | - |
---|
2354 | | - /* |
---|
2355 | | - * we don't need to attach clean inodes or those only |
---|
2356 | | - * with unlogged changes (which we throw away, anyway). |
---|
2357 | | - */ |
---|
2358 | | - iip = ip->i_itemp; |
---|
2359 | | - if (!iip || xfs_inode_clean(ip)) { |
---|
2360 | | - ASSERT(ip != free_ip); |
---|
2361 | | - xfs_ifunlock(ip); |
---|
2362 | | - xfs_iunlock(ip, XFS_ILOCK_EXCL); |
---|
2363 | | - continue; |
---|
2364 | | - } |
---|
2365 | | - |
---|
2366 | | - iip->ili_last_fields = iip->ili_fields; |
---|
2367 | | - iip->ili_fields = 0; |
---|
2368 | | - iip->ili_fsync_fields = 0; |
---|
2369 | | - iip->ili_logged = 1; |
---|
2370 | | - xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, |
---|
2371 | | - &iip->ili_item.li_lsn); |
---|
2372 | | - |
---|
2373 | | - xfs_buf_attach_iodone(bp, xfs_istale_done, |
---|
2374 | | - &iip->ili_item); |
---|
2375 | | - |
---|
2376 | | - if (ip != free_ip) |
---|
2377 | | - xfs_iunlock(ip, XFS_ILOCK_EXCL); |
---|
2378 | | - } |
---|
| 2662 | + for (i = 0; i < igeo->inodes_per_cluster; i++) |
---|
| 2663 | + xfs_ifree_mark_inode_stale(bp, free_ip, inum + i); |
---|
2379 | 2664 | |
---|
2380 | 2665 | xfs_trans_stale_inode_buf(tp, bp); |
---|
2381 | 2666 | xfs_trans_binval(tp, bp); |
---|
2382 | 2667 | } |
---|
2383 | | - |
---|
2384 | | - xfs_perag_put(pag); |
---|
2385 | 2668 | return 0; |
---|
2386 | 2669 | } |
---|
2387 | 2670 | |
---|
2388 | 2671 | /* |
---|
2389 | | - * Free any local-format buffers sitting around before we reset to |
---|
2390 | | - * extents format. |
---|
2391 | | - */ |
---|
2392 | | -static inline void |
---|
2393 | | -xfs_ifree_local_data( |
---|
2394 | | - struct xfs_inode *ip, |
---|
2395 | | - int whichfork) |
---|
2396 | | -{ |
---|
2397 | | - struct xfs_ifork *ifp; |
---|
2398 | | - |
---|
2399 | | - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) |
---|
2400 | | - return; |
---|
2401 | | - |
---|
2402 | | - ifp = XFS_IFORK_PTR(ip, whichfork); |
---|
2403 | | - xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); |
---|
2404 | | -} |
---|
2405 | | - |
---|
2406 | | -/* |
---|
2407 | | - * This is called to return an inode to the inode free list. |
---|
2408 | | - * The inode should already be truncated to 0 length and have |
---|
2409 | | - * no pages associated with it. This routine also assumes that |
---|
2410 | | - * the inode is already a part of the transaction. |
---|
| 2672 | + * This is called to return an inode to the inode free list. The inode should |
---|
| 2673 | + * already be truncated to 0 length and have no pages associated with it. This |
---|
| 2674 | + * routine also assumes that the inode is already a part of the transaction. |
---|
2411 | 2675 | * |
---|
2412 | | - * The on-disk copy of the inode will have been added to the list |
---|
2413 | | - * of unlinked inodes in the AGI. We need to remove the inode from |
---|
2414 | | - * that list atomically with respect to freeing it here. |
---|
| 2676 | + * The on-disk copy of the inode will have been added to the list of unlinked |
---|
| 2677 | + * inodes in the AGI. We need to remove the inode from that list atomically with |
---|
| 2678 | + * respect to freeing it here. |
---|
2415 | 2679 | */ |
---|
2416 | 2680 | int |
---|
2417 | 2681 | xfs_ifree( |
---|
.. | .. |
---|
2420 | 2684 | { |
---|
2421 | 2685 | int error; |
---|
2422 | 2686 | struct xfs_icluster xic = { 0 }; |
---|
| 2687 | + struct xfs_inode_log_item *iip = ip->i_itemp; |
---|
2423 | 2688 | |
---|
2424 | 2689 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
---|
2425 | 2690 | ASSERT(VFS_I(ip)->i_nlink == 0); |
---|
2426 | | - ASSERT(ip->i_d.di_nextents == 0); |
---|
2427 | | - ASSERT(ip->i_d.di_anextents == 0); |
---|
| 2691 | + ASSERT(ip->i_df.if_nextents == 0); |
---|
2428 | 2692 | ASSERT(ip->i_d.di_size == 0 || !S_ISREG(VFS_I(ip)->i_mode)); |
---|
2429 | 2693 | ASSERT(ip->i_d.di_nblocks == 0); |
---|
2430 | 2694 | |
---|
2431 | 2695 | /* |
---|
2432 | | - * Pull the on-disk inode from the AGI unlinked list. |
---|
| 2696 | + * Free the inode first so that we guarantee that the AGI lock is going |
---|
| 2697 | + * to be taken before we remove the inode from the unlinked list. This |
---|
| 2698 | + * makes the AGI lock -> unlinked list modification order the same as |
---|
| 2699 | + * used in O_TMPFILE creation. |
---|
2433 | 2700 | */ |
---|
2434 | | - error = xfs_iunlink_remove(tp, ip); |
---|
2435 | | - if (error) |
---|
2436 | | - return error; |
---|
2437 | | - |
---|
2438 | 2701 | error = xfs_difree(tp, ip->i_ino, &xic); |
---|
2439 | 2702 | if (error) |
---|
2440 | 2703 | return error; |
---|
2441 | 2704 | |
---|
2442 | | - xfs_ifree_local_data(ip, XFS_DATA_FORK); |
---|
2443 | | - xfs_ifree_local_data(ip, XFS_ATTR_FORK); |
---|
| 2705 | + error = xfs_iunlink_remove(tp, ip); |
---|
| 2706 | + if (error) |
---|
| 2707 | + return error; |
---|
| 2708 | + |
---|
| 2709 | + /* |
---|
| 2710 | + * Free any local-format data sitting around before we reset the |
---|
| 2711 | + * data fork to extents format. Note that the attr fork data has |
---|
| 2712 | + * already been freed by xfs_attr_inactive. |
---|
| 2713 | + */ |
---|
| 2714 | + if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) { |
---|
| 2715 | + kmem_free(ip->i_df.if_u1.if_data); |
---|
| 2716 | + ip->i_df.if_u1.if_data = NULL; |
---|
| 2717 | + ip->i_df.if_bytes = 0; |
---|
| 2718 | + } |
---|
2444 | 2719 | |
---|
2445 | 2720 | VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ |
---|
2446 | 2721 | ip->i_d.di_flags = 0; |
---|
2447 | | - ip->i_d.di_flags2 = 0; |
---|
| 2722 | + ip->i_d.di_flags2 = ip->i_mount->m_ino_geo.new_diflags2; |
---|
2448 | 2723 | ip->i_d.di_dmevmask = 0; |
---|
2449 | 2724 | ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ |
---|
2450 | | - ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; |
---|
2451 | | - ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; |
---|
| 2725 | + ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS; |
---|
2452 | 2726 | |
---|
2453 | 2727 | /* Don't attempt to replay owner changes for a deleted inode */ |
---|
2454 | | - ip->i_itemp->ili_fields &= ~(XFS_ILOG_AOWNER|XFS_ILOG_DOWNER); |
---|
| 2728 | + spin_lock(&iip->ili_lock); |
---|
| 2729 | + iip->ili_fields &= ~(XFS_ILOG_AOWNER | XFS_ILOG_DOWNER); |
---|
| 2730 | + spin_unlock(&iip->ili_lock); |
---|
2455 | 2731 | |
---|
2456 | 2732 | /* |
---|
2457 | 2733 | * Bump the generation count so no one will be confused |
---|
.. | .. |
---|
2480 | 2756 | trace_xfs_inode_unpin_nowait(ip, _RET_IP_); |
---|
2481 | 2757 | |
---|
2482 | 2758 | /* Give the log a push to start the unpinning I/O */ |
---|
2483 | | - xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL); |
---|
| 2759 | + xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL); |
---|
2484 | 2760 | |
---|
2485 | 2761 | } |
---|
2486 | 2762 | |
---|
.. | .. |
---|
2769 | 3045 | error = xfs_droplink(tp, dp2); |
---|
2770 | 3046 | if (error) |
---|
2771 | 3047 | goto out_trans_abort; |
---|
2772 | | - error = xfs_bumplink(tp, dp1); |
---|
2773 | | - if (error) |
---|
2774 | | - goto out_trans_abort; |
---|
| 3048 | + xfs_bumplink(tp, dp1); |
---|
2775 | 3049 | } |
---|
2776 | 3050 | |
---|
2777 | 3051 | /* |
---|
.. | .. |
---|
2795 | 3069 | error = xfs_droplink(tp, dp1); |
---|
2796 | 3070 | if (error) |
---|
2797 | 3071 | goto out_trans_abort; |
---|
2798 | | - error = xfs_bumplink(tp, dp2); |
---|
2799 | | - if (error) |
---|
2800 | | - goto out_trans_abort; |
---|
| 3072 | + xfs_bumplink(tp, dp2); |
---|
2801 | 3073 | } |
---|
2802 | 3074 | |
---|
2803 | 3075 | /* |
---|
.. | .. |
---|
2835 | 3107 | /* |
---|
2836 | 3108 | * xfs_rename_alloc_whiteout() |
---|
2837 | 3109 | * |
---|
2838 | | - * Return a referenced, unlinked, unlocked inode that that can be used as a |
---|
| 3110 | + * Return a referenced, unlinked, unlocked inode that can be used as a |
---|
2839 | 3111 | * whiteout in a rename transaction. We use a tmpfile inode here so that if we |
---|
2840 | 3112 | * crash between allocating the inode and linking it into the rename transaction |
---|
2841 | 3113 | * recovery will free the inode and we won't leak it. |
---|
.. | .. |
---|
2882 | 3154 | struct xfs_trans *tp; |
---|
2883 | 3155 | struct xfs_inode *wip = NULL; /* whiteout inode */ |
---|
2884 | 3156 | struct xfs_inode *inodes[__XFS_SORT_INODES]; |
---|
| 3157 | + int i; |
---|
2885 | 3158 | int num_inodes = __XFS_SORT_INODES; |
---|
2886 | 3159 | bool new_parent = (src_dp != target_dp); |
---|
2887 | 3160 | bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode); |
---|
.. | .. |
---|
2899 | 3172 | * appropriately. |
---|
2900 | 3173 | */ |
---|
2901 | 3174 | if (flags & RENAME_WHITEOUT) { |
---|
2902 | | - ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE))); |
---|
2903 | 3175 | error = xfs_rename_alloc_whiteout(target_dp, &wip); |
---|
2904 | 3176 | if (error) |
---|
2905 | 3177 | return error; |
---|
.. | .. |
---|
2956 | 3228 | * tree quota mechanism would be circumvented. |
---|
2957 | 3229 | */ |
---|
2958 | 3230 | if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && |
---|
2959 | | - (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { |
---|
| 3231 | + target_dp->i_d.di_projid != src_ip->i_d.di_projid)) { |
---|
2960 | 3232 | error = -EXDEV; |
---|
2961 | 3233 | goto out_trans_cancel; |
---|
2962 | 3234 | } |
---|
.. | .. |
---|
2995 | 3267 | } |
---|
2996 | 3268 | |
---|
2997 | 3269 | /* |
---|
| 3270 | + * Lock the AGI buffers we need to handle bumping the nlink of the |
---|
| 3271 | + * whiteout inode off the unlinked list and to handle dropping the |
---|
| 3272 | + * nlink of the target inode. Per locking order rules, do this in |
---|
| 3273 | + * increasing AG order and before directory block allocation tries to |
---|
| 3274 | + * grab AGFs because we grab AGIs before AGFs. |
---|
| 3275 | + * |
---|
| 3276 | + * The (vfs) caller must ensure that if src is a directory then |
---|
| 3277 | + * target_ip is either null or an empty directory. |
---|
| 3278 | + */ |
---|
| 3279 | + for (i = 0; i < num_inodes && inodes[i] != NULL; i++) { |
---|
| 3280 | + if (inodes[i] == wip || |
---|
| 3281 | + (inodes[i] == target_ip && |
---|
| 3282 | + (VFS_I(target_ip)->i_nlink == 1 || src_is_directory))) { |
---|
| 3283 | + struct xfs_buf *bp; |
---|
| 3284 | + xfs_agnumber_t agno; |
---|
| 3285 | + |
---|
| 3286 | + agno = XFS_INO_TO_AGNO(mp, inodes[i]->i_ino); |
---|
| 3287 | + error = xfs_read_agi(mp, tp, agno, &bp); |
---|
| 3288 | + if (error) |
---|
| 3289 | + goto out_trans_cancel; |
---|
| 3290 | + } |
---|
| 3291 | + } |
---|
| 3292 | + |
---|
| 3293 | + /* |
---|
2998 | 3294 | * Directory entry creation below may acquire the AGF. Remove |
---|
2999 | 3295 | * the whiteout from the unlinked list first to preserve correct |
---|
3000 | 3296 | * AGI/AGF locking order. This dirties the transaction so failures |
---|
.. | .. |
---|
3013 | 3309 | goto out_trans_cancel; |
---|
3014 | 3310 | |
---|
3015 | 3311 | xfs_bumplink(tp, wip); |
---|
3016 | | - xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); |
---|
3017 | 3312 | VFS_I(wip)->i_state &= ~I_LINKABLE; |
---|
3018 | 3313 | } |
---|
3019 | 3314 | |
---|
.. | .. |
---|
3035 | 3330 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
---|
3036 | 3331 | |
---|
3037 | 3332 | if (new_parent && src_is_directory) { |
---|
3038 | | - error = xfs_bumplink(tp, target_dp); |
---|
3039 | | - if (error) |
---|
3040 | | - goto out_trans_cancel; |
---|
| 3333 | + xfs_bumplink(tp, target_dp); |
---|
3041 | 3334 | } |
---|
3042 | 3335 | } else { /* target_ip != NULL */ |
---|
3043 | 3336 | /* |
---|
.. | .. |
---|
3148 | 3441 | return error; |
---|
3149 | 3442 | } |
---|
3150 | 3443 | |
---|
3151 | | -STATIC int |
---|
3152 | | -xfs_iflush_cluster( |
---|
3153 | | - struct xfs_inode *ip, |
---|
3154 | | - struct xfs_buf *bp) |
---|
3155 | | -{ |
---|
3156 | | - struct xfs_mount *mp = ip->i_mount; |
---|
3157 | | - struct xfs_perag *pag; |
---|
3158 | | - unsigned long first_index, mask; |
---|
3159 | | - unsigned long inodes_per_cluster; |
---|
3160 | | - int cilist_size; |
---|
3161 | | - struct xfs_inode **cilist; |
---|
3162 | | - struct xfs_inode *cip; |
---|
3163 | | - int nr_found; |
---|
3164 | | - int clcount = 0; |
---|
3165 | | - int i; |
---|
3166 | | - |
---|
3167 | | - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); |
---|
3168 | | - |
---|
3169 | | - inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; |
---|
3170 | | - cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); |
---|
3171 | | - cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS); |
---|
3172 | | - if (!cilist) |
---|
3173 | | - goto out_put; |
---|
3174 | | - |
---|
3175 | | - mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); |
---|
3176 | | - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; |
---|
3177 | | - rcu_read_lock(); |
---|
3178 | | - /* really need a gang lookup range call here */ |
---|
3179 | | - nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist, |
---|
3180 | | - first_index, inodes_per_cluster); |
---|
3181 | | - if (nr_found == 0) |
---|
3182 | | - goto out_free; |
---|
3183 | | - |
---|
3184 | | - for (i = 0; i < nr_found; i++) { |
---|
3185 | | - cip = cilist[i]; |
---|
3186 | | - if (cip == ip) |
---|
3187 | | - continue; |
---|
3188 | | - |
---|
3189 | | - /* |
---|
3190 | | - * because this is an RCU protected lookup, we could find a |
---|
3191 | | - * recently freed or even reallocated inode during the lookup. |
---|
3192 | | - * We need to check under the i_flags_lock for a valid inode |
---|
3193 | | - * here. Skip it if it is not valid or the wrong inode. |
---|
3194 | | - */ |
---|
3195 | | - spin_lock(&cip->i_flags_lock); |
---|
3196 | | - if (!cip->i_ino || |
---|
3197 | | - __xfs_iflags_test(cip, XFS_ISTALE)) { |
---|
3198 | | - spin_unlock(&cip->i_flags_lock); |
---|
3199 | | - continue; |
---|
3200 | | - } |
---|
3201 | | - |
---|
3202 | | - /* |
---|
3203 | | - * Once we fall off the end of the cluster, no point checking |
---|
3204 | | - * any more inodes in the list because they will also all be |
---|
3205 | | - * outside the cluster. |
---|
3206 | | - */ |
---|
3207 | | - if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) { |
---|
3208 | | - spin_unlock(&cip->i_flags_lock); |
---|
3209 | | - break; |
---|
3210 | | - } |
---|
3211 | | - spin_unlock(&cip->i_flags_lock); |
---|
3212 | | - |
---|
3213 | | - /* |
---|
3214 | | - * Do an un-protected check to see if the inode is dirty and |
---|
3215 | | - * is a candidate for flushing. These checks will be repeated |
---|
3216 | | - * later after the appropriate locks are acquired. |
---|
3217 | | - */ |
---|
3218 | | - if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0) |
---|
3219 | | - continue; |
---|
3220 | | - |
---|
3221 | | - /* |
---|
3222 | | - * Try to get locks. If any are unavailable or it is pinned, |
---|
3223 | | - * then this inode cannot be flushed and is skipped. |
---|
3224 | | - */ |
---|
3225 | | - |
---|
3226 | | - if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED)) |
---|
3227 | | - continue; |
---|
3228 | | - if (!xfs_iflock_nowait(cip)) { |
---|
3229 | | - xfs_iunlock(cip, XFS_ILOCK_SHARED); |
---|
3230 | | - continue; |
---|
3231 | | - } |
---|
3232 | | - if (xfs_ipincount(cip)) { |
---|
3233 | | - xfs_ifunlock(cip); |
---|
3234 | | - xfs_iunlock(cip, XFS_ILOCK_SHARED); |
---|
3235 | | - continue; |
---|
3236 | | - } |
---|
3237 | | - |
---|
3238 | | - |
---|
3239 | | - /* |
---|
3240 | | - * Check the inode number again, just to be certain we are not |
---|
3241 | | - * racing with freeing in xfs_reclaim_inode(). See the comments |
---|
3242 | | - * in that function for more information as to why the initial |
---|
3243 | | - * check is not sufficient. |
---|
3244 | | - */ |
---|
3245 | | - if (!cip->i_ino) { |
---|
3246 | | - xfs_ifunlock(cip); |
---|
3247 | | - xfs_iunlock(cip, XFS_ILOCK_SHARED); |
---|
3248 | | - continue; |
---|
3249 | | - } |
---|
3250 | | - |
---|
3251 | | - /* |
---|
3252 | | - * arriving here means that this inode can be flushed. First |
---|
3253 | | - * re-check that it's dirty before flushing. |
---|
3254 | | - */ |
---|
3255 | | - if (!xfs_inode_clean(cip)) { |
---|
3256 | | - int error; |
---|
3257 | | - error = xfs_iflush_int(cip, bp); |
---|
3258 | | - if (error) { |
---|
3259 | | - xfs_iunlock(cip, XFS_ILOCK_SHARED); |
---|
3260 | | - goto cluster_corrupt_out; |
---|
3261 | | - } |
---|
3262 | | - clcount++; |
---|
3263 | | - } else { |
---|
3264 | | - xfs_ifunlock(cip); |
---|
3265 | | - } |
---|
3266 | | - xfs_iunlock(cip, XFS_ILOCK_SHARED); |
---|
3267 | | - } |
---|
3268 | | - |
---|
3269 | | - if (clcount) { |
---|
3270 | | - XFS_STATS_INC(mp, xs_icluster_flushcnt); |
---|
3271 | | - XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount); |
---|
3272 | | - } |
---|
3273 | | - |
---|
3274 | | -out_free: |
---|
3275 | | - rcu_read_unlock(); |
---|
3276 | | - kmem_free(cilist); |
---|
3277 | | -out_put: |
---|
3278 | | - xfs_perag_put(pag); |
---|
3279 | | - return 0; |
---|
3280 | | - |
---|
3281 | | - |
---|
3282 | | -cluster_corrupt_out: |
---|
3283 | | - /* |
---|
3284 | | - * Corruption detected in the clustering loop. Invalidate the |
---|
3285 | | - * inode buffer and shut down the filesystem. |
---|
3286 | | - */ |
---|
3287 | | - rcu_read_unlock(); |
---|
3288 | | - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
---|
3289 | | - |
---|
3290 | | - /* |
---|
3291 | | - * We'll always have an inode attached to the buffer for completion |
---|
3292 | | - * process by the time we are called from xfs_iflush(). Hence we have |
---|
3293 | | - * always need to do IO completion processing to abort the inodes |
---|
3294 | | - * attached to the buffer. handle them just like the shutdown case in |
---|
3295 | | - * xfs_buf_submit(). |
---|
3296 | | - */ |
---|
3297 | | - ASSERT(bp->b_iodone); |
---|
3298 | | - bp->b_flags &= ~XBF_DONE; |
---|
3299 | | - xfs_buf_stale(bp); |
---|
3300 | | - xfs_buf_ioerror(bp, -EIO); |
---|
3301 | | - xfs_buf_ioend(bp); |
---|
3302 | | - |
---|
3303 | | - /* abort the corrupt inode, as it was not attached to the buffer */ |
---|
3304 | | - xfs_iflush_abort(cip, false); |
---|
3305 | | - kmem_free(cilist); |
---|
3306 | | - xfs_perag_put(pag); |
---|
3307 | | - return -EFSCORRUPTED; |
---|
3308 | | -} |
---|
3309 | | - |
---|
3310 | | -/* |
---|
3311 | | - * Flush dirty inode metadata into the backing buffer. |
---|
3312 | | - * |
---|
3313 | | - * The caller must have the inode lock and the inode flush lock held. The |
---|
3314 | | - * inode lock will still be held upon return to the caller, and the inode |
---|
3315 | | - * flush lock will be released after the inode has reached the disk. |
---|
3316 | | - * |
---|
3317 | | - * The caller must write out the buffer returned in *bpp and release it. |
---|
3318 | | - */ |
---|
3319 | | -int |
---|
| 3444 | +static int |
---|
3320 | 3445 | xfs_iflush( |
---|
3321 | | - struct xfs_inode *ip, |
---|
3322 | | - struct xfs_buf **bpp) |
---|
3323 | | -{ |
---|
3324 | | - struct xfs_mount *mp = ip->i_mount; |
---|
3325 | | - struct xfs_buf *bp = NULL; |
---|
3326 | | - struct xfs_dinode *dip; |
---|
3327 | | - int error; |
---|
3328 | | - |
---|
3329 | | - XFS_STATS_INC(mp, xs_iflush_count); |
---|
3330 | | - |
---|
3331 | | - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
---|
3332 | | - ASSERT(xfs_isiflocked(ip)); |
---|
3333 | | - ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
---|
3334 | | - ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); |
---|
3335 | | - |
---|
3336 | | - *bpp = NULL; |
---|
3337 | | - |
---|
3338 | | - xfs_iunpin_wait(ip); |
---|
3339 | | - |
---|
3340 | | - /* |
---|
3341 | | - * For stale inodes we cannot rely on the backing buffer remaining |
---|
3342 | | - * stale in cache for the remaining life of the stale inode and so |
---|
3343 | | - * xfs_imap_to_bp() below may give us a buffer that no longer contains |
---|
3344 | | - * inodes below. We have to check this after ensuring the inode is |
---|
3345 | | - * unpinned so that it is safe to reclaim the stale inode after the |
---|
3346 | | - * flush call. |
---|
3347 | | - */ |
---|
3348 | | - if (xfs_iflags_test(ip, XFS_ISTALE)) { |
---|
3349 | | - xfs_ifunlock(ip); |
---|
3350 | | - return 0; |
---|
3351 | | - } |
---|
3352 | | - |
---|
3353 | | - /* |
---|
3354 | | - * This may have been unpinned because the filesystem is shutting |
---|
3355 | | - * down forcibly. If that's the case we must not write this inode |
---|
3356 | | - * to disk, because the log record didn't make it to disk. |
---|
3357 | | - * |
---|
3358 | | - * We also have to remove the log item from the AIL in this case, |
---|
3359 | | - * as we wait for an empty AIL as part of the unmount process. |
---|
3360 | | - */ |
---|
3361 | | - if (XFS_FORCED_SHUTDOWN(mp)) { |
---|
3362 | | - error = -EIO; |
---|
3363 | | - goto abort_out; |
---|
3364 | | - } |
---|
3365 | | - |
---|
3366 | | - /* |
---|
3367 | | - * Get the buffer containing the on-disk inode. We are doing a try-lock |
---|
3368 | | - * operation here, so we may get an EAGAIN error. In that case, we |
---|
3369 | | - * simply want to return with the inode still dirty. |
---|
3370 | | - * |
---|
3371 | | - * If we get any other error, we effectively have a corruption situation |
---|
3372 | | - * and we cannot flush the inode, so we treat it the same as failing |
---|
3373 | | - * xfs_iflush_int(). |
---|
3374 | | - */ |
---|
3375 | | - error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, |
---|
3376 | | - 0); |
---|
3377 | | - if (error == -EAGAIN) { |
---|
3378 | | - xfs_ifunlock(ip); |
---|
3379 | | - return error; |
---|
3380 | | - } |
---|
3381 | | - if (error) |
---|
3382 | | - goto corrupt_out; |
---|
3383 | | - |
---|
3384 | | - /* |
---|
3385 | | - * First flush out the inode that xfs_iflush was called with. |
---|
3386 | | - */ |
---|
3387 | | - error = xfs_iflush_int(ip, bp); |
---|
3388 | | - if (error) |
---|
3389 | | - goto corrupt_out; |
---|
3390 | | - |
---|
3391 | | - /* |
---|
3392 | | - * If the buffer is pinned then push on the log now so we won't |
---|
3393 | | - * get stuck waiting in the write for too long. |
---|
3394 | | - */ |
---|
3395 | | - if (xfs_buf_ispinned(bp)) |
---|
3396 | | - xfs_log_force(mp, 0); |
---|
3397 | | - |
---|
3398 | | - /* |
---|
3399 | | - * inode clustering: try to gather other inodes into this write |
---|
3400 | | - * |
---|
3401 | | - * Note: Any error during clustering will result in the filesystem |
---|
3402 | | - * being shut down and completion callbacks run on the cluster buffer. |
---|
3403 | | - * As we have already flushed and attached this inode to the buffer, |
---|
3404 | | - * it has already been aborted and released by xfs_iflush_cluster() and |
---|
3405 | | - * so we have no further error handling to do here. |
---|
3406 | | - */ |
---|
3407 | | - error = xfs_iflush_cluster(ip, bp); |
---|
3408 | | - if (error) |
---|
3409 | | - return error; |
---|
3410 | | - |
---|
3411 | | - *bpp = bp; |
---|
3412 | | - return 0; |
---|
3413 | | - |
---|
3414 | | -corrupt_out: |
---|
3415 | | - if (bp) |
---|
3416 | | - xfs_buf_relse(bp); |
---|
3417 | | - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
---|
3418 | | -abort_out: |
---|
3419 | | - /* abort the corrupt inode, as it was not attached to the buffer */ |
---|
3420 | | - xfs_iflush_abort(ip, false); |
---|
3421 | | - return error; |
---|
3422 | | -} |
---|
3423 | | - |
---|
3424 | | -/* |
---|
3425 | | - * If there are inline format data / attr forks attached to this inode, |
---|
3426 | | - * make sure they're not corrupt. |
---|
3427 | | - */ |
---|
3428 | | -bool |
---|
3429 | | -xfs_inode_verify_forks( |
---|
3430 | | - struct xfs_inode *ip) |
---|
3431 | | -{ |
---|
3432 | | - struct xfs_ifork *ifp; |
---|
3433 | | - xfs_failaddr_t fa; |
---|
3434 | | - |
---|
3435 | | - fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops); |
---|
3436 | | - if (fa) { |
---|
3437 | | - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); |
---|
3438 | | - xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork", |
---|
3439 | | - ifp->if_u1.if_data, ifp->if_bytes, fa); |
---|
3440 | | - return false; |
---|
3441 | | - } |
---|
3442 | | - |
---|
3443 | | - fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops); |
---|
3444 | | - if (fa) { |
---|
3445 | | - ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); |
---|
3446 | | - xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", |
---|
3447 | | - ifp ? ifp->if_u1.if_data : NULL, |
---|
3448 | | - ifp ? ifp->if_bytes : 0, fa); |
---|
3449 | | - return false; |
---|
3450 | | - } |
---|
3451 | | - return true; |
---|
3452 | | -} |
---|
3453 | | - |
---|
3454 | | -STATIC int |
---|
3455 | | -xfs_iflush_int( |
---|
3456 | 3446 | struct xfs_inode *ip, |
---|
3457 | 3447 | struct xfs_buf *bp) |
---|
3458 | 3448 | { |
---|
3459 | 3449 | struct xfs_inode_log_item *iip = ip->i_itemp; |
---|
3460 | 3450 | struct xfs_dinode *dip; |
---|
3461 | 3451 | struct xfs_mount *mp = ip->i_mount; |
---|
| 3452 | + int error; |
---|
3462 | 3453 | |
---|
3463 | 3454 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); |
---|
3464 | | - ASSERT(xfs_isiflocked(ip)); |
---|
3465 | | - ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
---|
3466 | | - ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); |
---|
3467 | | - ASSERT(iip != NULL && iip->ili_fields != 0); |
---|
3468 | | - ASSERT(ip->i_d.di_version > 1); |
---|
| 3455 | + ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING)); |
---|
| 3456 | + ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || |
---|
| 3457 | + ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); |
---|
| 3458 | + ASSERT(iip->ili_item.li_buf == bp); |
---|
3469 | 3459 | |
---|
3470 | | - /* set *dip = inode's place in the buffer */ |
---|
3471 | 3460 | dip = xfs_buf_offset(bp, ip->i_imap.im_boffset); |
---|
3472 | 3461 | |
---|
| 3462 | + /* |
---|
| 3463 | + * We don't flush the inode if any of the following checks fail, but we |
---|
| 3464 | + * do still update the log item and attach to the backing buffer as if |
---|
| 3465 | + * the flush happened. This is a formality to facilitate predictable |
---|
| 3466 | + * error handling as the caller will shutdown and fail the buffer. |
---|
| 3467 | + */ |
---|
| 3468 | + error = -EFSCORRUPTED; |
---|
3473 | 3469 | if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), |
---|
3474 | 3470 | mp, XFS_ERRTAG_IFLUSH_1)) { |
---|
3475 | 3471 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
---|
3476 | 3472 | "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT, |
---|
3477 | 3473 | __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); |
---|
3478 | | - goto corrupt_out; |
---|
| 3474 | + goto flush_out; |
---|
3479 | 3475 | } |
---|
3480 | 3476 | if (S_ISREG(VFS_I(ip)->i_mode)) { |
---|
3481 | 3477 | if (XFS_TEST_ERROR( |
---|
3482 | | - (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && |
---|
3483 | | - (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), |
---|
| 3478 | + ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && |
---|
| 3479 | + ip->i_df.if_format != XFS_DINODE_FMT_BTREE, |
---|
3484 | 3480 | mp, XFS_ERRTAG_IFLUSH_3)) { |
---|
3485 | 3481 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
---|
3486 | 3482 | "%s: Bad regular inode %Lu, ptr "PTR_FMT, |
---|
3487 | 3483 | __func__, ip->i_ino, ip); |
---|
3488 | | - goto corrupt_out; |
---|
| 3484 | + goto flush_out; |
---|
3489 | 3485 | } |
---|
3490 | 3486 | } else if (S_ISDIR(VFS_I(ip)->i_mode)) { |
---|
3491 | 3487 | if (XFS_TEST_ERROR( |
---|
3492 | | - (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && |
---|
3493 | | - (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && |
---|
3494 | | - (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), |
---|
| 3488 | + ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && |
---|
| 3489 | + ip->i_df.if_format != XFS_DINODE_FMT_BTREE && |
---|
| 3490 | + ip->i_df.if_format != XFS_DINODE_FMT_LOCAL, |
---|
3495 | 3491 | mp, XFS_ERRTAG_IFLUSH_4)) { |
---|
3496 | 3492 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
---|
3497 | 3493 | "%s: Bad directory inode %Lu, ptr "PTR_FMT, |
---|
3498 | 3494 | __func__, ip->i_ino, ip); |
---|
3499 | | - goto corrupt_out; |
---|
| 3495 | + goto flush_out; |
---|
3500 | 3496 | } |
---|
3501 | 3497 | } |
---|
3502 | | - if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > |
---|
| 3498 | + if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp) > |
---|
3503 | 3499 | ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) { |
---|
3504 | 3500 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
---|
3505 | 3501 | "%s: detected corrupt incore inode %Lu, " |
---|
3506 | 3502 | "total extents = %d, nblocks = %Ld, ptr "PTR_FMT, |
---|
3507 | 3503 | __func__, ip->i_ino, |
---|
3508 | | - ip->i_d.di_nextents + ip->i_d.di_anextents, |
---|
| 3504 | + ip->i_df.if_nextents + xfs_ifork_nextents(ip->i_afp), |
---|
3509 | 3505 | ip->i_d.di_nblocks, ip); |
---|
3510 | | - goto corrupt_out; |
---|
| 3506 | + goto flush_out; |
---|
3511 | 3507 | } |
---|
3512 | 3508 | if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, |
---|
3513 | 3509 | mp, XFS_ERRTAG_IFLUSH_6)) { |
---|
3514 | 3510 | xfs_alert_tag(mp, XFS_PTAG_IFLUSH, |
---|
3515 | 3511 | "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT, |
---|
3516 | 3512 | __func__, ip->i_ino, ip->i_d.di_forkoff, ip); |
---|
3517 | | - goto corrupt_out; |
---|
| 3513 | + goto flush_out; |
---|
3518 | 3514 | } |
---|
3519 | 3515 | |
---|
3520 | 3516 | /* |
---|
.. | .. |
---|
3526 | 3522 | * backwards compatibility with old kernels that predate logging all |
---|
3527 | 3523 | * inode changes. |
---|
3528 | 3524 | */ |
---|
3529 | | - if (ip->i_d.di_version < 3) |
---|
| 3525 | + if (!xfs_sb_version_has_v3inode(&mp->m_sb)) |
---|
3530 | 3526 | ip->i_d.di_flushiter++; |
---|
3531 | 3527 | |
---|
3532 | | - /* Check the inline fork data before we write out. */ |
---|
3533 | | - if (!xfs_inode_verify_forks(ip)) |
---|
3534 | | - goto corrupt_out; |
---|
| 3528 | + /* |
---|
| 3529 | + * If there are inline format data / attr forks attached to this inode, |
---|
| 3530 | + * make sure they are not corrupt. |
---|
| 3531 | + */ |
---|
| 3532 | + if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL && |
---|
| 3533 | + xfs_ifork_verify_local_data(ip)) |
---|
| 3534 | + goto flush_out; |
---|
| 3535 | + if (ip->i_afp && ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL && |
---|
| 3536 | + xfs_ifork_verify_local_attr(ip)) |
---|
| 3537 | + goto flush_out; |
---|
3535 | 3538 | |
---|
3536 | 3539 | /* |
---|
3537 | 3540 | * Copy the dirty parts of the inode into the on-disk inode. We always |
---|
.. | .. |
---|
3547 | 3550 | xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); |
---|
3548 | 3551 | if (XFS_IFORK_Q(ip)) |
---|
3549 | 3552 | xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); |
---|
3550 | | - xfs_inobp_check(mp, bp); |
---|
3551 | 3553 | |
---|
3552 | 3554 | /* |
---|
3553 | 3555 | * We've recorded everything logged in the inode, so we'd like to clear |
---|
.. | .. |
---|
3560 | 3562 | * |
---|
3561 | 3563 | * What we do is move the bits to the ili_last_fields field. When |
---|
3562 | 3564 | * logging the inode, these bits are moved back to the ili_fields field. |
---|
3563 | | - * In the xfs_iflush_done() routine we clear ili_last_fields, since we |
---|
3564 | | - * know that the information those bits represent is permanently on |
---|
| 3565 | + * In the xfs_buf_inode_iodone() routine we clear ili_last_fields, since |
---|
| 3566 | + * we know that the information those bits represent is permanently on |
---|
3565 | 3567 | * disk. As long as the flush completes before the inode is logged |
---|
3566 | 3568 | * again, then both ili_fields and ili_last_fields will be cleared. |
---|
3567 | | - * |
---|
3568 | | - * We can play with the ili_fields bits here, because the inode lock |
---|
3569 | | - * must be held exclusively in order to set bits there and the flush |
---|
3570 | | - * lock protects the ili_last_fields bits. Set ili_logged so the flush |
---|
3571 | | - * done routine can tell whether or not to look in the AIL. Also, store |
---|
3572 | | - * the current LSN of the inode so that we can tell whether the item has |
---|
3573 | | - * moved in the AIL from xfs_iflush_done(). In order to read the lsn we |
---|
3574 | | - * need the AIL lock, because it is a 64 bit value that cannot be read |
---|
3575 | | - * atomically. |
---|
3576 | 3569 | */ |
---|
| 3570 | + error = 0; |
---|
| 3571 | +flush_out: |
---|
| 3572 | + spin_lock(&iip->ili_lock); |
---|
3577 | 3573 | iip->ili_last_fields = iip->ili_fields; |
---|
3578 | 3574 | iip->ili_fields = 0; |
---|
3579 | 3575 | iip->ili_fsync_fields = 0; |
---|
3580 | | - iip->ili_logged = 1; |
---|
| 3576 | + spin_unlock(&iip->ili_lock); |
---|
3581 | 3577 | |
---|
| 3578 | + /* |
---|
| 3579 | + * Store the current LSN of the inode so that we can tell whether the |
---|
| 3580 | + * item has moved in the AIL from xfs_buf_inode_iodone(). |
---|
| 3581 | + */ |
---|
3582 | 3582 | xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, |
---|
3583 | 3583 | &iip->ili_item.li_lsn); |
---|
3584 | 3584 | |
---|
3585 | | - /* |
---|
3586 | | - * Attach the function xfs_iflush_done to the inode's |
---|
3587 | | - * buffer. This will remove the inode from the AIL |
---|
3588 | | - * and unlock the inode's flush lock when the inode is |
---|
3589 | | - * completely written to disk. |
---|
3590 | | - */ |
---|
3591 | | - xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); |
---|
3592 | | - |
---|
3593 | 3585 | /* generate the checksum. */ |
---|
3594 | 3586 | xfs_dinode_calc_crc(mp, dip); |
---|
| 3587 | + return error; |
---|
| 3588 | +} |
---|
3595 | 3589 | |
---|
3596 | | - ASSERT(!list_empty(&bp->b_li_list)); |
---|
3597 | | - ASSERT(bp->b_iodone != NULL); |
---|
| 3590 | +/* |
---|
| 3591 | + * Non-blocking flush of dirty inode metadata into the backing buffer. |
---|
| 3592 | + * |
---|
| 3593 | + * The caller must have a reference to the inode and hold the cluster buffer |
---|
| 3594 | + * locked. The function will walk across all the inodes on the cluster buffer it |
---|
| 3595 | + * can find and lock without blocking, and flush them to the cluster buffer. |
---|
| 3596 | + * |
---|
| 3597 | + * On successful flushing of at least one inode, the caller must write out the |
---|
| 3598 | + * buffer and release it. If no inodes are flushed, -EAGAIN will be returned and |
---|
| 3599 | + * the caller needs to release the buffer. On failure, the filesystem will be |
---|
| 3600 | + * shut down, the buffer will have been unlocked and released, and EFSCORRUPTED |
---|
| 3601 | + * will be returned. |
---|
| 3602 | + */ |
---|
| 3603 | +int |
---|
| 3604 | +xfs_iflush_cluster( |
---|
| 3605 | + struct xfs_buf *bp) |
---|
| 3606 | +{ |
---|
| 3607 | + struct xfs_mount *mp = bp->b_mount; |
---|
| 3608 | + struct xfs_log_item *lip, *n; |
---|
| 3609 | + struct xfs_inode *ip; |
---|
| 3610 | + struct xfs_inode_log_item *iip; |
---|
| 3611 | + int clcount = 0; |
---|
| 3612 | + int error = 0; |
---|
| 3613 | + |
---|
| 3614 | + /* |
---|
| 3615 | + * We must use the safe variant here as on shutdown xfs_iflush_abort() |
---|
| 3616 | + * can remove itself from the list. |
---|
| 3617 | + */ |
---|
| 3618 | + list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) { |
---|
| 3619 | + iip = (struct xfs_inode_log_item *)lip; |
---|
| 3620 | + ip = iip->ili_inode; |
---|
| 3621 | + |
---|
| 3622 | + /* |
---|
| 3623 | + * Quick and dirty check to avoid locks if possible. |
---|
| 3624 | + */ |
---|
| 3625 | + if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING)) |
---|
| 3626 | + continue; |
---|
| 3627 | + if (xfs_ipincount(ip)) |
---|
| 3628 | + continue; |
---|
| 3629 | + |
---|
| 3630 | + /* |
---|
| 3631 | + * The inode is still attached to the buffer, which means it is |
---|
| 3632 | + * dirty but reclaim might try to grab it. Check carefully for |
---|
| 3633 | + * that, and grab the ilock while still holding the i_flags_lock |
---|
| 3634 | + * to guarantee reclaim will not be able to reclaim this inode |
---|
| 3635 | + * once we drop the i_flags_lock. |
---|
| 3636 | + */ |
---|
| 3637 | + spin_lock(&ip->i_flags_lock); |
---|
| 3638 | + ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE)); |
---|
| 3639 | + if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING)) { |
---|
| 3640 | + spin_unlock(&ip->i_flags_lock); |
---|
| 3641 | + continue; |
---|
| 3642 | + } |
---|
| 3643 | + |
---|
| 3644 | + /* |
---|
| 3645 | + * ILOCK will pin the inode against reclaim and prevent |
---|
| 3646 | + * concurrent transactions modifying the inode while we are |
---|
| 3647 | + * flushing the inode. If we get the lock, set the flushing |
---|
| 3648 | + * state before we drop the i_flags_lock. |
---|
| 3649 | + */ |
---|
| 3650 | + if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { |
---|
| 3651 | + spin_unlock(&ip->i_flags_lock); |
---|
| 3652 | + continue; |
---|
| 3653 | + } |
---|
| 3654 | + __xfs_iflags_set(ip, XFS_IFLUSHING); |
---|
| 3655 | + spin_unlock(&ip->i_flags_lock); |
---|
| 3656 | + |
---|
| 3657 | + /* |
---|
| 3658 | + * Abort flushing this inode if we are shut down because the |
---|
| 3659 | + * inode may not currently be in the AIL. This can occur when |
---|
| 3660 | + * log I/O failure unpins the inode without inserting into the |
---|
| 3661 | + * AIL, leaving a dirty/unpinned inode attached to the buffer |
---|
| 3662 | + * that otherwise looks like it should be flushed. |
---|
| 3663 | + */ |
---|
| 3664 | + if (XFS_FORCED_SHUTDOWN(mp)) { |
---|
| 3665 | + xfs_iunpin_wait(ip); |
---|
| 3666 | + xfs_iflush_abort(ip); |
---|
| 3667 | + xfs_iunlock(ip, XFS_ILOCK_SHARED); |
---|
| 3668 | + error = -EIO; |
---|
| 3669 | + continue; |
---|
| 3670 | + } |
---|
| 3671 | + |
---|
| 3672 | + /* don't block waiting on a log force to unpin dirty inodes */ |
---|
| 3673 | + if (xfs_ipincount(ip)) { |
---|
| 3674 | + xfs_iflags_clear(ip, XFS_IFLUSHING); |
---|
| 3675 | + xfs_iunlock(ip, XFS_ILOCK_SHARED); |
---|
| 3676 | + continue; |
---|
| 3677 | + } |
---|
| 3678 | + |
---|
| 3679 | + if (!xfs_inode_clean(ip)) |
---|
| 3680 | + error = xfs_iflush(ip, bp); |
---|
| 3681 | + else |
---|
| 3682 | + xfs_iflags_clear(ip, XFS_IFLUSHING); |
---|
| 3683 | + xfs_iunlock(ip, XFS_ILOCK_SHARED); |
---|
| 3684 | + if (error) |
---|
| 3685 | + break; |
---|
| 3686 | + clcount++; |
---|
| 3687 | + } |
---|
| 3688 | + |
---|
| 3689 | + if (error) { |
---|
| 3690 | + bp->b_flags |= XBF_ASYNC; |
---|
| 3691 | + xfs_buf_ioend_fail(bp); |
---|
| 3692 | + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
---|
| 3693 | + return error; |
---|
| 3694 | + } |
---|
| 3695 | + |
---|
| 3696 | + if (!clcount) |
---|
| 3697 | + return -EAGAIN; |
---|
| 3698 | + |
---|
| 3699 | + XFS_STATS_INC(mp, xs_icluster_flushcnt); |
---|
| 3700 | + XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount); |
---|
3598 | 3701 | return 0; |
---|
3599 | 3702 | |
---|
3600 | | -corrupt_out: |
---|
3601 | | - return -EFSCORRUPTED; |
---|
3602 | 3703 | } |
---|
3603 | 3704 | |
---|
3604 | 3705 | /* Release an inode. */ |
---|
.. | .. |
---|
3609 | 3710 | trace_xfs_irele(ip, _RET_IP_); |
---|
3610 | 3711 | iput(VFS_I(ip)); |
---|
3611 | 3712 | } |
---|
| 3713 | + |
---|
| 3714 | +/* |
---|
| 3715 | + * Ensure all commited transactions touching the inode are written to the log. |
---|
| 3716 | + */ |
---|
| 3717 | +int |
---|
| 3718 | +xfs_log_force_inode( |
---|
| 3719 | + struct xfs_inode *ip) |
---|
| 3720 | +{ |
---|
| 3721 | + xfs_csn_t seq = 0; |
---|
| 3722 | + |
---|
| 3723 | + xfs_ilock(ip, XFS_ILOCK_SHARED); |
---|
| 3724 | + if (xfs_ipincount(ip)) |
---|
| 3725 | + seq = ip->i_itemp->ili_commit_seq; |
---|
| 3726 | + xfs_iunlock(ip, XFS_ILOCK_SHARED); |
---|
| 3727 | + |
---|
| 3728 | + if (!seq) |
---|
| 3729 | + return 0; |
---|
| 3730 | + return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL); |
---|
| 3731 | +} |
---|
| 3732 | + |
---|
| 3733 | +/* |
---|
| 3734 | + * Grab the exclusive iolock for a data copy from src to dest, making sure to |
---|
| 3735 | + * abide vfs locking order (lowest pointer value goes first) and breaking the |
---|
| 3736 | + * layout leases before proceeding. The loop is needed because we cannot call |
---|
| 3737 | + * the blocking break_layout() with the iolocks held, and therefore have to |
---|
| 3738 | + * back out both locks. |
---|
| 3739 | + */ |
---|
| 3740 | +static int |
---|
| 3741 | +xfs_iolock_two_inodes_and_break_layout( |
---|
| 3742 | + struct inode *src, |
---|
| 3743 | + struct inode *dest) |
---|
| 3744 | +{ |
---|
| 3745 | + int error; |
---|
| 3746 | + |
---|
| 3747 | + if (src > dest) |
---|
| 3748 | + swap(src, dest); |
---|
| 3749 | + |
---|
| 3750 | +retry: |
---|
| 3751 | + /* Wait to break both inodes' layouts before we start locking. */ |
---|
| 3752 | + error = break_layout(src, true); |
---|
| 3753 | + if (error) |
---|
| 3754 | + return error; |
---|
| 3755 | + if (src != dest) { |
---|
| 3756 | + error = break_layout(dest, true); |
---|
| 3757 | + if (error) |
---|
| 3758 | + return error; |
---|
| 3759 | + } |
---|
| 3760 | + |
---|
| 3761 | + /* Lock one inode and make sure nobody got in and leased it. */ |
---|
| 3762 | + inode_lock(src); |
---|
| 3763 | + error = break_layout(src, false); |
---|
| 3764 | + if (error) { |
---|
| 3765 | + inode_unlock(src); |
---|
| 3766 | + if (error == -EWOULDBLOCK) |
---|
| 3767 | + goto retry; |
---|
| 3768 | + return error; |
---|
| 3769 | + } |
---|
| 3770 | + |
---|
| 3771 | + if (src == dest) |
---|
| 3772 | + return 0; |
---|
| 3773 | + |
---|
| 3774 | + /* Lock the other inode and make sure nobody got in and leased it. */ |
---|
| 3775 | + inode_lock_nested(dest, I_MUTEX_NONDIR2); |
---|
| 3776 | + error = break_layout(dest, false); |
---|
| 3777 | + if (error) { |
---|
| 3778 | + inode_unlock(src); |
---|
| 3779 | + inode_unlock(dest); |
---|
| 3780 | + if (error == -EWOULDBLOCK) |
---|
| 3781 | + goto retry; |
---|
| 3782 | + return error; |
---|
| 3783 | + } |
---|
| 3784 | + |
---|
| 3785 | + return 0; |
---|
| 3786 | +} |
---|
| 3787 | + |
---|
| 3788 | +/* |
---|
| 3789 | + * Lock two inodes so that userspace cannot initiate I/O via file syscalls or |
---|
| 3790 | + * mmap activity. |
---|
| 3791 | + */ |
---|
| 3792 | +int |
---|
| 3793 | +xfs_ilock2_io_mmap( |
---|
| 3794 | + struct xfs_inode *ip1, |
---|
| 3795 | + struct xfs_inode *ip2) |
---|
| 3796 | +{ |
---|
| 3797 | + int ret; |
---|
| 3798 | + |
---|
| 3799 | + ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2)); |
---|
| 3800 | + if (ret) |
---|
| 3801 | + return ret; |
---|
| 3802 | + if (ip1 == ip2) |
---|
| 3803 | + xfs_ilock(ip1, XFS_MMAPLOCK_EXCL); |
---|
| 3804 | + else |
---|
| 3805 | + xfs_lock_two_inodes(ip1, XFS_MMAPLOCK_EXCL, |
---|
| 3806 | + ip2, XFS_MMAPLOCK_EXCL); |
---|
| 3807 | + return 0; |
---|
| 3808 | +} |
---|
| 3809 | + |
---|
| 3810 | +/* Unlock both inodes to allow IO and mmap activity. */ |
---|
| 3811 | +void |
---|
| 3812 | +xfs_iunlock2_io_mmap( |
---|
| 3813 | + struct xfs_inode *ip1, |
---|
| 3814 | + struct xfs_inode *ip2) |
---|
| 3815 | +{ |
---|
| 3816 | + bool same_inode = (ip1 == ip2); |
---|
| 3817 | + |
---|
| 3818 | + xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); |
---|
| 3819 | + if (!same_inode) |
---|
| 3820 | + xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); |
---|
| 3821 | + inode_unlock(VFS_I(ip2)); |
---|
| 3822 | + if (!same_inode) |
---|
| 3823 | + inode_unlock(VFS_I(ip1)); |
---|
| 3824 | +} |
---|