| .. | .. | 
|---|
| 9 | 9 | #include "xfs_format.h" | 
|---|
| 10 | 10 | #include "xfs_log_format.h" | 
|---|
| 11 | 11 | #include "xfs_trans_resv.h" | 
|---|
| 12 |  | -#include "xfs_bit.h" | 
|---|
| 13 | 12 | #include "xfs_mount.h" | 
|---|
|  | 13 | +#include "xfs_inode.h" | 
|---|
| 14 | 14 | #include "xfs_trans.h" | 
|---|
| 15 | 15 | #include "xfs_trans_priv.h" | 
|---|
| 16 |  | -#include "xfs_error.h" | 
|---|
| 17 | 16 | #include "xfs_icreate_item.h" | 
|---|
| 18 | 17 | #include "xfs_log.h" | 
|---|
|  | 18 | +#include "xfs_log_priv.h" | 
|---|
|  | 19 | +#include "xfs_log_recover.h" | 
|---|
|  | 20 | +#include "xfs_ialloc.h" | 
|---|
|  | 21 | +#include "xfs_trace.h" | 
|---|
| 19 | 22 |  | 
|---|
| 20 | 23 | kmem_zone_t	*xfs_icreate_zone;		/* inode create item zone */ | 
|---|
| 21 | 24 |  | 
|---|
| .. | .. | 
|---|
| 56 | 59 | sizeof(struct xfs_icreate_log)); | 
|---|
| 57 | 60 | } | 
|---|
| 58 | 61 |  | 
|---|
| 59 |  | - | 
|---|
| 60 |  | -/* Pinning has no meaning for the create item, so just return. */ | 
|---|
| 61 | 62 | STATIC void | 
|---|
| 62 |  | -xfs_icreate_item_pin( | 
|---|
|  | 63 | +xfs_icreate_item_release( | 
|---|
| 63 | 64 | struct xfs_log_item	*lip) | 
|---|
| 64 | 65 | { | 
|---|
|  | 66 | +	kmem_cache_free(xfs_icreate_zone, ICR_ITEM(lip)); | 
|---|
| 65 | 67 | } | 
|---|
| 66 | 68 |  | 
|---|
| 67 |  | - | 
|---|
| 68 |  | -/* pinning has no meaning for the create item, so just return. */ | 
|---|
| 69 |  | -STATIC void | 
|---|
| 70 |  | -xfs_icreate_item_unpin( | 
|---|
| 71 |  | -	struct xfs_log_item	*lip, | 
|---|
| 72 |  | -	int			remove) | 
|---|
| 73 |  | -{ | 
|---|
| 74 |  | -} | 
|---|
| 75 |  | - | 
|---|
| 76 |  | -STATIC void | 
|---|
| 77 |  | -xfs_icreate_item_unlock( | 
|---|
| 78 |  | -	struct xfs_log_item	*lip) | 
|---|
| 79 |  | -{ | 
|---|
| 80 |  | -	struct xfs_icreate_item	*icp = ICR_ITEM(lip); | 
|---|
| 81 |  | - | 
|---|
| 82 |  | -	if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) | 
|---|
| 83 |  | -		kmem_zone_free(xfs_icreate_zone, icp); | 
|---|
| 84 |  | -	return; | 
|---|
| 85 |  | -} | 
|---|
| 86 |  | - | 
|---|
| 87 |  | -/* | 
|---|
| 88 |  | - * Because we have ordered buffers being tracked in the AIL for the inode | 
|---|
| 89 |  | - * creation, we don't need the create item after this. Hence we can free | 
|---|
| 90 |  | - * the log item and return -1 to tell the caller we're done with the item. | 
|---|
| 91 |  | - */ | 
|---|
| 92 |  | -STATIC xfs_lsn_t | 
|---|
| 93 |  | -xfs_icreate_item_committed( | 
|---|
| 94 |  | -	struct xfs_log_item	*lip, | 
|---|
| 95 |  | -	xfs_lsn_t		lsn) | 
|---|
| 96 |  | -{ | 
|---|
| 97 |  | -	struct xfs_icreate_item	*icp = ICR_ITEM(lip); | 
|---|
| 98 |  | - | 
|---|
| 99 |  | -	kmem_zone_free(xfs_icreate_zone, icp); | 
|---|
| 100 |  | -	return (xfs_lsn_t)-1; | 
|---|
| 101 |  | -} | 
|---|
| 102 |  | - | 
|---|
| 103 |  | -/* item can never get into the AIL */ | 
|---|
| 104 |  | -STATIC uint | 
|---|
| 105 |  | -xfs_icreate_item_push( | 
|---|
| 106 |  | -	struct xfs_log_item	*lip, | 
|---|
| 107 |  | -	struct list_head	*buffer_list) | 
|---|
| 108 |  | -{ | 
|---|
| 109 |  | -	ASSERT(0); | 
|---|
| 110 |  | -	return XFS_ITEM_SUCCESS; | 
|---|
| 111 |  | -} | 
|---|
| 112 |  | - | 
|---|
| 113 |  | -/* Ordered buffers do the dependency tracking here, so this does nothing. */ | 
|---|
| 114 |  | -STATIC void | 
|---|
| 115 |  | -xfs_icreate_item_committing( | 
|---|
| 116 |  | -	struct xfs_log_item	*lip, | 
|---|
| 117 |  | -	xfs_lsn_t		lsn) | 
|---|
| 118 |  | -{ | 
|---|
| 119 |  | -} | 
|---|
| 120 |  | - | 
|---|
| 121 |  | -/* | 
|---|
| 122 |  | - * This is the ops vector shared by all buf log items. | 
|---|
| 123 |  | - */ | 
|---|
| 124 | 69 | static const struct xfs_item_ops xfs_icreate_item_ops = { | 
|---|
|  | 70 | +	.flags		= XFS_ITEM_RELEASE_WHEN_COMMITTED, | 
|---|
| 125 | 71 | .iop_size	= xfs_icreate_item_size, | 
|---|
| 126 | 72 | .iop_format	= xfs_icreate_item_format, | 
|---|
| 127 |  | -	.iop_pin	= xfs_icreate_item_pin, | 
|---|
| 128 |  | -	.iop_unpin	= xfs_icreate_item_unpin, | 
|---|
| 129 |  | -	.iop_push	= xfs_icreate_item_push, | 
|---|
| 130 |  | -	.iop_unlock	= xfs_icreate_item_unlock, | 
|---|
| 131 |  | -	.iop_committed	= xfs_icreate_item_committed, | 
|---|
| 132 |  | -	.iop_committing = xfs_icreate_item_committing, | 
|---|
|  | 73 | +	.iop_release	= xfs_icreate_item_release, | 
|---|
| 133 | 74 | }; | 
|---|
| 134 | 75 |  | 
|---|
| 135 | 76 |  | 
|---|
| .. | .. | 
|---|
| 156 | 97 | { | 
|---|
| 157 | 98 | struct xfs_icreate_item	*icp; | 
|---|
| 158 | 99 |  | 
|---|
| 159 |  | -	icp = kmem_zone_zalloc(xfs_icreate_zone, KM_SLEEP); | 
|---|
|  | 100 | +	icp = kmem_cache_zalloc(xfs_icreate_zone, GFP_KERNEL | __GFP_NOFAIL); | 
|---|
| 160 | 101 |  | 
|---|
| 161 | 102 | xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE, | 
|---|
| 162 | 103 | &xfs_icreate_item_ops); | 
|---|
| .. | .. | 
|---|
| 174 | 115 | tp->t_flags |= XFS_TRANS_DIRTY; | 
|---|
| 175 | 116 | set_bit(XFS_LI_DIRTY, &icp->ic_item.li_flags); | 
|---|
| 176 | 117 | } | 
|---|
|  | 118 | + | 
|---|
|  | 119 | +static enum xlog_recover_reorder | 
|---|
|  | 120 | +xlog_recover_icreate_reorder( | 
|---|
|  | 121 | +		struct xlog_recover_item *item) | 
|---|
|  | 122 | +{ | 
|---|
|  | 123 | +	/* | 
|---|
|  | 124 | +	 * Inode allocation buffers must be replayed before subsequent inode | 
|---|
|  | 125 | +	 * items try to modify those buffers.  ICREATE items are the logical | 
|---|
|  | 126 | +	 * equivalent of logging a newly initialized inode buffer, so recover | 
|---|
|  | 127 | +	 * these at the same time that we recover logged buffers. | 
|---|
|  | 128 | +	 */ | 
|---|
|  | 129 | +	return XLOG_REORDER_BUFFER_LIST; | 
|---|
|  | 130 | +} | 
|---|
|  | 131 | + | 
|---|
|  | 132 | +/* | 
|---|
|  | 133 | + * This routine is called when an inode create format structure is found in a | 
|---|
|  | 134 | + * committed transaction in the log.  It's purpose is to initialise the inodes | 
|---|
|  | 135 | + * being allocated on disk. This requires us to get inode cluster buffers that | 
|---|
|  | 136 | + * match the range to be initialised, stamped with inode templates and written | 
|---|
|  | 137 | + * by delayed write so that subsequent modifications will hit the cached buffer | 
|---|
|  | 138 | + * and only need writing out at the end of recovery. | 
|---|
|  | 139 | + */ | 
|---|
|  | 140 | +STATIC int | 
|---|
|  | 141 | +xlog_recover_icreate_commit_pass2( | 
|---|
|  | 142 | +	struct xlog			*log, | 
|---|
|  | 143 | +	struct list_head		*buffer_list, | 
|---|
|  | 144 | +	struct xlog_recover_item	*item, | 
|---|
|  | 145 | +	xfs_lsn_t			lsn) | 
|---|
|  | 146 | +{ | 
|---|
|  | 147 | +	struct xfs_mount		*mp = log->l_mp; | 
|---|
|  | 148 | +	struct xfs_icreate_log		*icl; | 
|---|
|  | 149 | +	struct xfs_ino_geometry		*igeo = M_IGEO(mp); | 
|---|
|  | 150 | +	xfs_agnumber_t			agno; | 
|---|
|  | 151 | +	xfs_agblock_t			agbno; | 
|---|
|  | 152 | +	unsigned int			count; | 
|---|
|  | 153 | +	unsigned int			isize; | 
|---|
|  | 154 | +	xfs_agblock_t			length; | 
|---|
|  | 155 | +	int				bb_per_cluster; | 
|---|
|  | 156 | +	int				cancel_count; | 
|---|
|  | 157 | +	int				nbufs; | 
|---|
|  | 158 | +	int				i; | 
|---|
|  | 159 | + | 
|---|
|  | 160 | +	icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; | 
|---|
|  | 161 | +	if (icl->icl_type != XFS_LI_ICREATE) { | 
|---|
|  | 162 | +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); | 
|---|
|  | 163 | +		return -EINVAL; | 
|---|
|  | 164 | +	} | 
|---|
|  | 165 | + | 
|---|
|  | 166 | +	if (icl->icl_size != 1) { | 
|---|
|  | 167 | +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); | 
|---|
|  | 168 | +		return -EINVAL; | 
|---|
|  | 169 | +	} | 
|---|
|  | 170 | + | 
|---|
|  | 171 | +	agno = be32_to_cpu(icl->icl_ag); | 
|---|
|  | 172 | +	if (agno >= mp->m_sb.sb_agcount) { | 
|---|
|  | 173 | +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); | 
|---|
|  | 174 | +		return -EINVAL; | 
|---|
|  | 175 | +	} | 
|---|
|  | 176 | +	agbno = be32_to_cpu(icl->icl_agbno); | 
|---|
|  | 177 | +	if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { | 
|---|
|  | 178 | +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); | 
|---|
|  | 179 | +		return -EINVAL; | 
|---|
|  | 180 | +	} | 
|---|
|  | 181 | +	isize = be32_to_cpu(icl->icl_isize); | 
|---|
|  | 182 | +	if (isize != mp->m_sb.sb_inodesize) { | 
|---|
|  | 183 | +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); | 
|---|
|  | 184 | +		return -EINVAL; | 
|---|
|  | 185 | +	} | 
|---|
|  | 186 | +	count = be32_to_cpu(icl->icl_count); | 
|---|
|  | 187 | +	if (!count) { | 
|---|
|  | 188 | +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); | 
|---|
|  | 189 | +		return -EINVAL; | 
|---|
|  | 190 | +	} | 
|---|
|  | 191 | +	length = be32_to_cpu(icl->icl_length); | 
|---|
|  | 192 | +	if (!length || length >= mp->m_sb.sb_agblocks) { | 
|---|
|  | 193 | +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); | 
|---|
|  | 194 | +		return -EINVAL; | 
|---|
|  | 195 | +	} | 
|---|
|  | 196 | + | 
|---|
|  | 197 | +	/* | 
|---|
|  | 198 | +	 * The inode chunk is either full or sparse and we only support | 
|---|
|  | 199 | +	 * m_ino_geo.ialloc_min_blks sized sparse allocations at this time. | 
|---|
|  | 200 | +	 */ | 
|---|
|  | 201 | +	if (length != igeo->ialloc_blks && | 
|---|
|  | 202 | +	    length != igeo->ialloc_min_blks) { | 
|---|
|  | 203 | +		xfs_warn(log->l_mp, | 
|---|
|  | 204 | +			 "%s: unsupported chunk length", __FUNCTION__); | 
|---|
|  | 205 | +		return -EINVAL; | 
|---|
|  | 206 | +	} | 
|---|
|  | 207 | + | 
|---|
|  | 208 | +	/* verify inode count is consistent with extent length */ | 
|---|
|  | 209 | +	if ((count >> mp->m_sb.sb_inopblog) != length) { | 
|---|
|  | 210 | +		xfs_warn(log->l_mp, | 
|---|
|  | 211 | +			 "%s: inconsistent inode count and chunk length", | 
|---|
|  | 212 | +			 __FUNCTION__); | 
|---|
|  | 213 | +		return -EINVAL; | 
|---|
|  | 214 | +	} | 
|---|
|  | 215 | + | 
|---|
|  | 216 | +	/* | 
|---|
|  | 217 | +	 * The icreate transaction can cover multiple cluster buffers and these | 
|---|
|  | 218 | +	 * buffers could have been freed and reused. Check the individual | 
|---|
|  | 219 | +	 * buffers for cancellation so we don't overwrite anything written after | 
|---|
|  | 220 | +	 * a cancellation. | 
|---|
|  | 221 | +	 */ | 
|---|
|  | 222 | +	bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster); | 
|---|
|  | 223 | +	nbufs = length / igeo->blocks_per_cluster; | 
|---|
|  | 224 | +	for (i = 0, cancel_count = 0; i < nbufs; i++) { | 
|---|
|  | 225 | +		xfs_daddr_t	daddr; | 
|---|
|  | 226 | + | 
|---|
|  | 227 | +		daddr = XFS_AGB_TO_DADDR(mp, agno, | 
|---|
|  | 228 | +				agbno + i * igeo->blocks_per_cluster); | 
|---|
|  | 229 | +		if (xlog_is_buffer_cancelled(log, daddr, bb_per_cluster)) | 
|---|
|  | 230 | +			cancel_count++; | 
|---|
|  | 231 | +	} | 
|---|
|  | 232 | + | 
|---|
|  | 233 | +	/* | 
|---|
|  | 234 | +	 * We currently only use icreate for a single allocation at a time. This | 
|---|
|  | 235 | +	 * means we should expect either all or none of the buffers to be | 
|---|
|  | 236 | +	 * cancelled. Be conservative and skip replay if at least one buffer is | 
|---|
|  | 237 | +	 * cancelled, but warn the user that something is awry if the buffers | 
|---|
|  | 238 | +	 * are not consistent. | 
|---|
|  | 239 | +	 * | 
|---|
|  | 240 | +	 * XXX: This must be refined to only skip cancelled clusters once we use | 
|---|
|  | 241 | +	 * icreate for multiple chunk allocations. | 
|---|
|  | 242 | +	 */ | 
|---|
|  | 243 | +	ASSERT(!cancel_count || cancel_count == nbufs); | 
|---|
|  | 244 | +	if (cancel_count) { | 
|---|
|  | 245 | +		if (cancel_count != nbufs) | 
|---|
|  | 246 | +			xfs_warn(mp, | 
|---|
|  | 247 | +	"WARNING: partial inode chunk cancellation, skipped icreate."); | 
|---|
|  | 248 | +		trace_xfs_log_recover_icreate_cancel(log, icl); | 
|---|
|  | 249 | +		return 0; | 
|---|
|  | 250 | +	} | 
|---|
|  | 251 | + | 
|---|
|  | 252 | +	trace_xfs_log_recover_icreate_recover(log, icl); | 
|---|
|  | 253 | +	return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, | 
|---|
|  | 254 | +				     length, be32_to_cpu(icl->icl_gen)); | 
|---|
|  | 255 | +} | 
|---|
|  | 256 | + | 
|---|
|  | 257 | +const struct xlog_recover_item_ops xlog_icreate_item_ops = { | 
|---|
|  | 258 | +	.item_type		= XFS_LI_ICREATE, | 
|---|
|  | 259 | +	.reorder		= xlog_recover_icreate_reorder, | 
|---|
|  | 260 | +	.commit_pass2		= xlog_recover_icreate_commit_pass2, | 
|---|
|  | 261 | +}; | 
|---|