hc
2024-05-11 04dd17822334871b23ea2862f7798fb0e0007777
kernel/fs/xfs/xfs_bmap_util.c
....@@ -12,12 +12,10 @@
1212 #include "xfs_trans_resv.h"
1313 #include "xfs_bit.h"
1414 #include "xfs_mount.h"
15
-#include "xfs_da_format.h"
1615 #include "xfs_defer.h"
1716 #include "xfs_inode.h"
1817 #include "xfs_btree.h"
1918 #include "xfs_trans.h"
20
-#include "xfs_extfree_item.h"
2119 #include "xfs_alloc.h"
2220 #include "xfs_bmap.h"
2321 #include "xfs_bmap_util.h"
....@@ -28,11 +26,8 @@
2826 #include "xfs_trans_space.h"
2927 #include "xfs_trace.h"
3028 #include "xfs_icache.h"
31
-#include "xfs_log.h"
32
-#include "xfs_rmap_btree.h"
3329 #include "xfs_iomap.h"
3430 #include "xfs_reflink.h"
35
-#include "xfs_refcount.h"
3631
3732 /* Kernel only BMAP related definitions and functions */
3833
....@@ -44,9 +39,9 @@
4439 xfs_daddr_t
4540 xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
4641 {
47
- return (XFS_IS_REALTIME_INODE(ip) ? \
48
- (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
49
- XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
42
+ if (XFS_IS_REALTIME_INODE(ip))
43
+ return XFS_FSB_TO_BB(ip->i_mount, fsb);
44
+ return XFS_FSB_TO_DADDR(ip->i_mount, fsb);
5045 }
5146
5247 /*
....@@ -58,15 +53,16 @@
5853 */
5954 int
6055 xfs_zero_extent(
61
- struct xfs_inode *ip,
62
- xfs_fsblock_t start_fsb,
63
- xfs_off_t count_fsb)
56
+ struct xfs_inode *ip,
57
+ xfs_fsblock_t start_fsb,
58
+ xfs_off_t count_fsb)
6459 {
65
- struct xfs_mount *mp = ip->i_mount;
66
- xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb);
67
- sector_t block = XFS_BB_TO_FSBT(mp, sector);
60
+ struct xfs_mount *mp = ip->i_mount;
61
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
62
+ xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb);
63
+ sector_t block = XFS_BB_TO_FSBT(mp, sector);
6864
69
- return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)),
65
+ return blkdev_issue_zeroout(target->bt_bdev,
7066 block << (mp->m_super->s_blocksize_bits - 9),
7167 count_fsb << (mp->m_super->s_blocksize_bits - 9),
7268 GFP_NOFS, 0);
....@@ -130,7 +126,7 @@
130126 * pick an extent that will space things out in the rt area.
131127 */
132128 if (ap->eof && ap->offset == 0) {
133
- xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
129
+ xfs_rtblock_t rtx; /* realtime extent no */
134130
135131 error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
136132 if (error)
....@@ -169,42 +165,12 @@
169165 xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
170166 ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
171167 XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
172
-
173
- /* Zero the extent if we were asked to do so */
174
- if (ap->datatype & XFS_ALLOC_USERDATA_ZERO) {
175
- error = xfs_zero_extent(ap->ip, ap->blkno, ap->length);
176
- if (error)
177
- return error;
178
- }
179168 } else {
180169 ap->length = 0;
181170 }
182171 return 0;
183172 }
184173 #endif /* CONFIG_XFS_RT */
185
-
186
-/*
187
- * Check if the endoff is outside the last extent. If so the caller will grow
188
- * the allocation to a stripe unit boundary. All offsets are considered outside
189
- * the end of file for an empty fork, so 1 is returned in *eof in that case.
190
- */
191
-int
192
-xfs_bmap_eof(
193
- struct xfs_inode *ip,
194
- xfs_fileoff_t endoff,
195
- int whichfork,
196
- int *eof)
197
-{
198
- struct xfs_bmbt_irec rec;
199
- int error;
200
-
201
- error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
202
- if (error || *eof)
203
- return error;
204
-
205
- *eof = endoff >= rec.br_startoff + rec.br_blockcount;
206
- return 0;
207
-}
208174
209175 /*
210176 * Extent tree block counting routines.
....@@ -234,106 +200,6 @@
234200 }
235201
236202 /*
237
- * Count leaf blocks given a range of extent records originally
238
- * in btree format.
239
- */
240
-STATIC void
241
-xfs_bmap_disk_count_leaves(
242
- struct xfs_mount *mp,
243
- struct xfs_btree_block *block,
244
- int numrecs,
245
- xfs_filblks_t *count)
246
-{
247
- int b;
248
- xfs_bmbt_rec_t *frp;
249
-
250
- for (b = 1; b <= numrecs; b++) {
251
- frp = XFS_BMBT_REC_ADDR(mp, block, b);
252
- *count += xfs_bmbt_disk_get_blockcount(frp);
253
- }
254
-}
255
-
256
-/*
257
- * Recursively walks each level of a btree
258
- * to count total fsblocks in use.
259
- */
260
-STATIC int
261
-xfs_bmap_count_tree(
262
- struct xfs_mount *mp,
263
- struct xfs_trans *tp,
264
- struct xfs_ifork *ifp,
265
- xfs_fsblock_t blockno,
266
- int levelin,
267
- xfs_extnum_t *nextents,
268
- xfs_filblks_t *count)
269
-{
270
- int error;
271
- struct xfs_buf *bp, *nbp;
272
- int level = levelin;
273
- __be64 *pp;
274
- xfs_fsblock_t bno = blockno;
275
- xfs_fsblock_t nextbno;
276
- struct xfs_btree_block *block, *nextblock;
277
- int numrecs;
278
-
279
- error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
280
- &xfs_bmbt_buf_ops);
281
- if (error)
282
- return error;
283
- *count += 1;
284
- block = XFS_BUF_TO_BLOCK(bp);
285
-
286
- if (--level) {
287
- /* Not at node above leaves, count this level of nodes */
288
- nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
289
- while (nextbno != NULLFSBLOCK) {
290
- error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
291
- XFS_BMAP_BTREE_REF,
292
- &xfs_bmbt_buf_ops);
293
- if (error)
294
- return error;
295
- *count += 1;
296
- nextblock = XFS_BUF_TO_BLOCK(nbp);
297
- nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
298
- xfs_trans_brelse(tp, nbp);
299
- }
300
-
301
- /* Dive to the next level */
302
- pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
303
- bno = be64_to_cpu(*pp);
304
- error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, nextents,
305
- count);
306
- if (error) {
307
- xfs_trans_brelse(tp, bp);
308
- XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
309
- XFS_ERRLEVEL_LOW, mp);
310
- return -EFSCORRUPTED;
311
- }
312
- xfs_trans_brelse(tp, bp);
313
- } else {
314
- /* count all level 1 nodes and their leaves */
315
- for (;;) {
316
- nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
317
- numrecs = be16_to_cpu(block->bb_numrecs);
318
- (*nextents) += numrecs;
319
- xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
320
- xfs_trans_brelse(tp, bp);
321
- if (nextbno == NULLFSBLOCK)
322
- break;
323
- bno = nextbno;
324
- error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
325
- XFS_BMAP_BTREE_REF,
326
- &xfs_bmbt_buf_ops);
327
- if (error)
328
- return error;
329
- *count += 1;
330
- block = XFS_BUF_TO_BLOCK(bp);
331
- }
332
- }
333
- return 0;
334
-}
335
-
336
-/*
337203 * Count fsblocks of the given fork. Delayed allocation extents are
338204 * not counted towards the totals.
339205 */
....@@ -345,26 +211,19 @@
345211 xfs_extnum_t *nextents,
346212 xfs_filblks_t *count)
347213 {
348
- struct xfs_mount *mp; /* file system mount structure */
349
- __be64 *pp; /* pointer to block address */
350
- struct xfs_btree_block *block; /* current btree block */
351
- struct xfs_ifork *ifp; /* fork structure */
352
- xfs_fsblock_t bno; /* block # of "block" */
353
- int level; /* btree level, for checking */
214
+ struct xfs_mount *mp = ip->i_mount;
215
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
216
+ struct xfs_btree_cur *cur;
217
+ xfs_extlen_t btblocks = 0;
354218 int error;
355219
356
- bno = NULLFSBLOCK;
357
- mp = ip->i_mount;
358220 *nextents = 0;
359221 *count = 0;
360
- ifp = XFS_IFORK_PTR(ip, whichfork);
222
+
361223 if (!ifp)
362224 return 0;
363225
364
- switch (XFS_IFORK_FORMAT(ip, whichfork)) {
365
- case XFS_DINODE_FMT_EXTENTS:
366
- *nextents = xfs_bmap_count_leaves(ifp, count);
367
- return 0;
226
+ switch (ifp->if_format) {
368227 case XFS_DINODE_FMT_BTREE:
369228 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
370229 error = xfs_iread_extents(tp, ip, whichfork);
....@@ -372,26 +231,23 @@
372231 return error;
373232 }
374233
375
- /*
376
- * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
377
- */
378
- block = ifp->if_broot;
379
- level = be16_to_cpu(block->bb_level);
380
- ASSERT(level > 0);
381
- pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
382
- bno = be64_to_cpu(*pp);
383
- ASSERT(bno != NULLFSBLOCK);
384
- ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
385
- ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
234
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
235
+ error = xfs_btree_count_blocks(cur, &btblocks);
236
+ xfs_btree_del_cursor(cur, error);
237
+ if (error)
238
+ return error;
386239
387
- error = xfs_bmap_count_tree(mp, tp, ifp, bno, level,
388
- nextents, count);
389
- if (error) {
390
- XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)",
391
- XFS_ERRLEVEL_LOW, mp);
392
- return -EFSCORRUPTED;
393
- }
394
- return 0;
240
+ /*
241
+ * xfs_btree_count_blocks includes the root block contained in
242
+ * the inode fork in @btblocks, so subtract one because we're
243
+ * only interested in allocated disk blocks.
244
+ */
245
+ *count += btblocks - 1;
246
+
247
+ /* fall through */
248
+ case XFS_DINODE_FMT_EXTENTS:
249
+ *nextents = xfs_bmap_count_leaves(ifp, count);
250
+ break;
395251 }
396252
397253 return 0;
....@@ -406,10 +262,10 @@
406262 struct xfs_bmbt_irec *got)
407263 {
408264 struct kgetbmap *p = out + bmv->bmv_entries;
409
- bool shared = false, trimmed = false;
265
+ bool shared = false;
410266 int error;
411267
412
- error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed);
268
+ error = xfs_reflink_trim_around_shared(ip, got, &shared);
413269 if (error)
414270 return error;
415271
....@@ -593,7 +449,7 @@
593449 break;
594450 }
595451
596
- switch (XFS_IFORK_FORMAT(ip, whichfork)) {
452
+ switch (ifp->if_format) {
597453 case XFS_DINODE_FMT_EXTENTS:
598454 case XFS_DINODE_FMT_BTREE:
599455 break;
....@@ -869,6 +725,7 @@
869725 xfs_filblks_t allocatesize_fsb;
870726 xfs_extlen_t extsz, temp;
871727 xfs_fileoff_t startoffset_fsb;
728
+ xfs_fileoff_t endoffset_fsb;
872729 int nimaps;
873730 int quota_flag;
874731 int rt;
....@@ -896,7 +753,8 @@
896753 imapp = &imaps[0];
897754 nimaps = 1;
898755 startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
899
- allocatesize_fsb = XFS_B_TO_FSB(mp, count);
756
+ endoffset_fsb = XFS_B_TO_FSB(mp, offset + count);
757
+ allocatesize_fsb = endoffset_fsb - startoffset_fsb;
900758
901759 /*
902760 * Allocate file space until done or until there is an error
....@@ -942,9 +800,6 @@
942800 quota_flag = XFS_QMOPT_RES_REGBLKS;
943801 }
944802
945
- /*
946
- * Allocate and setup the transaction.
947
- */
948803 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
949804 resrtextents, 0, &tp);
950805
....@@ -967,14 +822,14 @@
967822 xfs_trans_ijoin(tp, ip, 0);
968823
969824 error = xfs_bmapi_write(tp, ip, startoffset_fsb,
970
- allocatesize_fsb, alloc_type, resblks,
971
- imapp, &nimaps);
825
+ allocatesize_fsb, alloc_type, 0, imapp,
826
+ &nimaps);
972827 if (error)
973828 goto error0;
974829
975
- /*
976
- * Complete the transaction
977
- */
830
+ ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
831
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
832
+
978833 error = xfs_trans_commit(tp);
979834 xfs_iunlock(ip, XFS_ILOCK_EXCL);
980835 if (error)
....@@ -1042,44 +897,7 @@
1042897 goto out_unlock;
1043898 }
1044899
1045
-static int
1046
-xfs_adjust_extent_unmap_boundaries(
1047
- struct xfs_inode *ip,
1048
- xfs_fileoff_t *startoffset_fsb,
1049
- xfs_fileoff_t *endoffset_fsb)
1050
-{
1051
- struct xfs_mount *mp = ip->i_mount;
1052
- struct xfs_bmbt_irec imap;
1053
- int nimap, error;
1054
- xfs_extlen_t mod = 0;
1055
-
1056
- nimap = 1;
1057
- error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0);
1058
- if (error)
1059
- return error;
1060
-
1061
- if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
1062
- ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1063
- div_u64_rem(imap.br_startblock, mp->m_sb.sb_rextsize, &mod);
1064
- if (mod)
1065
- *startoffset_fsb += mp->m_sb.sb_rextsize - mod;
1066
- }
1067
-
1068
- nimap = 1;
1069
- error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0);
1070
- if (error)
1071
- return error;
1072
-
1073
- if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
1074
- ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1075
- mod++;
1076
- if (mod && mod != mp->m_sb.sb_rextsize)
1077
- *endoffset_fsb -= mod;
1078
- }
1079
-
1080
- return 0;
1081
-}
1082
-
900
+/* Caller must first wait for the completion of any pending DIOs if required. */
1083901 int
1084902 xfs_flush_unmap_range(
1085903 struct xfs_inode *ip,
....@@ -1090,9 +908,6 @@
1090908 struct inode *inode = VFS_I(ip);
1091909 xfs_off_t rounding, start, end;
1092910 int error;
1093
-
1094
- /* wait for the completion of any pending DIOs */
1095
- inode_dio_wait(inode);
1096911
1097912 rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
1098913 start = round_down(offset, rounding);
....@@ -1125,27 +940,20 @@
1125940 if (len <= 0) /* if nothing being freed */
1126941 return 0;
1127942
1128
- error = xfs_flush_unmap_range(ip, offset, len);
1129
- if (error)
1130
- return error;
1131
-
1132943 startoffset_fsb = XFS_B_TO_FSB(mp, offset);
1133944 endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
1134945
1135
- /*
1136
- * Need to zero the stuff we're not freeing, on disk. If it's a RT file
1137
- * and we can't use unwritten extents then we actually need to ensure
1138
- * to zero the whole extent, otherwise we just need to take of block
1139
- * boundaries, and xfs_bunmapi will handle the rest.
1140
- */
1141
- if (XFS_IS_REALTIME_INODE(ip) &&
1142
- !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
1143
- error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb,
1144
- &endoffset_fsb);
1145
- if (error)
1146
- return error;
946
+ /* We can only free complete realtime extents. */
947
+ if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) {
948
+ startoffset_fsb = roundup_64(startoffset_fsb,
949
+ mp->m_sb.sb_rextsize);
950
+ endoffset_fsb = rounddown_64(endoffset_fsb,
951
+ mp->m_sb.sb_rextsize);
1147952 }
1148953
954
+ /*
955
+ * Need to zero the stuff we're not freeing, on disk.
956
+ */
1149957 if (endoffset_fsb > startoffset_fsb) {
1150958 while (!done) {
1151959 error = xfs_unmap_extent(ip, startoffset_fsb,
....@@ -1165,7 +973,8 @@
1165973 return 0;
1166974 if (offset + len > XFS_ISIZE(ip))
1167975 len = XFS_ISIZE(ip) - offset;
1168
- error = iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops);
976
+ error = iomap_zero_range(VFS_I(ip), offset, len, NULL,
977
+ &xfs_buffered_write_iomap_ops);
1169978 if (error)
1170979 return error;
1171980
....@@ -1183,51 +992,12 @@
1183992 return error;
1184993 }
1185994
1186
-/*
1187
- * Preallocate and zero a range of a file. This mechanism has the allocation
1188
- * semantics of fallocate and in addition converts data in the range to zeroes.
1189
- */
1190
-int
1191
-xfs_zero_file_space(
1192
- struct xfs_inode *ip,
1193
- xfs_off_t offset,
1194
- xfs_off_t len)
1195
-{
1196
- struct xfs_mount *mp = ip->i_mount;
1197
- uint blksize;
1198
- int error;
1199
-
1200
- trace_xfs_zero_file_space(ip);
1201
-
1202
- blksize = 1 << mp->m_sb.sb_blocklog;
1203
-
1204
- /*
1205
- * Punch a hole and prealloc the range. We use hole punch rather than
1206
- * unwritten extent conversion for two reasons:
1207
- *
1208
- * 1.) Hole punch handles partial block zeroing for us.
1209
- *
1210
- * 2.) If prealloc returns ENOSPC, the file range is still zero-valued
1211
- * by virtue of the hole punch.
1212
- */
1213
- error = xfs_free_file_space(ip, offset, len);
1214
- if (error)
1215
- goto out;
1216
-
1217
- error = xfs_alloc_file_space(ip, round_down(offset, blksize),
1218
- round_up(offset + len, blksize) -
1219
- round_down(offset, blksize),
1220
- XFS_BMAPI_PREALLOC);
1221
-out:
1222
- return error;
1223
-
1224
-}
1225
-
1226995 static int
1227996 xfs_prepare_shift(
1228997 struct xfs_inode *ip,
1229998 loff_t offset)
1230999 {
1000
+ struct xfs_mount *mp = ip->i_mount;
12311001 int error;
12321002
12331003 /*
....@@ -1239,6 +1009,17 @@
12391009 if (error)
12401010 return error;
12411011 }
1012
+
1013
+ /*
1014
+ * Shift operations must stabilize the start block offset boundary along
1015
+ * with the full range of the operation. If we don't, a COW writeback
1016
+ * completion could race with an insert, front merge with the start
1017
+ * extent (after split) during the shift and corrupt the file. Start
1018
+ * with the block just prior to the start to stabilize the boundary.
1019
+ */
1020
+ offset = round_down(offset, 1 << mp->m_sb.sb_blocklog);
1021
+ if (offset)
1022
+ offset -= (1 << mp->m_sb.sb_blocklog);
12421023
12431024 /*
12441025 * Writeback and invalidate cache for the remainder of the file as we're
....@@ -1286,7 +1067,6 @@
12861067 int error;
12871068 xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len);
12881069 xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len);
1289
- uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
12901070 bool done = false;
12911071
12921072 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
....@@ -1302,32 +1082,34 @@
13021082 if (error)
13031083 return error;
13041084
1305
- while (!error && !done) {
1306
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
1307
- &tp);
1308
- if (error)
1309
- break;
1085
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
1086
+ if (error)
1087
+ return error;
13101088
1311
- xfs_ilock(ip, XFS_ILOCK_EXCL);
1312
- error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
1313
- ip->i_gdquot, ip->i_pdquot, resblks, 0,
1314
- XFS_QMOPT_RES_REGBLKS);
1315
- if (error)
1316
- goto out_trans_cancel;
1317
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1089
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
1090
+ xfs_trans_ijoin(tp, ip, 0);
13181091
1092
+ while (!done) {
13191093 error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
13201094 &done);
13211095 if (error)
13221096 goto out_trans_cancel;
1097
+ if (done)
1098
+ break;
13231099
1324
- error = xfs_trans_commit(tp);
1100
+ /* finish any deferred frees and roll the transaction */
1101
+ error = xfs_defer_finish(&tp);
1102
+ if (error)
1103
+ goto out_trans_cancel;
13251104 }
13261105
1106
+ error = xfs_trans_commit(tp);
1107
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
13271108 return error;
13281109
13291110 out_trans_cancel:
13301111 xfs_trans_cancel(tp);
1112
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
13311113 return error;
13321114 }
13331115
....@@ -1370,35 +1152,41 @@
13701152 if (error)
13711153 return error;
13721154
1155
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
1156
+ XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
1157
+ if (error)
1158
+ return error;
1159
+
1160
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
1161
+ xfs_trans_ijoin(tp, ip, 0);
1162
+
13731163 /*
13741164 * The extent shifting code works on extent granularity. So, if stop_fsb
13751165 * is not the starting block of extent, we need to split the extent at
13761166 * stop_fsb.
13771167 */
1378
- error = xfs_bmap_split_extent(ip, stop_fsb);
1168
+ error = xfs_bmap_split_extent(tp, ip, stop_fsb);
13791169 if (error)
1380
- return error;
1170
+ goto out_trans_cancel;
13811171
1382
- while (!error && !done) {
1383
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0,
1384
- &tp);
1172
+ do {
1173
+ error = xfs_defer_finish(&tp);
13851174 if (error)
1386
- break;
1175
+ goto out_trans_cancel;
13871176
1388
- xfs_ilock(ip, XFS_ILOCK_EXCL);
1389
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
13901177 error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
13911178 &done, stop_fsb);
13921179 if (error)
13931180 goto out_trans_cancel;
1181
+ } while (!done);
13941182
1395
- error = xfs_trans_commit(tp);
1396
- }
1397
-
1183
+ error = xfs_trans_commit(tp);
1184
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
13981185 return error;
13991186
14001187 out_trans_cancel:
14011188 xfs_trans_cancel(tp);
1189
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
14021190 return error;
14031191 }
14041192
....@@ -1427,17 +1215,26 @@
14271215 struct xfs_inode *ip, /* target inode */
14281216 struct xfs_inode *tip) /* tmp inode */
14291217 {
1218
+ struct xfs_ifork *ifp = &ip->i_df;
1219
+ struct xfs_ifork *tifp = &tip->i_df;
1220
+
1221
+ /* User/group/project quota ids must match if quotas are enforced. */
1222
+ if (XFS_IS_QUOTA_ON(ip->i_mount) &&
1223
+ (!uid_eq(VFS_I(ip)->i_uid, VFS_I(tip)->i_uid) ||
1224
+ !gid_eq(VFS_I(ip)->i_gid, VFS_I(tip)->i_gid) ||
1225
+ ip->i_d.di_projid != tip->i_d.di_projid))
1226
+ return -EINVAL;
14301227
14311228 /* Should never get a local format */
1432
- if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
1433
- tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
1229
+ if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
1230
+ tifp->if_format == XFS_DINODE_FMT_LOCAL)
14341231 return -EINVAL;
14351232
14361233 /*
14371234 * if the target inode has less extents that then temporary inode then
14381235 * why did userspace call us?
14391236 */
1440
- if (ip->i_d.di_nextents < tip->i_d.di_nextents)
1237
+ if (ifp->if_nextents < tifp->if_nextents)
14411238 return -EINVAL;
14421239
14431240 /*
....@@ -1452,20 +1249,18 @@
14521249 * form then we will end up with the target inode in the wrong format
14531250 * as we already know there are less extents in the temp inode.
14541251 */
1455
- if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1456
- tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
1252
+ if (ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1253
+ tifp->if_format == XFS_DINODE_FMT_BTREE)
14571254 return -EINVAL;
14581255
14591256 /* Check temp in extent form to max in target */
1460
- if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1461
- XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
1462
- XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
1257
+ if (tifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1258
+ tifp->if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
14631259 return -EINVAL;
14641260
14651261 /* Check target in extent form to max in temp */
1466
- if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1467
- XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
1468
- XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
1262
+ if (ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1263
+ ifp->if_nextents > XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
14691264 return -EINVAL;
14701265
14711266 /*
....@@ -1477,22 +1272,20 @@
14771272 * (a common defrag case) which will occur when the temp inode is in
14781273 * extent format...
14791274 */
1480
- if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
1275
+ if (tifp->if_format == XFS_DINODE_FMT_BTREE) {
14811276 if (XFS_IFORK_Q(ip) &&
1482
- XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
1277
+ XFS_BMAP_BMDR_SPACE(tifp->if_broot) > XFS_IFORK_BOFF(ip))
14831278 return -EINVAL;
1484
- if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
1485
- XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
1279
+ if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
14861280 return -EINVAL;
14871281 }
14881282
14891283 /* Reciprocal target->temp btree format checks */
1490
- if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
1284
+ if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
14911285 if (XFS_IFORK_Q(tip) &&
14921286 XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
14931287 return -EINVAL;
1494
- if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
1495
- XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
1288
+ if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
14961289 return -EINVAL;
14971290 }
14981291
....@@ -1589,24 +1382,16 @@
15891382 trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec);
15901383
15911384 /* Remove the mapping from the donor file. */
1592
- error = xfs_bmap_unmap_extent(tp, tip, &uirec);
1593
- if (error)
1594
- goto out;
1385
+ xfs_bmap_unmap_extent(tp, tip, &uirec);
15951386
15961387 /* Remove the mapping from the source file. */
1597
- error = xfs_bmap_unmap_extent(tp, ip, &irec);
1598
- if (error)
1599
- goto out;
1388
+ xfs_bmap_unmap_extent(tp, ip, &irec);
16001389
16011390 /* Map the donor file's blocks into the source file. */
1602
- error = xfs_bmap_map_extent(tp, ip, &uirec);
1603
- if (error)
1604
- goto out;
1391
+ xfs_bmap_map_extent(tp, ip, &uirec);
16051392
16061393 /* Map the source file's blocks into the donor file. */
1607
- error = xfs_bmap_map_extent(tp, tip, &irec);
1608
- if (error)
1609
- goto out;
1394
+ xfs_bmap_map_extent(tp, tip, &irec);
16101395
16111396 error = xfs_defer_finish(tpp);
16121397 tp = *tpp;
....@@ -1652,15 +1437,15 @@
16521437 /*
16531438 * Count the number of extended attribute blocks
16541439 */
1655
- if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
1656
- (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
1440
+ if (XFS_IFORK_Q(ip) && ip->i_afp->if_nextents > 0 &&
1441
+ ip->i_afp->if_format != XFS_DINODE_FMT_LOCAL) {
16571442 error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk,
16581443 &aforkblks);
16591444 if (error)
16601445 return error;
16611446 }
1662
- if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
1663
- (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
1447
+ if (XFS_IFORK_Q(tip) && tip->i_afp->if_nextents > 0 &&
1448
+ tip->i_afp->if_format != XFS_DINODE_FMT_LOCAL) {
16641449 error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk,
16651450 &taforkblks);
16661451 if (error)
....@@ -1674,12 +1459,12 @@
16741459 * event of a crash. Set the owner change log flags now and leave the
16751460 * bmbt scan as the last step.
16761461 */
1677
- if (ip->i_d.di_version == 3 &&
1678
- ip->i_d.di_format == XFS_DINODE_FMT_BTREE)
1679
- (*target_log_flags) |= XFS_ILOG_DOWNER;
1680
- if (tip->i_d.di_version == 3 &&
1681
- tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
1682
- (*src_log_flags) |= XFS_ILOG_DOWNER;
1462
+ if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
1463
+ if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE)
1464
+ (*target_log_flags) |= XFS_ILOG_DOWNER;
1465
+ if (tip->i_df.if_format == XFS_DINODE_FMT_BTREE)
1466
+ (*src_log_flags) |= XFS_ILOG_DOWNER;
1467
+ }
16831468
16841469 /*
16851470 * Swap the data forks of the inodes
....@@ -1692,9 +1477,6 @@
16921477 tmp = (uint64_t)ip->i_d.di_nblocks;
16931478 ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
16941479 tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
1695
-
1696
- swap(ip->i_d.di_nextents, tip->i_d.di_nextents);
1697
- swap(ip->i_d.di_format, tip->i_d.di_format);
16981480
16991481 /*
17001482 * The extents in the source inode could still contain speculative
....@@ -1709,24 +1491,24 @@
17091491 tip->i_delayed_blks = ip->i_delayed_blks;
17101492 ip->i_delayed_blks = 0;
17111493
1712
- switch (ip->i_d.di_format) {
1494
+ switch (ip->i_df.if_format) {
17131495 case XFS_DINODE_FMT_EXTENTS:
17141496 (*src_log_flags) |= XFS_ILOG_DEXT;
17151497 break;
17161498 case XFS_DINODE_FMT_BTREE:
1717
- ASSERT(ip->i_d.di_version < 3 ||
1499
+ ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) ||
17181500 (*src_log_flags & XFS_ILOG_DOWNER));
17191501 (*src_log_flags) |= XFS_ILOG_DBROOT;
17201502 break;
17211503 }
17221504
1723
- switch (tip->i_d.di_format) {
1505
+ switch (tip->i_df.if_format) {
17241506 case XFS_DINODE_FMT_EXTENTS:
17251507 (*target_log_flags) |= XFS_ILOG_DEXT;
17261508 break;
17271509 case XFS_DINODE_FMT_BTREE:
17281510 (*target_log_flags) |= XFS_ILOG_DBROOT;
1729
- ASSERT(tip->i_d.di_version < 3 ||
1511
+ ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) ||
17301512 (*target_log_flags & XFS_ILOG_DOWNER));
17311513 break;
17321514 }
....@@ -1790,6 +1572,7 @@
17901572 int lock_flags;
17911573 uint64_t f;
17921574 int resblks = 0;
1575
+ unsigned int flags = 0;
17931576
17941577 /*
17951578 * Lock the inodes against other IO, page faults and truncate to
....@@ -1813,6 +1596,14 @@
18131596 goto out_unlock;
18141597 }
18151598
1599
+ error = xfs_qm_dqattach(ip);
1600
+ if (error)
1601
+ goto out_unlock;
1602
+
1603
+ error = xfs_qm_dqattach(tip);
1604
+ if (error)
1605
+ goto out_unlock;
1606
+
18161607 error = xfs_swap_extent_flush(ip);
18171608 if (error)
18181609 goto out_unlock;
....@@ -1832,9 +1623,9 @@
18321623 * performed with log redo items!
18331624 */
18341625 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
1835
- int w = XFS_DATA_FORK;
1836
- uint32_t ipnext = XFS_IFORK_NEXTENTS(ip, w);
1837
- uint32_t tipnext = XFS_IFORK_NEXTENTS(tip, w);
1626
+ int w = XFS_DATA_FORK;
1627
+ uint32_t ipnext = ip->i_df.if_nextents;
1628
+ uint32_t tipnext = tip->i_df.if_nextents;
18381629
18391630 /*
18401631 * Conceptually this shouldn't affect the shape of either bmbt,
....@@ -1845,17 +1636,16 @@
18451636 resblks += XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w);
18461637
18471638 /*
1848
- * Handle the corner case where either inode might straddle the
1849
- * btree format boundary. If so, the inode could bounce between
1850
- * btree <-> extent format on unmap -> remap cycles, freeing and
1851
- * allocating a bmapbt block each time.
1639
+ * If either inode straddles a bmapbt block allocation boundary,
1640
+ * the rmapbt algorithm triggers repeated allocs and frees as
1641
+ * extents are remapped. This can exhaust the block reservation
1642
+ * prematurely and cause shutdown. Return freed blocks to the
1643
+ * transaction reservation to counter this behavior.
18521644 */
1853
- if (ipnext == (XFS_IFORK_MAXEXT(ip, w) + 1))
1854
- resblks += XFS_IFORK_MAXEXT(ip, w);
1855
- if (tipnext == (XFS_IFORK_MAXEXT(tip, w) + 1))
1856
- resblks += XFS_IFORK_MAXEXT(tip, w);
1645
+ flags |= XFS_TRANS_RES_FDBLKS;
18571646 }
1858
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
1647
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, flags,
1648
+ &tp);
18591649 if (error)
18601650 goto out_unlock;
18611651
....@@ -1934,10 +1724,11 @@
19341724
19351725 /* Swap the cow forks. */
19361726 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1937
- ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS);
1938
- ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS);
1727
+ ASSERT(!ip->i_cowfp ||
1728
+ ip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
1729
+ ASSERT(!tip->i_cowfp ||
1730
+ tip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
19391731
1940
- swap(ip->i_cnextents, tip->i_cnextents);
19411732 swap(ip->i_cowfp, tip->i_cowfp);
19421733
19431734 if (ip->i_cowfp && ip->i_cowfp->if_bytes)