hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/fs/xfs/xfs_iomap.c
....@@ -4,7 +4,6 @@
44 * Copyright (c) 2016-2018 Christoph Hellwig.
55 * All Rights Reserved.
66 */
7
-#include <linux/iomap.h>
87 #include "xfs.h"
98 #include "xfs_fs.h"
109 #include "xfs_shared.h"
....@@ -12,7 +11,6 @@
1211 #include "xfs_log_format.h"
1312 #include "xfs_trans_resv.h"
1413 #include "xfs_mount.h"
15
-#include "xfs_defer.h"
1614 #include "xfs_inode.h"
1715 #include "xfs_btree.h"
1816 #include "xfs_bmap_btree.h"
....@@ -25,28 +23,50 @@
2523 #include "xfs_inode_item.h"
2624 #include "xfs_iomap.h"
2725 #include "xfs_trace.h"
28
-#include "xfs_icache.h"
2926 #include "xfs_quota.h"
3027 #include "xfs_dquot_item.h"
3128 #include "xfs_dquot.h"
3229 #include "xfs_reflink.h"
3330
3431
35
-#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
36
- << mp->m_writeio_log)
32
+#define XFS_ALLOC_ALIGN(mp, off) \
33
+ (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log)
3734
38
-void
35
+static int
36
+xfs_alert_fsblock_zero(
37
+ xfs_inode_t *ip,
38
+ xfs_bmbt_irec_t *imap)
39
+{
40
+ xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
41
+ "Access to block zero in inode %llu "
42
+ "start_block: %llx start_off: %llx "
43
+ "blkcnt: %llx extent-state: %x",
44
+ (unsigned long long)ip->i_ino,
45
+ (unsigned long long)imap->br_startblock,
46
+ (unsigned long long)imap->br_startoff,
47
+ (unsigned long long)imap->br_blockcount,
48
+ imap->br_state);
49
+ return -EFSCORRUPTED;
50
+}
51
+
52
+int
3953 xfs_bmbt_to_iomap(
4054 struct xfs_inode *ip,
4155 struct iomap *iomap,
42
- struct xfs_bmbt_irec *imap)
56
+ struct xfs_bmbt_irec *imap,
57
+ u16 flags)
4358 {
4459 struct xfs_mount *mp = ip->i_mount;
60
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
61
+
62
+ if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock)))
63
+ return xfs_alert_fsblock_zero(ip, imap);
4564
4665 if (imap->br_startblock == HOLESTARTBLOCK) {
4766 iomap->addr = IOMAP_NULL_ADDR;
4867 iomap->type = IOMAP_HOLE;
49
- } else if (imap->br_startblock == DELAYSTARTBLOCK) {
68
+ } else if (imap->br_startblock == DELAYSTARTBLOCK ||
69
+ isnullstartblock(imap->br_startblock)) {
5070 iomap->addr = IOMAP_NULL_ADDR;
5171 iomap->type = IOMAP_DELALLOC;
5272 } else {
....@@ -58,14 +78,47 @@
5878 }
5979 iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
6080 iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
61
- iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
62
- iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip));
81
+ iomap->bdev = target->bt_bdev;
82
+ iomap->dax_dev = target->bt_daxdev;
83
+ iomap->flags = flags;
84
+
85
+ if (xfs_ipincount(ip) &&
86
+ (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
87
+ iomap->flags |= IOMAP_F_DIRTY;
88
+ return 0;
6389 }
6490
65
-xfs_extlen_t
66
-xfs_eof_alignment(
91
+static void
92
+xfs_hole_to_iomap(
6793 struct xfs_inode *ip,
68
- xfs_extlen_t extsize)
94
+ struct iomap *iomap,
95
+ xfs_fileoff_t offset_fsb,
96
+ xfs_fileoff_t end_fsb)
97
+{
98
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
99
+
100
+ iomap->addr = IOMAP_NULL_ADDR;
101
+ iomap->type = IOMAP_HOLE;
102
+ iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb);
103
+ iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb);
104
+ iomap->bdev = target->bt_bdev;
105
+ iomap->dax_dev = target->bt_daxdev;
106
+}
107
+
108
+static inline xfs_fileoff_t
109
+xfs_iomap_end_fsb(
110
+ struct xfs_mount *mp,
111
+ loff_t offset,
112
+ loff_t count)
113
+{
114
+ ASSERT(offset <= mp->m_super->s_maxbytes);
115
+ return min(XFS_B_TO_FSB(mp, offset + count),
116
+ XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes));
117
+}
118
+
119
+static xfs_extlen_t
120
+xfs_eof_alignment(
121
+ struct xfs_inode *ip)
69122 {
70123 struct xfs_mount *mp = ip->i_mount;
71124 xfs_extlen_t align = 0;
....@@ -88,128 +141,80 @@
88141 align = 0;
89142 }
90143
91
- /*
92
- * Always round up the allocation request to an extent boundary
93
- * (when file on a real-time subvolume or has di_extsize hint).
94
- */
95
- if (extsize) {
96
- if (align)
97
- align = roundup_64(align, extsize);
98
- else
99
- align = extsize;
100
- }
101
-
102144 return align;
103145 }
104146
105
-STATIC int
147
+/*
148
+ * Check if last_fsb is outside the last extent, and if so grow it to the next
149
+ * stripe unit boundary.
150
+ */
151
+xfs_fileoff_t
106152 xfs_iomap_eof_align_last_fsb(
107153 struct xfs_inode *ip,
108
- xfs_extlen_t extsize,
109
- xfs_fileoff_t *last_fsb)
154
+ xfs_fileoff_t end_fsb)
110155 {
111
- xfs_extlen_t align = xfs_eof_alignment(ip, extsize);
156
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
157
+ xfs_extlen_t extsz = xfs_get_extsz_hint(ip);
158
+ xfs_extlen_t align = xfs_eof_alignment(ip);
159
+ struct xfs_bmbt_irec irec;
160
+ struct xfs_iext_cursor icur;
161
+
162
+ ASSERT(ifp->if_flags & XFS_IFEXTENTS);
163
+
164
+ /*
165
+ * Always round up the allocation request to the extent hint boundary.
166
+ */
167
+ if (extsz) {
168
+ if (align)
169
+ align = roundup_64(align, extsz);
170
+ else
171
+ align = extsz;
172
+ }
112173
113174 if (align) {
114
- xfs_fileoff_t new_last_fsb = roundup_64(*last_fsb, align);
115
- int eof, error;
175
+ xfs_fileoff_t aligned_end_fsb = roundup_64(end_fsb, align);
116176
117
- error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
118
- if (error)
119
- return error;
120
- if (eof)
121
- *last_fsb = new_last_fsb;
177
+ xfs_iext_last(ifp, &icur);
178
+ if (!xfs_iext_get_extent(ifp, &icur, &irec) ||
179
+ aligned_end_fsb >= irec.br_startoff + irec.br_blockcount)
180
+ return aligned_end_fsb;
122181 }
123
- return 0;
124
-}
125182
126
-STATIC int
127
-xfs_alert_fsblock_zero(
128
- xfs_inode_t *ip,
129
- xfs_bmbt_irec_t *imap)
130
-{
131
- xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
132
- "Access to block zero in inode %llu "
133
- "start_block: %llx start_off: %llx "
134
- "blkcnt: %llx extent-state: %x",
135
- (unsigned long long)ip->i_ino,
136
- (unsigned long long)imap->br_startblock,
137
- (unsigned long long)imap->br_startoff,
138
- (unsigned long long)imap->br_blockcount,
139
- imap->br_state);
140
- return -EFSCORRUPTED;
183
+ return end_fsb;
141184 }
142185
143186 int
144187 xfs_iomap_write_direct(
145
- xfs_inode_t *ip,
146
- xfs_off_t offset,
147
- size_t count,
148
- xfs_bmbt_irec_t *imap,
149
- int nmaps)
188
+ struct xfs_inode *ip,
189
+ xfs_fileoff_t offset_fsb,
190
+ xfs_fileoff_t count_fsb,
191
+ struct xfs_bmbt_irec *imap)
150192 {
151
- xfs_mount_t *mp = ip->i_mount;
152
- xfs_fileoff_t offset_fsb;
153
- xfs_fileoff_t last_fsb;
154
- xfs_filblks_t count_fsb, resaligned;
155
- xfs_extlen_t extsz;
156
- int nimaps;
157
- int quota_flag;
158
- int rt;
159
- xfs_trans_t *tp;
160
- uint qblocks, resblks, resrtextents;
161
- int error;
162
- int lockmode;
163
- int bmapi_flags = XFS_BMAPI_PREALLOC;
164
- uint tflags = 0;
193
+ struct xfs_mount *mp = ip->i_mount;
194
+ struct xfs_trans *tp;
195
+ xfs_filblks_t resaligned;
196
+ int nimaps;
197
+ int quota_flag;
198
+ uint qblocks, resblks;
199
+ unsigned int resrtextents = 0;
200
+ int error;
201
+ int bmapi_flags = XFS_BMAPI_PREALLOC;
202
+ uint tflags = 0;
165203
166
- rt = XFS_IS_REALTIME_INODE(ip);
167
- extsz = xfs_get_extsz_hint(ip);
168
- lockmode = XFS_ILOCK_SHARED; /* locked by caller */
169
-
170
- ASSERT(xfs_isilocked(ip, lockmode));
171
-
172
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
173
- last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
174
- if ((offset + count) > XFS_ISIZE(ip)) {
175
- /*
176
- * Assert that the in-core extent list is present since this can
177
- * call xfs_iread_extents() and we only have the ilock shared.
178
- * This should be safe because the lock was held around a bmapi
179
- * call in the caller and we only need it to access the in-core
180
- * list.
181
- */
182
- ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
183
- XFS_IFEXTENTS);
184
- error = xfs_iomap_eof_align_last_fsb(ip, extsz, &last_fsb);
185
- if (error)
186
- goto out_unlock;
187
- } else {
188
- if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
189
- last_fsb = min(last_fsb, (xfs_fileoff_t)
190
- imap->br_blockcount +
191
- imap->br_startoff);
192
- }
193
- count_fsb = last_fsb - offset_fsb;
194204 ASSERT(count_fsb > 0);
195
- resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, extsz);
196205
197
- if (unlikely(rt)) {
206
+ resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb,
207
+ xfs_get_extsz_hint(ip));
208
+ if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
198209 resrtextents = qblocks = resaligned;
199210 resrtextents /= mp->m_sb.sb_rextsize;
200211 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
201212 quota_flag = XFS_QMOPT_RES_RTBLKS;
202213 } else {
203
- resrtextents = 0;
204214 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
205215 quota_flag = XFS_QMOPT_RES_REGBLKS;
206216 }
207217
208
- /*
209
- * Drop the shared lock acquired by the caller, attach the dquot if
210
- * necessary and move on to transaction setup.
211
- */
212
- xfs_iunlock(ip, lockmode);
213218 error = xfs_qm_dqattach(ip);
214219 if (error)
215220 return error;
....@@ -239,8 +244,7 @@
239244 if (error)
240245 return error;
241246
242
- lockmode = XFS_ILOCK_EXCL;
243
- xfs_ilock(ip, lockmode);
247
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
244248
245249 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
246250 if (error)
....@@ -253,8 +257,8 @@
253257 * caller gave to us.
254258 */
255259 nimaps = 1;
256
- error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
257
- bmapi_flags, resblks, imap, &nimaps);
260
+ error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flags, 0,
261
+ imap, &nimaps);
258262 if (error)
259263 goto out_res_cancel;
260264
....@@ -273,11 +277,11 @@
273277 goto out_unlock;
274278 }
275279
276
- if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
280
+ if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock)))
277281 error = xfs_alert_fsblock_zero(ip, imap);
278282
279283 out_unlock:
280
- xfs_iunlock(ip, lockmode);
284
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
281285 return error;
282286
283287 out_res_cancel:
....@@ -289,11 +293,11 @@
289293
290294 STATIC bool
291295 xfs_quota_need_throttle(
292
- struct xfs_inode *ip,
293
- int type,
294
- xfs_fsblock_t alloc_blocks)
296
+ struct xfs_inode *ip,
297
+ xfs_dqtype_t type,
298
+ xfs_fsblock_t alloc_blocks)
295299 {
296
- struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
300
+ struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
297301
298302 if (!dq || !xfs_this_quota_on(ip->i_mount, type))
299303 return false;
....@@ -303,7 +307,7 @@
303307 return false;
304308
305309 /* under the lo watermark, no throttle */
306
- if (dq->q_res_bcount + alloc_blocks < dq->q_prealloc_lo_wmark)
310
+ if (dq->q_blk.reserved + alloc_blocks < dq->q_prealloc_lo_wmark)
307311 return false;
308312
309313 return true;
....@@ -311,24 +315,24 @@
311315
312316 STATIC void
313317 xfs_quota_calc_throttle(
314
- struct xfs_inode *ip,
315
- int type,
316
- xfs_fsblock_t *qblocks,
317
- int *qshift,
318
- int64_t *qfreesp)
318
+ struct xfs_inode *ip,
319
+ xfs_dqtype_t type,
320
+ xfs_fsblock_t *qblocks,
321
+ int *qshift,
322
+ int64_t *qfreesp)
319323 {
320
- int64_t freesp;
321
- int shift = 0;
322
- struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
324
+ struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
325
+ int64_t freesp;
326
+ int shift = 0;
323327
324328 /* no dq, or over hi wmark, squash the prealloc completely */
325
- if (!dq || dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
329
+ if (!dq || dq->q_blk.reserved >= dq->q_prealloc_hi_wmark) {
326330 *qblocks = 0;
327331 *qfreesp = 0;
328332 return;
329333 }
330334
331
- freesp = dq->q_prealloc_hi_wmark - dq->q_res_bcount;
335
+ freesp = dq->q_prealloc_hi_wmark - dq->q_blk.reserved;
332336 if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) {
333337 shift = 2;
334338 if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT])
....@@ -348,87 +352,83 @@
348352 }
349353
350354 /*
351
- * If we are doing a write at the end of the file and there are no allocations
352
- * past this one, then extend the allocation out to the file system's write
353
- * iosize.
354
- *
355355 * If we don't have a user specified preallocation size, dynamically increase
356356 * the preallocation size as the size of the file grows. Cap the maximum size
357357 * at a single extent or less if the filesystem is near full. The closer the
358
- * filesystem is to full, the smaller the maximum prealocation.
359
- *
360
- * As an exception we don't do any preallocation at all if the file is smaller
361
- * than the minimum preallocation and we are using the default dynamic
362
- * preallocation scheme, as it is likely this is the only write to the file that
363
- * is going to be done.
364
- *
365
- * We clean up any extra space left over when the file is closed in
366
- * xfs_inactive().
358
+ * filesystem is to being full, the smaller the maximum preallocation.
367359 */
368360 STATIC xfs_fsblock_t
369361 xfs_iomap_prealloc_size(
370362 struct xfs_inode *ip,
363
+ int whichfork,
371364 loff_t offset,
372365 loff_t count,
373366 struct xfs_iext_cursor *icur)
374367 {
368
+ struct xfs_iext_cursor ncur = *icur;
369
+ struct xfs_bmbt_irec prev, got;
375370 struct xfs_mount *mp = ip->i_mount;
376
- struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
371
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
377372 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
378
- struct xfs_bmbt_irec prev;
379
- int shift = 0;
380373 int64_t freesp;
381374 xfs_fsblock_t qblocks;
382
- int qshift = 0;
383375 xfs_fsblock_t alloc_blocks = 0;
376
+ xfs_extlen_t plen;
377
+ int shift = 0;
378
+ int qshift = 0;
384379
385
- if (offset + count <= XFS_ISIZE(ip))
386
- return 0;
387
-
388
- if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) &&
389
- (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)))
380
+ /*
381
+ * As an exception we don't do any preallocation at all if the file is
382
+ * smaller than the minimum preallocation and we are using the default
383
+ * dynamic preallocation scheme, as it is likely this is the only write
384
+ * to the file that is going to be done.
385
+ */
386
+ if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks))
390387 return 0;
391388
392389 /*
393
- * If an explicit allocsize is set, the file is small, or we
394
- * are writing behind a hole, then use the minimum prealloc:
390
+ * Use the minimum preallocation size for small files or if we are
391
+ * writing right after a hole.
395392 */
396
- if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ||
397
- XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
398
- !xfs_iext_peek_prev_extent(ifp, icur, &prev) ||
393
+ if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
394
+ !xfs_iext_prev_extent(ifp, &ncur, &prev) ||
399395 prev.br_startoff + prev.br_blockcount < offset_fsb)
400
- return mp->m_writeio_blocks;
396
+ return mp->m_allocsize_blocks;
401397
402398 /*
403
- * Determine the initial size of the preallocation. We are beyond the
404
- * current EOF here, but we need to take into account whether this is
405
- * a sparse write or an extending write when determining the
406
- * preallocation size. Hence we need to look up the extent that ends
407
- * at the current write offset and use the result to determine the
408
- * preallocation size.
409
- *
410
- * If the extent is a hole, then preallocation is essentially disabled.
411
- * Otherwise we take the size of the preceding data extent as the basis
412
- * for the preallocation size. If the size of the extent is greater than
413
- * half the maximum extent length, then use the current offset as the
414
- * basis. This ensures that for large files the preallocation size
415
- * always extends to MAXEXTLEN rather than falling short due to things
416
- * like stripe unit/width alignment of real extents.
399
+ * Take the size of the preceding data extents as the basis for the
400
+ * preallocation size. Note that we don't care if the previous extents
401
+ * are written or not.
417402 */
418
- if (prev.br_blockcount <= (MAXEXTLEN >> 1))
419
- alloc_blocks = prev.br_blockcount << 1;
420
- else
403
+ plen = prev.br_blockcount;
404
+ while (xfs_iext_prev_extent(ifp, &ncur, &got)) {
405
+ if (plen > MAXEXTLEN / 2 ||
406
+ isnullstartblock(got.br_startblock) ||
407
+ got.br_startoff + got.br_blockcount != prev.br_startoff ||
408
+ got.br_startblock + got.br_blockcount != prev.br_startblock)
409
+ break;
410
+ plen += got.br_blockcount;
411
+ prev = got;
412
+ }
413
+
414
+ /*
415
+ * If the size of the extents is greater than half the maximum extent
416
+ * length, then use the current offset as the basis. This ensures that
417
+ * for large files the preallocation size always extends to MAXEXTLEN
418
+ * rather than falling short due to things like stripe unit/width
419
+ * alignment of real extents.
420
+ */
421
+ alloc_blocks = plen * 2;
422
+ if (alloc_blocks > MAXEXTLEN)
421423 alloc_blocks = XFS_B_TO_FSB(mp, offset);
422
- if (!alloc_blocks)
423
- goto check_writeio;
424424 qblocks = alloc_blocks;
425425
426426 /*
427427 * MAXEXTLEN is not a power of two value but we round the prealloc down
428428 * to the nearest power of two value after throttling. To prevent the
429
- * round down from unconditionally reducing the maximum supported prealloc
430
- * size, we round up first, apply appropriate throttling, round down and
431
- * cap the value to MAXEXTLEN.
429
+ * round down from unconditionally reducing the maximum supported
430
+ * prealloc size, we round up first, apply appropriate throttling,
431
+ * round down and cap the value to MAXEXTLEN.
432432 */
433433 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
434434 alloc_blocks);
....@@ -450,14 +450,14 @@
450450 * Check each quota to cap the prealloc size, provide a shift value to
451451 * throttle with and adjust amount of available space.
452452 */
453
- if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks))
454
- xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift,
453
+ if (xfs_quota_need_throttle(ip, XFS_DQTYPE_USER, alloc_blocks))
454
+ xfs_quota_calc_throttle(ip, XFS_DQTYPE_USER, &qblocks, &qshift,
455455 &freesp);
456
- if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks))
457
- xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift,
456
+ if (xfs_quota_need_throttle(ip, XFS_DQTYPE_GROUP, alloc_blocks))
457
+ xfs_quota_calc_throttle(ip, XFS_DQTYPE_GROUP, &qblocks, &qshift,
458458 &freesp);
459
- if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks))
460
- xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift,
459
+ if (xfs_quota_need_throttle(ip, XFS_DQTYPE_PROJ, alloc_blocks))
460
+ xfs_quota_calc_throttle(ip, XFS_DQTYPE_PROJ, &qblocks, &qshift,
461461 &freesp);
462462
463463 /*
....@@ -489,317 +489,11 @@
489489 */
490490 while (alloc_blocks && alloc_blocks >= freesp)
491491 alloc_blocks >>= 4;
492
-check_writeio:
493
- if (alloc_blocks < mp->m_writeio_blocks)
494
- alloc_blocks = mp->m_writeio_blocks;
492
+ if (alloc_blocks < mp->m_allocsize_blocks)
493
+ alloc_blocks = mp->m_allocsize_blocks;
495494 trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift,
496
- mp->m_writeio_blocks);
495
+ mp->m_allocsize_blocks);
497496 return alloc_blocks;
498
-}
499
-
500
-static int
501
-xfs_file_iomap_begin_delay(
502
- struct inode *inode,
503
- loff_t offset,
504
- loff_t count,
505
- struct iomap *iomap)
506
-{
507
- struct xfs_inode *ip = XFS_I(inode);
508
- struct xfs_mount *mp = ip->i_mount;
509
- struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
510
- xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
511
- xfs_fileoff_t maxbytes_fsb =
512
- XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
513
- xfs_fileoff_t end_fsb;
514
- int error = 0, eof = 0;
515
- struct xfs_bmbt_irec got;
516
- struct xfs_iext_cursor icur;
517
- xfs_fsblock_t prealloc_blocks = 0;
518
-
519
- ASSERT(!XFS_IS_REALTIME_INODE(ip));
520
- ASSERT(!xfs_get_extsz_hint(ip));
521
-
522
- xfs_ilock(ip, XFS_ILOCK_EXCL);
523
-
524
- if (unlikely(XFS_TEST_ERROR(
525
- (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
526
- XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
527
- mp, XFS_ERRTAG_BMAPIFORMAT))) {
528
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
529
- error = -EFSCORRUPTED;
530
- goto out_unlock;
531
- }
532
-
533
- XFS_STATS_INC(mp, xs_blk_mapw);
534
-
535
- if (!(ifp->if_flags & XFS_IFEXTENTS)) {
536
- error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
537
- if (error)
538
- goto out_unlock;
539
- }
540
-
541
- eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got);
542
- if (!eof && got.br_startoff <= offset_fsb) {
543
- if (xfs_is_reflink_inode(ip)) {
544
- bool shared;
545
-
546
- end_fsb = min(XFS_B_TO_FSB(mp, offset + count),
547
- maxbytes_fsb);
548
- xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
549
- error = xfs_reflink_reserve_cow(ip, &got, &shared);
550
- if (error)
551
- goto out_unlock;
552
- }
553
-
554
- trace_xfs_iomap_found(ip, offset, count, 0, &got);
555
- goto done;
556
- }
557
-
558
- error = xfs_qm_dqattach_locked(ip, false);
559
- if (error)
560
- goto out_unlock;
561
-
562
- /*
563
- * We cap the maximum length we map here to MAX_WRITEBACK_PAGES pages
564
- * to keep the chunks of work done where somewhat symmetric with the
565
- * work writeback does. This is a completely arbitrary number pulled
566
- * out of thin air as a best guess for initial testing.
567
- *
568
- * Note that the values needs to be less than 32-bits wide until
569
- * the lower level functions are updated.
570
- */
571
- count = min_t(loff_t, count, 1024 * PAGE_SIZE);
572
- end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
573
-
574
- if (eof) {
575
- prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count,
576
- &icur);
577
- if (prealloc_blocks) {
578
- xfs_extlen_t align;
579
- xfs_off_t end_offset;
580
- xfs_fileoff_t p_end_fsb;
581
-
582
- end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1);
583
- p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
584
- prealloc_blocks;
585
-
586
- align = xfs_eof_alignment(ip, 0);
587
- if (align)
588
- p_end_fsb = roundup_64(p_end_fsb, align);
589
-
590
- p_end_fsb = min(p_end_fsb, maxbytes_fsb);
591
- ASSERT(p_end_fsb > offset_fsb);
592
- prealloc_blocks = p_end_fsb - end_fsb;
593
- }
594
- }
595
-
596
-retry:
597
- error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb,
598
- end_fsb - offset_fsb, prealloc_blocks, &got, &icur,
599
- eof);
600
- switch (error) {
601
- case 0:
602
- break;
603
- case -ENOSPC:
604
- case -EDQUOT:
605
- /* retry without any preallocation */
606
- trace_xfs_delalloc_enospc(ip, offset, count);
607
- if (prealloc_blocks) {
608
- prealloc_blocks = 0;
609
- goto retry;
610
- }
611
- /*FALLTHRU*/
612
- default:
613
- goto out_unlock;
614
- }
615
-
616
- /*
617
- * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
618
- * them out if the write happens to fail.
619
- */
620
- iomap->flags |= IOMAP_F_NEW;
621
- trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
622
-done:
623
- if (isnullstartblock(got.br_startblock))
624
- got.br_startblock = DELAYSTARTBLOCK;
625
-
626
- if (!got.br_startblock) {
627
- error = xfs_alert_fsblock_zero(ip, &got);
628
- if (error)
629
- goto out_unlock;
630
- }
631
-
632
- xfs_bmbt_to_iomap(ip, iomap, &got);
633
-
634
-out_unlock:
635
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
636
- return error;
637
-}
638
-
639
-/*
640
- * Pass in a delayed allocate extent, convert it to real extents;
641
- * return to the caller the extent we create which maps on top of
642
- * the originating callers request.
643
- *
644
- * Called without a lock on the inode.
645
- *
646
- * We no longer bother to look at the incoming map - all we have to
647
- * guarantee is that whatever we allocate fills the required range.
648
- */
649
-int
650
-xfs_iomap_write_allocate(
651
- xfs_inode_t *ip,
652
- int whichfork,
653
- xfs_off_t offset,
654
- xfs_bmbt_irec_t *imap,
655
- unsigned int *cow_seq)
656
-{
657
- xfs_mount_t *mp = ip->i_mount;
658
- struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
659
- xfs_fileoff_t offset_fsb, last_block;
660
- xfs_fileoff_t end_fsb, map_start_fsb;
661
- xfs_filblks_t count_fsb;
662
- xfs_trans_t *tp;
663
- int nimaps;
664
- int error = 0;
665
- int flags = XFS_BMAPI_DELALLOC;
666
- int nres;
667
-
668
- if (whichfork == XFS_COW_FORK)
669
- flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
670
-
671
- /*
672
- * Make sure that the dquots are there.
673
- */
674
- error = xfs_qm_dqattach(ip);
675
- if (error)
676
- return error;
677
-
678
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
679
- count_fsb = imap->br_blockcount;
680
- map_start_fsb = imap->br_startoff;
681
-
682
- XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
683
-
684
- while (count_fsb != 0) {
685
- /*
686
- * Set up a transaction with which to allocate the
687
- * backing store for the file. Do allocations in a
688
- * loop until we get some space in the range we are
689
- * interested in. The other space that might be allocated
690
- * is in the delayed allocation extent on which we sit
691
- * but before our buffer starts.
692
- */
693
- nimaps = 0;
694
- while (nimaps == 0) {
695
- nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
696
- /*
697
- * We have already reserved space for the extent and any
698
- * indirect blocks when creating the delalloc extent,
699
- * there is no need to reserve space in this transaction
700
- * again.
701
- */
702
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0,
703
- 0, XFS_TRANS_RESERVE, &tp);
704
- if (error)
705
- return error;
706
-
707
- xfs_ilock(ip, XFS_ILOCK_EXCL);
708
- xfs_trans_ijoin(tp, ip, 0);
709
-
710
- /*
711
- * it is possible that the extents have changed since
712
- * we did the read call as we dropped the ilock for a
713
- * while. We have to be careful about truncates or hole
714
- * punchs here - we are not allowed to allocate
715
- * non-delalloc blocks here.
716
- *
717
- * The only protection against truncation is the pages
718
- * for the range we are being asked to convert are
719
- * locked and hence a truncate will block on them
720
- * first.
721
- *
722
- * As a result, if we go beyond the range we really
723
- * need and hit an delalloc extent boundary followed by
724
- * a hole while we have excess blocks in the map, we
725
- * will fill the hole incorrectly and overrun the
726
- * transaction reservation.
727
- *
728
- * Using a single map prevents this as we are forced to
729
- * check each map we look for overlap with the desired
730
- * range and abort as soon as we find it. Also, given
731
- * that we only return a single map, having one beyond
732
- * what we can return is probably a bit silly.
733
- *
734
- * We also need to check that we don't go beyond EOF;
735
- * this is a truncate optimisation as a truncate sets
736
- * the new file size before block on the pages we
737
- * currently have locked under writeback. Because they
738
- * are about to be tossed, we don't need to write them
739
- * back....
740
- */
741
- nimaps = 1;
742
- end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
743
- error = xfs_bmap_last_offset(ip, &last_block,
744
- XFS_DATA_FORK);
745
- if (error)
746
- goto trans_cancel;
747
-
748
- last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
749
- if ((map_start_fsb + count_fsb) > last_block) {
750
- count_fsb = last_block - map_start_fsb;
751
- if (count_fsb == 0) {
752
- error = -EAGAIN;
753
- goto trans_cancel;
754
- }
755
- }
756
-
757
- /*
758
- * From this point onwards we overwrite the imap
759
- * pointer that the caller gave to us.
760
- */
761
- error = xfs_bmapi_write(tp, ip, map_start_fsb,
762
- count_fsb, flags, nres, imap,
763
- &nimaps);
764
- if (error)
765
- goto trans_cancel;
766
-
767
- error = xfs_trans_commit(tp);
768
- if (error)
769
- goto error0;
770
-
771
- if (whichfork == XFS_COW_FORK)
772
- *cow_seq = READ_ONCE(ifp->if_seq);
773
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
774
- }
775
-
776
- /*
777
- * See if we were able to allocate an extent that
778
- * covers at least part of the callers request
779
- */
780
- if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
781
- return xfs_alert_fsblock_zero(ip, imap);
782
-
783
- if ((offset_fsb >= imap->br_startoff) &&
784
- (offset_fsb < (imap->br_startoff +
785
- imap->br_blockcount))) {
786
- XFS_STATS_INC(mp, xs_xstrat_quick);
787
- return 0;
788
- }
789
-
790
- /*
791
- * So far we have not mapped the requested part of the
792
- * file, just surrounding data, try again.
793
- */
794
- count_fsb -= imap->br_blockcount;
795
- map_start_fsb = imap->br_startoff + imap->br_blockcount;
796
- }
797
-
798
-trans_cancel:
799
- xfs_trans_cancel(tp);
800
-error0:
801
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
802
- return error;
803497 }
804498
805499 int
....@@ -839,6 +533,11 @@
839533 */
840534 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
841535
536
+ /* Attach dquots so that bmbt splits are accounted correctly. */
537
+ error = xfs_qm_dqattach(ip);
538
+ if (error)
539
+ return error;
540
+
842541 do {
843542 /*
844543 * Set up a transaction to convert the range of extents
....@@ -850,12 +549,17 @@
850549 * complete here and might deadlock on the iolock.
851550 */
852551 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
853
- XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
552
+ XFS_TRANS_RESERVE, &tp);
854553 if (error)
855554 return error;
856555
857556 xfs_ilock(ip, XFS_ILOCK_EXCL);
858557 xfs_trans_ijoin(tp, ip, 0);
558
+
559
+ error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
560
+ XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES);
561
+ if (error)
562
+ goto error_on_bmapi_transaction;
859563
860564 /*
861565 * Modify the unwritten extent state of the buffer.
....@@ -888,7 +592,7 @@
888592 if (error)
889593 return error;
890594
891
- if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
595
+ if (unlikely(!xfs_valid_startblock(ip, imap.br_startblock)))
892596 return xfs_alert_fsblock_zero(ip, &imap);
893597
894598 if ((numblks_fsb = imap.br_blockcount) == 0) {
....@@ -914,23 +618,42 @@
914618 static inline bool
915619 imap_needs_alloc(
916620 struct inode *inode,
621
+ unsigned flags,
917622 struct xfs_bmbt_irec *imap,
918623 int nimaps)
919624 {
920
- return !nimaps ||
921
- imap->br_startblock == HOLESTARTBLOCK ||
922
- imap->br_startblock == DELAYSTARTBLOCK ||
923
- (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN);
625
+ /* don't allocate blocks when just zeroing */
626
+ if (flags & IOMAP_ZERO)
627
+ return false;
628
+ if (!nimaps ||
629
+ imap->br_startblock == HOLESTARTBLOCK ||
630
+ imap->br_startblock == DELAYSTARTBLOCK)
631
+ return true;
632
+ /* we convert unwritten extents before copying the data for DAX */
633
+ if (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN)
634
+ return true;
635
+ return false;
924636 }
925637
926638 static inline bool
927
-needs_cow_for_zeroing(
639
+imap_needs_cow(
640
+ struct xfs_inode *ip,
641
+ unsigned int flags,
928642 struct xfs_bmbt_irec *imap,
929643 int nimaps)
930644 {
931
- return nimaps &&
932
- imap->br_startblock != HOLESTARTBLOCK &&
933
- imap->br_state != XFS_EXT_UNWRITTEN;
645
+ if (!xfs_is_cow_inode(ip))
646
+ return false;
647
+
648
+ /* when zeroing we don't have to COW holes or unwritten extents */
649
+ if (flags & IOMAP_ZERO) {
650
+ if (!nimaps ||
651
+ imap->br_startblock == HOLESTARTBLOCK ||
652
+ imap->br_state == XFS_EXT_UNWRITTEN)
653
+ return false;
654
+ }
655
+
656
+ return true;
934657 }
935658
936659 static int
....@@ -946,15 +669,8 @@
946669 * COW writes may allocate delalloc space or convert unwritten COW
947670 * extents, so we need to make sure to take the lock exclusively here.
948671 */
949
- if (xfs_is_reflink_inode(ip) && is_write) {
950
- /*
951
- * FIXME: It could still overwrite on unshared extents and not
952
- * need allocation.
953
- */
954
- if (flags & IOMAP_NOWAIT)
955
- return -EAGAIN;
672
+ if (xfs_is_cow_inode(ip) && is_write)
956673 mode = XFS_ILOCK_EXCL;
957
- }
958674
959675 /*
960676 * Extents not yet cached requires exclusive access, don't block. This
....@@ -980,7 +696,7 @@
980696 * check, so if we got ILOCK_SHARED for a write and but we're now a
981697 * reflink inode we have to switch to ILOCK_EXCL and relock.
982698 */
983
- if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_reflink_inode(ip)) {
699
+ if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_cow_inode(ip)) {
984700 xfs_iunlock(ip, mode);
985701 mode = XFS_ILOCK_EXCL;
986702 goto relock;
....@@ -990,102 +706,103 @@
990706 return 0;
991707 }
992708
709
+/*
710
+ * Check that the imap we are going to return to the caller spans the entire
711
+ * range that the caller requested for the IO.
712
+ */
713
+static bool
714
+imap_spans_range(
715
+ struct xfs_bmbt_irec *imap,
716
+ xfs_fileoff_t offset_fsb,
717
+ xfs_fileoff_t end_fsb)
718
+{
719
+ if (imap->br_startoff > offset_fsb)
720
+ return false;
721
+ if (imap->br_startoff + imap->br_blockcount < end_fsb)
722
+ return false;
723
+ return true;
724
+}
725
+
993726 static int
994
-xfs_file_iomap_begin(
727
+xfs_direct_write_iomap_begin(
995728 struct inode *inode,
996729 loff_t offset,
997730 loff_t length,
998731 unsigned flags,
999
- struct iomap *iomap)
732
+ struct iomap *iomap,
733
+ struct iomap *srcmap)
1000734 {
1001735 struct xfs_inode *ip = XFS_I(inode);
1002736 struct xfs_mount *mp = ip->i_mount;
1003
- struct xfs_bmbt_irec imap;
1004
- xfs_fileoff_t offset_fsb, end_fsb;
737
+ struct xfs_bmbt_irec imap, cmap;
738
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
739
+ xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
1005740 int nimaps = 1, error = 0;
1006
- bool shared = false, trimmed = false;
741
+ bool shared = false;
742
+ u16 iomap_flags = 0;
1007743 unsigned lockmode;
744
+
745
+ ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
1008746
1009747 if (XFS_FORCED_SHUTDOWN(mp))
1010748 return -EIO;
1011749
1012
- if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
1013
- !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
1014
- /* Reserve delalloc blocks for regular writeback. */
1015
- return xfs_file_iomap_begin_delay(inode, offset, length, iomap);
1016
- }
1017
-
1018750 /*
1019
- * Lock the inode in the manner required for the specified operation and
1020
- * check for as many conditions that would result in blocking as
1021
- * possible. This removes most of the non-blocking checks from the
1022
- * mapping code below.
751
+ * Writes that span EOF might trigger an IO size update on completion,
752
+ * so consider them to be dirty for the purposes of O_DSYNC even if
753
+ * there is no other metadata changes pending or have been made here.
1023754 */
755
+ if (offset + length > i_size_read(inode))
756
+ iomap_flags |= IOMAP_F_DIRTY;
757
+
1024758 error = xfs_ilock_for_iomap(ip, flags, &lockmode);
1025759 if (error)
1026760 return error;
1027
-
1028
- ASSERT(offset <= mp->m_super->s_maxbytes);
1029
- if (offset > mp->m_super->s_maxbytes - length)
1030
- length = mp->m_super->s_maxbytes - offset;
1031
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
1032
- end_fsb = XFS_B_TO_FSB(mp, offset + length);
1033761
1034762 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
1035763 &nimaps, 0);
1036764 if (error)
1037765 goto out_unlock;
1038766
1039
- if (flags & IOMAP_REPORT) {
1040
- /* Trim the mapping to the nearest shared extent boundary. */
1041
- error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
1042
- &trimmed);
767
+ if (imap_needs_cow(ip, flags, &imap, nimaps)) {
768
+ error = -EAGAIN;
769
+ if (flags & IOMAP_NOWAIT)
770
+ goto out_unlock;
771
+
772
+ /* may drop and re-acquire the ilock */
773
+ error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
774
+ &lockmode, flags & IOMAP_DIRECT);
1043775 if (error)
1044776 goto out_unlock;
1045
- }
1046
-
1047
- /* Non-modifying mapping requested, so we are done */
1048
- if (!(flags & (IOMAP_WRITE | IOMAP_ZERO)))
1049
- goto out_found;
1050
-
1051
- /*
1052
- * Break shared extents if necessary. Checks for non-blocking IO have
1053
- * been done up front, so we don't need to do them here.
1054
- */
1055
- if (xfs_is_reflink_inode(ip)) {
1056
- /* if zeroing doesn't need COW allocation, then we are done. */
1057
- if ((flags & IOMAP_ZERO) &&
1058
- !needs_cow_for_zeroing(&imap, nimaps))
1059
- goto out_found;
1060
-
1061
- if (flags & IOMAP_DIRECT) {
1062
- /* may drop and re-acquire the ilock */
1063
- error = xfs_reflink_allocate_cow(ip, &imap, &shared,
1064
- &lockmode);
1065
- if (error)
1066
- goto out_unlock;
1067
- } else {
1068
- error = xfs_reflink_reserve_cow(ip, &imap, &shared);
1069
- if (error)
1070
- goto out_unlock;
1071
- }
1072
-
777
+ if (shared)
778
+ goto out_found_cow;
1073779 end_fsb = imap.br_startoff + imap.br_blockcount;
1074780 length = XFS_FSB_TO_B(mp, end_fsb) - offset;
1075781 }
1076782
1077
- /* Don't need to allocate over holes when doing zeroing operations. */
1078
- if (flags & IOMAP_ZERO)
1079
- goto out_found;
783
+ if (imap_needs_alloc(inode, flags, &imap, nimaps))
784
+ goto allocate_blocks;
1080785
1081
- if (!imap_needs_alloc(inode, &imap, nimaps))
1082
- goto out_found;
1083
-
1084
- /* If nowait is set bail since we are going to make allocations. */
1085
- if (flags & IOMAP_NOWAIT) {
786
+ /*
787
+ * NOWAIT IO needs to span the entire requested IO with a single map so
788
+ * that we avoid partial IO failures due to the rest of the IO range not
789
+ * covered by this map triggering an EAGAIN condition when it is
790
+ * subsequently mapped and aborting the IO.
791
+ */
792
+ if ((flags & IOMAP_NOWAIT) &&
793
+ !imap_spans_range(&imap, offset_fsb, end_fsb)) {
1086794 error = -EAGAIN;
1087795 goto out_unlock;
1088796 }
797
+
798
+ xfs_iunlock(ip, lockmode);
799
+ trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
800
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
801
+
802
+allocate_blocks:
803
+ error = -EAGAIN;
804
+ if (flags & IOMAP_NOWAIT)
805
+ goto out_unlock;
1089806
1090807 /*
1091808 * We cap the maximum length we map to a sane size to keep the chunks
....@@ -1097,55 +814,282 @@
1097814 * lower level functions are updated.
1098815 */
1099816 length = min_t(loff_t, length, 1024 * PAGE_SIZE);
817
+ end_fsb = xfs_iomap_end_fsb(mp, offset, length);
1100818
1101
- /*
1102
- * xfs_iomap_write_direct() expects the shared lock. It is unlocked on
1103
- * return.
1104
- */
1105
- if (lockmode == XFS_ILOCK_EXCL)
1106
- xfs_ilock_demote(ip, lockmode);
1107
- error = xfs_iomap_write_direct(ip, offset, length, &imap,
1108
- nimaps);
819
+ if (offset + length > XFS_ISIZE(ip))
820
+ end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb);
821
+ else if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
822
+ end_fsb = min(end_fsb, imap.br_startoff + imap.br_blockcount);
823
+ xfs_iunlock(ip, lockmode);
824
+
825
+ error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb,
826
+ &imap);
1109827 if (error)
1110828 return error;
1111829
1112
- iomap->flags |= IOMAP_F_NEW;
1113
- trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
830
+ trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
831
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW);
1114832
1115
-out_finish:
1116
- if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields
1117
- & ~XFS_ILOG_TIMESTAMP))
1118
- iomap->flags |= IOMAP_F_DIRTY;
1119
-
1120
- xfs_bmbt_to_iomap(ip, iomap, &imap);
1121
-
1122
- if (shared)
1123
- iomap->flags |= IOMAP_F_SHARED;
1124
- return 0;
1125
-
1126
-out_found:
1127
- ASSERT(nimaps);
833
+out_found_cow:
1128834 xfs_iunlock(ip, lockmode);
1129
- trace_xfs_iomap_found(ip, offset, length, 0, &imap);
1130
- goto out_finish;
835
+ length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
836
+ trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
837
+ if (imap.br_startblock != HOLESTARTBLOCK) {
838
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
839
+ if (error)
840
+ return error;
841
+ }
842
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
1131843
1132844 out_unlock:
1133845 xfs_iunlock(ip, lockmode);
1134846 return error;
1135847 }
1136848
849
+const struct iomap_ops xfs_direct_write_iomap_ops = {
850
+ .iomap_begin = xfs_direct_write_iomap_begin,
851
+};
852
+
1137853 static int
1138
-xfs_file_iomap_end_delalloc(
1139
- struct xfs_inode *ip,
854
+xfs_buffered_write_iomap_begin(
855
+ struct inode *inode,
856
+ loff_t offset,
857
+ loff_t count,
858
+ unsigned flags,
859
+ struct iomap *iomap,
860
+ struct iomap *srcmap)
861
+{
862
+ struct xfs_inode *ip = XFS_I(inode);
863
+ struct xfs_mount *mp = ip->i_mount;
864
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
865
+ xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count);
866
+ struct xfs_bmbt_irec imap, cmap;
867
+ struct xfs_iext_cursor icur, ccur;
868
+ xfs_fsblock_t prealloc_blocks = 0;
869
+ bool eof = false, cow_eof = false, shared = false;
870
+ int allocfork = XFS_DATA_FORK;
871
+ int error = 0;
872
+
873
+ if (XFS_FORCED_SHUTDOWN(mp))
874
+ return -EIO;
875
+
876
+ /* we can't use delayed allocations when using extent size hints */
877
+ if (xfs_get_extsz_hint(ip))
878
+ return xfs_direct_write_iomap_begin(inode, offset, count,
879
+ flags, iomap, srcmap);
880
+
881
+ ASSERT(!XFS_IS_REALTIME_INODE(ip));
882
+
883
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
884
+
885
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) ||
886
+ XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
887
+ error = -EFSCORRUPTED;
888
+ goto out_unlock;
889
+ }
890
+
891
+ XFS_STATS_INC(mp, xs_blk_mapw);
892
+
893
+ if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
894
+ error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
895
+ if (error)
896
+ goto out_unlock;
897
+ }
898
+
899
+ /*
900
+ * Search the data fork first to look up our source mapping. We
901
+ * always need the data fork map, as we have to return it to the
902
+ * iomap code so that the higher level write code can read data in to
903
+ * perform read-modify-write cycles for unaligned writes.
904
+ */
905
+ eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap);
906
+ if (eof)
907
+ imap.br_startoff = end_fsb; /* fake hole until the end */
908
+
909
+ /* We never need to allocate blocks for zeroing a hole. */
910
+ if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
911
+ xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
912
+ goto out_unlock;
913
+ }
914
+
915
+ /*
916
+ * Search the COW fork extent list even if we did not find a data fork
917
+ * extent. This serves two purposes: first this implements the
918
+ * speculative preallocation using cowextsize, so that we also unshare
919
+ * block adjacent to shared blocks instead of just the shared blocks
920
+ * themselves. Second the lookup in the extent list is generally faster
921
+ * than going out to the shared extent tree.
922
+ */
923
+ if (xfs_is_cow_inode(ip)) {
924
+ if (!ip->i_cowfp) {
925
+ ASSERT(!xfs_is_reflink_inode(ip));
926
+ xfs_ifork_init_cow(ip);
927
+ }
928
+ cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
929
+ &ccur, &cmap);
930
+ if (!cow_eof && cmap.br_startoff <= offset_fsb) {
931
+ trace_xfs_reflink_cow_found(ip, &cmap);
932
+ goto found_cow;
933
+ }
934
+ }
935
+
936
+ if (imap.br_startoff <= offset_fsb) {
937
+ /*
938
+ * For reflink files we may need a delalloc reservation when
939
+ * overwriting shared extents. This includes zeroing of
940
+ * existing extents that contain data.
941
+ */
942
+ if (!xfs_is_cow_inode(ip) ||
943
+ ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) {
944
+ trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
945
+ &imap);
946
+ goto found_imap;
947
+ }
948
+
949
+ xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb);
950
+
951
+ /* Trim the mapping to the nearest shared extent boundary. */
952
+ error = xfs_bmap_trim_cow(ip, &imap, &shared);
953
+ if (error)
954
+ goto out_unlock;
955
+
956
+ /* Not shared? Just report the (potentially capped) extent. */
957
+ if (!shared) {
958
+ trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
959
+ &imap);
960
+ goto found_imap;
961
+ }
962
+
963
+ /*
964
+ * Fork all the shared blocks from our write offset until the
965
+ * end of the extent.
966
+ */
967
+ allocfork = XFS_COW_FORK;
968
+ end_fsb = imap.br_startoff + imap.br_blockcount;
969
+ } else {
970
+ /*
971
+ * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
972
+ * pages to keep the chunks of work done where somewhat
973
+ * symmetric with the work writeback does. This is a completely
974
+ * arbitrary number pulled out of thin air.
975
+ *
976
+ * Note that the values needs to be less than 32-bits wide until
977
+ * the lower level functions are updated.
978
+ */
979
+ count = min_t(loff_t, count, 1024 * PAGE_SIZE);
980
+ end_fsb = xfs_iomap_end_fsb(mp, offset, count);
981
+
982
+ if (xfs_is_always_cow_inode(ip))
983
+ allocfork = XFS_COW_FORK;
984
+ }
985
+
986
+ error = xfs_qm_dqattach_locked(ip, false);
987
+ if (error)
988
+ goto out_unlock;
989
+
990
+ if (eof && offset + count > XFS_ISIZE(ip)) {
991
+ /*
992
+ * Determine the initial size of the preallocation.
993
+ * We clean up any extra preallocation when the file is closed.
994
+ */
995
+ if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
996
+ prealloc_blocks = mp->m_allocsize_blocks;
997
+ else
998
+ prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork,
999
+ offset, count, &icur);
1000
+ if (prealloc_blocks) {
1001
+ xfs_extlen_t align;
1002
+ xfs_off_t end_offset;
1003
+ xfs_fileoff_t p_end_fsb;
1004
+
1005
+ end_offset = XFS_ALLOC_ALIGN(mp, offset + count - 1);
1006
+ p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
1007
+ prealloc_blocks;
1008
+
1009
+ align = xfs_eof_alignment(ip);
1010
+ if (align)
1011
+ p_end_fsb = roundup_64(p_end_fsb, align);
1012
+
1013
+ p_end_fsb = min(p_end_fsb,
1014
+ XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes));
1015
+ ASSERT(p_end_fsb > offset_fsb);
1016
+ prealloc_blocks = p_end_fsb - end_fsb;
1017
+ }
1018
+ }
1019
+
1020
+retry:
1021
+ error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
1022
+ end_fsb - offset_fsb, prealloc_blocks,
1023
+ allocfork == XFS_DATA_FORK ? &imap : &cmap,
1024
+ allocfork == XFS_DATA_FORK ? &icur : &ccur,
1025
+ allocfork == XFS_DATA_FORK ? eof : cow_eof);
1026
+ switch (error) {
1027
+ case 0:
1028
+ break;
1029
+ case -ENOSPC:
1030
+ case -EDQUOT:
1031
+ /* retry without any preallocation */
1032
+ trace_xfs_delalloc_enospc(ip, offset, count);
1033
+ if (prealloc_blocks) {
1034
+ prealloc_blocks = 0;
1035
+ goto retry;
1036
+ }
1037
+ /*FALLTHRU*/
1038
+ default:
1039
+ goto out_unlock;
1040
+ }
1041
+
1042
+ if (allocfork == XFS_COW_FORK) {
1043
+ trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap);
1044
+ goto found_cow;
1045
+ }
1046
+
1047
+ /*
1048
+ * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
1049
+ * them out if the write happens to fail.
1050
+ */
1051
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
1052
+ trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
1053
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW);
1054
+
1055
+found_imap:
1056
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
1057
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
1058
+
1059
+found_cow:
1060
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
1061
+ if (imap.br_startoff <= offset_fsb) {
1062
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
1063
+ if (error)
1064
+ return error;
1065
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
1066
+ }
1067
+
1068
+ xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
1069
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, 0);
1070
+
1071
+out_unlock:
1072
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
1073
+ return error;
1074
+}
1075
+
1076
+static int
1077
+xfs_buffered_write_iomap_end(
1078
+ struct inode *inode,
11401079 loff_t offset,
11411080 loff_t length,
11421081 ssize_t written,
1082
+ unsigned flags,
11431083 struct iomap *iomap)
11441084 {
1085
+ struct xfs_inode *ip = XFS_I(inode);
11451086 struct xfs_mount *mp = ip->i_mount;
11461087 xfs_fileoff_t start_fsb;
11471088 xfs_fileoff_t end_fsb;
11481089 int error = 0;
1090
+
1091
+ if (iomap->type != IOMAP_DELALLOC)
1092
+ return 0;
11491093
11501094 /*
11511095 * Behave as if the write failed if drop writes is enabled. Set the NEW
....@@ -1191,24 +1135,137 @@
11911135 return 0;
11921136 }
11931137
1138
+const struct iomap_ops xfs_buffered_write_iomap_ops = {
1139
+ .iomap_begin = xfs_buffered_write_iomap_begin,
1140
+ .iomap_end = xfs_buffered_write_iomap_end,
1141
+};
1142
+
11941143 static int
1195
-xfs_file_iomap_end(
1144
+xfs_read_iomap_begin(
11961145 struct inode *inode,
11971146 loff_t offset,
11981147 loff_t length,
1199
- ssize_t written,
12001148 unsigned flags,
1201
- struct iomap *iomap)
1149
+ struct iomap *iomap,
1150
+ struct iomap *srcmap)
12021151 {
1203
- if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
1204
- return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
1205
- length, written, iomap);
1206
- return 0;
1152
+ struct xfs_inode *ip = XFS_I(inode);
1153
+ struct xfs_mount *mp = ip->i_mount;
1154
+ struct xfs_bmbt_irec imap;
1155
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
1156
+ xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
1157
+ int nimaps = 1, error = 0;
1158
+ bool shared = false;
1159
+ unsigned lockmode;
1160
+
1161
+ ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO)));
1162
+
1163
+ if (XFS_FORCED_SHUTDOWN(mp))
1164
+ return -EIO;
1165
+
1166
+ error = xfs_ilock_for_iomap(ip, flags, &lockmode);
1167
+ if (error)
1168
+ return error;
1169
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
1170
+ &nimaps, 0);
1171
+ if (!error && (flags & IOMAP_REPORT))
1172
+ error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
1173
+ xfs_iunlock(ip, lockmode);
1174
+
1175
+ if (error)
1176
+ return error;
1177
+ trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
1178
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, shared ? IOMAP_F_SHARED : 0);
12071179 }
12081180
1209
-const struct iomap_ops xfs_iomap_ops = {
1210
- .iomap_begin = xfs_file_iomap_begin,
1211
- .iomap_end = xfs_file_iomap_end,
1181
+const struct iomap_ops xfs_read_iomap_ops = {
1182
+ .iomap_begin = xfs_read_iomap_begin,
1183
+};
1184
+
1185
+static int
1186
+xfs_seek_iomap_begin(
1187
+ struct inode *inode,
1188
+ loff_t offset,
1189
+ loff_t length,
1190
+ unsigned flags,
1191
+ struct iomap *iomap,
1192
+ struct iomap *srcmap)
1193
+{
1194
+ struct xfs_inode *ip = XFS_I(inode);
1195
+ struct xfs_mount *mp = ip->i_mount;
1196
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
1197
+ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length);
1198
+ xfs_fileoff_t cow_fsb = NULLFILEOFF, data_fsb = NULLFILEOFF;
1199
+ struct xfs_iext_cursor icur;
1200
+ struct xfs_bmbt_irec imap, cmap;
1201
+ int error = 0;
1202
+ unsigned lockmode;
1203
+
1204
+ if (XFS_FORCED_SHUTDOWN(mp))
1205
+ return -EIO;
1206
+
1207
+ lockmode = xfs_ilock_data_map_shared(ip);
1208
+ if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
1209
+ error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
1210
+ if (error)
1211
+ goto out_unlock;
1212
+ }
1213
+
1214
+ if (xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) {
1215
+ /*
1216
+ * If we found a data extent we are done.
1217
+ */
1218
+ if (imap.br_startoff <= offset_fsb)
1219
+ goto done;
1220
+ data_fsb = imap.br_startoff;
1221
+ } else {
1222
+ /*
1223
+ * Fake a hole until the end of the file.
1224
+ */
1225
+ data_fsb = xfs_iomap_end_fsb(mp, offset, length);
1226
+ }
1227
+
1228
+ /*
1229
+ * If a COW fork extent covers the hole, report it - capped to the next
1230
+ * data fork extent:
1231
+ */
1232
+ if (xfs_inode_has_cow_data(ip) &&
1233
+ xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
1234
+ cow_fsb = cmap.br_startoff;
1235
+ if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
1236
+ if (data_fsb < cow_fsb + cmap.br_blockcount)
1237
+ end_fsb = min(end_fsb, data_fsb);
1238
+ xfs_trim_extent(&cmap, offset_fsb, end_fsb);
1239
+ error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
1240
+ /*
1241
+ * This is a COW extent, so we must probe the page cache
1242
+ * because there could be dirty page cache being backed
1243
+ * by this extent.
1244
+ */
1245
+ iomap->type = IOMAP_UNWRITTEN;
1246
+ goto out_unlock;
1247
+ }
1248
+
1249
+ /*
1250
+ * Else report a hole, capped to the next found data or COW extent.
1251
+ */
1252
+ if (cow_fsb != NULLFILEOFF && cow_fsb < data_fsb)
1253
+ imap.br_blockcount = cow_fsb - offset_fsb;
1254
+ else
1255
+ imap.br_blockcount = data_fsb - offset_fsb;
1256
+ imap.br_startoff = offset_fsb;
1257
+ imap.br_startblock = HOLESTARTBLOCK;
1258
+ imap.br_state = XFS_EXT_NORM;
1259
+done:
1260
+ xfs_trim_extent(&imap, offset_fsb, end_fsb);
1261
+ error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
1262
+out_unlock:
1263
+ xfs_iunlock(ip, lockmode);
1264
+ return error;
1265
+}
1266
+
1267
+const struct iomap_ops xfs_seek_iomap_ops = {
1268
+ .iomap_begin = xfs_seek_iomap_begin,
12121269 };
12131270
12141271 static int
....@@ -1217,7 +1274,8 @@
12171274 loff_t offset,
12181275 loff_t length,
12191276 unsigned flags,
1220
- struct iomap *iomap)
1277
+ struct iomap *iomap,
1278
+ struct iomap *srcmap)
12211279 {
12221280 struct xfs_inode *ip = XFS_I(inode);
12231281 struct xfs_mount *mp = ip->i_mount;
....@@ -1233,23 +1291,21 @@
12331291 lockmode = xfs_ilock_attr_map_shared(ip);
12341292
12351293 /* if there are no attribute fork or extents, return ENOENT */
1236
- if (!XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) {
1294
+ if (!XFS_IFORK_Q(ip) || !ip->i_afp->if_nextents) {
12371295 error = -ENOENT;
12381296 goto out_unlock;
12391297 }
12401298
1241
- ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
1299
+ ASSERT(ip->i_afp->if_format != XFS_DINODE_FMT_LOCAL);
12421300 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
12431301 &nimaps, XFS_BMAPI_ATTRFORK);
12441302 out_unlock:
12451303 xfs_iunlock(ip, lockmode);
12461304
1247
- if (!error) {
1248
- ASSERT(nimaps);
1249
- xfs_bmbt_to_iomap(ip, iomap, &imap);
1250
- }
1251
-
1252
- return error;
1305
+ if (error)
1306
+ return error;
1307
+ ASSERT(nimaps);
1308
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
12531309 }
12541310
12551311 const struct iomap_ops xfs_xattr_iomap_ops = {