hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/ext4/extents.c
....@@ -28,6 +28,7 @@
2828 #include <linux/uaccess.h>
2929 #include <linux/fiemap.h>
3030 #include <linux/backing-dev.h>
31
+#include <linux/iomap.h>
3132 #include "ext4_jbd2.h"
3233 #include "ext4_extents.h"
3334 #include "xattr.h"
....@@ -83,13 +84,6 @@
8384 et->et_checksum = ext4_extent_block_csum(inode, eh);
8485 }
8586
86
-static int ext4_split_extent(handle_t *handle,
87
- struct inode *inode,
88
- struct ext4_ext_path **ppath,
89
- struct ext4_map_blocks *map,
90
- int split_flag,
91
- int flags);
92
-
9387 static int ext4_split_extent_at(handle_t *handle,
9488 struct inode *inode,
9589 struct ext4_ext_path **ppath,
....@@ -97,32 +91,41 @@
9791 int split_flag,
9892 int flags);
9993
100
-static int ext4_find_delayed_extent(struct inode *inode,
101
- struct extent_status *newes);
102
-
103
-static int ext4_ext_truncate_extend_restart(handle_t *handle,
104
- struct inode *inode,
105
- int needed)
94
+static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
10695 {
107
- int err;
108
-
109
- if (!ext4_handle_valid(handle))
110
- return 0;
111
- if (handle->h_buffer_credits >= needed)
112
- return 0;
11396 /*
114
- * If we need to extend the journal get a few extra blocks
115
- * while we're at it for efficiency's sake.
97
+ * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
98
+ * moment, get_block can be called only for blocks inside i_size since
99
+ * page cache has been already dropped and writes are blocked by
100
+ * i_mutex. So we can safely drop the i_data_sem here.
116101 */
117
- needed += 3;
118
- err = ext4_journal_extend(handle, needed - handle->h_buffer_credits);
119
- if (err <= 0)
120
- return err;
121
- err = ext4_truncate_restart_trans(handle, inode, needed);
122
- if (err == 0)
123
- err = -EAGAIN;
102
+ BUG_ON(EXT4_JOURNAL(inode) == NULL);
103
+ ext4_discard_preallocations(inode, 0);
104
+ up_write(&EXT4_I(inode)->i_data_sem);
105
+ *dropped = 1;
106
+ return 0;
107
+}
124108
125
- return err;
109
+/*
110
+ * Make sure 'handle' has at least 'check_cred' credits. If not, restart
111
+ * transaction with 'restart_cred' credits. The function drops i_data_sem
112
+ * when restarting transaction and gets it after transaction is restarted.
113
+ *
114
+ * The function returns 0 on success, 1 if transaction had to be restarted,
115
+ * and < 0 in case of fatal error.
116
+ */
117
+int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
118
+ int check_cred, int restart_cred,
119
+ int revoke_cred)
120
+{
121
+ int ret;
122
+ int dropped = 0;
123
+
124
+ ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
125
+ revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped));
126
+ if (dropped)
127
+ down_write(&EXT4_I(inode)->i_data_sem);
128
+ return ret;
126129 }
127130
128131 /*
....@@ -133,14 +136,24 @@
133136 static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
134137 struct ext4_ext_path *path)
135138 {
139
+ int err = 0;
140
+
136141 if (path->p_bh) {
137142 /* path points to block */
138143 BUFFER_TRACE(path->p_bh, "get_write_access");
139
- return ext4_journal_get_write_access(handle, path->p_bh);
144
+ err = ext4_journal_get_write_access(handle, path->p_bh);
145
+ /*
146
+ * The extent buffer's verified bit will be set again in
147
+ * __ext4_ext_dirty(). We could leave an inconsistent
148
+ * buffer if the extents updating procudure break off du
149
+ * to some error happens, force to check it again.
150
+ */
151
+ if (!err)
152
+ clear_buffer_verified(path->p_bh);
140153 }
141154 /* path points to leaf/index in inode body */
142155 /* we use in-core data, no need to protect them */
143
- return 0;
156
+ return err;
144157 }
145158
146159 /*
....@@ -149,8 +162,9 @@
149162 * - ENOMEM
150163 * - EIO
151164 */
152
-int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
153
- struct inode *inode, struct ext4_ext_path *path)
165
+static int __ext4_ext_dirty(const char *where, unsigned int line,
166
+ handle_t *handle, struct inode *inode,
167
+ struct ext4_ext_path *path)
154168 {
155169 int err;
156170
....@@ -160,12 +174,18 @@
160174 /* path points to block */
161175 err = __ext4_handle_dirty_metadata(where, line, handle,
162176 inode, path->p_bh);
177
+ /* Extents updating done, re-set verified flag */
178
+ if (!err)
179
+ set_buffer_verified(path->p_bh);
163180 } else {
164181 /* path points to leaf/index in inode body */
165182 err = ext4_mark_inode_dirty(handle, inode);
166183 }
167184 return err;
168185 }
186
+
187
+#define ext4_ext_dirty(handle, inode, path) \
188
+ __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
169189
170190 static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
171191 struct ext4_ext_path *path,
....@@ -290,58 +310,14 @@
290310 {
291311 struct ext4_ext_path *path = *ppath;
292312 int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
313
+ int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO;
314
+
315
+ if (nofail)
316
+ flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL;
293317
294318 return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
295319 EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
296
- EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
297
- (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
298
-}
299
-
300
-/*
301
- * Calculate the number of metadata blocks needed
302
- * to allocate @blocks
303
- * Worse case is one block per extent
304
- */
305
-int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
306
-{
307
- struct ext4_inode_info *ei = EXT4_I(inode);
308
- int idxs;
309
-
310
- idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
311
- / sizeof(struct ext4_extent_idx));
312
-
313
- /*
314
- * If the new delayed allocation block is contiguous with the
315
- * previous da block, it can share index blocks with the
316
- * previous block, so we only need to allocate a new index
317
- * block every idxs leaf blocks. At ldxs**2 blocks, we need
318
- * an additional index block, and at ldxs**3 blocks, yet
319
- * another index blocks.
320
- */
321
- if (ei->i_da_metadata_calc_len &&
322
- ei->i_da_metadata_calc_last_lblock+1 == lblock) {
323
- int num = 0;
324
-
325
- if ((ei->i_da_metadata_calc_len % idxs) == 0)
326
- num++;
327
- if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
328
- num++;
329
- if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
330
- num++;
331
- ei->i_da_metadata_calc_len = 0;
332
- } else
333
- ei->i_da_metadata_calc_len++;
334
- ei->i_da_metadata_calc_last_lblock++;
335
- return num;
336
- }
337
-
338
- /*
339
- * In the worst case we need a new set of index blocks at
340
- * every level of the inode's extent tree.
341
- */
342
- ei->i_da_metadata_calc_len = 1;
343
- ei->i_da_metadata_calc_last_lblock = lblock;
344
- return ext_depth(inode) + 1;
320
+ flags);
345321 }
346322
347323 static int
....@@ -389,12 +365,13 @@
389365 }
390366
391367 static int ext4_valid_extent_entries(struct inode *inode,
392
- struct ext4_extent_header *eh,
393
- ext4_fsblk_t *pblk, int depth)
368
+ struct ext4_extent_header *eh,
369
+ ext4_lblk_t lblk, ext4_fsblk_t *pblk,
370
+ int depth)
394371 {
395372 unsigned short entries;
396373 ext4_lblk_t lblock = 0;
397
- ext4_lblk_t prev = 0;
374
+ ext4_lblk_t cur = 0;
398375
399376 if (eh->eh_entries == 0)
400377 return 1;
....@@ -404,38 +381,51 @@
404381 if (depth == 0) {
405382 /* leaf entries */
406383 struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
407
- struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
408
- ext4_fsblk_t pblock = 0;
384
+
385
+ /*
386
+ * The logical block in the first entry should equal to
387
+ * the number in the index block.
388
+ */
389
+ if (depth != ext_depth(inode) &&
390
+ lblk != le32_to_cpu(ext->ee_block))
391
+ return 0;
409392 while (entries) {
410393 if (!ext4_valid_extent(inode, ext))
411394 return 0;
412395
413396 /* Check for overlapping extents */
414397 lblock = le32_to_cpu(ext->ee_block);
415
- if ((lblock <= prev) && prev) {
416
- pblock = ext4_ext_pblock(ext);
417
- es->s_last_error_block = cpu_to_le64(pblock);
398
+ if (lblock < cur) {
399
+ *pblk = ext4_ext_pblock(ext);
418400 return 0;
419401 }
420
- prev = lblock + ext4_ext_get_actual_len(ext) - 1;
402
+ cur = lblock + ext4_ext_get_actual_len(ext);
421403 ext++;
422404 entries--;
423405 }
424406 } else {
425407 struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
408
+
409
+ /*
410
+ * The logical block in the first entry should equal to
411
+ * the number in the parent index block.
412
+ */
413
+ if (depth != ext_depth(inode) &&
414
+ lblk != le32_to_cpu(ext_idx->ei_block))
415
+ return 0;
426416 while (entries) {
427417 if (!ext4_valid_extent_idx(inode, ext_idx))
428418 return 0;
429419
430420 /* Check for overlapping index extents */
431421 lblock = le32_to_cpu(ext_idx->ei_block);
432
- if ((lblock <= prev) && prev) {
422
+ if (lblock < cur) {
433423 *pblk = ext4_idx_pblock(ext_idx);
434424 return 0;
435425 }
436426 ext_idx++;
437427 entries--;
438
- prev = lblock;
428
+ cur = lblock + 1;
439429 }
440430 }
441431 return 1;
....@@ -443,7 +433,7 @@
443433
444434 static int __ext4_ext_check(const char *function, unsigned int line,
445435 struct inode *inode, struct ext4_extent_header *eh,
446
- int depth, ext4_fsblk_t pblk)
436
+ int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk)
447437 {
448438 const char *error_msg;
449439 int max = 0, err = -EFSCORRUPTED;
....@@ -469,7 +459,11 @@
469459 error_msg = "invalid eh_entries";
470460 goto corrupted;
471461 }
472
- if (!ext4_valid_extent_entries(inode, eh, &pblk, depth)) {
462
+ if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
463
+ error_msg = "eh_entries is 0 but eh_depth is > 0";
464
+ goto corrupted;
465
+ }
466
+ if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
473467 error_msg = "invalid extent entries";
474468 goto corrupted;
475469 }
....@@ -487,18 +481,19 @@
487481 return 0;
488482
489483 corrupted:
490
- ext4_error_inode(inode, function, line, 0,
491
- "pblk %llu bad header/extent: %s - magic %x, "
492
- "entries %u, max %u(%u), depth %u(%u)",
493
- (unsigned long long) pblk, error_msg,
494
- le16_to_cpu(eh->eh_magic),
495
- le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
496
- max, le16_to_cpu(eh->eh_depth), depth);
484
+ ext4_error_inode_err(inode, function, line, 0, -err,
485
+ "pblk %llu bad header/extent: %s - magic %x, "
486
+ "entries %u, max %u(%u), depth %u(%u)",
487
+ (unsigned long long) pblk, error_msg,
488
+ le16_to_cpu(eh->eh_magic),
489
+ le16_to_cpu(eh->eh_entries),
490
+ le16_to_cpu(eh->eh_max),
491
+ max, le16_to_cpu(eh->eh_depth), depth);
497492 return err;
498493 }
499494
500495 #define ext4_ext_check(inode, eh, depth, pblk) \
501
- __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk))
496
+ __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0)
502497
503498 int ext4_ext_check_inode(struct inode *inode)
504499 {
....@@ -531,26 +526,32 @@
531526
532527 static struct buffer_head *
533528 __read_extent_tree_block(const char *function, unsigned int line,
534
- struct inode *inode, ext4_fsblk_t pblk, int depth,
535
- int flags)
529
+ struct inode *inode, struct ext4_extent_idx *idx,
530
+ int depth, int flags)
536531 {
537532 struct buffer_head *bh;
538533 int err;
534
+ gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS;
535
+ ext4_fsblk_t pblk;
539536
540
- bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS);
537
+ if (flags & EXT4_EX_NOFAIL)
538
+ gfp_flags |= __GFP_NOFAIL;
539
+
540
+ pblk = ext4_idx_pblock(idx);
541
+ bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags);
541542 if (unlikely(!bh))
542543 return ERR_PTR(-ENOMEM);
543544
544545 if (!bh_uptodate_or_lock(bh)) {
545546 trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
546
- err = bh_submit_read(bh);
547
+ err = ext4_read_bh(bh, 0, NULL);
547548 if (err < 0)
548549 goto errout;
549550 }
550551 if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
551552 return bh;
552
- err = __ext4_ext_check(function, line, inode,
553
- ext_block_hdr(bh), depth, pblk);
553
+ err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh),
554
+ depth, pblk, le32_to_cpu(idx->ei_block));
554555 if (err)
555556 goto errout;
556557 set_buffer_verified(bh);
....@@ -568,8 +569,8 @@
568569
569570 }
570571
571
-#define read_extent_tree_block(inode, pblk, depth, flags) \
572
- __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \
572
+#define read_extent_tree_block(inode, idx, depth, flags) \
573
+ __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \
573574 (depth), (flags))
574575
575576 /*
....@@ -589,6 +590,12 @@
589590 down_read(&ei->i_data_sem);
590591 depth = ext_depth(inode);
591592
593
+ /* Don't cache anything if there are no external extent blocks */
594
+ if (!depth) {
595
+ up_read(&ei->i_data_sem);
596
+ return ret;
597
+ }
598
+
592599 path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
593600 GFP_NOFS);
594601 if (path == NULL) {
....@@ -596,9 +603,6 @@
596603 return -ENOMEM;
597604 }
598605
599
- /* Don't cache anything if there are no external extent blocks */
600
- if (depth == 0)
601
- goto out;
602606 path[0].p_hdr = ext_inode_hdr(inode);
603607 ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0);
604608 if (ret)
....@@ -616,8 +620,7 @@
616620 i--;
617621 continue;
618622 }
619
- bh = read_extent_tree_block(inode,
620
- ext4_idx_pblock(path[i].p_idx++),
623
+ bh = read_extent_tree_block(inode, path[i].p_idx++,
621624 depth - i - 1,
622625 EXT4_EX_FORCE_CACHE);
623626 if (IS_ERR(bh)) {
....@@ -642,21 +645,22 @@
642645 {
643646 int k, l = path->p_depth;
644647
645
- ext_debug("path:");
648
+ ext_debug(inode, "path:");
646649 for (k = 0; k <= l; k++, path++) {
647650 if (path->p_idx) {
648
- ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
649
- ext4_idx_pblock(path->p_idx));
651
+ ext_debug(inode, " %d->%llu",
652
+ le32_to_cpu(path->p_idx->ei_block),
653
+ ext4_idx_pblock(path->p_idx));
650654 } else if (path->p_ext) {
651
- ext_debug(" %d:[%d]%d:%llu ",
655
+ ext_debug(inode, " %d:[%d]%d:%llu ",
652656 le32_to_cpu(path->p_ext->ee_block),
653657 ext4_ext_is_unwritten(path->p_ext),
654658 ext4_ext_get_actual_len(path->p_ext),
655659 ext4_ext_pblock(path->p_ext));
656660 } else
657
- ext_debug(" []");
661
+ ext_debug(inode, " []");
658662 }
659
- ext_debug("\n");
663
+ ext_debug(inode, "\n");
660664 }
661665
662666 static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
....@@ -672,14 +676,14 @@
672676 eh = path[depth].p_hdr;
673677 ex = EXT_FIRST_EXTENT(eh);
674678
675
- ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino);
679
+ ext_debug(inode, "Displaying leaf extents\n");
676680
677681 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
678
- ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
682
+ ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
679683 ext4_ext_is_unwritten(ex),
680684 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
681685 }
682
- ext_debug("\n");
686
+ ext_debug(inode, "\n");
683687 }
684688
685689 static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
....@@ -692,10 +696,9 @@
692696 struct ext4_extent_idx *idx;
693697 idx = path[level].p_idx;
694698 while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
695
- ext_debug("%d: move %d:%llu in new index %llu\n", level,
696
- le32_to_cpu(idx->ei_block),
697
- ext4_idx_pblock(idx),
698
- newblock);
699
+ ext_debug(inode, "%d: move %d:%llu in new index %llu\n",
700
+ level, le32_to_cpu(idx->ei_block),
701
+ ext4_idx_pblock(idx), newblock);
699702 idx++;
700703 }
701704
....@@ -704,7 +707,7 @@
704707
705708 ex = path[depth].p_ext;
706709 while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
707
- ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
710
+ ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n",
708711 le32_to_cpu(ex->ee_block),
709712 ext4_ext_pblock(ex),
710713 ext4_ext_is_unwritten(ex),
....@@ -727,11 +730,10 @@
727730 if (!path)
728731 return;
729732 depth = path->p_depth;
730
- for (i = 0; i <= depth; i++, path++)
731
- if (path->p_bh) {
732
- brelse(path->p_bh);
733
- path->p_bh = NULL;
734
- }
733
+ for (i = 0; i <= depth; i++, path++) {
734
+ brelse(path->p_bh);
735
+ path->p_bh = NULL;
736
+ }
735737 }
736738
737739 /*
....@@ -747,7 +749,7 @@
747749 struct ext4_extent_idx *r, *l, *m;
748750
749751
750
- ext_debug("binsearch for %u(idx): ", block);
752
+ ext_debug(inode, "binsearch for %u(idx): ", block);
751753
752754 l = EXT_FIRST_INDEX(eh) + 1;
753755 r = EXT_LAST_INDEX(eh);
....@@ -757,13 +759,13 @@
757759 r = m - 1;
758760 else
759761 l = m + 1;
760
- ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block),
761
- m, le32_to_cpu(m->ei_block),
762
- r, le32_to_cpu(r->ei_block));
762
+ ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
763
+ le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
764
+ r, le32_to_cpu(r->ei_block));
763765 }
764766
765767 path->p_idx = l - 1;
766
- ext_debug(" -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
768
+ ext_debug(inode, " -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
767769 ext4_idx_pblock(path->p_idx));
768770
769771 #ifdef CHECK_BINSEARCH
....@@ -773,8 +775,8 @@
773775
774776 chix = ix = EXT_FIRST_INDEX(eh);
775777 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
776
- if (k != 0 &&
777
- le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
778
+ if (k != 0 && le32_to_cpu(ix->ei_block) <=
779
+ le32_to_cpu(ix[-1].ei_block)) {
778780 printk(KERN_DEBUG "k=%d, ix=0x%p, "
779781 "first=0x%p\n", k,
780782 ix, EXT_FIRST_INDEX(eh));
....@@ -814,7 +816,7 @@
814816 return;
815817 }
816818
817
- ext_debug("binsearch for %u: ", block);
819
+ ext_debug(inode, "binsearch for %u: ", block);
818820
819821 l = EXT_FIRST_EXTENT(eh) + 1;
820822 r = EXT_LAST_EXTENT(eh);
....@@ -825,13 +827,13 @@
825827 r = m - 1;
826828 else
827829 l = m + 1;
828
- ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block),
829
- m, le32_to_cpu(m->ee_block),
830
- r, le32_to_cpu(r->ee_block));
830
+ ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
831
+ le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
832
+ r, le32_to_cpu(r->ee_block));
831833 }
832834
833835 path->p_ext = l - 1;
834
- ext_debug(" -> %d:%llu:[%d]%d ",
836
+ ext_debug(inode, " -> %d:%llu:[%d]%d ",
835837 le32_to_cpu(path->p_ext->ee_block),
836838 ext4_ext_pblock(path->p_ext),
837839 ext4_ext_is_unwritten(path->p_ext),
....@@ -856,7 +858,7 @@
856858
857859 }
858860
859
-int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
861
+void ext4_ext_tree_init(handle_t *handle, struct inode *inode)
860862 {
861863 struct ext4_extent_header *eh;
862864
....@@ -867,7 +869,6 @@
867869 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
868870 eh->eh_generation = 0;
869871 ext4_mark_inode_dirty(handle, inode);
870
- return 0;
871872 }
872873
873874 struct ext4_ext_path *
....@@ -879,6 +880,10 @@
879880 struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
880881 short int depth, i, ppos = 0;
881882 int ret;
883
+ gfp_t gfp_flags = GFP_NOFS;
884
+
885
+ if (flags & EXT4_EX_NOFAIL)
886
+ gfp_flags |= __GFP_NOFAIL;
882887
883888 eh = ext_inode_hdr(inode);
884889 depth = ext_depth(inode);
....@@ -899,7 +904,7 @@
899904 if (!path) {
900905 /* account possible depth increase */
901906 path = kcalloc(depth + 2, sizeof(struct ext4_ext_path),
902
- GFP_NOFS);
907
+ gfp_flags);
903908 if (unlikely(!path))
904909 return ERR_PTR(-ENOMEM);
905910 path[0].p_maxdepth = depth + 1;
....@@ -912,7 +917,7 @@
912917 ext4_cache_extents(inode, eh);
913918 /* walk through the tree */
914919 while (i) {
915
- ext_debug("depth %d: num %d, max %d\n",
920
+ ext_debug(inode, "depth %d: num %d, max %d\n",
916921 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
917922
918923 ext4_ext_binsearch_idx(inode, path + ppos, block);
....@@ -920,8 +925,7 @@
920925 path[ppos].p_depth = i;
921926 path[ppos].p_ext = NULL;
922927
923
- bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
924
- flags);
928
+ bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags);
925929 if (IS_ERR(bh)) {
926930 ret = PTR_ERR(bh);
927931 goto err;
....@@ -989,18 +993,20 @@
989993
990994 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
991995 /* insert after */
992
- ext_debug("insert new index %d after: %llu\n", logical, ptr);
996
+ ext_debug(inode, "insert new index %d after: %llu\n",
997
+ logical, ptr);
993998 ix = curp->p_idx + 1;
994999 } else {
9951000 /* insert before */
996
- ext_debug("insert new index %d before: %llu\n", logical, ptr);
1001
+ ext_debug(inode, "insert new index %d before: %llu\n",
1002
+ logical, ptr);
9971003 ix = curp->p_idx;
9981004 }
9991005
10001006 len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
10011007 BUG_ON(len < 0);
10021008 if (len > 0) {
1003
- ext_debug("insert new index %d: "
1009
+ ext_debug(inode, "insert new index %d: "
10041010 "move %d indices from 0x%p to 0x%p\n",
10051011 logical, len, ix, ix + 1);
10061012 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
....@@ -1049,8 +1055,12 @@
10491055 ext4_fsblk_t newblock, oldblock;
10501056 __le32 border;
10511057 ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
1058
+ gfp_t gfp_flags = GFP_NOFS;
10521059 int err = 0;
10531060 size_t ext_size = 0;
1061
+
1062
+ if (flags & EXT4_EX_NOFAIL)
1063
+ gfp_flags |= __GFP_NOFAIL;
10541064
10551065 /* make decision: where to split? */
10561066 /* FIXME: now decision is simplest: at current extent */
....@@ -1063,12 +1073,12 @@
10631073 }
10641074 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
10651075 border = path[depth].p_ext[1].ee_block;
1066
- ext_debug("leaf will be split."
1076
+ ext_debug(inode, "leaf will be split."
10671077 " next leaf starts at %d\n",
10681078 le32_to_cpu(border));
10691079 } else {
10701080 border = newext->ee_block;
1071
- ext_debug("leaf will be added."
1081
+ ext_debug(inode, "leaf will be added."
10721082 " next leaf starts at %d\n",
10731083 le32_to_cpu(border));
10741084 }
....@@ -1085,12 +1095,12 @@
10851095 * We need this to handle errors and free blocks
10861096 * upon them.
10871097 */
1088
- ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), GFP_NOFS);
1098
+ ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags);
10891099 if (!ablocks)
10901100 return -ENOMEM;
10911101
10921102 /* allocate all needed blocks */
1093
- ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
1103
+ ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at);
10941104 for (a = 0; a < depth - at; a++) {
10951105 newblock = ext4_ext_new_meta_block(handle, inode, path,
10961106 newext, &err, flags);
....@@ -1177,7 +1187,7 @@
11771187 goto cleanup;
11781188 }
11791189 if (k)
1180
- ext_debug("create %d intermediate indices\n", k);
1190
+ ext_debug(inode, "create %d intermediate indices\n", k);
11811191 /* insert new index into current index block */
11821192 /* current depth stored in i var */
11831193 i = depth - 1;
....@@ -1205,7 +1215,7 @@
12051215 fidx->ei_block = border;
12061216 ext4_idx_store_pblock(fidx, oldblock);
12071217
1208
- ext_debug("int.index at %d (block %llu): %u -> %llu\n",
1218
+ ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n",
12091219 i, newblock, le32_to_cpu(border), oldblock);
12101220
12111221 /* move remainder of path[i] to the new index block */
....@@ -1219,7 +1229,7 @@
12191229 }
12201230 /* start copy indexes */
12211231 m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
1222
- ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
1232
+ ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
12231233 EXT_MAX_INDEX(path[i].p_hdr));
12241234 ext4_ext_show_move(inode, path, newblock, i);
12251235 if (m) {
....@@ -1356,13 +1366,13 @@
13561366 EXT_FIRST_INDEX(neh)->ei_block =
13571367 EXT_FIRST_EXTENT(neh)->ee_block;
13581368 }
1359
- ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1369
+ ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n",
13601370 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
13611371 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
13621372 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
13631373
13641374 le16_add_cpu(&neh->eh_depth, 1);
1365
- ext4_mark_inode_dirty(handle, inode);
1375
+ err = ext4_mark_inode_dirty(handle, inode);
13661376 out:
13671377 brelse(bh);
13681378
....@@ -1505,22 +1515,21 @@
15051515 }
15061516
15071517 /*
1508
- * search the closest allocated block to the right for *logical
1509
- * and returns it at @logical + it's physical address at @phys
1510
- * if *logical is the largest allocated block, the function
1511
- * returns 0 at @phys
1512
- * return value contains 0 (success) or error code
1518
+ * Search the closest allocated block to the right for *logical
1519
+ * and returns it at @logical + it's physical address at @phys.
1520
+ * If not exists, return 0 and @phys is set to 0. We will return
1521
+ * 1 which means we found an allocated block and ret_ex is valid.
1522
+ * Or return a (< 0) error code.
15131523 */
15141524 static int ext4_ext_search_right(struct inode *inode,
15151525 struct ext4_ext_path *path,
15161526 ext4_lblk_t *logical, ext4_fsblk_t *phys,
1517
- struct ext4_extent **ret_ex)
1527
+ struct ext4_extent *ret_ex)
15181528 {
15191529 struct buffer_head *bh = NULL;
15201530 struct ext4_extent_header *eh;
15211531 struct ext4_extent_idx *ix;
15221532 struct ext4_extent *ex;
1523
- ext4_fsblk_t block;
15241533 int depth; /* Note, NOT eh_depth; depth from top of tree */
15251534 int ee_len;
15261535
....@@ -1587,20 +1596,17 @@
15871596 * follow it and find the closest allocated
15881597 * block to the right */
15891598 ix++;
1590
- block = ext4_idx_pblock(ix);
15911599 while (++depth < path->p_depth) {
15921600 /* subtract from p_depth to get proper eh_depth */
1593
- bh = read_extent_tree_block(inode, block,
1594
- path->p_depth - depth, 0);
1601
+ bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
15951602 if (IS_ERR(bh))
15961603 return PTR_ERR(bh);
15971604 eh = ext_block_hdr(bh);
15981605 ix = EXT_FIRST_INDEX(eh);
1599
- block = ext4_idx_pblock(ix);
16001606 put_bh(bh);
16011607 }
16021608
1603
- bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0);
1609
+ bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
16041610 if (IS_ERR(bh))
16051611 return PTR_ERR(bh);
16061612 eh = ext_block_hdr(bh);
....@@ -1608,10 +1614,11 @@
16081614 found_extent:
16091615 *logical = le32_to_cpu(ex->ee_block);
16101616 *phys = ext4_ext_pblock(ex);
1611
- *ret_ex = ex;
1617
+ if (ret_ex)
1618
+ *ret_ex = *ex;
16121619 if (bh)
16131620 put_bh(bh);
1614
- return 0;
1621
+ return 1;
16151622 }
16161623
16171624 /*
....@@ -1633,17 +1640,16 @@
16331640 return EXT_MAX_BLOCKS;
16341641
16351642 while (depth >= 0) {
1643
+ struct ext4_ext_path *p = &path[depth];
1644
+
16361645 if (depth == path->p_depth) {
16371646 /* leaf */
1638
- if (path[depth].p_ext &&
1639
- path[depth].p_ext !=
1640
- EXT_LAST_EXTENT(path[depth].p_hdr))
1641
- return le32_to_cpu(path[depth].p_ext[1].ee_block);
1647
+ if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr))
1648
+ return le32_to_cpu(p->p_ext[1].ee_block);
16421649 } else {
16431650 /* index */
1644
- if (path[depth].p_idx !=
1645
- EXT_LAST_INDEX(path[depth].p_hdr))
1646
- return le32_to_cpu(path[depth].p_idx[1].ei_block);
1651
+ if (p->p_idx != EXT_LAST_INDEX(p->p_hdr))
1652
+ return le32_to_cpu(p->p_idx[1].ei_block);
16471653 }
16481654 depth--;
16491655 }
....@@ -1743,9 +1749,9 @@
17431749 return err;
17441750 }
17451751
1746
-int
1747
-ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1748
- struct ext4_extent *ex2)
1752
+static int ext4_can_extents_be_merged(struct inode *inode,
1753
+ struct ext4_extent *ex1,
1754
+ struct ext4_extent *ex2)
17491755 {
17501756 unsigned short ext1_ee_len, ext2_ee_len;
17511757
....@@ -1759,23 +1765,11 @@
17591765 le32_to_cpu(ex2->ee_block))
17601766 return 0;
17611767
1762
- /*
1763
- * To allow future support for preallocated extents to be added
1764
- * as an RO_COMPAT feature, refuse to merge to extents if
1765
- * this can result in the top bit of ee_len being set.
1766
- */
17671768 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
17681769 return 0;
1769
- /*
1770
- * The check for IO to unwritten extent is somewhat racy as we
1771
- * increment i_unwritten / set EXT4_STATE_DIO_UNWRITTEN only after
1772
- * dropping i_data_sem. But reserved blocks should save us in that
1773
- * case.
1774
- */
1770
+
17751771 if (ext4_ext_is_unwritten(ex1) &&
1776
- (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
1777
- atomic_read(&EXT4_I(inode)->i_unwritten) ||
1778
- (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)))
1772
+ ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
17791773 return 0;
17801774 #ifdef AGGRESSIVE_TEST
17811775 if (ext1_ee_len >= 4)
....@@ -1853,7 +1847,8 @@
18531847 * group descriptor to release the extent tree block. If we
18541848 * can't get the journal credits, give up.
18551849 */
1856
- if (ext4_journal_extend(handle, 2))
1850
+ if (ext4_journal_extend(handle, 2,
1851
+ ext4_free_metadata_revoke_credits(inode->i_sb, 1)))
18571852 return;
18581853
18591854 /*
....@@ -1877,13 +1872,14 @@
18771872 }
18781873
18791874 /*
1880
- * This function tries to merge the @ex extent to neighbours in the tree.
1881
- * return 1 if merge left else 0.
1875
+ * This function tries to merge the @ex extent to neighbours in the tree, then
1876
+ * tries to collapse the extent tree into the inode.
18821877 */
18831878 static void ext4_ext_try_to_merge(handle_t *handle,
18841879 struct inode *inode,
18851880 struct ext4_ext_path *path,
1886
- struct ext4_extent *ex) {
1881
+ struct ext4_extent *ex)
1882
+{
18871883 struct ext4_extent_header *eh;
18881884 unsigned int depth;
18891885 int merge_done = 0;
....@@ -1954,7 +1950,7 @@
19541950
19551951 /*
19561952 * ext4_ext_insert_extent:
1957
- * tries to merge requsted extent into the existing extent or
1953
+ * tries to merge requested extent into the existing extent or
19581954 * inserts requested extent as new one into the tree,
19591955 * creating new leaf in the no-space case.
19601956 */
....@@ -2009,7 +2005,7 @@
20092005
20102006 /* Try to append newex to the ex */
20112007 if (ext4_can_extents_be_merged(inode, ex, newext)) {
2012
- ext_debug("append [%d]%d block to %u:[%d]%d"
2008
+ ext_debug(inode, "append [%d]%d block to %u:[%d]%d"
20132009 "(from %llu)\n",
20142010 ext4_ext_is_unwritten(newext),
20152011 ext4_ext_get_actual_len(newext),
....@@ -2034,7 +2030,7 @@
20342030 prepend:
20352031 /* Try to prepend newex to the ex */
20362032 if (ext4_can_extents_be_merged(inode, newext, ex)) {
2037
- ext_debug("prepend %u[%d]%d block to %u:[%d]%d"
2033
+ ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d"
20382034 "(from %llu)\n",
20392035 le32_to_cpu(newext->ee_block),
20402036 ext4_ext_is_unwritten(newext),
....@@ -2072,20 +2068,20 @@
20722068 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
20732069 next = ext4_ext_next_leaf_block(path);
20742070 if (next != EXT_MAX_BLOCKS) {
2075
- ext_debug("next leaf block - %u\n", next);
2071
+ ext_debug(inode, "next leaf block - %u\n", next);
20762072 BUG_ON(npath != NULL);
2077
- npath = ext4_find_extent(inode, next, NULL, 0);
2073
+ npath = ext4_find_extent(inode, next, NULL, gb_flags);
20782074 if (IS_ERR(npath))
20792075 return PTR_ERR(npath);
20802076 BUG_ON(npath->p_depth != path->p_depth);
20812077 eh = npath[depth].p_hdr;
20822078 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
2083
- ext_debug("next leaf isn't full(%d)\n",
2079
+ ext_debug(inode, "next leaf isn't full(%d)\n",
20842080 le16_to_cpu(eh->eh_entries));
20852081 path = npath;
20862082 goto has_space;
20872083 }
2088
- ext_debug("next leaf has no free space(%d,%d)\n",
2084
+ ext_debug(inode, "next leaf has no free space(%d,%d)\n",
20892085 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
20902086 }
20912087
....@@ -2111,7 +2107,7 @@
21112107
21122108 if (!nearex) {
21132109 /* there is no extent in this leaf, create first one */
2114
- ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
2110
+ ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n",
21152111 le32_to_cpu(newext->ee_block),
21162112 ext4_ext_pblock(newext),
21172113 ext4_ext_is_unwritten(newext),
....@@ -2121,7 +2117,7 @@
21212117 if (le32_to_cpu(newext->ee_block)
21222118 > le32_to_cpu(nearex->ee_block)) {
21232119 /* Insert after */
2124
- ext_debug("insert %u:%llu:[%d]%d before: "
2120
+ ext_debug(inode, "insert %u:%llu:[%d]%d before: "
21252121 "nearest %p\n",
21262122 le32_to_cpu(newext->ee_block),
21272123 ext4_ext_pblock(newext),
....@@ -2132,7 +2128,7 @@
21322128 } else {
21332129 /* Insert before */
21342130 BUG_ON(newext->ee_block == nearex->ee_block);
2135
- ext_debug("insert %u:%llu:[%d]%d after: "
2131
+ ext_debug(inode, "insert %u:%llu:[%d]%d after: "
21362132 "nearest %p\n",
21372133 le32_to_cpu(newext->ee_block),
21382134 ext4_ext_pblock(newext),
....@@ -2142,7 +2138,7 @@
21422138 }
21432139 len = EXT_LAST_EXTENT(eh) - nearex + 1;
21442140 if (len > 0) {
2145
- ext_debug("insert %u:%llu:[%d]%d: "
2141
+ ext_debug(inode, "insert %u:%llu:[%d]%d: "
21462142 "move %d extents from 0x%p to 0x%p\n",
21472143 le32_to_cpu(newext->ee_block),
21482144 ext4_ext_pblock(newext),
....@@ -2179,154 +2175,51 @@
21792175 return err;
21802176 }
21812177
2182
-static int ext4_fill_fiemap_extents(struct inode *inode,
2183
- ext4_lblk_t block, ext4_lblk_t num,
2184
- struct fiemap_extent_info *fieinfo)
2178
+static int ext4_fill_es_cache_info(struct inode *inode,
2179
+ ext4_lblk_t block, ext4_lblk_t num,
2180
+ struct fiemap_extent_info *fieinfo)
21852181 {
2186
- struct ext4_ext_path *path = NULL;
2187
- struct ext4_extent *ex;
2182
+ ext4_lblk_t next, end = block + num - 1;
21882183 struct extent_status es;
2189
- ext4_lblk_t next, next_del, start = 0, end = 0;
2190
- ext4_lblk_t last = block + num;
2191
- int exists, depth = 0, err = 0;
2192
- unsigned int flags = 0;
21932184 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
2185
+ unsigned int flags;
2186
+ int err;
21942187
2195
- while (block < last && block != EXT_MAX_BLOCKS) {
2196
- num = last - block;
2197
- /* find extent for this block */
2198
- down_read(&EXT4_I(inode)->i_data_sem);
2199
-
2200
- path = ext4_find_extent(inode, block, &path, 0);
2201
- if (IS_ERR(path)) {
2202
- up_read(&EXT4_I(inode)->i_data_sem);
2203
- err = PTR_ERR(path);
2204
- path = NULL;
2205
- break;
2206
- }
2207
-
2208
- depth = ext_depth(inode);
2209
- if (unlikely(path[depth].p_hdr == NULL)) {
2210
- up_read(&EXT4_I(inode)->i_data_sem);
2211
- EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
2212
- err = -EFSCORRUPTED;
2213
- break;
2214
- }
2215
- ex = path[depth].p_ext;
2216
- next = ext4_ext_next_allocated_block(path);
2217
-
2188
+ while (block <= end) {
2189
+ next = 0;
22182190 flags = 0;
2219
- exists = 0;
2220
- if (!ex) {
2221
- /* there is no extent yet, so try to allocate
2222
- * all requested space */
2223
- start = block;
2224
- end = block + num;
2225
- } else if (le32_to_cpu(ex->ee_block) > block) {
2226
- /* need to allocate space before found extent */
2227
- start = block;
2228
- end = le32_to_cpu(ex->ee_block);
2229
- if (block + num < end)
2230
- end = block + num;
2231
- } else if (block >= le32_to_cpu(ex->ee_block)
2232
- + ext4_ext_get_actual_len(ex)) {
2233
- /* need to allocate space after found extent */
2234
- start = block;
2235
- end = block + num;
2236
- if (end >= next)
2237
- end = next;
2238
- } else if (block >= le32_to_cpu(ex->ee_block)) {
2239
- /*
2240
- * some part of requested space is covered
2241
- * by found extent
2242
- */
2243
- start = block;
2244
- end = le32_to_cpu(ex->ee_block)
2245
- + ext4_ext_get_actual_len(ex);
2246
- if (block + num < end)
2247
- end = block + num;
2248
- exists = 1;
2249
- } else {
2250
- BUG();
2251
- }
2252
- BUG_ON(end <= start);
2253
-
2254
- if (!exists) {
2255
- es.es_lblk = start;
2256
- es.es_len = end - start;
2257
- es.es_pblk = 0;
2258
- } else {
2259
- es.es_lblk = le32_to_cpu(ex->ee_block);
2260
- es.es_len = ext4_ext_get_actual_len(ex);
2261
- es.es_pblk = ext4_ext_pblock(ex);
2262
- if (ext4_ext_is_unwritten(ex))
2263
- flags |= FIEMAP_EXTENT_UNWRITTEN;
2264
- }
2265
-
2266
- /*
2267
- * Find delayed extent and update es accordingly. We call
2268
- * it even in !exists case to find out whether es is the
2269
- * last existing extent or not.
2270
- */
2271
- next_del = ext4_find_delayed_extent(inode, &es);
2272
- if (!exists && next_del) {
2273
- exists = 1;
2191
+ if (!ext4_es_lookup_extent(inode, block, &next, &es))
2192
+ break;
2193
+ if (ext4_es_is_unwritten(&es))
2194
+ flags |= FIEMAP_EXTENT_UNWRITTEN;
2195
+ if (ext4_es_is_delayed(&es))
22742196 flags |= (FIEMAP_EXTENT_DELALLOC |
22752197 FIEMAP_EXTENT_UNKNOWN);
2276
- }
2277
- up_read(&EXT4_I(inode)->i_data_sem);
2278
-
2279
- if (unlikely(es.es_len == 0)) {
2280
- EXT4_ERROR_INODE(inode, "es.es_len == 0");
2281
- err = -EFSCORRUPTED;
2282
- break;
2283
- }
2284
-
2285
- /*
2286
- * This is possible iff next == next_del == EXT_MAX_BLOCKS.
2287
- * we need to check next == EXT_MAX_BLOCKS because it is
2288
- * possible that an extent is with unwritten and delayed
2289
- * status due to when an extent is delayed allocated and
2290
- * is allocated by fallocate status tree will track both of
2291
- * them in a extent.
2292
- *
2293
- * So we could return a unwritten and delayed extent, and
2294
- * its block is equal to 'next'.
2295
- */
2296
- if (next == next_del && next == EXT_MAX_BLOCKS) {
2198
+ if (ext4_es_is_hole(&es))
2199
+ flags |= EXT4_FIEMAP_EXTENT_HOLE;
2200
+ if (next == 0)
22972201 flags |= FIEMAP_EXTENT_LAST;
2298
- if (unlikely(next_del != EXT_MAX_BLOCKS ||
2299
- next != EXT_MAX_BLOCKS)) {
2300
- EXT4_ERROR_INODE(inode,
2301
- "next extent == %u, next "
2302
- "delalloc extent = %u",
2303
- next, next_del);
2304
- err = -EFSCORRUPTED;
2305
- break;
2306
- }
2307
- }
2308
-
2309
- if (exists) {
2310
- err = fiemap_fill_next_extent(fieinfo,
2202
+ if (flags & (FIEMAP_EXTENT_DELALLOC|
2203
+ EXT4_FIEMAP_EXTENT_HOLE))
2204
+ es.es_pblk = 0;
2205
+ else
2206
+ es.es_pblk = ext4_es_pblock(&es);
2207
+ err = fiemap_fill_next_extent(fieinfo,
23112208 (__u64)es.es_lblk << blksize_bits,
23122209 (__u64)es.es_pblk << blksize_bits,
23132210 (__u64)es.es_len << blksize_bits,
23142211 flags);
2315
- if (err < 0)
2316
- break;
2317
- if (err == 1) {
2318
- err = 0;
2319
- break;
2320
- }
2321
- }
2322
-
2323
- block = es.es_lblk + es.es_len;
2212
+ if (next == 0)
2213
+ break;
2214
+ block = next;
2215
+ if (err < 0)
2216
+ return err;
2217
+ if (err == 1)
2218
+ return 0;
23242219 }
2325
-
2326
- ext4_ext_drop_refs(path);
2327
- kfree(path);
2328
- return err;
2220
+ return 0;
23292221 }
2222
+
23302223
23312224 /*
23322225 * ext4_ext_determine_hole - determine hole around given block
....@@ -2381,15 +2274,15 @@
23812274 {
23822275 struct extent_status es;
23832276
2384
- ext4_es_find_delayed_extent_range(inode, hole_start,
2385
- hole_start + hole_len - 1, &es);
2277
+ ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
2278
+ hole_start + hole_len - 1, &es);
23862279 if (es.es_len) {
23872280 /* There's delayed extent containing lblock? */
23882281 if (es.es_lblk <= hole_start)
23892282 return;
23902283 hole_len = min(es.es_lblk - hole_start, hole_len);
23912284 }
2392
- ext_debug(" -> %u:%u\n", hole_start, hole_len);
2285
+ ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
23932286 ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
23942287 EXTENT_STATUS_HOLE);
23952288 }
....@@ -2426,7 +2319,7 @@
24262319 err = ext4_ext_dirty(handle, inode, path);
24272320 if (err)
24282321 return err;
2429
- ext_debug("index is empty, remove it, free block %llu\n", leaf);
2322
+ ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf);
24302323 trace_ext4_ext_rm_idx(inode, leaf);
24312324
24322325 ext4_free_blocks(handle, inode, NULL, leaf, 1,
....@@ -2520,105 +2413,156 @@
25202413 return 0;
25212414 }
25222415
2416
+/*
2417
+ * ext4_rereserve_cluster - increment the reserved cluster count when
2418
+ * freeing a cluster with a pending reservation
2419
+ *
2420
+ * @inode - file containing the cluster
2421
+ * @lblk - logical block in cluster to be reserved
2422
+ *
2423
+ * Increments the reserved cluster count and adjusts quota in a bigalloc
2424
+ * file system when freeing a partial cluster containing at least one
2425
+ * delayed and unwritten block. A partial cluster meeting that
2426
+ * requirement will have a pending reservation. If so, the
2427
+ * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
2428
+ * defer reserved and allocated space accounting to a subsequent call
2429
+ * to this function.
2430
+ */
2431
+static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
2432
+{
2433
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2434
+ struct ext4_inode_info *ei = EXT4_I(inode);
2435
+
2436
+ dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
2437
+
2438
+ spin_lock(&ei->i_block_reservation_lock);
2439
+ ei->i_reserved_data_blocks++;
2440
+ percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
2441
+ spin_unlock(&ei->i_block_reservation_lock);
2442
+
2443
+ percpu_counter_add(&sbi->s_freeclusters_counter, 1);
2444
+ ext4_remove_pending(inode, lblk);
2445
+}
2446
+
25232447 static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
25242448 struct ext4_extent *ex,
2525
- long long *partial_cluster,
2449
+ struct partial_cluster *partial,
25262450 ext4_lblk_t from, ext4_lblk_t to)
25272451 {
25282452 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
25292453 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2530
- ext4_fsblk_t pblk;
2531
- int flags = get_default_free_blocks_flags(inode);
2454
+ ext4_fsblk_t last_pblk, pblk;
2455
+ ext4_lblk_t num;
2456
+ int flags;
2457
+
2458
+ /* only extent tail removal is allowed */
2459
+ if (from < le32_to_cpu(ex->ee_block) ||
2460
+ to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
2461
+ ext4_error(sbi->s_sb,
2462
+ "strange request: removal(2) %u-%u from %u:%u",
2463
+ from, to, le32_to_cpu(ex->ee_block), ee_len);
2464
+ return 0;
2465
+ }
2466
+
2467
+#ifdef EXTENTS_STATS
2468
+ spin_lock(&sbi->s_ext_stats_lock);
2469
+ sbi->s_ext_blocks += ee_len;
2470
+ sbi->s_ext_extents++;
2471
+ if (ee_len < sbi->s_ext_min)
2472
+ sbi->s_ext_min = ee_len;
2473
+ if (ee_len > sbi->s_ext_max)
2474
+ sbi->s_ext_max = ee_len;
2475
+ if (ext_depth(inode) > sbi->s_depth_max)
2476
+ sbi->s_depth_max = ext_depth(inode);
2477
+ spin_unlock(&sbi->s_ext_stats_lock);
2478
+#endif
2479
+
2480
+ trace_ext4_remove_blocks(inode, ex, from, to, partial);
2481
+
2482
+ /*
2483
+ * if we have a partial cluster, and it's different from the
2484
+ * cluster of the last block in the extent, we free it
2485
+ */
2486
+ last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
2487
+
2488
+ if (partial->state != initial &&
2489
+ partial->pclu != EXT4_B2C(sbi, last_pblk)) {
2490
+ if (partial->state == tofree) {
2491
+ flags = get_default_free_blocks_flags(inode);
2492
+ if (ext4_is_pending(inode, partial->lblk))
2493
+ flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2494
+ ext4_free_blocks(handle, inode, NULL,
2495
+ EXT4_C2B(sbi, partial->pclu),
2496
+ sbi->s_cluster_ratio, flags);
2497
+ if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2498
+ ext4_rereserve_cluster(inode, partial->lblk);
2499
+ }
2500
+ partial->state = initial;
2501
+ }
2502
+
2503
+ num = le32_to_cpu(ex->ee_block) + ee_len - from;
2504
+ pblk = ext4_ext_pblock(ex) + ee_len - num;
2505
+
2506
+ /*
2507
+ * We free the partial cluster at the end of the extent (if any),
2508
+ * unless the cluster is used by another extent (partial_cluster
2509
+ * state is nofree). If a partial cluster exists here, it must be
2510
+ * shared with the last block in the extent.
2511
+ */
2512
+ flags = get_default_free_blocks_flags(inode);
2513
+
2514
+ /* partial, left end cluster aligned, right end unaligned */
2515
+ if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
2516
+ (EXT4_LBLK_CMASK(sbi, to) >= from) &&
2517
+ (partial->state != nofree)) {
2518
+ if (ext4_is_pending(inode, to))
2519
+ flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2520
+ ext4_free_blocks(handle, inode, NULL,
2521
+ EXT4_PBLK_CMASK(sbi, last_pblk),
2522
+ sbi->s_cluster_ratio, flags);
2523
+ if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2524
+ ext4_rereserve_cluster(inode, to);
2525
+ partial->state = initial;
2526
+ flags = get_default_free_blocks_flags(inode);
2527
+ }
2528
+
2529
+ flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
25322530
25332531 /*
25342532 * For bigalloc file systems, we never free a partial cluster
2535
- * at the beginning of the extent. Instead, we make a note
2536
- * that we tried freeing the cluster, and check to see if we
2533
+ * at the beginning of the extent. Instead, we check to see if we
25372534 * need to free it on a subsequent call to ext4_remove_blocks,
25382535 * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
25392536 */
25402537 flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
2538
+ ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
25412539
2542
- trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster);
2540
+ /* reset the partial cluster if we've freed past it */
2541
+ if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
2542
+ partial->state = initial;
2543
+
25432544 /*
2544
- * If we have a partial cluster, and it's different from the
2545
- * cluster of the last block, we need to explicitly free the
2546
- * partial cluster here.
2545
+ * If we've freed the entire extent but the beginning is not left
2546
+ * cluster aligned and is not marked as ineligible for freeing we
2547
+ * record the partial cluster at the beginning of the extent. It
2548
+ * wasn't freed by the preceding ext4_free_blocks() call, and we
2549
+ * need to look farther to the left to determine if it's to be freed
2550
+ * (not shared with another extent). Else, reset the partial
2551
+ * cluster - we're either done freeing or the beginning of the
2552
+ * extent is left cluster aligned.
25472553 */
2548
- pblk = ext4_ext_pblock(ex) + ee_len - 1;
2549
- if (*partial_cluster > 0 &&
2550
- *partial_cluster != (long long) EXT4_B2C(sbi, pblk)) {
2551
- ext4_free_blocks(handle, inode, NULL,
2552
- EXT4_C2B(sbi, *partial_cluster),
2553
- sbi->s_cluster_ratio, flags);
2554
- *partial_cluster = 0;
2555
- }
2556
-
2557
-#ifdef EXTENTS_STATS
2558
- {
2559
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2560
- spin_lock(&sbi->s_ext_stats_lock);
2561
- sbi->s_ext_blocks += ee_len;
2562
- sbi->s_ext_extents++;
2563
- if (ee_len < sbi->s_ext_min)
2564
- sbi->s_ext_min = ee_len;
2565
- if (ee_len > sbi->s_ext_max)
2566
- sbi->s_ext_max = ee_len;
2567
- if (ext_depth(inode) > sbi->s_depth_max)
2568
- sbi->s_depth_max = ext_depth(inode);
2569
- spin_unlock(&sbi->s_ext_stats_lock);
2570
- }
2571
-#endif
2572
- if (from >= le32_to_cpu(ex->ee_block)
2573
- && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
2574
- /* tail removal */
2575
- ext4_lblk_t num;
2576
- long long first_cluster;
2577
-
2578
- num = le32_to_cpu(ex->ee_block) + ee_len - from;
2579
- pblk = ext4_ext_pblock(ex) + ee_len - num;
2580
- /*
2581
- * Usually we want to free partial cluster at the end of the
2582
- * extent, except for the situation when the cluster is still
2583
- * used by any other extent (partial_cluster is negative).
2584
- */
2585
- if (*partial_cluster < 0 &&
2586
- *partial_cluster == -(long long) EXT4_B2C(sbi, pblk+num-1))
2587
- flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
2588
-
2589
- ext_debug("free last %u blocks starting %llu partial %lld\n",
2590
- num, pblk, *partial_cluster);
2591
- ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
2592
- /*
2593
- * If the block range to be freed didn't start at the
2594
- * beginning of a cluster, and we removed the entire
2595
- * extent and the cluster is not used by any other extent,
2596
- * save the partial cluster here, since we might need to
2597
- * delete if we determine that the truncate or punch hole
2598
- * operation has removed all of the blocks in the cluster.
2599
- * If that cluster is used by another extent, preserve its
2600
- * negative value so it isn't freed later on.
2601
- *
2602
- * If the whole extent wasn't freed, we've reached the
2603
- * start of the truncated/punched region and have finished
2604
- * removing blocks. If there's a partial cluster here it's
2605
- * shared with the remainder of the extent and is no longer
2606
- * a candidate for removal.
2607
- */
2608
- if (EXT4_PBLK_COFF(sbi, pblk) && ee_len == num) {
2609
- first_cluster = (long long) EXT4_B2C(sbi, pblk);
2610
- if (first_cluster != -*partial_cluster)
2611
- *partial_cluster = first_cluster;
2612
- } else {
2613
- *partial_cluster = 0;
2554
+ if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
2555
+ if (partial->state == initial) {
2556
+ partial->pclu = EXT4_B2C(sbi, pblk);
2557
+ partial->lblk = from;
2558
+ partial->state = tofree;
26142559 }
2615
- } else
2616
- ext4_error(sbi->s_sb, "strange request: removal(2) "
2617
- "%u-%u from %u:%u",
2618
- from, to, le32_to_cpu(ex->ee_block), ee_len);
2560
+ } else {
2561
+ partial->state = initial;
2562
+ }
2563
+
26192564 return 0;
26202565 }
2621
-
26222566
26232567 /*
26242568 * ext4_ext_rm_leaf() Removes the extents associated with the
....@@ -2638,12 +2582,12 @@
26382582 static int
26392583 ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
26402584 struct ext4_ext_path *path,
2641
- long long *partial_cluster,
2585
+ struct partial_cluster *partial,
26422586 ext4_lblk_t start, ext4_lblk_t end)
26432587 {
26442588 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
26452589 int err = 0, correct_index = 0;
2646
- int depth = ext_depth(inode), credits;
2590
+ int depth = ext_depth(inode), credits, revoke_credits;
26472591 struct ext4_extent_header *eh;
26482592 ext4_lblk_t a, b;
26492593 unsigned num;
....@@ -2654,7 +2598,7 @@
26542598 ext4_fsblk_t pblk;
26552599
26562600 /* the header must be checked already in ext4_ext_remove_space() */
2657
- ext_debug("truncate since %u in leaf to %u\n", start, end);
2601
+ ext_debug(inode, "truncate since %u in leaf to %u\n", start, end);
26582602 if (!path[depth].p_hdr)
26592603 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
26602604 eh = path[depth].p_hdr;
....@@ -2670,7 +2614,7 @@
26702614 ex_ee_block = le32_to_cpu(ex->ee_block);
26712615 ex_ee_len = ext4_ext_get_actual_len(ex);
26722616
2673
- trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
2617
+ trace_ext4_ext_rm_leaf(inode, start, ex, partial);
26742618
26752619 while (ex >= EXT_FIRST_EXTENT(eh) &&
26762620 ex_ee_block + ex_ee_len > start) {
....@@ -2680,7 +2624,7 @@
26802624 else
26812625 unwritten = 0;
26822626
2683
- ext_debug("remove ext %u:[%d]%d\n", ex_ee_block,
2627
+ ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block,
26842628 unwritten, ex_ee_len);
26852629 path[depth].p_ext = ex;
26862630
....@@ -2688,7 +2632,7 @@
26882632 b = ex_ee_block+ex_ee_len - 1 < end ?
26892633 ex_ee_block+ex_ee_len - 1 : end;
26902634
2691
- ext_debug(" border %u:%u\n", a, b);
2635
+ ext_debug(inode, " border %u:%u\n", a, b);
26922636
26932637 /* If this extent is beyond the end of the hole, skip it */
26942638 if (end < ex_ee_block) {
....@@ -2701,8 +2645,8 @@
27012645 */
27022646 if (sbi->s_cluster_ratio > 1) {
27032647 pblk = ext4_ext_pblock(ex);
2704
- *partial_cluster =
2705
- -(long long) EXT4_B2C(sbi, pblk);
2648
+ partial->pclu = EXT4_B2C(sbi, pblk);
2649
+ partial->state = nofree;
27062650 }
27072651 ex--;
27082652 ex_ee_block = le32_to_cpu(ex->ee_block);
....@@ -2735,17 +2679,29 @@
27352679 credits += (ext_depth(inode)) + 1;
27362680 }
27372681 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
2682
+ /*
2683
+ * We may end up freeing some index blocks and data from the
2684
+ * punched range. Note that partial clusters are accounted for
2685
+ * by ext4_free_data_revoke_credits().
2686
+ */
2687
+ revoke_credits =
2688
+ ext4_free_metadata_revoke_credits(inode->i_sb,
2689
+ ext_depth(inode)) +
2690
+ ext4_free_data_revoke_credits(inode, b - a + 1);
27382691
2739
- err = ext4_ext_truncate_extend_restart(handle, inode, credits);
2740
- if (err)
2692
+ err = ext4_datasem_ensure_credits(handle, inode, credits,
2693
+ credits, revoke_credits);
2694
+ if (err) {
2695
+ if (err > 0)
2696
+ err = -EAGAIN;
27412697 goto out;
2698
+ }
27422699
27432700 err = ext4_ext_get_access(handle, inode, path + depth);
27442701 if (err)
27452702 goto out;
27462703
2747
- err = ext4_remove_blocks(handle, inode, ex, partial_cluster,
2748
- a, b);
2704
+ err = ext4_remove_blocks(handle, inode, ex, partial, a, b);
27492705 if (err)
27502706 goto out;
27512707
....@@ -2785,7 +2741,7 @@
27852741 if (err)
27862742 goto out;
27872743
2788
- ext_debug("new extent: %u:%u:%llu\n", ex_ee_block, num,
2744
+ ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num,
27892745 ext4_ext_pblock(ex));
27902746 ex--;
27912747 ex_ee_block = le32_to_cpu(ex->ee_block);
....@@ -2799,18 +2755,23 @@
27992755 * If there's a partial cluster and at least one extent remains in
28002756 * the leaf, free the partial cluster if it isn't shared with the
28012757 * current extent. If it is shared with the current extent
2802
- * we zero partial_cluster because we've reached the start of the
2758
+ * we reset the partial cluster because we've reached the start of the
28032759 * truncated/punched region and we're done removing blocks.
28042760 */
2805
- if (*partial_cluster > 0 && ex >= EXT_FIRST_EXTENT(eh)) {
2761
+ if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
28062762 pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
2807
- if (*partial_cluster != (long long) EXT4_B2C(sbi, pblk)) {
2763
+ if (partial->pclu != EXT4_B2C(sbi, pblk)) {
2764
+ int flags = get_default_free_blocks_flags(inode);
2765
+
2766
+ if (ext4_is_pending(inode, partial->lblk))
2767
+ flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
28082768 ext4_free_blocks(handle, inode, NULL,
2809
- EXT4_C2B(sbi, *partial_cluster),
2810
- sbi->s_cluster_ratio,
2811
- get_default_free_blocks_flags(inode));
2769
+ EXT4_C2B(sbi, partial->pclu),
2770
+ sbi->s_cluster_ratio, flags);
2771
+ if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2772
+ ext4_rereserve_cluster(inode, partial->lblk);
28122773 }
2813
- *partial_cluster = 0;
2774
+ partial->state = initial;
28142775 }
28152776
28162777 /* if this leaf is free, then we should
....@@ -2849,14 +2810,20 @@
28492810 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
28502811 int depth = ext_depth(inode);
28512812 struct ext4_ext_path *path = NULL;
2852
- long long partial_cluster = 0;
2813
+ struct partial_cluster partial;
28532814 handle_t *handle;
28542815 int i = 0, err = 0;
28552816
2856
- ext_debug("truncate since %u to %u\n", start, end);
2817
+ partial.pclu = 0;
2818
+ partial.lblk = 0;
2819
+ partial.state = initial;
2820
+
2821
+ ext_debug(inode, "truncate since %u to %u\n", start, end);
28572822
28582823 /* probably first extent we're gonna free will be last in block */
2859
- handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1);
2824
+ handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
2825
+ depth + 1,
2826
+ ext4_free_metadata_revoke_credits(inode->i_sb, depth));
28602827 if (IS_ERR(handle))
28612828 return PTR_ERR(handle);
28622829
....@@ -2876,7 +2843,8 @@
28762843 ext4_fsblk_t pblk;
28772844
28782845 /* find extent for or closest extent to this block */
2879
- path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
2846
+ path = ext4_find_extent(inode, end, NULL,
2847
+ EXT4_EX_NOCACHE | EXT4_EX_NOFAIL);
28802848 if (IS_ERR(path)) {
28812849 ext4_journal_stop(handle);
28822850 return PTR_ERR(path);
....@@ -2912,8 +2880,8 @@
29122880 */
29132881 if (sbi->s_cluster_ratio > 1) {
29142882 pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
2915
- partial_cluster =
2916
- -(long long) EXT4_B2C(sbi, pblk);
2883
+ partial.pclu = EXT4_B2C(sbi, pblk);
2884
+ partial.state = nofree;
29172885 }
29182886
29192887 /*
....@@ -2927,23 +2895,27 @@
29272895 if (err < 0)
29282896 goto out;
29292897
2930
- } else if (sbi->s_cluster_ratio > 1 && end >= ex_end) {
2898
+ } else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
2899
+ partial.state == initial) {
29312900 /*
2932
- * If there's an extent to the right its first cluster
2933
- * contains the immediate right boundary of the
2934
- * truncated/punched region. Set partial_cluster to
2935
- * its negative value so it won't be freed if shared
2936
- * with the current extent. The end < ee_block case
2937
- * is handled in ext4_ext_rm_leaf().
2901
+ * If we're punching, there's an extent to the right.
2902
+ * If the partial cluster hasn't been set, set it to
2903
+ * that extent's first cluster and its state to nofree
2904
+ * so it won't be freed should it contain blocks to be
2905
+ * removed. If it's already set (tofree/nofree), we're
2906
+ * retrying and keep the original partial cluster info
2907
+ * so a cluster marked tofree as a result of earlier
2908
+ * extent removal is not lost.
29382909 */
29392910 lblk = ex_end + 1;
29402911 err = ext4_ext_search_right(inode, path, &lblk, &pblk,
2941
- &ex);
2942
- if (err)
2912
+ NULL);
2913
+ if (err < 0)
29432914 goto out;
2944
- if (pblk)
2945
- partial_cluster =
2946
- -(long long) EXT4_B2C(sbi, pblk);
2915
+ if (pblk) {
2916
+ partial.pclu = EXT4_B2C(sbi, pblk);
2917
+ partial.state = nofree;
2918
+ }
29472919 }
29482920 }
29492921 /*
....@@ -2958,7 +2930,7 @@
29582930 le16_to_cpu(path[k].p_hdr->eh_entries)+1;
29592931 } else {
29602932 path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
2961
- GFP_NOFS);
2933
+ GFP_NOFS | __GFP_NOFAIL);
29622934 if (path == NULL) {
29632935 ext4_journal_stop(handle);
29642936 return -ENOMEM;
....@@ -2978,8 +2950,7 @@
29782950 if (i == depth) {
29792951 /* this is leaf block */
29802952 err = ext4_ext_rm_leaf(handle, inode, path,
2981
- &partial_cluster, start,
2982
- end);
2953
+ &partial, start, end);
29832954 /* root level has p_bh == NULL, brelse() eats this */
29842955 brelse(path[i].p_bh);
29852956 path[i].p_bh = NULL;
....@@ -2989,7 +2960,7 @@
29892960
29902961 /* this is index block */
29912962 if (!path[i].p_hdr) {
2992
- ext_debug("initialize header\n");
2963
+ ext_debug(inode, "initialize header\n");
29932964 path[i].p_hdr = ext_block_hdr(path[i].p_bh);
29942965 }
29952966
....@@ -2997,7 +2968,7 @@
29972968 /* this level hasn't been touched yet */
29982969 path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
29992970 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
3000
- ext_debug("init index ptr: hdr 0x%p, num %d\n",
2971
+ ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
30012972 path[i].p_hdr,
30022973 le16_to_cpu(path[i].p_hdr->eh_entries));
30032974 } else {
....@@ -3005,18 +2976,18 @@
30052976 path[i].p_idx--;
30062977 }
30072978
3008
- ext_debug("level %d - index, first 0x%p, cur 0x%p\n",
2979
+ ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
30092980 i, EXT_FIRST_INDEX(path[i].p_hdr),
30102981 path[i].p_idx);
30112982 if (ext4_ext_more_to_rm(path + i)) {
30122983 struct buffer_head *bh;
30132984 /* go to the next level */
3014
- ext_debug("move to level %d (block %llu)\n",
2985
+ ext_debug(inode, "move to level %d (block %llu)\n",
30152986 i + 1, ext4_idx_pblock(path[i].p_idx));
30162987 memset(path + i + 1, 0, sizeof(*path));
3017
- bh = read_extent_tree_block(inode,
3018
- ext4_idx_pblock(path[i].p_idx), depth - i - 1,
3019
- EXT4_EX_NOCACHE);
2988
+ bh = read_extent_tree_block(inode, path[i].p_idx,
2989
+ depth - i - 1,
2990
+ EXT4_EX_NOCACHE);
30202991 if (IS_ERR(bh)) {
30212992 /* should we reset i_size? */
30222993 err = PTR_ERR(bh);
....@@ -3047,25 +3018,28 @@
30473018 brelse(path[i].p_bh);
30483019 path[i].p_bh = NULL;
30493020 i--;
3050
- ext_debug("return to level %d\n", i);
3021
+ ext_debug(inode, "return to level %d\n", i);
30513022 }
30523023 }
30533024
3054
- trace_ext4_ext_remove_space_done(inode, start, end, depth,
3055
- partial_cluster, path->p_hdr->eh_entries);
3025
+ trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial,
3026
+ path->p_hdr->eh_entries);
30563027
30573028 /*
3058
- * If we still have something in the partial cluster and we have removed
3059
- * even the first extent, then we should free the blocks in the partial
3060
- * cluster as well. (This code will only run when there are no leaves
3061
- * to the immediate left of the truncated/punched region.)
3029
+ * if there's a partial cluster and we have removed the first extent
3030
+ * in the file, then we also free the partial cluster, if any
30623031 */
3063
- if (partial_cluster > 0 && err == 0) {
3064
- /* don't zero partial_cluster since it's not used afterwards */
3032
+ if (partial.state == tofree && err == 0) {
3033
+ int flags = get_default_free_blocks_flags(inode);
3034
+
3035
+ if (ext4_is_pending(inode, partial.lblk))
3036
+ flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
30653037 ext4_free_blocks(handle, inode, NULL,
3066
- EXT4_C2B(sbi, partial_cluster),
3067
- sbi->s_cluster_ratio,
3068
- get_default_free_blocks_flags(inode));
3038
+ EXT4_C2B(sbi, partial.pclu),
3039
+ sbi->s_cluster_ratio, flags);
3040
+ if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
3041
+ ext4_rereserve_cluster(inode, partial.lblk);
3042
+ partial.state = initial;
30693043 }
30703044
30713045 /* TODO: flexible tree reduction should be here */
....@@ -3186,7 +3160,7 @@
31863160 *
31873161 *
31883162 * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
3189
- * of which are deterimined by split_flag.
3163
+ * of which are determined by split_flag.
31903164 *
31913165 * There are two cases:
31923166 * a> the extent are splitted into two extent.
....@@ -3212,8 +3186,7 @@
32123186 BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
32133187 (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
32143188
3215
- ext_debug("ext4_split_extents_at: inode %lu, logical"
3216
- "block %llu\n", inode->i_ino, (unsigned long long)split);
3189
+ ext_debug(inode, "logical block %llu\n", (unsigned long long)split);
32173190
32183191 ext4_ext_show_leaf(inode, path);
32193192
....@@ -3321,6 +3294,10 @@
33213294
33223295 fix_extent_len:
33233296 ex->ee_len = orig_ex.ee_len;
3297
+ /*
3298
+ * Ignore ext4_ext_dirty return value since we are already in error path
3299
+ * and err is a non-zero error code.
3300
+ */
33243301 ext4_ext_dirty(handle, inode, path + path->p_depth);
33253302 return err;
33263303 out:
....@@ -3380,7 +3357,7 @@
33803357 * Update path is required because previous ext4_split_extent_at() may
33813358 * result in split of original leaf or extent zeroout.
33823359 */
3383
- path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3360
+ path = ext4_find_extent(inode, map->m_lblk, ppath, flags);
33843361 if (IS_ERR(path))
33853362 return PTR_ERR(path);
33863363 depth = ext_depth(inode);
....@@ -3449,9 +3426,8 @@
34493426 int err = 0;
34503427 int split_flag = EXT4_EXT_DATA_VALID2;
34513428
3452
- ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
3453
- "block %llu, max_blocks %u\n", inode->i_ino,
3454
- (unsigned long long)map->m_lblk, map_len);
3429
+ ext_debug(inode, "logical block %llu, max_blocks %u\n",
3430
+ (unsigned long long)map->m_lblk, map_len);
34553431
34563432 sbi = EXT4_SB(inode->i_sb);
34573433 eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
....@@ -3583,7 +3559,7 @@
35833559 }
35843560 if (allocated) {
35853561 /* Mark the block containing both extents as dirty */
3586
- ext4_ext_dirty(handle, inode, path + depth);
3562
+ err = ext4_ext_dirty(handle, inode, path + depth);
35873563
35883564 /* Update path to point to the right extent */
35893565 path[depth].p_ext = abut_ex;
....@@ -3601,9 +3577,6 @@
36013577 if (EXT4_EXT_MAY_ZEROOUT & split_flag)
36023578 max_zeroout = sbi->s_extent_max_zeroout_kb >>
36033579 (inode->i_sb->s_blocksize_bits - 10);
3604
-
3605
- if (IS_ENCRYPTED(inode))
3606
- max_zeroout = 0;
36073580
36083581 /*
36093582 * five cases:
....@@ -3706,8 +3679,7 @@
37063679 unsigned int ee_len;
37073680 int split_flag = 0, depth;
37083681
3709
- ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n",
3710
- __func__, inode->i_ino,
3682
+ ext_debug(inode, "logical block %llu, max_blocks %u\n",
37113683 (unsigned long long)map->m_lblk, map->m_len);
37123684
37133685 eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
....@@ -3716,7 +3688,7 @@
37163688 eof_block = map->m_lblk + map->m_len;
37173689 /*
37183690 * It is safe to convert extent to initialized via explicit
3719
- * zeroout only if extent is fully insde i_size or new_size.
3691
+ * zeroout only if extent is fully inside i_size or new_size.
37203692 */
37213693 depth = ext_depth(inode);
37223694 ex = path[depth].p_ext;
....@@ -3753,8 +3725,7 @@
37533725 ee_block = le32_to_cpu(ex->ee_block);
37543726 ee_len = ext4_ext_get_actual_len(ex);
37553727
3756
- ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
3757
- "block %llu, max_blocks %u\n", inode->i_ino,
3728
+ ext_debug(inode, "logical block %llu, max_blocks %u\n",
37583729 (unsigned long long)ee_block, ee_len);
37593730
37603731 /* If extent is larger than requested it is a clear sign that we still
....@@ -3799,172 +3770,11 @@
37993770 return err;
38003771 }
38013772
3802
-/*
3803
- * Handle EOFBLOCKS_FL flag, clearing it if necessary
3804
- */
3805
-static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3806
- ext4_lblk_t lblk,
3807
- struct ext4_ext_path *path,
3808
- unsigned int len)
3809
-{
3810
- int i, depth;
3811
- struct ext4_extent_header *eh;
3812
- struct ext4_extent *last_ex;
3813
-
3814
- if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
3815
- return 0;
3816
-
3817
- depth = ext_depth(inode);
3818
- eh = path[depth].p_hdr;
3819
-
3820
- /*
3821
- * We're going to remove EOFBLOCKS_FL entirely in future so we
3822
- * do not care for this case anymore. Simply remove the flag
3823
- * if there are no extents.
3824
- */
3825
- if (unlikely(!eh->eh_entries))
3826
- goto out;
3827
- last_ex = EXT_LAST_EXTENT(eh);
3828
- /*
3829
- * We should clear the EOFBLOCKS_FL flag if we are writing the
3830
- * last block in the last extent in the file. We test this by
3831
- * first checking to see if the caller to
3832
- * ext4_ext_get_blocks() was interested in the last block (or
3833
- * a block beyond the last block) in the current extent. If
3834
- * this turns out to be false, we can bail out from this
3835
- * function immediately.
3836
- */
3837
- if (lblk + len < le32_to_cpu(last_ex->ee_block) +
3838
- ext4_ext_get_actual_len(last_ex))
3839
- return 0;
3840
- /*
3841
- * If the caller does appear to be planning to write at or
3842
- * beyond the end of the current extent, we then test to see
3843
- * if the current extent is the last extent in the file, by
3844
- * checking to make sure it was reached via the rightmost node
3845
- * at each level of the tree.
3846
- */
3847
- for (i = depth-1; i >= 0; i--)
3848
- if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
3849
- return 0;
3850
-out:
3851
- ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
3852
- return ext4_mark_inode_dirty(handle, inode);
3853
-}
3854
-
3855
-/**
3856
- * ext4_find_delalloc_range: find delayed allocated block in the given range.
3857
- *
3858
- * Return 1 if there is a delalloc block in the range, otherwise 0.
3859
- */
3860
-int ext4_find_delalloc_range(struct inode *inode,
3861
- ext4_lblk_t lblk_start,
3862
- ext4_lblk_t lblk_end)
3863
-{
3864
- struct extent_status es;
3865
-
3866
- ext4_es_find_delayed_extent_range(inode, lblk_start, lblk_end, &es);
3867
- if (es.es_len == 0)
3868
- return 0; /* there is no delay extent in this tree */
3869
- else if (es.es_lblk <= lblk_start &&
3870
- lblk_start < es.es_lblk + es.es_len)
3871
- return 1;
3872
- else if (lblk_start <= es.es_lblk && es.es_lblk <= lblk_end)
3873
- return 1;
3874
- else
3875
- return 0;
3876
-}
3877
-
3878
-int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
3879
-{
3880
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3881
- ext4_lblk_t lblk_start, lblk_end;
3882
- lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
3883
- lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
3884
-
3885
- return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
3886
-}
3887
-
3888
-/**
3889
- * Determines how many complete clusters (out of those specified by the 'map')
3890
- * are under delalloc and were reserved quota for.
3891
- * This function is called when we are writing out the blocks that were
3892
- * originally written with their allocation delayed, but then the space was
3893
- * allocated using fallocate() before the delayed allocation could be resolved.
3894
- * The cases to look for are:
3895
- * ('=' indicated delayed allocated blocks
3896
- * '-' indicates non-delayed allocated blocks)
3897
- * (a) partial clusters towards beginning and/or end outside of allocated range
3898
- * are not delalloc'ed.
3899
- * Ex:
3900
- * |----c---=|====c====|====c====|===-c----|
3901
- * |++++++ allocated ++++++|
3902
- * ==> 4 complete clusters in above example
3903
- *
3904
- * (b) partial cluster (outside of allocated range) towards either end is
3905
- * marked for delayed allocation. In this case, we will exclude that
3906
- * cluster.
3907
- * Ex:
3908
- * |----====c========|========c========|
3909
- * |++++++ allocated ++++++|
3910
- * ==> 1 complete clusters in above example
3911
- *
3912
- * Ex:
3913
- * |================c================|
3914
- * |++++++ allocated ++++++|
3915
- * ==> 0 complete clusters in above example
3916
- *
3917
- * The ext4_da_update_reserve_space will be called only if we
3918
- * determine here that there were some "entire" clusters that span
3919
- * this 'allocated' range.
3920
- * In the non-bigalloc case, this function will just end up returning num_blks
3921
- * without ever calling ext4_find_delalloc_range.
3922
- */
3923
-static unsigned int
3924
-get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3925
- unsigned int num_blks)
3926
-{
3927
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3928
- ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
3929
- ext4_lblk_t lblk_from, lblk_to, c_offset;
3930
- unsigned int allocated_clusters = 0;
3931
-
3932
- alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
3933
- alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
3934
-
3935
- /* max possible clusters for this allocation */
3936
- allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
3937
-
3938
- trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
3939
-
3940
- /* Check towards left side */
3941
- c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
3942
- if (c_offset) {
3943
- lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
3944
- lblk_to = lblk_from + c_offset - 1;
3945
-
3946
- if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
3947
- allocated_clusters--;
3948
- }
3949
-
3950
- /* Now check towards right. */
3951
- c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
3952
- if (allocated_clusters && c_offset) {
3953
- lblk_from = lblk_start + num_blks;
3954
- lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
3955
-
3956
- if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
3957
- allocated_clusters--;
3958
- }
3959
-
3960
- return allocated_clusters;
3961
-}
3962
-
39633773 static int
39643774 convert_initialized_extent(handle_t *handle, struct inode *inode,
39653775 struct ext4_map_blocks *map,
39663776 struct ext4_ext_path **ppath,
3967
- unsigned int allocated)
3777
+ unsigned int *allocated)
39683778 {
39693779 struct ext4_ext_path *path = *ppath;
39703780 struct ext4_extent *ex;
....@@ -3985,8 +3795,7 @@
39853795 ee_block = le32_to_cpu(ex->ee_block);
39863796 ee_len = ext4_ext_get_actual_len(ex);
39873797
3988
- ext_debug("%s: inode %lu, logical"
3989
- "block %llu, max_blocks %u\n", __func__, inode->i_ino,
3798
+ ext_debug(inode, "logical block %llu, max_blocks %u\n",
39903799 (unsigned long long)ee_block, ee_len);
39913800
39923801 if (ee_block != map->m_lblk || ee_len > map->m_len) {
....@@ -4024,14 +3833,12 @@
40243833 ext4_ext_show_leaf(inode, path);
40253834
40263835 ext4_update_inode_fsync_trans(handle, inode, 1);
4027
- err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len);
4028
- if (err)
4029
- return err;
3836
+
40303837 map->m_flags |= EXT4_MAP_UNWRITTEN;
4031
- if (allocated > map->m_len)
4032
- allocated = map->m_len;
4033
- map->m_len = allocated;
4034
- return allocated;
3838
+ if (*allocated > map->m_len)
3839
+ *allocated = map->m_len;
3840
+ map->m_len = *allocated;
3841
+ return 0;
40353842 }
40363843
40373844 static int
....@@ -4040,14 +3847,13 @@
40403847 struct ext4_ext_path **ppath, int flags,
40413848 unsigned int allocated, ext4_fsblk_t newblock)
40423849 {
4043
- struct ext4_ext_path *path = *ppath;
3850
+ struct ext4_ext_path __maybe_unused *path = *ppath;
40443851 int ret = 0;
40453852 int err = 0;
40463853
4047
- ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical "
4048
- "block %llu, max_blocks %u, flags %x, allocated %u\n",
4049
- inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
4050
- flags, allocated);
3854
+ ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n",
3855
+ (unsigned long long)map->m_lblk, map->m_len, flags,
3856
+ allocated);
40513857 ext4_ext_show_leaf(inode, path);
40523858
40533859 /*
....@@ -4059,41 +3865,38 @@
40593865 trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
40603866 allocated, newblock);
40613867
4062
- /* get_block() before submit the IO, split the extent */
3868
+ /* get_block() before submitting IO, split the extent */
40633869 if (flags & EXT4_GET_BLOCKS_PRE_IO) {
40643870 ret = ext4_split_convert_extents(handle, inode, map, ppath,
40653871 flags | EXT4_GET_BLOCKS_CONVERT);
4066
- if (ret <= 0)
4067
- goto out;
3872
+ if (ret < 0) {
3873
+ err = ret;
3874
+ goto out2;
3875
+ }
3876
+ /*
3877
+ * shouldn't get a 0 return when splitting an extent unless
3878
+ * m_len is 0 (bug) or extent has been corrupted
3879
+ */
3880
+ if (unlikely(ret == 0)) {
3881
+ EXT4_ERROR_INODE(inode,
3882
+ "unexpected ret == 0, m_len = %u",
3883
+ map->m_len);
3884
+ err = -EFSCORRUPTED;
3885
+ goto out2;
3886
+ }
40683887 map->m_flags |= EXT4_MAP_UNWRITTEN;
40693888 goto out;
40703889 }
40713890 /* IO end_io complete, convert the filled extent to written */
40723891 if (flags & EXT4_GET_BLOCKS_CONVERT) {
4073
- if (flags & EXT4_GET_BLOCKS_ZERO) {
4074
- if (allocated > map->m_len)
4075
- allocated = map->m_len;
4076
- err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
4077
- allocated);
4078
- if (err < 0)
4079
- goto out2;
4080
- }
4081
- ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
3892
+ err = ext4_convert_unwritten_extents_endio(handle, inode, map,
40823893 ppath);
4083
- if (ret >= 0) {
4084
- ext4_update_inode_fsync_trans(handle, inode, 1);
4085
- err = check_eofblocks_fl(handle, inode, map->m_lblk,
4086
- path, map->m_len);
4087
- } else
4088
- err = ret;
4089
- map->m_flags |= EXT4_MAP_MAPPED;
4090
- map->m_pblk = newblock;
4091
- if (allocated > map->m_len)
4092
- allocated = map->m_len;
4093
- map->m_len = allocated;
4094
- goto out2;
3894
+ if (err < 0)
3895
+ goto out2;
3896
+ ext4_update_inode_fsync_trans(handle, inode, 1);
3897
+ goto map_out;
40953898 }
4096
- /* buffered IO case */
3899
+ /* buffered IO cases */
40973900 /*
40983901 * repeat fallocate creation request
40993902 * we already have an unwritten extent
....@@ -4116,62 +3919,39 @@
41163919 goto out1;
41173920 }
41183921
4119
- /* buffered write, writepage time, convert*/
3922
+ /*
3923
+ * Default case when (flags & EXT4_GET_BLOCKS_CREATE) == 1.
3924
+ * For buffered writes, at writepage time, etc. Convert a
3925
+ * discovered unwritten extent to written.
3926
+ */
41203927 ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
4121
- if (ret >= 0)
4122
- ext4_update_inode_fsync_trans(handle, inode, 1);
4123
-out:
4124
- if (ret <= 0) {
3928
+ if (ret < 0) {
41253929 err = ret;
41263930 goto out2;
4127
- } else
4128
- allocated = ret;
3931
+ }
3932
+ ext4_update_inode_fsync_trans(handle, inode, 1);
3933
+ /*
3934
+ * shouldn't get a 0 return when converting an unwritten extent
3935
+ * unless m_len is 0 (bug) or extent has been corrupted
3936
+ */
3937
+ if (unlikely(ret == 0)) {
3938
+ EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u",
3939
+ map->m_len);
3940
+ err = -EFSCORRUPTED;
3941
+ goto out2;
3942
+ }
3943
+
3944
+out:
3945
+ allocated = ret;
41293946 map->m_flags |= EXT4_MAP_NEW;
4130
- /*
4131
- * if we allocated more blocks than requested
4132
- * we need to make sure we unmap the extra block
4133
- * allocated. The actual needed block will get
4134
- * unmapped later when we find the buffer_head marked
4135
- * new.
4136
- */
4137
- if (allocated > map->m_len) {
4138
- clean_bdev_aliases(inode->i_sb->s_bdev, newblock + map->m_len,
4139
- allocated - map->m_len);
4140
- allocated = map->m_len;
4141
- }
4142
- map->m_len = allocated;
4143
-
4144
- /*
4145
- * If we have done fallocate with the offset that is already
4146
- * delayed allocated, we would have block reservation
4147
- * and quota reservation done in the delayed write path.
4148
- * But fallocate would have already updated quota and block
4149
- * count for this offset. So cancel these reservation
4150
- */
4151
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
4152
- unsigned int reserved_clusters;
4153
- reserved_clusters = get_reserved_cluster_alloc(inode,
4154
- map->m_lblk, map->m_len);
4155
- if (reserved_clusters)
4156
- ext4_da_update_reserve_space(inode,
4157
- reserved_clusters,
4158
- 0);
4159
- }
4160
-
41613947 map_out:
41623948 map->m_flags |= EXT4_MAP_MAPPED;
4163
- if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) {
4164
- err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
4165
- map->m_len);
4166
- if (err < 0)
4167
- goto out2;
4168
- }
41693949 out1:
3950
+ map->m_pblk = newblock;
41703951 if (allocated > map->m_len)
41713952 allocated = map->m_len;
4172
- ext4_ext_show_leaf(inode, path);
4173
- map->m_pblk = newblock;
41743953 map->m_len = allocated;
3954
+ ext4_ext_show_leaf(inode, path);
41753955 out2:
41763956 return err ? err : allocated;
41773957 }
....@@ -4287,7 +4067,7 @@
42874067 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
42884068 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
42894069 *
4290
- * return > 0, number of of blocks already mapped/allocated
4070
+ * return > 0, number of blocks already mapped/allocated
42914071 * if create == 0 and these are pre-allocated blocks
42924072 * buffer head is unmapped
42934073 * otherwise blocks are mapped
....@@ -4301,18 +4081,16 @@
43014081 struct ext4_map_blocks *map, int flags)
43024082 {
43034083 struct ext4_ext_path *path = NULL;
4304
- struct ext4_extent newex, *ex, *ex2;
4084
+ struct ext4_extent newex, *ex, ex2;
43054085 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4306
- ext4_fsblk_t newblock = 0;
4307
- int free_on_err = 0, err = 0, depth, ret;
4086
+ ext4_fsblk_t newblock = 0, pblk;
4087
+ int err = 0, depth, ret;
43084088 unsigned int allocated = 0, offset = 0;
43094089 unsigned int allocated_clusters = 0;
43104090 struct ext4_allocation_request ar;
43114091 ext4_lblk_t cluster_offset;
4312
- bool map_from_cluster = false;
43134092
4314
- ext_debug("blocks %u/%u requested for inode %lu\n",
4315
- map->m_lblk, map->m_len, inode->i_ino);
4093
+ ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len);
43164094 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
43174095
43184096 /* find extent for this block */
....@@ -4320,7 +4098,7 @@
43204098 if (IS_ERR(path)) {
43214099 err = PTR_ERR(path);
43224100 path = NULL;
4323
- goto out2;
4101
+ goto out;
43244102 }
43254103
43264104 depth = ext_depth(inode);
....@@ -4336,7 +4114,7 @@
43364114 (unsigned long) map->m_lblk, depth,
43374115 path[depth].p_block);
43384116 err = -EFSCORRUPTED;
4339
- goto out2;
4117
+ goto out;
43404118 }
43414119
43424120 ex = path[depth].p_ext;
....@@ -4359,8 +4137,8 @@
43594137 newblock = map->m_lblk - ee_block + ee_start;
43604138 /* number of remaining blocks in the extent */
43614139 allocated = ee_len - (map->m_lblk - ee_block);
4362
- ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
4363
- ee_block, ee_len, newblock);
4140
+ ext_debug(inode, "%u fit into %u:%d -> %llu\n",
4141
+ map->m_lblk, ee_block, ee_len, newblock);
43644142
43654143 /*
43664144 * If the extent is initialized check whether the
....@@ -4368,12 +4146,18 @@
43684146 */
43694147 if ((!ext4_ext_is_unwritten(ex)) &&
43704148 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
4371
- allocated = convert_initialized_extent(
4372
- handle, inode, map, &path,
4373
- allocated);
4374
- goto out2;
4375
- } else if (!ext4_ext_is_unwritten(ex))
4149
+ err = convert_initialized_extent(handle,
4150
+ inode, map, &path, &allocated);
43764151 goto out;
4152
+ } else if (!ext4_ext_is_unwritten(ex)) {
4153
+ map->m_flags |= EXT4_MAP_MAPPED;
4154
+ map->m_pblk = newblock;
4155
+ if (allocated > map->m_len)
4156
+ allocated = map->m_len;
4157
+ map->m_len = allocated;
4158
+ ext4_ext_show_leaf(inode, path);
4159
+ goto out;
4160
+ }
43774161
43784162 ret = ext4_ext_handle_unwritten_extents(
43794163 handle, inode, map, &path, flags,
....@@ -4382,7 +4166,7 @@
43824166 err = ret;
43834167 else
43844168 allocated = ret;
4385
- goto out2;
4169
+ goto out;
43864170 }
43874171 }
43884172
....@@ -4407,7 +4191,7 @@
44074191 map->m_pblk = 0;
44084192 map->m_len = min_t(unsigned int, map->m_len, hole_len);
44094193
4410
- goto out2;
4194
+ goto out;
44114195 }
44124196
44134197 /*
....@@ -4424,7 +4208,6 @@
44244208 get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
44254209 ar.len = allocated = map->m_len;
44264210 newblock = map->m_pblk;
4427
- map_from_cluster = true;
44284211 goto got_allocated_blocks;
44294212 }
44304213
....@@ -4432,20 +4215,18 @@
44324215 ar.lleft = map->m_lblk;
44334216 err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
44344217 if (err)
4435
- goto out2;
4218
+ goto out;
44364219 ar.lright = map->m_lblk;
4437
- ex2 = NULL;
44384220 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
4439
- if (err)
4440
- goto out2;
4221
+ if (err < 0)
4222
+ goto out;
44414223
44424224 /* Check if the extent after searching to the right implies a
44434225 * cluster we can use. */
4444
- if ((sbi->s_cluster_ratio > 1) && ex2 &&
4445
- get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
4226
+ if ((sbi->s_cluster_ratio > 1) && err &&
4227
+ get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) {
44464228 ar.len = allocated = map->m_len;
44474229 newblock = map->m_pblk;
4448
- map_from_cluster = true;
44494230 goto got_allocated_blocks;
44504231 }
44514232
....@@ -4499,124 +4280,79 @@
44994280 ar.flags |= EXT4_MB_USE_RESERVED;
45004281 newblock = ext4_mb_new_blocks(handle, &ar, &err);
45014282 if (!newblock)
4502
- goto out2;
4503
- ext_debug("allocate new block: goal %llu, found %llu/%u\n",
4504
- ar.goal, newblock, allocated);
4505
- free_on_err = 1;
4283
+ goto out;
45064284 allocated_clusters = ar.len;
45074285 ar.len = EXT4_C2B(sbi, ar.len) - offset;
4286
+ ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n",
4287
+ ar.goal, newblock, ar.len, allocated);
45084288 if (ar.len > allocated)
45094289 ar.len = allocated;
45104290
45114291 got_allocated_blocks:
45124292 /* try to insert new extent into found leaf and return */
4513
- ext4_ext_store_pblock(&newex, newblock + offset);
4293
+ pblk = newblock + offset;
4294
+ ext4_ext_store_pblock(&newex, pblk);
45144295 newex.ee_len = cpu_to_le16(ar.len);
45154296 /* Mark unwritten */
4516
- if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){
4297
+ if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
45174298 ext4_ext_mark_unwritten(&newex);
45184299 map->m_flags |= EXT4_MAP_UNWRITTEN;
45194300 }
45204301
4521
- err = 0;
4522
- if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0)
4523
- err = check_eofblocks_fl(handle, inode, map->m_lblk,
4524
- path, ar.len);
4525
- if (!err)
4526
- err = ext4_ext_insert_extent(handle, inode, &path,
4527
- &newex, flags);
4302
+ err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags);
4303
+ if (err) {
4304
+ if (allocated_clusters) {
4305
+ int fb_flags = 0;
45284306
4529
- if (err && free_on_err) {
4530
- int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
4531
- EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
4532
- /* free data blocks we just allocated */
4533
- /* not a good idea to call discard here directly,
4534
- * but otherwise we'd need to call it every free() */
4535
- ext4_discard_preallocations(inode);
4536
- ext4_free_blocks(handle, inode, NULL, newblock,
4537
- EXT4_C2B(sbi, allocated_clusters), fb_flags);
4538
- goto out2;
4307
+ /*
4308
+ * free data blocks we just allocated.
4309
+ * not a good idea to call discard here directly,
4310
+ * but otherwise we'd need to call it every free().
4311
+ */
4312
+ ext4_discard_preallocations(inode, 0);
4313
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4314
+ fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
4315
+ ext4_free_blocks(handle, inode, NULL, newblock,
4316
+ EXT4_C2B(sbi, allocated_clusters),
4317
+ fb_flags);
4318
+ }
4319
+ goto out;
45394320 }
45404321
4541
- /* previous routine could use block we allocated */
4542
- newblock = ext4_ext_pblock(&newex);
4543
- allocated = ext4_ext_get_actual_len(&newex);
4544
- if (allocated > map->m_len)
4545
- allocated = map->m_len;
4546
- map->m_flags |= EXT4_MAP_NEW;
4547
-
45484322 /*
4549
- * Update reserved blocks/metadata blocks after successful
4550
- * block allocation which had been deferred till now.
4323
+ * Reduce the reserved cluster count to reflect successful deferred
4324
+ * allocation of delayed allocated clusters or direct allocation of
4325
+ * clusters discovered to be delayed allocated. Once allocated, a
4326
+ * cluster is not included in the reserved count.
45514327 */
4552
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
4553
- unsigned int reserved_clusters;
4554
- /*
4555
- * Check how many clusters we had reserved this allocated range
4556
- */
4557
- reserved_clusters = get_reserved_cluster_alloc(inode,
4558
- map->m_lblk, allocated);
4559
- if (!map_from_cluster) {
4560
- BUG_ON(allocated_clusters < reserved_clusters);
4561
- if (reserved_clusters < allocated_clusters) {
4562
- struct ext4_inode_info *ei = EXT4_I(inode);
4563
- int reservation = allocated_clusters -
4564
- reserved_clusters;
4565
- /*
4566
- * It seems we claimed few clusters outside of
4567
- * the range of this allocation. We should give
4568
- * it back to the reservation pool. This can
4569
- * happen in the following case:
4570
- *
4571
- * * Suppose s_cluster_ratio is 4 (i.e., each
4572
- * cluster has 4 blocks. Thus, the clusters
4573
- * are [0-3],[4-7],[8-11]...
4574
- * * First comes delayed allocation write for
4575
- * logical blocks 10 & 11. Since there were no
4576
- * previous delayed allocated blocks in the
4577
- * range [8-11], we would reserve 1 cluster
4578
- * for this write.
4579
- * * Next comes write for logical blocks 3 to 8.
4580
- * In this case, we will reserve 2 clusters
4581
- * (for [0-3] and [4-7]; and not for [8-11] as
4582
- * that range has a delayed allocated blocks.
4583
- * Thus total reserved clusters now becomes 3.
4584
- * * Now, during the delayed allocation writeout
4585
- * time, we will first write blocks [3-8] and
4586
- * allocate 3 clusters for writing these
4587
- * blocks. Also, we would claim all these
4588
- * three clusters above.
4589
- * * Now when we come here to writeout the
4590
- * blocks [10-11], we would expect to claim
4591
- * the reservation of 1 cluster we had made
4592
- * (and we would claim it since there are no
4593
- * more delayed allocated blocks in the range
4594
- * [8-11]. But our reserved cluster count had
4595
- * already gone to 0.
4596
- *
4597
- * Thus, at the step 4 above when we determine
4598
- * that there are still some unwritten delayed
4599
- * allocated blocks outside of our current
4600
- * block range, we should increment the
4601
- * reserved clusters count so that when the
4602
- * remaining blocks finally gets written, we
4603
- * could claim them.
4604
- */
4605
- dquot_reserve_block(inode,
4606
- EXT4_C2B(sbi, reservation));
4607
- spin_lock(&ei->i_block_reservation_lock);
4608
- ei->i_reserved_data_blocks += reservation;
4609
- spin_unlock(&ei->i_block_reservation_lock);
4610
- }
4328
+ if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) {
4329
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
46114330 /*
4612
- * We will claim quota for all newly allocated blocks.
4613
- * We're updating the reserved space *after* the
4614
- * correction above so we do not accidentally free
4615
- * all the metadata reservation because we might
4616
- * actually need it later on.
4331
+ * When allocating delayed allocated clusters, simply
4332
+ * reduce the reserved cluster count and claim quota
46174333 */
46184334 ext4_da_update_reserve_space(inode, allocated_clusters,
46194335 1);
4336
+ } else {
4337
+ ext4_lblk_t lblk, len;
4338
+ unsigned int n;
4339
+
4340
+ /*
4341
+ * When allocating non-delayed allocated clusters
4342
+ * (from fallocate, filemap, DIO, or clusters
4343
+ * allocated when delalloc has been disabled by
4344
+ * ext4_nonda_switch), reduce the reserved cluster
4345
+ * count by the number of allocated clusters that
4346
+ * have previously been delayed allocated. Quota
4347
+ * has been claimed by ext4_mb_new_blocks() above,
4348
+ * so release the quota reservations made for any
4349
+ * previously delayed allocated clusters.
4350
+ */
4351
+ lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk);
4352
+ len = allocated_clusters << sbi->s_cluster_bits;
4353
+ n = ext4_es_delayed_clu(inode, lblk, len);
4354
+ if (n > 0)
4355
+ ext4_da_update_reserve_space(inode, (int) n, 0);
46204356 }
46214357 }
46224358
....@@ -4628,14 +4364,13 @@
46284364 ext4_update_inode_fsync_trans(handle, inode, 1);
46294365 else
46304366 ext4_update_inode_fsync_trans(handle, inode, 0);
4631
-out:
4632
- if (allocated > map->m_len)
4633
- allocated = map->m_len;
4367
+
4368
+ map->m_flags |= (EXT4_MAP_NEW | EXT4_MAP_MAPPED);
4369
+ map->m_pblk = pblk;
4370
+ map->m_len = ar.len;
4371
+ allocated = map->m_len;
46344372 ext4_ext_show_leaf(inode, path);
4635
- map->m_flags |= EXT4_MAP_MAPPED;
4636
- map->m_pblk = newblock;
4637
- map->m_len = allocated;
4638
-out2:
4373
+out:
46394374 ext4_ext_drop_refs(path);
46404375 kfree(path);
46414376
....@@ -4674,7 +4409,14 @@
46744409 }
46754410 if (err)
46764411 return err;
4677
- return ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4412
+retry_remove_space:
4413
+ err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4414
+ if (err == -ENOMEM) {
4415
+ cond_resched();
4416
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
4417
+ goto retry_remove_space;
4418
+ }
4419
+ return err;
46784420 }
46794421
46804422 static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
....@@ -4684,7 +4426,7 @@
46844426 struct inode *inode = file_inode(file);
46854427 handle_t *handle;
46864428 int ret = 0;
4687
- int ret2 = 0;
4429
+ int ret2 = 0, ret3 = 0;
46884430 int retries = 0;
46894431 int depth = 0;
46904432 struct ext4_map_blocks map;
....@@ -4743,15 +4485,12 @@
47434485 epos = new_size;
47444486 if (ext4_update_inode_size(inode, epos) & 0x1)
47454487 inode->i_mtime = inode->i_ctime;
4746
- } else {
4747
- if (epos > inode->i_size)
4748
- ext4_set_inode_flag(inode,
4749
- EXT4_INODE_EOFBLOCKS);
47504488 }
4751
- ext4_mark_inode_dirty(handle, inode);
4489
+ ret2 = ext4_mark_inode_dirty(handle, inode);
47524490 ext4_update_inode_fsync_trans(handle, inode, 1);
4753
- ret2 = ext4_journal_stop(handle);
4754
- if (ret2)
4491
+ ret3 = ext4_journal_stop(handle);
4492
+ ret2 = ret3 ? ret3 : ret2;
4493
+ if (unlikely(ret2))
47554494 break;
47564495 }
47574496 if (ret == -ENOSPC &&
....@@ -4762,6 +4501,10 @@
47624501
47634502 return ret > 0 ? ret2 : ret;
47644503 }
4504
+
4505
+static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len);
4506
+
4507
+static int ext4_insert_range(struct file *file, loff_t offset, loff_t len);
47654508
47664509 static long ext4_zero_range(struct file *file, loff_t offset,
47674510 loff_t len, int mode)
....@@ -4780,9 +4523,6 @@
47804523
47814524 trace_ext4_zero_range(inode, offset, len, mode);
47824525
4783
- if (!S_ISREG(inode->i_mode))
4784
- return -EINVAL;
4785
-
47864526 /* Call ext4_force_commit to flush all data in case of data=journal. */
47874527 if (ext4_should_journal_data(inode)) {
47884528 ret = ext4_force_commit(inode->i_sb);
....@@ -4791,7 +4531,7 @@
47914531 }
47924532
47934533 /*
4794
- * Round up offset. This is not fallocate, we neet to zero out
4534
+ * Round up offset. This is not fallocate, we need to zero out
47954535 * blocks, so convert interior block aligned part of the range to
47964536 * unwritten and possibly manually zero out unaligned parts of the
47974537 * range.
....@@ -4814,7 +4554,7 @@
48144554 inode_lock(inode);
48154555
48164556 /*
4817
- * Indirect files do not support unwritten extnets
4557
+ * Indirect files do not support unwritten extents
48184558 */
48194559 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
48204560 ret = -EOPNOTSUPP;
....@@ -4822,7 +4562,7 @@
48224562 }
48234563
48244564 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
4825
- (offset + len > i_size_read(inode) ||
4565
+ (offset + len > inode->i_size ||
48264566 offset + len > EXT4_I(inode)->i_disksize)) {
48274567 new_size = offset + len;
48284568 ret = inode_newsize_ok(inode, new_size);
....@@ -4831,11 +4571,13 @@
48314571 }
48324572
48334573 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4834
- if (mode & FALLOC_FL_KEEP_SIZE)
4835
- flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
48364574
48374575 /* Wait all existing dio workers, newcomers will block on i_mutex */
48384576 inode_dio_wait(inode);
4577
+
4578
+ ret = file_modified(file);
4579
+ if (ret)
4580
+ goto out_mutex;
48394581
48404582 /* Preallocate the range including the unaligned edges */
48414583 if (partial_begin || partial_end) {
....@@ -4899,18 +4641,11 @@
48994641 }
49004642
49014643 inode->i_mtime = inode->i_ctime = current_time(inode);
4902
- if (new_size) {
4644
+ if (new_size)
49034645 ext4_update_inode_size(inode, new_size);
4904
- } else {
4905
- /*
4906
- * Mark that we allocate beyond EOF so the subsequent truncate
4907
- * can proceed even if the new size is the same as i_size.
4908
- */
4909
- if ((offset + len) > i_size_read(inode))
4910
- ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4911
- }
4912
- ext4_mark_inode_dirty(handle, inode);
4913
-
4646
+ ret = ext4_mark_inode_dirty(handle, inode);
4647
+ if (unlikely(ret))
4648
+ goto out_handle;
49144649 /* Zero out partial block at the edges of the range */
49154650 ret = ext4_zero_partial_blocks(handle, inode, offset, len);
49164651 if (ret >= 0)
....@@ -4919,6 +4654,7 @@
49194654 if (file->f_flags & O_SYNC)
49204655 ext4_handle_sync(handle);
49214656
4657
+out_handle:
49224658 ext4_journal_stop(handle);
49234659 out_mutex:
49244660 inode_unlock(inode);
....@@ -4947,14 +4683,9 @@
49474683 * range since we would need to re-encrypt blocks with a
49484684 * different IV or XTS tweak (which are based on the logical
49494685 * block number).
4950
- *
4951
- * XXX It's not clear why zero range isn't working, but we'll
4952
- * leave it disabled for encrypted inodes for now. This is a
4953
- * bug we should fix....
49544686 */
49554687 if (IS_ENCRYPTED(inode) &&
4956
- (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE |
4957
- FALLOC_FL_ZERO_RANGE)))
4688
+ (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
49584689 return -EOPNOTSUPP;
49594690
49604691 /* Return error if mode is not supported */
....@@ -4963,29 +4694,36 @@
49634694 FALLOC_FL_INSERT_RANGE))
49644695 return -EOPNOTSUPP;
49654696
4966
- if (mode & FALLOC_FL_PUNCH_HOLE)
4967
- return ext4_punch_hole(inode, offset, len);
4968
-
4697
+ inode_lock(inode);
49694698 ret = ext4_convert_inline_data(inode);
4699
+ inode_unlock(inode);
49704700 if (ret)
4971
- return ret;
4701
+ goto exit;
49724702
4973
- if (mode & FALLOC_FL_COLLAPSE_RANGE)
4974
- return ext4_collapse_range(inode, offset, len);
4703
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
4704
+ ret = ext4_punch_hole(file, offset, len);
4705
+ goto exit;
4706
+ }
49754707
4976
- if (mode & FALLOC_FL_INSERT_RANGE)
4977
- return ext4_insert_range(inode, offset, len);
4708
+ if (mode & FALLOC_FL_COLLAPSE_RANGE) {
4709
+ ret = ext4_collapse_range(file, offset, len);
4710
+ goto exit;
4711
+ }
49784712
4979
- if (mode & FALLOC_FL_ZERO_RANGE)
4980
- return ext4_zero_range(file, offset, len, mode);
4713
+ if (mode & FALLOC_FL_INSERT_RANGE) {
4714
+ ret = ext4_insert_range(file, offset, len);
4715
+ goto exit;
4716
+ }
49814717
4718
+ if (mode & FALLOC_FL_ZERO_RANGE) {
4719
+ ret = ext4_zero_range(file, offset, len, mode);
4720
+ goto exit;
4721
+ }
49824722 trace_ext4_fallocate_enter(inode, offset, len, mode);
49834723 lblk = offset >> blkbits;
49844724
49854725 max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
49864726 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4987
- if (mode & FALLOC_FL_KEEP_SIZE)
4988
- flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
49894727
49904728 inode_lock(inode);
49914729
....@@ -4998,7 +4736,7 @@
49984736 }
49994737
50004738 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5001
- (offset + len > i_size_read(inode) ||
4739
+ (offset + len > inode->i_size ||
50024740 offset + len > EXT4_I(inode)->i_disksize)) {
50034741 new_size = offset + len;
50044742 ret = inode_newsize_ok(inode, new_size);
....@@ -5009,17 +4747,22 @@
50094747 /* Wait all existing dio workers, newcomers will block on i_mutex */
50104748 inode_dio_wait(inode);
50114749
4750
+ ret = file_modified(file);
4751
+ if (ret)
4752
+ goto out;
4753
+
50124754 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
50134755 if (ret)
50144756 goto out;
50154757
50164758 if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
5017
- ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
5018
- EXT4_I(inode)->i_sync_tid);
4759
+ ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
4760
+ EXT4_I(inode)->i_sync_tid);
50194761 }
50204762 out:
50214763 inode_unlock(inode);
50224764 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
4765
+exit:
50234766 return ret;
50244767 }
50254768
....@@ -5037,26 +4780,15 @@
50374780 loff_t offset, ssize_t len)
50384781 {
50394782 unsigned int max_blocks;
5040
- int ret = 0;
5041
- int ret2 = 0;
4783
+ int ret = 0, ret2 = 0, ret3 = 0;
50424784 struct ext4_map_blocks map;
5043
- unsigned int credits, blkbits = inode->i_blkbits;
4785
+ unsigned int blkbits = inode->i_blkbits;
4786
+ unsigned int credits = 0;
50444787
50454788 map.m_lblk = offset >> blkbits;
50464789 max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
50474790
5048
- /*
5049
- * This is somewhat ugly but the idea is clear: When transaction is
5050
- * reserved, everything goes into it. Otherwise we rather start several
5051
- * smaller transactions for conversion of each extent separately.
5052
- */
5053
- if (handle) {
5054
- handle = ext4_journal_start_reserved(handle,
5055
- EXT4_HT_EXT_CONVERT);
5056
- if (IS_ERR(handle))
5057
- return PTR_ERR(handle);
5058
- credits = 0;
5059
- } else {
4791
+ if (!handle) {
50604792 /*
50614793 * credits to insert 1 extent into extent tree
50624794 */
....@@ -5081,74 +4813,57 @@
50814813 "ext4_ext_map_blocks returned %d",
50824814 inode->i_ino, map.m_lblk,
50834815 map.m_len, ret);
5084
- ext4_mark_inode_dirty(handle, inode);
5085
- if (credits)
5086
- ret2 = ext4_journal_stop(handle);
4816
+ ret2 = ext4_mark_inode_dirty(handle, inode);
4817
+ if (credits) {
4818
+ ret3 = ext4_journal_stop(handle);
4819
+ if (unlikely(ret3))
4820
+ ret2 = ret3;
4821
+ }
4822
+
50874823 if (ret <= 0 || ret2)
50884824 break;
50894825 }
5090
- if (!credits)
5091
- ret2 = ext4_journal_stop(handle);
50924826 return ret > 0 ? ret2 : ret;
50934827 }
50944828
5095
-/*
5096
- * If newes is not existing extent (newes->ec_pblk equals zero) find
5097
- * delayed extent at start of newes and update newes accordingly and
5098
- * return start of the next delayed extent.
5099
- *
5100
- * If newes is existing extent (newes->ec_pblk is not equal zero)
5101
- * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed
5102
- * extent found. Leave newes unmodified.
5103
- */
5104
-static int ext4_find_delayed_extent(struct inode *inode,
5105
- struct extent_status *newes)
4829
+int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
51064830 {
5107
- struct extent_status es;
5108
- ext4_lblk_t block, next_del;
4831
+ int ret = 0, err = 0;
4832
+ struct ext4_io_end_vec *io_end_vec;
51094833
5110
- if (newes->es_pblk == 0) {
5111
- ext4_es_find_delayed_extent_range(inode, newes->es_lblk,
5112
- newes->es_lblk + newes->es_len - 1, &es);
5113
-
5114
- /*
5115
- * No extent in extent-tree contains block @newes->es_pblk,
5116
- * then the block may stay in 1)a hole or 2)delayed-extent.
5117
- */
5118
- if (es.es_len == 0)
5119
- /* A hole found. */
5120
- return 0;
5121
-
5122
- if (es.es_lblk > newes->es_lblk) {
5123
- /* A hole found. */
5124
- newes->es_len = min(es.es_lblk - newes->es_lblk,
5125
- newes->es_len);
5126
- return 0;
5127
- }
5128
-
5129
- newes->es_len = es.es_lblk + es.es_len - newes->es_lblk;
4834
+ /*
4835
+ * This is somewhat ugly but the idea is clear: When transaction is
4836
+ * reserved, everything goes into it. Otherwise we rather start several
4837
+ * smaller transactions for conversion of each extent separately.
4838
+ */
4839
+ if (handle) {
4840
+ handle = ext4_journal_start_reserved(handle,
4841
+ EXT4_HT_EXT_CONVERT);
4842
+ if (IS_ERR(handle))
4843
+ return PTR_ERR(handle);
51304844 }
51314845
5132
- block = newes->es_lblk + newes->es_len;
5133
- ext4_es_find_delayed_extent_range(inode, block, EXT_MAX_BLOCKS, &es);
5134
- if (es.es_len == 0)
5135
- next_del = EXT_MAX_BLOCKS;
5136
- else
5137
- next_del = es.es_lblk;
4846
+ list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
4847
+ ret = ext4_convert_unwritten_extents(handle, io_end->inode,
4848
+ io_end_vec->offset,
4849
+ io_end_vec->size);
4850
+ if (ret)
4851
+ break;
4852
+ }
51384853
5139
- return next_del;
4854
+ if (handle)
4855
+ err = ext4_journal_stop(handle);
4856
+
4857
+ return ret < 0 ? ret : err;
51404858 }
5141
-/* fiemap flags we can handle specified here */
5142
-#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
51434859
5144
-static int ext4_xattr_fiemap(struct inode *inode,
5145
- struct fiemap_extent_info *fieinfo)
4860
+static int ext4_iomap_xattr_fiemap(struct inode *inode, struct iomap *iomap)
51464861 {
51474862 __u64 physical = 0;
5148
- __u64 length;
5149
- __u32 flags = FIEMAP_EXTENT_LAST;
4863
+ __u64 length = 0;
51504864 int blockbits = inode->i_sb->s_blocksize_bits;
51514865 int error = 0;
4866
+ u16 iomap_type;
51524867
51534868 /* in-inode? */
51544869 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
....@@ -5163,102 +4878,138 @@
51634878 EXT4_I(inode)->i_extra_isize;
51644879 physical += offset;
51654880 length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
5166
- flags |= FIEMAP_EXTENT_DATA_INLINE;
51674881 brelse(iloc.bh);
5168
- } else { /* external block */
4882
+ iomap_type = IOMAP_INLINE;
4883
+ } else if (EXT4_I(inode)->i_file_acl) { /* external block */
51694884 physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits;
51704885 length = inode->i_sb->s_blocksize;
4886
+ iomap_type = IOMAP_MAPPED;
4887
+ } else {
4888
+ /* no in-inode or external block for xattr, so return -ENOENT */
4889
+ error = -ENOENT;
4890
+ goto out;
51714891 }
51724892
5173
- if (physical)
5174
- error = fiemap_fill_next_extent(fieinfo, 0, physical,
5175
- length, flags);
5176
- return (error < 0 ? error : 0);
4893
+ iomap->addr = physical;
4894
+ iomap->offset = 0;
4895
+ iomap->length = length;
4896
+ iomap->type = iomap_type;
4897
+ iomap->flags = 0;
4898
+out:
4899
+ return error;
4900
+}
4901
+
4902
+static int ext4_iomap_xattr_begin(struct inode *inode, loff_t offset,
4903
+ loff_t length, unsigned flags,
4904
+ struct iomap *iomap, struct iomap *srcmap)
4905
+{
4906
+ int error;
4907
+
4908
+ error = ext4_iomap_xattr_fiemap(inode, iomap);
4909
+ if (error == 0 && (offset >= iomap->length))
4910
+ error = -ENOENT;
4911
+ return error;
4912
+}
4913
+
4914
+static const struct iomap_ops ext4_iomap_xattr_ops = {
4915
+ .iomap_begin = ext4_iomap_xattr_begin,
4916
+};
4917
+
4918
+static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len)
4919
+{
4920
+ u64 maxbytes;
4921
+
4922
+ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
4923
+ maxbytes = inode->i_sb->s_maxbytes;
4924
+ else
4925
+ maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
4926
+
4927
+ if (*len == 0)
4928
+ return -EINVAL;
4929
+ if (start > maxbytes)
4930
+ return -EFBIG;
4931
+
4932
+ /*
4933
+ * Shrink request scope to what the fs can actually handle.
4934
+ */
4935
+ if (*len > maxbytes || (maxbytes - *len) < start)
4936
+ *len = maxbytes - start;
4937
+ return 0;
51774938 }
51784939
51794940 int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
5180
- __u64 start, __u64 len)
4941
+ u64 start, u64 len)
51814942 {
5182
- ext4_lblk_t start_blk;
4943
+ int error = 0;
4944
+
4945
+ if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4946
+ error = ext4_ext_precache(inode);
4947
+ if (error)
4948
+ return error;
4949
+ fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
4950
+ }
4951
+
4952
+ /*
4953
+ * For bitmap files the maximum size limit could be smaller than
4954
+ * s_maxbytes, so check len here manually instead of just relying on the
4955
+ * generic check.
4956
+ */
4957
+ error = ext4_fiemap_check_ranges(inode, start, &len);
4958
+ if (error)
4959
+ return error;
4960
+
4961
+ if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
4962
+ fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
4963
+ return iomap_fiemap(inode, fieinfo, start, len,
4964
+ &ext4_iomap_xattr_ops);
4965
+ }
4966
+
4967
+ return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_report_ops);
4968
+}
4969
+
4970
+int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
4971
+ __u64 start, __u64 len)
4972
+{
4973
+ ext4_lblk_t start_blk, len_blks;
4974
+ __u64 last_blk;
51834975 int error = 0;
51844976
51854977 if (ext4_has_inline_data(inode)) {
5186
- int has_inline = 1;
4978
+ int has_inline;
51874979
5188
- error = ext4_inline_data_fiemap(inode, fieinfo, &has_inline,
5189
- start, len);
5190
-
4980
+ down_read(&EXT4_I(inode)->xattr_sem);
4981
+ has_inline = ext4_has_inline_data(inode);
4982
+ up_read(&EXT4_I(inode)->xattr_sem);
51914983 if (has_inline)
5192
- return error;
4984
+ return 0;
51934985 }
51944986
51954987 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
51964988 error = ext4_ext_precache(inode);
51974989 if (error)
51984990 return error;
4991
+ fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
51994992 }
52004993
5201
- /* fallback to generic here if not in extents fmt */
5202
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
5203
- return generic_block_fiemap(inode, fieinfo, start, len,
5204
- ext4_get_block);
4994
+ error = fiemap_prep(inode, fieinfo, start, &len, 0);
4995
+ if (error)
4996
+ return error;
52054997
5206
- if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
5207
- return -EBADR;
4998
+ error = ext4_fiemap_check_ranges(inode, start, &len);
4999
+ if (error)
5000
+ return error;
52085001
5209
- if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
5210
- error = ext4_xattr_fiemap(inode, fieinfo);
5211
- } else {
5212
- ext4_lblk_t len_blks;
5213
- __u64 last_blk;
5214
-
5215
- start_blk = start >> inode->i_sb->s_blocksize_bits;
5216
- last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
5217
- if (last_blk >= EXT_MAX_BLOCKS)
5218
- last_blk = EXT_MAX_BLOCKS-1;
5219
- len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
5220
-
5221
- /*
5222
- * Walk the extent tree gathering extent information
5223
- * and pushing extents back to the user.
5224
- */
5225
- error = ext4_fill_fiemap_extents(inode, start_blk,
5226
- len_blks, fieinfo);
5227
- }
5228
- return error;
5229
-}
5230
-
5231
-/*
5232
- * ext4_access_path:
5233
- * Function to access the path buffer for marking it dirty.
5234
- * It also checks if there are sufficient credits left in the journal handle
5235
- * to update path.
5236
- */
5237
-static int
5238
-ext4_access_path(handle_t *handle, struct inode *inode,
5239
- struct ext4_ext_path *path)
5240
-{
5241
- int credits, err;
5242
-
5243
- if (!ext4_handle_valid(handle))
5244
- return 0;
5002
+ start_blk = start >> inode->i_sb->s_blocksize_bits;
5003
+ last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
5004
+ if (last_blk >= EXT_MAX_BLOCKS)
5005
+ last_blk = EXT_MAX_BLOCKS-1;
5006
+ len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
52455007
52465008 /*
5247
- * Check if need to extend journal credits
5248
- * 3 for leaf, sb, and inode plus 2 (bmap and group
5249
- * descriptor) for each block group; assume two block
5250
- * groups
5009
+ * Walk the extent tree gathering extent information
5010
+ * and pushing extents back to the user.
52515011 */
5252
- if (handle->h_buffer_credits < 7) {
5253
- credits = ext4_writepage_trans_blocks(inode);
5254
- err = ext4_ext_truncate_extend_restart(handle, inode, credits);
5255
- /* EAGAIN is success */
5256
- if (err && err != -EAGAIN)
5257
- return err;
5258
- }
5259
-
5260
- err = ext4_ext_get_access(handle, inode, path);
5261
- return err;
5012
+ return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
52625013 }
52635014
52645015 /*
....@@ -5274,7 +5025,8 @@
52745025 {
52755026 int depth, err = 0;
52765027 struct ext4_extent *ex_start, *ex_last;
5277
- bool update = 0;
5028
+ bool update = false;
5029
+ int credits, restart_credits;
52785030 depth = path->p_depth;
52795031
52805032 while (depth >= 0) {
....@@ -5284,13 +5036,26 @@
52845036 return -EFSCORRUPTED;
52855037
52865038 ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
5039
+ /* leaf + sb + inode */
5040
+ credits = 3;
5041
+ if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) {
5042
+ update = true;
5043
+ /* extent tree + sb + inode */
5044
+ credits = depth + 2;
5045
+ }
52875046
5288
- err = ext4_access_path(handle, inode, path + depth);
5047
+ restart_credits = ext4_writepage_trans_blocks(inode);
5048
+ err = ext4_datasem_ensure_credits(handle, inode, credits,
5049
+ restart_credits, 0);
5050
+ if (err) {
5051
+ if (err > 0)
5052
+ err = -EAGAIN;
5053
+ goto out;
5054
+ }
5055
+
5056
+ err = ext4_ext_get_access(handle, inode, path + depth);
52895057 if (err)
52905058 goto out;
5291
-
5292
- if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
5293
- update = 1;
52945059
52955060 while (ex_start <= ex_last) {
52965061 if (SHIFT == SHIFT_LEFT) {
....@@ -5321,7 +5086,7 @@
53215086 }
53225087
53235088 /* Update index too */
5324
- err = ext4_access_path(handle, inode, path + depth);
5089
+ err = ext4_ext_get_access(handle, inode, path + depth);
53255090 if (err)
53265091 goto out;
53275092
....@@ -5360,6 +5125,7 @@
53605125 int ret = 0, depth;
53615126 struct ext4_extent *extent;
53625127 ext4_lblk_t stop, *iterator, ex_start, ex_end;
5128
+ ext4_lblk_t tmp = EXT_MAX_BLOCKS;
53635129
53645130 /* Let path point to the last extent */
53655131 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
....@@ -5413,10 +5179,15 @@
54135179 * till we reach stop. In case of right shift, iterator points to stop
54145180 * and it is decreased till we reach start.
54155181 */
5182
+again:
5183
+ ret = 0;
54165184 if (SHIFT == SHIFT_LEFT)
54175185 iterator = &start;
54185186 else
54195187 iterator = &stop;
5188
+
5189
+ if (tmp != EXT_MAX_BLOCKS)
5190
+ *iterator = tmp;
54205191
54215192 /*
54225193 * Its safe to start updating extents. Start and stop are unsigned, so
....@@ -5446,24 +5217,35 @@
54465217 }
54475218 }
54485219
5220
+ tmp = *iterator;
54495221 if (SHIFT == SHIFT_LEFT) {
54505222 extent = EXT_LAST_EXTENT(path[depth].p_hdr);
54515223 *iterator = le32_to_cpu(extent->ee_block) +
54525224 ext4_ext_get_actual_len(extent);
54535225 } else {
54545226 extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
5455
- if (le32_to_cpu(extent->ee_block) > 0)
5227
+ if (le32_to_cpu(extent->ee_block) > start)
54565228 *iterator = le32_to_cpu(extent->ee_block) - 1;
5457
- else
5458
- /* Beginning is reached, end of the loop */
5229
+ else if (le32_to_cpu(extent->ee_block) == start)
54595230 iterator = NULL;
5460
- /* Update path extent in case we need to stop */
5461
- while (le32_to_cpu(extent->ee_block) < start)
5231
+ else {
5232
+ extent = EXT_LAST_EXTENT(path[depth].p_hdr);
5233
+ while (le32_to_cpu(extent->ee_block) >= start)
5234
+ extent--;
5235
+
5236
+ if (extent == EXT_LAST_EXTENT(path[depth].p_hdr))
5237
+ break;
5238
+
54625239 extent++;
5240
+ iterator = NULL;
5241
+ }
54635242 path[depth].p_ext = extent;
54645243 }
54655244 ret = ext4_ext_shift_path_extents(path, shift, inode,
54665245 handle, SHIFT);
5246
+ /* iterator can be NULL which means we should break */
5247
+ if (ret == -EAGAIN)
5248
+ goto again;
54675249 if (ret)
54685250 break;
54695251 }
....@@ -5478,8 +5260,9 @@
54785260 * This implements the fallocate's collapse range functionality for ext4
54795261 * Returns: 0 and non-zero on error.
54805262 */
5481
-int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5263
+static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
54825264 {
5265
+ struct inode *inode = file_inode(file);
54835266 struct super_block *sb = inode->i_sb;
54845267 ext4_lblk_t punch_start, punch_stop;
54855268 handle_t *handle;
....@@ -5495,12 +5278,8 @@
54955278 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
54965279 return -EOPNOTSUPP;
54975280
5498
- /* Collapse range works only on fs block size aligned offsets. */
5499
- if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
5500
- len & (EXT4_CLUSTER_SIZE(sb) - 1))
5501
- return -EINVAL;
5502
-
5503
- if (!S_ISREG(inode->i_mode))
5281
+ /* Collapse range works only on fs cluster size aligned regions. */
5282
+ if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
55045283 return -EINVAL;
55055284
55065285 trace_ext4_collapse_range(inode, offset, len);
....@@ -5520,7 +5299,7 @@
55205299 * There is no need to overlap collapse range with EOF, in which case
55215300 * it is effectively a truncate operation
55225301 */
5523
- if (offset + len >= i_size_read(inode)) {
5302
+ if (offset + len >= inode->i_size) {
55245303 ret = -EINVAL;
55255304 goto out_mutex;
55265305 }
....@@ -5533,6 +5312,10 @@
55335312
55345313 /* Wait for existing dio to complete */
55355314 inode_dio_wait(inode);
5315
+
5316
+ ret = file_modified(file);
5317
+ if (ret)
5318
+ goto out_mutex;
55365319
55375320 /*
55385321 * Prevent page faults from reinstantiating pages we have released from
....@@ -5573,9 +5356,10 @@
55735356 ret = PTR_ERR(handle);
55745357 goto out_mmap;
55755358 }
5359
+ ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
55765360
55775361 down_write(&EXT4_I(inode)->i_data_sem);
5578
- ext4_discard_preallocations(inode);
5362
+ ext4_discard_preallocations(inode, 0);
55795363
55805364 ret = ext4_es_remove_extent(inode, punch_start,
55815365 EXT_MAX_BLOCKS - punch_start);
....@@ -5589,7 +5373,7 @@
55895373 up_write(&EXT4_I(inode)->i_data_sem);
55905374 goto out_stop;
55915375 }
5592
- ext4_discard_preallocations(inode);
5376
+ ext4_discard_preallocations(inode, 0);
55935377
55945378 ret = ext4_ext_shift_extents(inode, handle, punch_stop,
55955379 punch_stop - punch_start, SHIFT_LEFT);
....@@ -5598,7 +5382,7 @@
55985382 goto out_stop;
55995383 }
56005384
5601
- new_size = i_size_read(inode) - len;
5385
+ new_size = inode->i_size - len;
56025386 i_size_write(inode, new_size);
56035387 EXT4_I(inode)->i_disksize = new_size;
56045388
....@@ -5606,11 +5390,12 @@
56065390 if (IS_SYNC(inode))
56075391 ext4_handle_sync(handle);
56085392 inode->i_mtime = inode->i_ctime = current_time(inode);
5609
- ext4_mark_inode_dirty(handle, inode);
5393
+ ret = ext4_mark_inode_dirty(handle, inode);
56105394 ext4_update_inode_fsync_trans(handle, inode, 1);
56115395
56125396 out_stop:
56135397 ext4_journal_stop(handle);
5398
+ ext4_fc_stop_ineligible(sb);
56145399 out_mmap:
56155400 up_write(&EXT4_I(inode)->i_mmap_sem);
56165401 out_mutex:
....@@ -5626,8 +5411,9 @@
56265411 * by len bytes.
56275412 * Returns 0 on success, error otherwise.
56285413 */
5629
-int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
5414
+static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
56305415 {
5416
+ struct inode *inode = file_inode(file);
56315417 struct super_block *sb = inode->i_sb;
56325418 handle_t *handle;
56335419 struct ext4_ext_path *path;
....@@ -5645,13 +5431,9 @@
56455431 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
56465432 return -EOPNOTSUPP;
56475433
5648
- /* Insert range works only on fs block size aligned offsets. */
5649
- if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
5650
- len & (EXT4_CLUSTER_SIZE(sb) - 1))
5434
+ /* Insert range works only on fs cluster size aligned regions. */
5435
+ if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
56515436 return -EINVAL;
5652
-
5653
- if (!S_ISREG(inode->i_mode))
5654
- return -EOPNOTSUPP;
56555437
56565438 trace_ext4_insert_range(inode, offset, len);
56575439
....@@ -5672,20 +5454,24 @@
56725454 goto out_mutex;
56735455 }
56745456
5675
- /* Check for wrap through zero */
5676
- if (inode->i_size + len > inode->i_sb->s_maxbytes) {
5457
+ /* Check whether the maximum file size would be exceeded */
5458
+ if (len > inode->i_sb->s_maxbytes - inode->i_size) {
56775459 ret = -EFBIG;
56785460 goto out_mutex;
56795461 }
56805462
5681
- /* Offset should be less than i_size */
5682
- if (offset >= i_size_read(inode)) {
5463
+ /* Offset must be less than i_size */
5464
+ if (offset >= inode->i_size) {
56835465 ret = -EINVAL;
56845466 goto out_mutex;
56855467 }
56865468
56875469 /* Wait for existing dio to complete */
56885470 inode_dio_wait(inode);
5471
+
5472
+ ret = file_modified(file);
5473
+ if (ret)
5474
+ goto out_mutex;
56895475
56905476 /*
56915477 * Prevent page faults from reinstantiating pages we have released from
....@@ -5715,6 +5501,7 @@
57155501 ret = PTR_ERR(handle);
57165502 goto out_mmap;
57175503 }
5504
+ ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
57185505
57195506 /* Expand file to avoid data loss if there is error while shifting */
57205507 inode->i_size += len;
....@@ -5725,7 +5512,7 @@
57255512 goto out_stop;
57265513
57275514 down_write(&EXT4_I(inode)->i_data_sem);
5728
- ext4_discard_preallocations(inode);
5515
+ ext4_discard_preallocations(inode, 0);
57295516
57305517 path = ext4_find_extent(inode, offset_lblk, NULL, 0);
57315518 if (IS_ERR(path)) {
....@@ -5789,6 +5576,7 @@
57895576
57905577 out_stop:
57915578 ext4_journal_stop(handle);
5579
+ ext4_fc_stop_ineligible(sb);
57925580 out_mmap:
57935581 up_write(&EXT4_I(inode)->i_mmap_sem);
57945582 out_mutex:
....@@ -5797,8 +5585,8 @@
57975585 }
57985586
57995587 /**
5800
- * ext4_swap_extents - Swap extents between two inodes
5801
- *
5588
+ * ext4_swap_extents() - Swap extents between two inodes
5589
+ * @handle: handle for this transaction
58025590 * @inode1: First inode
58035591 * @inode2: Second inode
58045592 * @lblk1: Start block for first inode
....@@ -5859,7 +5647,7 @@
58595647 }
58605648 ex1 = path1[path1->p_depth].p_ext;
58615649 ex2 = path2[path2->p_depth].p_ext;
5862
- /* Do we have somthing to swap ? */
5650
+ /* Do we have something to swap ? */
58635651 if (unlikely(!ex2 || !ex1))
58645652 goto finish;
58655653
....@@ -5991,3 +5779,365 @@
59915779 }
59925780 return replaced_count;
59935781 }
5782
+
5783
+/*
5784
+ * ext4_clu_mapped - determine whether any block in a logical cluster has
5785
+ * been mapped to a physical cluster
5786
+ *
5787
+ * @inode - file containing the logical cluster
5788
+ * @lclu - logical cluster of interest
5789
+ *
5790
+ * Returns 1 if any block in the logical cluster is mapped, signifying
5791
+ * that a physical cluster has been allocated for it. Otherwise,
5792
+ * returns 0. Can also return negative error codes. Derived from
5793
+ * ext4_ext_map_blocks().
5794
+ */
5795
+int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
5796
+{
5797
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5798
+ struct ext4_ext_path *path;
5799
+ int depth, mapped = 0, err = 0;
5800
+ struct ext4_extent *extent;
5801
+ ext4_lblk_t first_lblk, first_lclu, last_lclu;
5802
+
5803
+ /*
5804
+ * if data can be stored inline, the logical cluster isn't
5805
+ * mapped - no physical clusters have been allocated, and the
5806
+ * file has no extents
5807
+ */
5808
+ if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) ||
5809
+ ext4_has_inline_data(inode))
5810
+ return 0;
5811
+
5812
+ /* search for the extent closest to the first block in the cluster */
5813
+ path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
5814
+ if (IS_ERR(path)) {
5815
+ err = PTR_ERR(path);
5816
+ path = NULL;
5817
+ goto out;
5818
+ }
5819
+
5820
+ depth = ext_depth(inode);
5821
+
5822
+ /*
5823
+ * A consistent leaf must not be empty. This situation is possible,
5824
+ * though, _during_ tree modification, and it's why an assert can't
5825
+ * be put in ext4_find_extent().
5826
+ */
5827
+ if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
5828
+ EXT4_ERROR_INODE(inode,
5829
+ "bad extent address - lblock: %lu, depth: %d, pblock: %lld",
5830
+ (unsigned long) EXT4_C2B(sbi, lclu),
5831
+ depth, path[depth].p_block);
5832
+ err = -EFSCORRUPTED;
5833
+ goto out;
5834
+ }
5835
+
5836
+ extent = path[depth].p_ext;
5837
+
5838
+ /* can't be mapped if the extent tree is empty */
5839
+ if (extent == NULL)
5840
+ goto out;
5841
+
5842
+ first_lblk = le32_to_cpu(extent->ee_block);
5843
+ first_lclu = EXT4_B2C(sbi, first_lblk);
5844
+
5845
+ /*
5846
+ * Three possible outcomes at this point - found extent spanning
5847
+ * the target cluster, to the left of the target cluster, or to the
5848
+ * right of the target cluster. The first two cases are handled here.
5849
+ * The last case indicates the target cluster is not mapped.
5850
+ */
5851
+ if (lclu >= first_lclu) {
5852
+ last_lclu = EXT4_B2C(sbi, first_lblk +
5853
+ ext4_ext_get_actual_len(extent) - 1);
5854
+ if (lclu <= last_lclu) {
5855
+ mapped = 1;
5856
+ } else {
5857
+ first_lblk = ext4_ext_next_allocated_block(path);
5858
+ first_lclu = EXT4_B2C(sbi, first_lblk);
5859
+ if (lclu == first_lclu)
5860
+ mapped = 1;
5861
+ }
5862
+ }
5863
+
5864
+out:
5865
+ ext4_ext_drop_refs(path);
5866
+ kfree(path);
5867
+
5868
+ return err ? err : mapped;
5869
+}
5870
+
5871
+/*
5872
+ * Updates physical block address and unwritten status of extent
5873
+ * starting at lblk start and of len. If such an extent doesn't exist,
5874
+ * this function splits the extent tree appropriately to create an
5875
+ * extent like this. This function is called in the fast commit
5876
+ * replay path. Returns 0 on success and error on failure.
5877
+ */
5878
+int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
5879
+ int len, int unwritten, ext4_fsblk_t pblk)
5880
+{
5881
+ struct ext4_ext_path *path = NULL, *ppath;
5882
+ struct ext4_extent *ex;
5883
+ int ret;
5884
+
5885
+ path = ext4_find_extent(inode, start, NULL, 0);
5886
+ if (IS_ERR(path))
5887
+ return PTR_ERR(path);
5888
+ ex = path[path->p_depth].p_ext;
5889
+ if (!ex) {
5890
+ ret = -EFSCORRUPTED;
5891
+ goto out;
5892
+ }
5893
+
5894
+ if (le32_to_cpu(ex->ee_block) != start ||
5895
+ ext4_ext_get_actual_len(ex) != len) {
5896
+ /* We need to split this extent to match our extent first */
5897
+ ppath = path;
5898
+ down_write(&EXT4_I(inode)->i_data_sem);
5899
+ ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1);
5900
+ up_write(&EXT4_I(inode)->i_data_sem);
5901
+ if (ret)
5902
+ goto out;
5903
+ kfree(path);
5904
+ path = ext4_find_extent(inode, start, NULL, 0);
5905
+ if (IS_ERR(path))
5906
+ return -1;
5907
+ ppath = path;
5908
+ ex = path[path->p_depth].p_ext;
5909
+ WARN_ON(le32_to_cpu(ex->ee_block) != start);
5910
+ if (ext4_ext_get_actual_len(ex) != len) {
5911
+ down_write(&EXT4_I(inode)->i_data_sem);
5912
+ ret = ext4_force_split_extent_at(NULL, inode, &ppath,
5913
+ start + len, 1);
5914
+ up_write(&EXT4_I(inode)->i_data_sem);
5915
+ if (ret)
5916
+ goto out;
5917
+ kfree(path);
5918
+ path = ext4_find_extent(inode, start, NULL, 0);
5919
+ if (IS_ERR(path))
5920
+ return -EINVAL;
5921
+ ex = path[path->p_depth].p_ext;
5922
+ }
5923
+ }
5924
+ if (unwritten)
5925
+ ext4_ext_mark_unwritten(ex);
5926
+ else
5927
+ ext4_ext_mark_initialized(ex);
5928
+ ext4_ext_store_pblock(ex, pblk);
5929
+ down_write(&EXT4_I(inode)->i_data_sem);
5930
+ ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5931
+ up_write(&EXT4_I(inode)->i_data_sem);
5932
+out:
5933
+ ext4_ext_drop_refs(path);
5934
+ kfree(path);
5935
+ ext4_mark_inode_dirty(NULL, inode);
5936
+ return ret;
5937
+}
5938
+
5939
+/* Try to shrink the extent tree */
5940
+void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
5941
+{
5942
+ struct ext4_ext_path *path = NULL;
5943
+ struct ext4_extent *ex;
5944
+ ext4_lblk_t old_cur, cur = 0;
5945
+
5946
+ while (cur < end) {
5947
+ path = ext4_find_extent(inode, cur, NULL, 0);
5948
+ if (IS_ERR(path))
5949
+ return;
5950
+ ex = path[path->p_depth].p_ext;
5951
+ if (!ex) {
5952
+ ext4_ext_drop_refs(path);
5953
+ kfree(path);
5954
+ ext4_mark_inode_dirty(NULL, inode);
5955
+ return;
5956
+ }
5957
+ old_cur = cur;
5958
+ cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5959
+ if (cur <= old_cur)
5960
+ cur = old_cur + 1;
5961
+ ext4_ext_try_to_merge(NULL, inode, path, ex);
5962
+ down_write(&EXT4_I(inode)->i_data_sem);
5963
+ ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5964
+ up_write(&EXT4_I(inode)->i_data_sem);
5965
+ ext4_mark_inode_dirty(NULL, inode);
5966
+ ext4_ext_drop_refs(path);
5967
+ kfree(path);
5968
+ }
5969
+}
5970
+
5971
+/* Check if *cur is a hole and if it is, skip it */
5972
+static int skip_hole(struct inode *inode, ext4_lblk_t *cur)
5973
+{
5974
+ int ret;
5975
+ struct ext4_map_blocks map;
5976
+
5977
+ map.m_lblk = *cur;
5978
+ map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur;
5979
+
5980
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
5981
+ if (ret < 0)
5982
+ return ret;
5983
+ if (ret != 0)
5984
+ return 0;
5985
+ *cur = *cur + map.m_len;
5986
+ return 0;
5987
+}
5988
+
5989
+/* Count number of blocks used by this inode and update i_blocks */
5990
+int ext4_ext_replay_set_iblocks(struct inode *inode)
5991
+{
5992
+ struct ext4_ext_path *path = NULL, *path2 = NULL;
5993
+ struct ext4_extent *ex;
5994
+ ext4_lblk_t cur = 0, end;
5995
+ int numblks = 0, i, ret = 0;
5996
+ ext4_fsblk_t cmp1, cmp2;
5997
+ struct ext4_map_blocks map;
5998
+
5999
+ /* Determin the size of the file first */
6000
+ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
6001
+ EXT4_EX_NOCACHE);
6002
+ if (IS_ERR(path))
6003
+ return PTR_ERR(path);
6004
+ ex = path[path->p_depth].p_ext;
6005
+ if (!ex) {
6006
+ ext4_ext_drop_refs(path);
6007
+ kfree(path);
6008
+ goto out;
6009
+ }
6010
+ end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
6011
+ ext4_ext_drop_refs(path);
6012
+ kfree(path);
6013
+
6014
+ /* Count the number of data blocks */
6015
+ cur = 0;
6016
+ while (cur < end) {
6017
+ map.m_lblk = cur;
6018
+ map.m_len = end - cur;
6019
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
6020
+ if (ret < 0)
6021
+ break;
6022
+ if (ret > 0)
6023
+ numblks += ret;
6024
+ cur = cur + map.m_len;
6025
+ }
6026
+
6027
+ /*
6028
+ * Count the number of extent tree blocks. We do it by looking up
6029
+ * two successive extents and determining the difference between
6030
+ * their paths. When path is different for 2 successive extents
6031
+ * we compare the blocks in the path at each level and increment
6032
+ * iblocks by total number of differences found.
6033
+ */
6034
+ cur = 0;
6035
+ ret = skip_hole(inode, &cur);
6036
+ if (ret < 0)
6037
+ goto out;
6038
+ path = ext4_find_extent(inode, cur, NULL, 0);
6039
+ if (IS_ERR(path))
6040
+ goto out;
6041
+ numblks += path->p_depth;
6042
+ ext4_ext_drop_refs(path);
6043
+ kfree(path);
6044
+ while (cur < end) {
6045
+ path = ext4_find_extent(inode, cur, NULL, 0);
6046
+ if (IS_ERR(path))
6047
+ break;
6048
+ ex = path[path->p_depth].p_ext;
6049
+ if (!ex) {
6050
+ ext4_ext_drop_refs(path);
6051
+ kfree(path);
6052
+ return 0;
6053
+ }
6054
+ cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
6055
+ ext4_ext_get_actual_len(ex));
6056
+ ret = skip_hole(inode, &cur);
6057
+ if (ret < 0) {
6058
+ ext4_ext_drop_refs(path);
6059
+ kfree(path);
6060
+ break;
6061
+ }
6062
+ path2 = ext4_find_extent(inode, cur, NULL, 0);
6063
+ if (IS_ERR(path2)) {
6064
+ ext4_ext_drop_refs(path);
6065
+ kfree(path);
6066
+ break;
6067
+ }
6068
+ ex = path2[path2->p_depth].p_ext;
6069
+ for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
6070
+ cmp1 = cmp2 = 0;
6071
+ if (i <= path->p_depth)
6072
+ cmp1 = path[i].p_bh ?
6073
+ path[i].p_bh->b_blocknr : 0;
6074
+ if (i <= path2->p_depth)
6075
+ cmp2 = path2[i].p_bh ?
6076
+ path2[i].p_bh->b_blocknr : 0;
6077
+ if (cmp1 != cmp2 && cmp2 != 0)
6078
+ numblks++;
6079
+ }
6080
+ ext4_ext_drop_refs(path);
6081
+ ext4_ext_drop_refs(path2);
6082
+ kfree(path);
6083
+ kfree(path2);
6084
+ }
6085
+
6086
+out:
6087
+ inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9);
6088
+ ext4_mark_inode_dirty(NULL, inode);
6089
+ return 0;
6090
+}
6091
+
6092
+int ext4_ext_clear_bb(struct inode *inode)
6093
+{
6094
+ struct ext4_ext_path *path = NULL;
6095
+ struct ext4_extent *ex;
6096
+ ext4_lblk_t cur = 0, end;
6097
+ int j, ret = 0;
6098
+ struct ext4_map_blocks map;
6099
+
6100
+ /* Determin the size of the file first */
6101
+ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
6102
+ EXT4_EX_NOCACHE);
6103
+ if (IS_ERR(path))
6104
+ return PTR_ERR(path);
6105
+ ex = path[path->p_depth].p_ext;
6106
+ if (!ex) {
6107
+ ext4_ext_drop_refs(path);
6108
+ kfree(path);
6109
+ return 0;
6110
+ }
6111
+ end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
6112
+ ext4_ext_drop_refs(path);
6113
+ kfree(path);
6114
+
6115
+ cur = 0;
6116
+ while (cur < end) {
6117
+ map.m_lblk = cur;
6118
+ map.m_len = end - cur;
6119
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
6120
+ if (ret < 0)
6121
+ break;
6122
+ if (ret > 0) {
6123
+ path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
6124
+ if (!IS_ERR_OR_NULL(path)) {
6125
+ for (j = 0; j < path->p_depth; j++) {
6126
+
6127
+ ext4_mb_mark_bb(inode->i_sb,
6128
+ path[j].p_block, 1, 0);
6129
+ ext4_fc_record_regions(inode->i_sb, inode->i_ino,
6130
+ 0, path[j].p_block, 1, 1);
6131
+ }
6132
+ ext4_ext_drop_refs(path);
6133
+ kfree(path);
6134
+ }
6135
+ ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
6136
+ ext4_fc_record_regions(inode->i_sb, inode->i_ino,
6137
+ map.m_lblk, map.m_pblk, map.m_len, 1);
6138
+ }
6139
+ cur = cur + map.m_len;
6140
+ }
6141
+
6142
+ return 0;
6143
+}