hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/ext4/xattr.c
....@@ -123,7 +123,11 @@
123123 #ifdef CONFIG_LOCKDEP
124124 void ext4_xattr_inode_set_class(struct inode *ea_inode)
125125 {
126
+ struct ext4_inode_info *ei = EXT4_I(ea_inode);
127
+
126128 lockdep_set_subclass(&ea_inode->i_rwsem, 1);
129
+ (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
130
+ lockdep_set_subclass(&ei->i_data_sem, I_DATA_SEM_EA);
127131 }
128132 #endif
129133
....@@ -386,7 +390,18 @@
386390 struct inode *inode;
387391 int err;
388392
389
- inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
393
+ /*
394
+ * We have to check for this corruption early as otherwise
395
+ * iget_locked() could wait indefinitely for the state of our
396
+ * parent inode.
397
+ */
398
+ if (parent->i_ino == ea_ino) {
399
+ ext4_error(parent->i_sb,
400
+ "Parent and EA inode have the same ino %lu", ea_ino);
401
+ return -EFSCORRUPTED;
402
+ }
403
+
404
+ inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_EA_INODE);
390405 if (IS_ERR(inode)) {
391406 err = PTR_ERR(inode);
392407 ext4_error(parent->i_sb,
....@@ -394,23 +409,6 @@
394409 err);
395410 return err;
396411 }
397
-
398
- if (is_bad_inode(inode)) {
399
- ext4_error(parent->i_sb,
400
- "error while reading EA inode %lu is_bad_inode",
401
- ea_ino);
402
- err = -EIO;
403
- goto error;
404
- }
405
-
406
- if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
407
- ext4_error(parent->i_sb,
408
- "EA inode %lu does not have EXT4_EA_INODE_FL flag",
409
- ea_ino);
410
- err = -EINVAL;
411
- goto error;
412
- }
413
-
414412 ext4_xattr_inode_set_class(inode);
415413
416414 /*
....@@ -431,9 +429,21 @@
431429
432430 *ea_inode = inode;
433431 return 0;
434
-error:
435
- iput(inode);
436
- return err;
432
+}
433
+
434
+/* Remove entry from mbcache when EA inode is getting evicted */
435
+void ext4_evict_ea_inode(struct inode *inode)
436
+{
437
+ struct mb_cache_entry *oe;
438
+
439
+ if (!EA_INODE_CACHE(inode))
440
+ return;
441
+ /* Wait for entry to get unused so that we can remove it */
442
+ while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
443
+ ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
444
+ mb_cache_entry_wait_unused(oe);
445
+ mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
446
+ }
437447 }
438448
439449 static int
....@@ -972,10 +982,8 @@
972982 static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
973983 int ref_change)
974984 {
975
- struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
976985 struct ext4_iloc iloc;
977986 s64 ref_count;
978
- u32 hash;
979987 int ret;
980988
981989 inode_lock(ea_inode);
....@@ -998,14 +1006,6 @@
9981006
9991007 set_nlink(ea_inode, 1);
10001008 ext4_orphan_del(handle, ea_inode);
1001
-
1002
- if (ea_inode_cache) {
1003
- hash = ext4_xattr_inode_get_hash(ea_inode);
1004
- mb_cache_entry_create(ea_inode_cache,
1005
- GFP_NOFS, hash,
1006
- ea_inode->i_ino,
1007
- true /* reusable */);
1008
- }
10091009 }
10101010 } else {
10111011 WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
....@@ -1018,12 +1018,6 @@
10181018
10191019 clear_nlink(ea_inode);
10201020 ext4_orphan_add(handle, ea_inode);
1021
-
1022
- if (ea_inode_cache) {
1023
- hash = ext4_xattr_inode_get_hash(ea_inode);
1024
- mb_cache_entry_delete(ea_inode_cache, hash,
1025
- ea_inode->i_ino);
1026
- }
10271021 }
10281022 }
10291023
....@@ -1231,6 +1225,7 @@
12311225 if (error)
12321226 goto out;
12331227
1228
+retry_ref:
12341229 lock_buffer(bh);
12351230 hash = le32_to_cpu(BHDR(bh)->h_hash);
12361231 ref = le32_to_cpu(BHDR(bh)->h_refcount);
....@@ -1240,9 +1235,18 @@
12401235 * This must happen under buffer lock for
12411236 * ext4_xattr_block_set() to reliably detect freed block
12421237 */
1243
- if (ea_block_cache)
1244
- mb_cache_entry_delete(ea_block_cache, hash,
1245
- bh->b_blocknr);
1238
+ if (ea_block_cache) {
1239
+ struct mb_cache_entry *oe;
1240
+
1241
+ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
1242
+ bh->b_blocknr);
1243
+ if (oe) {
1244
+ unlock_buffer(bh);
1245
+ mb_cache_entry_wait_unused(oe);
1246
+ mb_cache_entry_put(ea_block_cache, oe);
1247
+ goto retry_ref;
1248
+ }
1249
+ }
12461250 get_bh(bh);
12471251 unlock_buffer(bh);
12481252
....@@ -1266,7 +1270,7 @@
12661270 ce = mb_cache_entry_get(ea_block_cache, hash,
12671271 bh->b_blocknr);
12681272 if (ce) {
1269
- ce->e_reusable = 1;
1273
+ set_bit(MBE_REUSABLE_B, &ce->e_flags);
12701274 mb_cache_entry_put(ea_block_cache, ce);
12711275 }
12721276 }
....@@ -1406,6 +1410,13 @@
14061410 uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
14071411 int err;
14081412
1413
+ if (inode->i_sb->s_root == NULL) {
1414
+ ext4_warning(inode->i_sb,
1415
+ "refuse to create EA inode when umounting");
1416
+ WARN_ON(1);
1417
+ return ERR_PTR(-EINVAL);
1418
+ }
1419
+
14091420 /*
14101421 * Let the next inode be the goal, so we try and allocate the EA inode
14111422 * in the same group, or nearby one.
....@@ -1425,6 +1436,9 @@
14251436 if (!err)
14261437 err = ext4_inode_attach_jinode(ea_inode);
14271438 if (err) {
1439
+ if (ext4_xattr_inode_dec_ref(handle, ea_inode))
1440
+ ext4_warning_inode(ea_inode,
1441
+ "cleanup dec ref error %d", err);
14281442 iput(ea_inode);
14291443 return ERR_PTR(err);
14301444 }
....@@ -1470,11 +1484,11 @@
14701484
14711485 while (ce) {
14721486 ea_inode = ext4_iget(inode->i_sb, ce->e_value,
1473
- EXT4_IGET_NORMAL);
1474
- if (!IS_ERR(ea_inode) &&
1475
- !is_bad_inode(ea_inode) &&
1476
- (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) &&
1477
- i_size_read(ea_inode) == value_len &&
1487
+ EXT4_IGET_EA_INODE);
1488
+ if (IS_ERR(ea_inode))
1489
+ goto next_entry;
1490
+ ext4_xattr_inode_set_class(ea_inode);
1491
+ if (i_size_read(ea_inode) == value_len &&
14781492 !ext4_xattr_inode_read(ea_inode, ea_data, value_len) &&
14791493 !ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data,
14801494 value_len) &&
....@@ -1484,9 +1498,8 @@
14841498 kvfree(ea_data);
14851499 return ea_inode;
14861500 }
1487
-
1488
- if (!IS_ERR(ea_inode))
1489
- iput(ea_inode);
1501
+ iput(ea_inode);
1502
+ next_entry:
14901503 ce = mb_cache_entry_find_next(ea_inode_cache, ce);
14911504 }
14921505 kvfree(ea_data);
....@@ -1614,7 +1627,7 @@
16141627 * If storing the value in an external inode is an option,
16151628 * reserve space for xattr entries/names in the external
16161629 * attribute block so that a long value does not occupy the
1617
- * whole space and prevent futher entries being added.
1630
+ * whole space and prevent further entries being added.
16181631 */
16191632 if (ext4_has_feature_ea_inode(inode->i_sb) &&
16201633 new_size && is_block &&
....@@ -1712,6 +1725,20 @@
17121725 memmove(here, (void *)here + size,
17131726 (void *)last - (void *)here + sizeof(__u32));
17141727 memset(last, 0, size);
1728
+
1729
+ /*
1730
+ * Update i_inline_off - moved ibody region might contain
1731
+ * system.data attribute. Handling a failure here won't
1732
+ * cause other complications for setting an xattr.
1733
+ */
1734
+ if (!is_block && ext4_has_inline_data(inode)) {
1735
+ ret = ext4_find_inline_data_nolock(inode);
1736
+ if (ret) {
1737
+ ext4_warning_inode(inode,
1738
+ "unable to update i_inline_off");
1739
+ goto out;
1740
+ }
1741
+ }
17151742 } else if (s->not_found) {
17161743 /* Insert new name. */
17171744 size_t size = EXT4_XATTR_LEN(name_len);
....@@ -1851,6 +1878,8 @@
18511878 #define header(x) ((struct ext4_xattr_header *)(x))
18521879
18531880 if (s->base) {
1881
+ int offset = (char *)s->here - bs->bh->b_data;
1882
+
18541883 BUFFER_TRACE(bs->bh, "get_write_access");
18551884 error = ext4_journal_get_write_access(handle, bs->bh);
18561885 if (error)
....@@ -1865,9 +1894,20 @@
18651894 * ext4_xattr_block_set() to reliably detect modified
18661895 * block
18671896 */
1868
- if (ea_block_cache)
1869
- mb_cache_entry_delete(ea_block_cache, hash,
1870
- bs->bh->b_blocknr);
1897
+ if (ea_block_cache) {
1898
+ struct mb_cache_entry *oe;
1899
+
1900
+ oe = mb_cache_entry_delete_or_get(ea_block_cache,
1901
+ hash, bs->bh->b_blocknr);
1902
+ if (oe) {
1903
+ /*
1904
+ * Xattr block is getting reused. Leave
1905
+ * it alone.
1906
+ */
1907
+ mb_cache_entry_put(ea_block_cache, oe);
1908
+ goto clone_block;
1909
+ }
1910
+ }
18711911 ea_bdebug(bs->bh, "modifying in-place");
18721912 error = ext4_xattr_set_entry(i, s, handle, inode,
18731913 true /* is_block */);
....@@ -1882,50 +1922,47 @@
18821922 if (error)
18831923 goto cleanup;
18841924 goto inserted;
1885
- } else {
1886
- int offset = (char *)s->here - bs->bh->b_data;
1925
+ }
1926
+clone_block:
1927
+ unlock_buffer(bs->bh);
1928
+ ea_bdebug(bs->bh, "cloning");
1929
+ s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
1930
+ error = -ENOMEM;
1931
+ if (s->base == NULL)
1932
+ goto cleanup;
1933
+ s->first = ENTRY(header(s->base)+1);
1934
+ header(s->base)->h_refcount = cpu_to_le32(1);
1935
+ s->here = ENTRY(s->base + offset);
1936
+ s->end = s->base + bs->bh->b_size;
18871937
1888
- unlock_buffer(bs->bh);
1889
- ea_bdebug(bs->bh, "cloning");
1890
- s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
1891
- error = -ENOMEM;
1892
- if (s->base == NULL)
1938
+ /*
1939
+ * If existing entry points to an xattr inode, we need
1940
+ * to prevent ext4_xattr_set_entry() from decrementing
1941
+ * ref count on it because the reference belongs to the
1942
+ * original block. In this case, make the entry look
1943
+ * like it has an empty value.
1944
+ */
1945
+ if (!s->not_found && s->here->e_value_inum) {
1946
+ ea_ino = le32_to_cpu(s->here->e_value_inum);
1947
+ error = ext4_xattr_inode_iget(inode, ea_ino,
1948
+ le32_to_cpu(s->here->e_hash),
1949
+ &tmp_inode);
1950
+ if (error)
18931951 goto cleanup;
1894
- memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
1895
- s->first = ENTRY(header(s->base)+1);
1896
- header(s->base)->h_refcount = cpu_to_le32(1);
1897
- s->here = ENTRY(s->base + offset);
1898
- s->end = s->base + bs->bh->b_size;
18991952
1900
- /*
1901
- * If existing entry points to an xattr inode, we need
1902
- * to prevent ext4_xattr_set_entry() from decrementing
1903
- * ref count on it because the reference belongs to the
1904
- * original block. In this case, make the entry look
1905
- * like it has an empty value.
1906
- */
1907
- if (!s->not_found && s->here->e_value_inum) {
1908
- ea_ino = le32_to_cpu(s->here->e_value_inum);
1909
- error = ext4_xattr_inode_iget(inode, ea_ino,
1910
- le32_to_cpu(s->here->e_hash),
1911
- &tmp_inode);
1912
- if (error)
1913
- goto cleanup;
1914
-
1915
- if (!ext4_test_inode_state(tmp_inode,
1916
- EXT4_STATE_LUSTRE_EA_INODE)) {
1917
- /*
1918
- * Defer quota free call for previous
1919
- * inode until success is guaranteed.
1920
- */
1921
- old_ea_inode_quota = le32_to_cpu(
1922
- s->here->e_value_size);
1923
- }
1924
- iput(tmp_inode);
1925
-
1926
- s->here->e_value_inum = 0;
1927
- s->here->e_value_size = 0;
1953
+ if (!ext4_test_inode_state(tmp_inode,
1954
+ EXT4_STATE_LUSTRE_EA_INODE)) {
1955
+ /*
1956
+ * Defer quota free call for previous
1957
+ * inode until success is guaranteed.
1958
+ */
1959
+ old_ea_inode_quota = le32_to_cpu(
1960
+ s->here->e_value_size);
19281961 }
1962
+ iput(tmp_inode);
1963
+
1964
+ s->here->e_value_inum = 0;
1965
+ s->here->e_value_size = 0;
19291966 }
19301967 } else {
19311968 /* Allocate a buffer where we construct the new block. */
....@@ -1976,8 +2013,9 @@
19762013 else {
19772014 u32 ref;
19782015
2016
+#ifdef EXT4_XATTR_DEBUG
19792017 WARN_ON_ONCE(dquot_initialize_needed(inode));
1980
-
2018
+#endif
19812019 /* The old block is released after updating
19822020 the inode. */
19832021 error = dquot_alloc_block(inode,
....@@ -1992,18 +2030,13 @@
19922030 lock_buffer(new_bh);
19932031 /*
19942032 * We have to be careful about races with
1995
- * freeing, rehashing or adding references to
1996
- * xattr block. Once we hold buffer lock xattr
1997
- * block's state is stable so we can check
1998
- * whether the block got freed / rehashed or
1999
- * not. Since we unhash mbcache entry under
2000
- * buffer lock when freeing / rehashing xattr
2001
- * block, checking whether entry is still
2002
- * hashed is reliable. Same rules hold for
2003
- * e_reusable handling.
2033
+ * adding references to xattr block. Once we
2034
+ * hold buffer lock xattr block's state is
2035
+ * stable so we can check the additional
2036
+ * reference fits.
20042037 */
2005
- if (hlist_bl_unhashed(&ce->e_hash_list) ||
2006
- !ce->e_reusable) {
2038
+ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
2039
+ if (ref > EXT4_XATTR_REFCOUNT_MAX) {
20072040 /*
20082041 * Undo everything and check mbcache
20092042 * again.
....@@ -2018,10 +2051,9 @@
20182051 new_bh = NULL;
20192052 goto inserted;
20202053 }
2021
- ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
20222054 BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
2023
- if (ref >= EXT4_XATTR_REFCOUNT_MAX)
2024
- ce->e_reusable = 0;
2055
+ if (ref == EXT4_XATTR_REFCOUNT_MAX)
2056
+ clear_bit(MBE_REUSABLE_B, &ce->e_flags);
20252057 ea_bdebug(new_bh, "reusing; refcount now=%d",
20262058 ref);
20272059 ext4_xattr_block_csum_set(inode, new_bh);
....@@ -2045,22 +2077,15 @@
20452077 /* We need to allocate a new block */
20462078 ext4_fsblk_t goal, block;
20472079
2080
+#ifdef EXT4_XATTR_DEBUG
20482081 WARN_ON_ONCE(dquot_initialize_needed(inode));
2049
-
2082
+#endif
20502083 goal = ext4_group_first_block_no(sb,
20512084 EXT4_I(inode)->i_block_group);
2052
-
2053
- /* non-extent files can't have physical blocks past 2^32 */
2054
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
2055
- goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
2056
-
20572085 block = ext4_new_meta_blocks(handle, inode, goal, 0,
20582086 NULL, &error);
20592087 if (error)
20602088 goto cleanup;
2061
-
2062
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
2063
- BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
20642089
20652090 ea_idebug(inode, "creating block %llu",
20662091 (unsigned long long)block);
....@@ -2189,7 +2214,7 @@
21892214 return 0;
21902215 }
21912216
2192
-int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
2217
+int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
21932218 struct ext4_xattr_info *i,
21942219 struct ext4_xattr_ibody_find *is)
21952220 {
....@@ -2200,30 +2225,6 @@
22002225 if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
22012226 return -ENOSPC;
22022227
2203
- error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
2204
- if (error)
2205
- return error;
2206
- header = IHDR(inode, ext4_raw_inode(&is->iloc));
2207
- if (!IS_LAST_ENTRY(s->first)) {
2208
- header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
2209
- ext4_set_inode_state(inode, EXT4_STATE_XATTR);
2210
- } else {
2211
- header->h_magic = cpu_to_le32(0);
2212
- ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
2213
- }
2214
- return 0;
2215
-}
2216
-
2217
-static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
2218
- struct ext4_xattr_info *i,
2219
- struct ext4_xattr_ibody_find *is)
2220
-{
2221
- struct ext4_xattr_ibody_header *header;
2222
- struct ext4_xattr_search *s = &is->s;
2223
- int error;
2224
-
2225
- if (EXT4_I(inode)->i_extra_isize == 0)
2226
- return -ENOSPC;
22272228 error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
22282229 if (error)
22292230 return error;
....@@ -2552,13 +2553,13 @@
25522553 .in_inode = !!entry->e_value_inum,
25532554 };
25542555 struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
2556
+ int needs_kvfree = 0;
25552557 int error;
25562558
25572559 is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
25582560 bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
2559
- buffer = kmalloc(value_size, GFP_NOFS);
25602561 b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
2561
- if (!is || !bs || !buffer || !b_entry_name) {
2562
+ if (!is || !bs || !b_entry_name) {
25622563 error = -ENOMEM;
25632564 goto out;
25642565 }
....@@ -2570,12 +2571,18 @@
25702571
25712572 /* Save the entry name and the entry value */
25722573 if (entry->e_value_inum) {
2574
+ buffer = kvmalloc(value_size, GFP_NOFS);
2575
+ if (!buffer) {
2576
+ error = -ENOMEM;
2577
+ goto out;
2578
+ }
2579
+ needs_kvfree = 1;
25732580 error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
25742581 if (error)
25752582 goto out;
25762583 } else {
25772584 size_t value_offs = le16_to_cpu(entry->e_value_offs);
2578
- memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
2585
+ buffer = (void *)IFIRST(header) + value_offs;
25792586 }
25802587
25812588 memcpy(b_entry_name, entry->e_name, entry->e_name_len);
....@@ -2590,25 +2597,26 @@
25902597 if (error)
25912598 goto out;
25922599
2593
- /* Remove the chosen entry from the inode */
2594
- error = ext4_xattr_ibody_set(handle, inode, &i, is);
2595
- if (error)
2596
- goto out;
2597
-
25982600 i.value = buffer;
25992601 i.value_len = value_size;
26002602 error = ext4_xattr_block_find(inode, &i, bs);
26012603 if (error)
26022604 goto out;
26032605
2604
- /* Add entry which was removed from the inode into the block */
2606
+ /* Move ea entry from the inode into the block */
26052607 error = ext4_xattr_block_set(handle, inode, &i, bs);
26062608 if (error)
26072609 goto out;
2608
- error = 0;
2610
+
2611
+ /* Remove the chosen entry from the inode */
2612
+ i.value = NULL;
2613
+ i.value_len = 0;
2614
+ error = ext4_xattr_ibody_set(handle, inode, &i, is);
2615
+
26092616 out:
26102617 kfree(b_entry_name);
2611
- kfree(buffer);
2618
+ if (needs_kvfree && buffer)
2619
+ kvfree(buffer);
26122620 if (is)
26132621 brelse(is->iloc.bh);
26142622 if (bs)
....@@ -2783,6 +2791,9 @@
27832791 (void *)header, total_ino);
27842792 EXT4_I(inode)->i_extra_isize = new_extra_isize;
27852793
2794
+ if (ext4_has_inline_data(inode))
2795
+ error = ext4_find_inline_data_nolock(inode);
2796
+
27862797 cleanup:
27872798 if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) {
27882799 ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.",