hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/inode.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * (C) 1997 Linus Torvalds
34 * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
....@@ -10,7 +11,7 @@
1011 #include <linux/swap.h>
1112 #include <linux/security.h>
1213 #include <linux/cdev.h>
13
-#include <linux/bootmem.h>
14
+#include <linux/memblock.h>
1415 #include <linux/fscrypt.h>
1516 #include <linux/fsnotify.h>
1617 #include <linux/mount.h>
....@@ -107,7 +108,7 @@
107108 */
108109 #ifdef CONFIG_SYSCTL
109110 int proc_nr_inodes(struct ctl_table *table, int write,
110
- void __user *buffer, size_t *lenp, loff_t *ppos)
111
+ void *buffer, size_t *lenp, loff_t *ppos)
111112 {
112113 inodes_stat.nr_inodes = get_nr_inodes();
113114 inodes_stat.nr_unused = get_nr_inodes_unused();
....@@ -167,8 +168,6 @@
167168 inode->i_wb_frn_history = 0;
168169 #endif
169170
170
- if (security_inode_alloc(inode))
171
- goto out;
172171 spin_lock_init(&inode->i_lock);
173172 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
174173
....@@ -180,8 +179,13 @@
180179 mapping->a_ops = &empty_aops;
181180 mapping->host = inode;
182181 mapping->flags = 0;
182
+ if (sb->s_type->fs_flags & FS_THP_SUPPORT)
183
+ __set_bit(AS_THP_SUPPORT, &mapping->flags);
183184 mapping->wb_err = 0;
184185 atomic_set(&mapping->i_mmap_writable, 0);
186
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
187
+ atomic_set(&mapping->nr_thps, 0);
188
+#endif
185189 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
186190 mapping->private_data = NULL;
187191 mapping->writeback_index = 0;
....@@ -196,20 +200,37 @@
196200 inode->i_fsnotify_mask = 0;
197201 #endif
198202 inode->i_flctx = NULL;
203
+
204
+ if (unlikely(security_inode_alloc(inode)))
205
+ return -ENOMEM;
199206 this_cpu_inc(nr_inodes);
200207
201208 return 0;
202
-out:
203
- return -ENOMEM;
204209 }
205210 EXPORT_SYMBOL(inode_init_always);
206211
212
+void free_inode_nonrcu(struct inode *inode)
213
+{
214
+ kmem_cache_free(inode_cachep, inode);
215
+}
216
+EXPORT_SYMBOL(free_inode_nonrcu);
217
+
218
+static void i_callback(struct rcu_head *head)
219
+{
220
+ struct inode *inode = container_of(head, struct inode, i_rcu);
221
+ if (inode->free_inode)
222
+ inode->free_inode(inode);
223
+ else
224
+ free_inode_nonrcu(inode);
225
+}
226
+
207227 static struct inode *alloc_inode(struct super_block *sb)
208228 {
229
+ const struct super_operations *ops = sb->s_op;
209230 struct inode *inode;
210231
211
- if (sb->s_op->alloc_inode)
212
- inode = sb->s_op->alloc_inode(sb);
232
+ if (ops->alloc_inode)
233
+ inode = ops->alloc_inode(sb);
213234 else
214235 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
215236
....@@ -217,21 +238,18 @@
217238 return NULL;
218239
219240 if (unlikely(inode_init_always(sb, inode))) {
220
- if (inode->i_sb->s_op->destroy_inode)
221
- inode->i_sb->s_op->destroy_inode(inode);
222
- else
223
- kmem_cache_free(inode_cachep, inode);
241
+ if (ops->destroy_inode) {
242
+ ops->destroy_inode(inode);
243
+ if (!ops->free_inode)
244
+ return NULL;
245
+ }
246
+ inode->free_inode = ops->free_inode;
247
+ i_callback(&inode->i_rcu);
224248 return NULL;
225249 }
226250
227251 return inode;
228252 }
229
-
230
-void free_inode_nonrcu(struct inode *inode)
231
-{
232
- kmem_cache_free(inode_cachep, inode);
233
-}
234
-EXPORT_SYMBOL(free_inode_nonrcu);
235253
236254 void __destroy_inode(struct inode *inode)
237255 {
....@@ -255,20 +273,19 @@
255273 }
256274 EXPORT_SYMBOL(__destroy_inode);
257275
258
-static void i_callback(struct rcu_head *head)
259
-{
260
- struct inode *inode = container_of(head, struct inode, i_rcu);
261
- kmem_cache_free(inode_cachep, inode);
262
-}
263
-
264276 static void destroy_inode(struct inode *inode)
265277 {
278
+ const struct super_operations *ops = inode->i_sb->s_op;
279
+
266280 BUG_ON(!list_empty(&inode->i_lru));
267281 __destroy_inode(inode);
268
- if (inode->i_sb->s_op->destroy_inode)
269
- inode->i_sb->s_op->destroy_inode(inode);
270
- else
271
- call_rcu(&inode->i_rcu, i_callback);
282
+ if (ops->destroy_inode) {
283
+ ops->destroy_inode(inode);
284
+ if (!ops->free_inode)
285
+ return;
286
+ }
287
+ inode->free_inode = ops->free_inode;
288
+ call_rcu(&inode->i_rcu, i_callback);
272289 }
273290
274291 /**
....@@ -289,7 +306,7 @@
289306 if (!inode->i_nlink)
290307 atomic_long_inc(&inode->i_sb->s_remove_count);
291308 }
292
-EXPORT_SYMBOL(drop_nlink);
309
+EXPORT_SYMBOL_NS(drop_nlink, ANDROID_GKI_VFS_EXPORT_ONLY);
293310
294311 /**
295312 * clear_nlink - directly zero an inode's link count
....@@ -328,7 +345,7 @@
328345 inode->__i_nlink = nlink;
329346 }
330347 }
331
-EXPORT_SYMBOL(set_nlink);
348
+EXPORT_SYMBOL_NS(set_nlink, ANDROID_GKI_VFS_EXPORT_ONLY);
332349
333350 /**
334351 * inc_nlink - directly increment an inode's link count
....@@ -351,7 +368,7 @@
351368
352369 static void __address_space_init_once(struct address_space *mapping)
353370 {
354
- INIT_RADIX_TREE(&mapping->i_pages, GFP_ATOMIC | __GFP_ACCOUNT);
371
+ xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
355372 init_rwsem(&mapping->i_mmap_rwsem);
356373 INIT_LIST_HEAD(&mapping->private_list);
357374 spin_lock_init(&mapping->private_lock);
....@@ -381,7 +398,7 @@
381398 __address_space_init_once(&inode->i_data);
382399 i_size_ordered_init(inode);
383400 }
384
-EXPORT_SYMBOL(inode_init_once);
401
+EXPORT_SYMBOL_NS(inode_init_once, ANDROID_GKI_VFS_EXPORT_ONLY);
385402
386403 static void init_once(void *foo)
387404 {
....@@ -405,7 +422,7 @@
405422 {
406423 WARN_ON(atomic_inc_return(&inode->i_count) < 2);
407424 }
408
-EXPORT_SYMBOL(ihold);
425
+EXPORT_SYMBOL_NS(ihold, ANDROID_GKI_VFS_EXPORT_ONLY);
409426
410427 static void inode_lru_list_add(struct inode *inode)
411428 {
....@@ -481,11 +498,11 @@
481498
482499 spin_lock(&inode_hash_lock);
483500 spin_lock(&inode->i_lock);
484
- hlist_add_head(&inode->i_hash, b);
501
+ hlist_add_head_rcu(&inode->i_hash, b);
485502 spin_unlock(&inode->i_lock);
486503 spin_unlock(&inode_hash_lock);
487504 }
488
-EXPORT_SYMBOL(__insert_inode_hash);
505
+EXPORT_SYMBOL_NS(__insert_inode_hash, ANDROID_GKI_VFS_EXPORT_ONLY);
489506
490507 /**
491508 * __remove_inode_hash - remove an inode from the hash
....@@ -497,11 +514,11 @@
497514 {
498515 spin_lock(&inode_hash_lock);
499516 spin_lock(&inode->i_lock);
500
- hlist_del_init(&inode->i_hash);
517
+ hlist_del_init_rcu(&inode->i_hash);
501518 spin_unlock(&inode->i_lock);
502519 spin_unlock(&inode_hash_lock);
503520 }
504
-EXPORT_SYMBOL(__remove_inode_hash);
521
+EXPORT_SYMBOL_NS(__remove_inode_hash, ANDROID_GKI_VFS_EXPORT_ONLY);
505522
506523 void clear_inode(struct inode *inode)
507524 {
....@@ -521,7 +538,7 @@
521538 /* don't need i_lock here, no concurrent mods to i_state */
522539 inode->i_state = I_FREEING | I_CLEAR;
523540 }
524
-EXPORT_SYMBOL(clear_inode);
541
+EXPORT_SYMBOL_NS(clear_inode, ANDROID_GKI_VFS_EXPORT_ONLY);
525542
526543 /*
527544 * Free the inode passed in, removing it from the lists it is still connected
....@@ -983,7 +1000,7 @@
9831000 wake_up_bit(&inode->i_state, __I_NEW);
9841001 spin_unlock(&inode->i_lock);
9851002 }
986
-EXPORT_SYMBOL(unlock_new_inode);
1003
+EXPORT_SYMBOL_NS(unlock_new_inode, ANDROID_GKI_VFS_EXPORT_ONLY);
9871004
9881005 void discard_new_inode(struct inode *inode)
9891006 {
....@@ -997,6 +1014,48 @@
9971014 iput(inode);
9981015 }
9991016 EXPORT_SYMBOL(discard_new_inode);
1017
+
1018
+/**
1019
+ * lock_two_inodes - lock two inodes (may be regular files but also dirs)
1020
+ *
1021
+ * Lock any non-NULL argument. The caller must make sure that if he is passing
1022
+ * in two directories, one is not ancestor of the other. Zero, one or two
1023
+ * objects may be locked by this function.
1024
+ *
1025
+ * @inode1: first inode to lock
1026
+ * @inode2: second inode to lock
1027
+ * @subclass1: inode lock subclass for the first lock obtained
1028
+ * @subclass2: inode lock subclass for the second lock obtained
1029
+ */
1030
+void lock_two_inodes(struct inode *inode1, struct inode *inode2,
1031
+ unsigned subclass1, unsigned subclass2)
1032
+{
1033
+ if (!inode1 || !inode2) {
1034
+ /*
1035
+ * Make sure @subclass1 will be used for the acquired lock.
1036
+ * This is not strictly necessary (no current caller cares) but
1037
+ * let's keep things consistent.
1038
+ */
1039
+ if (!inode1)
1040
+ swap(inode1, inode2);
1041
+ goto lock;
1042
+ }
1043
+
1044
+ /*
1045
+ * If one object is directory and the other is not, we must make sure
1046
+ * to lock directory first as the other object may be its child.
1047
+ */
1048
+ if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) {
1049
+ if (inode1 > inode2)
1050
+ swap(inode1, inode2);
1051
+ } else if (!S_ISDIR(inode1->i_mode))
1052
+ swap(inode1, inode2);
1053
+lock:
1054
+ if (inode1)
1055
+ inode_lock_nested(inode1, subclass1);
1056
+ if (inode2 && inode2 != inode1)
1057
+ inode_lock_nested(inode2, subclass2);
1058
+}
10001059
10011060 /**
10021061 * lock_two_nondirectories - take two i_mutexes on non-directory objects
....@@ -1091,7 +1150,7 @@
10911150 */
10921151 spin_lock(&inode->i_lock);
10931152 inode->i_state |= I_NEW;
1094
- hlist_add_head(&inode->i_hash, head);
1153
+ hlist_add_head_rcu(&inode->i_hash, head);
10951154 spin_unlock(&inode->i_lock);
10961155 if (!creating)
10971156 inode_sb_list_add(inode);
....@@ -1140,7 +1199,7 @@
11401199 }
11411200 return inode;
11421201 }
1143
-EXPORT_SYMBOL(iget5_locked);
1202
+EXPORT_SYMBOL_NS(iget5_locked, ANDROID_GKI_VFS_EXPORT_ONLY);
11441203
11451204 /**
11461205 * iget_locked - obtain an inode from a mounted file system
....@@ -1185,7 +1244,7 @@
11851244 inode->i_ino = ino;
11861245 spin_lock(&inode->i_lock);
11871246 inode->i_state = I_NEW;
1188
- hlist_add_head(&inode->i_hash, head);
1247
+ hlist_add_head_rcu(&inode->i_hash, head);
11891248 spin_unlock(&inode->i_lock);
11901249 inode_sb_list_add(inode);
11911250 spin_unlock(&inode_hash_lock);
....@@ -1228,15 +1287,10 @@
12281287 struct hlist_head *b = inode_hashtable + hash(sb, ino);
12291288 struct inode *inode;
12301289
1231
- spin_lock(&inode_hash_lock);
1232
- hlist_for_each_entry(inode, b, i_hash) {
1233
- if (inode->i_ino == ino && inode->i_sb == sb) {
1234
- spin_unlock(&inode_hash_lock);
1290
+ hlist_for_each_entry_rcu(inode, b, i_hash) {
1291
+ if (inode->i_ino == ino && inode->i_sb == sb)
12351292 return 0;
1236
- }
12371293 }
1238
- spin_unlock(&inode_hash_lock);
1239
-
12401294 return 1;
12411295 }
12421296
....@@ -1265,6 +1319,7 @@
12651319 static unsigned int counter;
12661320 ino_t res;
12671321
1322
+ rcu_read_lock();
12681323 spin_lock(&iunique_lock);
12691324 do {
12701325 if (counter <= max_reserved)
....@@ -1272,10 +1327,11 @@
12721327 res = counter++;
12731328 } while (!test_inode_iunique(sb, res));
12741329 spin_unlock(&iunique_lock);
1330
+ rcu_read_unlock();
12751331
12761332 return res;
12771333 }
1278
-EXPORT_SYMBOL(iunique);
1334
+EXPORT_SYMBOL_NS(iunique, ANDROID_GKI_VFS_EXPORT_ONLY);
12791335
12801336 struct inode *igrab(struct inode *inode)
12811337 {
....@@ -1358,7 +1414,7 @@
13581414 }
13591415 return inode;
13601416 }
1361
-EXPORT_SYMBOL(ilookup5);
1417
+EXPORT_SYMBOL_NS(ilookup5, ANDROID_GKI_VFS_EXPORT_ONLY);
13621418
13631419 /**
13641420 * ilookup - search for an inode in the inode cache
....@@ -1440,6 +1496,84 @@
14401496 }
14411497 EXPORT_SYMBOL(find_inode_nowait);
14421498
1499
+/**
1500
+ * find_inode_rcu - find an inode in the inode cache
1501
+ * @sb: Super block of file system to search
1502
+ * @hashval: Key to hash
1503
+ * @test: Function to test match on an inode
1504
+ * @data: Data for test function
1505
+ *
1506
+ * Search for the inode specified by @hashval and @data in the inode cache,
1507
+ * where the helper function @test will return 0 if the inode does not match
1508
+ * and 1 if it does. The @test function must be responsible for taking the
1509
+ * i_lock spin_lock and checking i_state for an inode being freed or being
1510
+ * initialized.
1511
+ *
1512
+ * If successful, this will return the inode for which the @test function
1513
+ * returned 1 and NULL otherwise.
1514
+ *
1515
+ * The @test function is not permitted to take a ref on any inode presented.
1516
+ * It is also not permitted to sleep.
1517
+ *
1518
+ * The caller must hold the RCU read lock.
1519
+ */
1520
+struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
1521
+ int (*test)(struct inode *, void *), void *data)
1522
+{
1523
+ struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1524
+ struct inode *inode;
1525
+
1526
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
1527
+ "suspicious find_inode_rcu() usage");
1528
+
1529
+ hlist_for_each_entry_rcu(inode, head, i_hash) {
1530
+ if (inode->i_sb == sb &&
1531
+ !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
1532
+ test(inode, data))
1533
+ return inode;
1534
+ }
1535
+ return NULL;
1536
+}
1537
+EXPORT_SYMBOL(find_inode_rcu);
1538
+
1539
+/**
1540
+ * find_inode_by_rcu - Find an inode in the inode cache
1541
+ * @sb: Super block of file system to search
1542
+ * @ino: The inode number to match
1543
+ *
1544
+ * Search for the inode specified by @hashval and @data in the inode cache,
1545
+ * where the helper function @test will return 0 if the inode does not match
1546
+ * and 1 if it does. The @test function must be responsible for taking the
1547
+ * i_lock spin_lock and checking i_state for an inode being freed or being
1548
+ * initialized.
1549
+ *
1550
+ * If successful, this will return the inode for which the @test function
1551
+ * returned 1 and NULL otherwise.
1552
+ *
1553
+ * The @test function is not permitted to take a ref on any inode presented.
1554
+ * It is also not permitted to sleep.
1555
+ *
1556
+ * The caller must hold the RCU read lock.
1557
+ */
1558
+struct inode *find_inode_by_ino_rcu(struct super_block *sb,
1559
+ unsigned long ino)
1560
+{
1561
+ struct hlist_head *head = inode_hashtable + hash(sb, ino);
1562
+ struct inode *inode;
1563
+
1564
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
1565
+ "suspicious find_inode_by_ino_rcu() usage");
1566
+
1567
+ hlist_for_each_entry_rcu(inode, head, i_hash) {
1568
+ if (inode->i_ino == ino &&
1569
+ inode->i_sb == sb &&
1570
+ !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
1571
+ return inode;
1572
+ }
1573
+ return NULL;
1574
+}
1575
+EXPORT_SYMBOL(find_inode_by_ino_rcu);
1576
+
14431577 int insert_inode_locked(struct inode *inode)
14441578 {
14451579 struct super_block *sb = inode->i_sb;
....@@ -1464,7 +1598,7 @@
14641598 if (likely(!old)) {
14651599 spin_lock(&inode->i_lock);
14661600 inode->i_state |= I_NEW | I_CREATING;
1467
- hlist_add_head(&inode->i_hash, head);
1601
+ hlist_add_head_rcu(&inode->i_hash, head);
14681602 spin_unlock(&inode->i_lock);
14691603 spin_unlock(&inode_hash_lock);
14701604 return 0;
....@@ -1524,6 +1658,7 @@
15241658 {
15251659 struct super_block *sb = inode->i_sb;
15261660 const struct super_operations *op = inode->i_sb->s_op;
1661
+ unsigned long state;
15271662 int drop;
15281663
15291664 WARN_ON(inode->i_state & I_NEW);
....@@ -1533,22 +1668,28 @@
15331668 else
15341669 drop = generic_drop_inode(inode);
15351670
1536
- if (!drop && (sb->s_flags & SB_ACTIVE)) {
1671
+ if (!drop &&
1672
+ !(inode->i_state & I_DONTCACHE) &&
1673
+ (sb->s_flags & SB_ACTIVE)) {
15371674 inode_add_lru(inode);
15381675 spin_unlock(&inode->i_lock);
15391676 return;
15401677 }
15411678
1679
+ state = inode->i_state;
15421680 if (!drop) {
1543
- inode->i_state |= I_WILL_FREE;
1681
+ WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
15441682 spin_unlock(&inode->i_lock);
1683
+
15451684 write_inode_now(inode, 1);
1685
+
15461686 spin_lock(&inode->i_lock);
1547
- WARN_ON(inode->i_state & I_NEW);
1548
- inode->i_state &= ~I_WILL_FREE;
1687
+ state = inode->i_state;
1688
+ WARN_ON(state & I_NEW);
1689
+ state &= ~I_WILL_FREE;
15491690 }
15501691
1551
- inode->i_state |= I_FREEING;
1692
+ WRITE_ONCE(inode->i_state, state | I_FREEING);
15521693 if (!list_empty(&inode->i_lru))
15531694 inode_lru_list_del(inode);
15541695 spin_unlock(&inode->i_lock);
....@@ -1584,25 +1725,31 @@
15841725 }
15851726 EXPORT_SYMBOL(iput);
15861727
1728
+#ifdef CONFIG_BLOCK
15871729 /**
15881730 * bmap - find a block number in a file
1589
- * @inode: inode of file
1590
- * @block: block to find
1731
+ * @inode: inode owning the block number being requested
1732
+ * @block: pointer containing the block to find
15911733 *
1592
- * Returns the block number on the device holding the inode that
1593
- * is the disk block number for the block of the file requested.
1594
- * That is, asked for block 4 of inode 1 the function will return the
1595
- * disk block relative to the disk start that holds that block of the
1596
- * file.
1734
+ * Replaces the value in ``*block`` with the block number on the device holding
1735
+ * corresponding to the requested block number in the file.
1736
+ * That is, asked for block 4 of inode 1 the function will replace the
1737
+ * 4 in ``*block``, with disk block relative to the disk start that holds that
1738
+ * block of the file.
1739
+ *
1740
+ * Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
1741
+ * hole, returns 0 and ``*block`` is also set to 0.
15971742 */
1598
-sector_t bmap(struct inode *inode, sector_t block)
1743
+int bmap(struct inode *inode, sector_t *block)
15991744 {
1600
- sector_t res = 0;
1601
- if (inode->i_mapping->a_ops->bmap)
1602
- res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
1603
- return res;
1745
+ if (!inode->i_mapping->a_ops->bmap)
1746
+ return -EINVAL;
1747
+
1748
+ *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
1749
+ return 0;
16041750 }
16051751 EXPORT_SYMBOL(bmap);
1752
+#endif
16061753
16071754 /*
16081755 * With relative atime, only update atime if the previous atime is
....@@ -1610,7 +1757,7 @@
16101757 * passed since the last atime update.
16111758 */
16121759 static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
1613
- struct timespec now)
1760
+ struct timespec64 now)
16141761 {
16151762
16161763 if (!(mnt->mnt_flags & MNT_RELATIME))
....@@ -1666,15 +1813,13 @@
16661813 * This does the actual work of updating an inodes time or version. Must have
16671814 * had called mnt_want_write() before calling this.
16681815 */
1669
-static int update_time(struct inode *inode, struct timespec64 *time, int flags)
1816
+int inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
16701817 {
1671
- int (*update_time)(struct inode *, struct timespec64 *, int);
1672
-
1673
- update_time = inode->i_op->update_time ? inode->i_op->update_time :
1674
- generic_update_time;
1675
-
1676
- return update_time(inode, time, flags);
1818
+ if (inode->i_op->update_time)
1819
+ return inode->i_op->update_time(inode, time, flags);
1820
+ return generic_update_time(inode, time, flags);
16771821 }
1822
+EXPORT_SYMBOL(inode_update_time);
16781823
16791824 /**
16801825 * touch_atime - update the access time
....@@ -1711,7 +1856,7 @@
17111856
17121857 now = current_time(inode);
17131858
1714
- if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now)))
1859
+ if (!relatime_need_update(mnt, inode, now))
17151860 return false;
17161861
17171862 if (timespec64_equal(&inode->i_atime, &now))
....@@ -1744,41 +1889,12 @@
17441889 * of the fs read only, e.g. subvolumes in Btrfs.
17451890 */
17461891 now = current_time(inode);
1747
- update_time(inode, &now, S_ATIME);
1892
+ inode_update_time(inode, &now, S_ATIME);
17481893 __mnt_drop_write(mnt);
17491894 skip_update:
17501895 sb_end_write(inode->i_sb);
17511896 }
1752
-EXPORT_SYMBOL(touch_atime);
1753
-
1754
-/*
1755
- * The logic we want is
1756
- *
1757
- * if suid or (sgid and xgrp)
1758
- * remove privs
1759
- */
1760
-int should_remove_suid(struct dentry *dentry)
1761
-{
1762
- umode_t mode = d_inode(dentry)->i_mode;
1763
- int kill = 0;
1764
-
1765
- /* suid always must be killed */
1766
- if (unlikely(mode & S_ISUID))
1767
- kill = ATTR_KILL_SUID;
1768
-
1769
- /*
1770
- * sgid without any exec bits is just a mandatory locking mark; leave
1771
- * it alone. If some exec bits are set, it's a real sgid; kill it.
1772
- */
1773
- if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
1774
- kill |= ATTR_KILL_SGID;
1775
-
1776
- if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
1777
- return kill;
1778
-
1779
- return 0;
1780
-}
1781
-EXPORT_SYMBOL(should_remove_suid);
1897
+EXPORT_SYMBOL_NS(touch_atime, ANDROID_GKI_VFS_EXPORT_ONLY);
17821898
17831899 /*
17841900 * Return mask of changes for notify_change() that need to be done as a
....@@ -1794,7 +1910,7 @@
17941910 if (IS_NOSEC(inode))
17951911 return 0;
17961912
1797
- mask = should_remove_suid(dentry);
1913
+ mask = setattr_should_drop_suidgid(inode);
17981914 ret = security_inode_need_killpriv(dentry);
17991915 if (ret < 0)
18001916 return ret;
....@@ -1803,7 +1919,7 @@
18031919 return mask;
18041920 }
18051921
1806
-static int __remove_privs(struct vfsmount *mnt, struct dentry *dentry, int kill)
1922
+static int __remove_privs(struct dentry *dentry, int kill)
18071923 {
18081924 struct iattr newattrs;
18091925
....@@ -1812,7 +1928,7 @@
18121928 * Note we call this on write, so notify_change will not
18131929 * encounter any conflicting delegations:
18141930 */
1815
- return notify_change2(mnt, dentry, &newattrs, NULL);
1931
+ return notify_change(dentry, &newattrs, NULL);
18161932 }
18171933
18181934 /*
....@@ -1839,13 +1955,13 @@
18391955 if (kill < 0)
18401956 return kill;
18411957 if (kill)
1842
- error = __remove_privs(file->f_path.mnt, dentry, kill);
1958
+ error = __remove_privs(dentry, kill);
18431959 if (!error)
18441960 inode_has_no_xattr(inode);
18451961
18461962 return error;
18471963 }
1848
-EXPORT_SYMBOL(file_remove_privs);
1964
+EXPORT_SYMBOL_NS(file_remove_privs, ANDROID_GKI_VFS_EXPORT_ONLY);
18491965
18501966 /**
18511967 * file_update_time - update mtime and ctime time
....@@ -1888,12 +2004,32 @@
18882004 if (__mnt_want_write_file(file))
18892005 return 0;
18902006
1891
- ret = update_time(inode, &now, sync_it);
2007
+ ret = inode_update_time(inode, &now, sync_it);
18922008 __mnt_drop_write_file(file);
18932009
18942010 return ret;
18952011 }
18962012 EXPORT_SYMBOL(file_update_time);
2013
+
2014
+/* Caller must hold the file's inode lock */
2015
+int file_modified(struct file *file)
2016
+{
2017
+ int err;
2018
+
2019
+ /*
2020
+ * Clear the security bits if the process is not being run by root.
2021
+ * This keeps people from modifying setuid and setgid binaries.
2022
+ */
2023
+ err = file_remove_privs(file);
2024
+ if (err)
2025
+ return err;
2026
+
2027
+ if (unlikely(file->f_mode & FMODE_NOCMTIME))
2028
+ return 0;
2029
+
2030
+ return file_update_time(file);
2031
+}
2032
+EXPORT_SYMBOL(file_modified);
18972033
18982034 int inode_needs_sync(struct inode *inode)
18992035 {
....@@ -2006,7 +2142,7 @@
20062142 " inode %s:%lu\n", mode, inode->i_sb->s_id,
20072143 inode->i_ino);
20082144 }
2009
-EXPORT_SYMBOL(init_special_inode);
2145
+EXPORT_SYMBOL_NS(init_special_inode, ANDROID_GKI_VFS_EXPORT_ONLY);
20102146
20112147 /**
20122148 * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
....@@ -2024,15 +2160,11 @@
20242160 /* Directories are special, and always inherit S_ISGID */
20252161 if (S_ISDIR(mode))
20262162 mode |= S_ISGID;
2027
- else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
2028
- !in_group_p(inode->i_gid) &&
2029
- !capable_wrt_inode_uidgid(dir, CAP_FSETID))
2030
- mode &= ~S_ISGID;
20312163 } else
20322164 inode->i_gid = current_fsgid();
20332165 inode->i_mode = mode;
20342166 }
2035
-EXPORT_SYMBOL(inode_init_owner);
2167
+EXPORT_SYMBOL_NS(inode_init_owner, ANDROID_GKI_VFS_EXPORT_ONLY);
20362168
20372169 /**
20382170 * inode_owner_or_capable - check current task permissions to inode
....@@ -2086,7 +2218,7 @@
20862218 if (atomic_read(&inode->i_dio_count))
20872219 __inode_dio_wait(inode);
20882220 }
2089
-EXPORT_SYMBOL(inode_dio_wait);
2221
+EXPORT_SYMBOL_NS(inode_dio_wait, ANDROID_GKI_VFS_EXPORT_ONLY);
20902222
20912223 /*
20922224 * inode_set_flags - atomically set some inode flags
....@@ -2107,16 +2239,10 @@
21072239 void inode_set_flags(struct inode *inode, unsigned int flags,
21082240 unsigned int mask)
21092241 {
2110
- unsigned int old_flags, new_flags;
2111
-
21122242 WARN_ON_ONCE(flags & ~mask);
2113
- do {
2114
- old_flags = READ_ONCE(inode->i_flags);
2115
- new_flags = (old_flags & ~mask) | flags;
2116
- } while (unlikely(cmpxchg(&inode->i_flags, old_flags,
2117
- new_flags) != old_flags));
2243
+ set_mask_bits(&inode->i_flags, mask, flags);
21182244 }
2119
-EXPORT_SYMBOL(inode_set_flags);
2245
+EXPORT_SYMBOL_NS(inode_set_flags, ANDROID_GKI_VFS_EXPORT_ONLY);
21202246
21212247 void inode_nohighmem(struct inode *inode)
21222248 {
....@@ -2125,28 +2251,35 @@
21252251 EXPORT_SYMBOL(inode_nohighmem);
21262252
21272253 /**
2128
- * timespec64_trunc - Truncate timespec64 to a granularity
2129
- * @t: Timespec64
2130
- * @gran: Granularity in ns.
2254
+ * timestamp_truncate - Truncate timespec to a granularity
2255
+ * @t: Timespec
2256
+ * @inode: inode being updated
21312257 *
2132
- * Truncate a timespec64 to a granularity. Always rounds down. gran must
2258
+ * Truncate a timespec to the granularity supported by the fs
2259
+ * containing the inode. Always rounds down. gran must
21332260 * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
21342261 */
2135
-struct timespec64 timespec64_trunc(struct timespec64 t, unsigned gran)
2262
+struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
21362263 {
2137
- /* Avoid division in the common cases 1 ns and 1 s. */
2138
- if (gran == 1) {
2139
- /* nothing */
2140
- } else if (gran == NSEC_PER_SEC) {
2264
+ struct super_block *sb = inode->i_sb;
2265
+ unsigned int gran = sb->s_time_gran;
2266
+
2267
+ t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
2268
+ if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
21412269 t.tv_nsec = 0;
2142
- } else if (gran > 1 && gran < NSEC_PER_SEC) {
2270
+
2271
+ /* Avoid division in the common cases 1 ns and 1 s. */
2272
+ if (gran == 1)
2273
+ ; /* nothing */
2274
+ else if (gran == NSEC_PER_SEC)
2275
+ t.tv_nsec = 0;
2276
+ else if (gran > 1 && gran < NSEC_PER_SEC)
21432277 t.tv_nsec -= t.tv_nsec % gran;
2144
- } else {
2145
- WARN(1, "illegal file time granularity: %u", gran);
2146
- }
2278
+ else
2279
+ WARN(1, "invalid file time granularity: %u", gran);
21472280 return t;
21482281 }
2149
-EXPORT_SYMBOL(timespec64_trunc);
2282
+EXPORT_SYMBOL_NS(timestamp_truncate, ANDROID_GKI_VFS_EXPORT_ONLY);
21502283
21512284 /**
21522285 * current_time - Return FS time
....@@ -2160,14 +2293,16 @@
21602293 */
21612294 struct timespec64 current_time(struct inode *inode)
21622295 {
2163
- struct timespec64 now = current_kernel_time64();
2296
+ struct timespec64 now;
2297
+
2298
+ ktime_get_coarse_real_ts64(&now);
21642299
21652300 if (unlikely(!inode->i_sb)) {
21662301 WARN(1, "current_time() called with uninitialized super_block in the inode");
21672302 return now;
21682303 }
21692304
2170
- return timespec64_trunc(now, inode->i_sb->s_time_gran);
2305
+ return timestamp_truncate(now, inode);
21712306 }
21722307 EXPORT_SYMBOL(current_time);
21732308
....@@ -2256,3 +2391,48 @@
22562391 return 0;
22572392 }
22582393 EXPORT_SYMBOL(vfs_ioc_fssetxattr_check);
2394
+
2395
+/**
2396
+ * in_group_or_capable - check whether caller is CAP_FSETID privileged
2397
+ * @inode: inode to check
2398
+ * @gid: the new/current gid of @inode
2399
+ *
2400
+ * Check wether @gid is in the caller's group list or if the caller is
2401
+ * privileged with CAP_FSETID over @inode. This can be used to determine
2402
+ * whether the setgid bit can be kept or must be dropped.
2403
+ *
2404
+ * Return: true if the caller is sufficiently privileged, false if not.
2405
+ */
2406
+bool in_group_or_capable(const struct inode *inode, kgid_t gid)
2407
+{
2408
+ if (in_group_p(gid))
2409
+ return true;
2410
+ if (capable_wrt_inode_uidgid(inode, CAP_FSETID))
2411
+ return true;
2412
+ return false;
2413
+}
2414
+
2415
+/**
2416
+ * mode_strip_sgid - handle the sgid bit for non-directories
2417
+ * @dir: parent directory inode
2418
+ * @mode: mode of the file to be created in @dir
2419
+ *
2420
+ * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
2421
+ * raised and @dir has the S_ISGID bit raised ensure that the caller is
2422
+ * either in the group of the parent directory or they have CAP_FSETID
2423
+ * in their user namespace and are privileged over the parent directory.
2424
+ * In all other cases, strip the S_ISGID bit from @mode.
2425
+ *
2426
+ * Return: the new mode to use for the file
2427
+ */
2428
+umode_t mode_strip_sgid(const struct inode *dir, umode_t mode)
2429
+{
2430
+ if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
2431
+ return mode;
2432
+ if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
2433
+ return mode;
2434
+ if (in_group_or_capable(dir, dir->i_gid))
2435
+ return mode;
2436
+ return mode & ~S_ISGID;
2437
+}
2438
+EXPORT_SYMBOL(mode_strip_sgid);