From ea08eeccae9297f7aabd2ef7f0c2517ac4549acc Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 20 Feb 2024 01:18:26 +0000
Subject: [PATCH] write in 30M
---
kernel/fs/inode.c | 472 ++++++++++++++++++++++++++++++++++++++++------------------
1 files changed, 326 insertions(+), 146 deletions(-)
diff --git a/kernel/fs/inode.c b/kernel/fs/inode.c
index 9e46350..4102cfe 100644
--- a/kernel/fs/inode.c
+++ b/kernel/fs/inode.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* (C) 1997 Linus Torvalds
* (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
@@ -10,7 +11,7 @@
#include <linux/swap.h>
#include <linux/security.h>
#include <linux/cdev.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/fscrypt.h>
#include <linux/fsnotify.h>
#include <linux/mount.h>
@@ -107,7 +108,7 @@
*/
#ifdef CONFIG_SYSCTL
int proc_nr_inodes(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
inodes_stat.nr_inodes = get_nr_inodes();
inodes_stat.nr_unused = get_nr_inodes_unused();
@@ -167,8 +168,6 @@
inode->i_wb_frn_history = 0;
#endif
- if (security_inode_alloc(inode))
- goto out;
spin_lock_init(&inode->i_lock);
lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
@@ -180,8 +179,13 @@
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
+ if (sb->s_type->fs_flags & FS_THP_SUPPORT)
+ __set_bit(AS_THP_SUPPORT, &mapping->flags);
mapping->wb_err = 0;
atomic_set(&mapping->i_mmap_writable, 0);
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+ atomic_set(&mapping->nr_thps, 0);
+#endif
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->private_data = NULL;
mapping->writeback_index = 0;
@@ -196,20 +200,37 @@
inode->i_fsnotify_mask = 0;
#endif
inode->i_flctx = NULL;
+
+ if (unlikely(security_inode_alloc(inode)))
+ return -ENOMEM;
this_cpu_inc(nr_inodes);
return 0;
-out:
- return -ENOMEM;
}
EXPORT_SYMBOL(inode_init_always);
+void free_inode_nonrcu(struct inode *inode)
+{
+ kmem_cache_free(inode_cachep, inode);
+}
+EXPORT_SYMBOL(free_inode_nonrcu);
+
+static void i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ if (inode->free_inode)
+ inode->free_inode(inode);
+ else
+ free_inode_nonrcu(inode);
+}
+
static struct inode *alloc_inode(struct super_block *sb)
{
+ const struct super_operations *ops = sb->s_op;
struct inode *inode;
- if (sb->s_op->alloc_inode)
- inode = sb->s_op->alloc_inode(sb);
+ if (ops->alloc_inode)
+ inode = ops->alloc_inode(sb);
else
inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
@@ -217,21 +238,18 @@
return NULL;
if (unlikely(inode_init_always(sb, inode))) {
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- kmem_cache_free(inode_cachep, inode);
+ if (ops->destroy_inode) {
+ ops->destroy_inode(inode);
+ if (!ops->free_inode)
+ return NULL;
+ }
+ inode->free_inode = ops->free_inode;
+ i_callback(&inode->i_rcu);
return NULL;
}
return inode;
}
-
-void free_inode_nonrcu(struct inode *inode)
-{
- kmem_cache_free(inode_cachep, inode);
-}
-EXPORT_SYMBOL(free_inode_nonrcu);
void __destroy_inode(struct inode *inode)
{
@@ -255,20 +273,19 @@
}
EXPORT_SYMBOL(__destroy_inode);
-static void i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- kmem_cache_free(inode_cachep, inode);
-}
-
static void destroy_inode(struct inode *inode)
{
+ const struct super_operations *ops = inode->i_sb->s_op;
+
BUG_ON(!list_empty(&inode->i_lru));
__destroy_inode(inode);
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- call_rcu(&inode->i_rcu, i_callback);
+ if (ops->destroy_inode) {
+ ops->destroy_inode(inode);
+ if (!ops->free_inode)
+ return;
+ }
+ inode->free_inode = ops->free_inode;
+ call_rcu(&inode->i_rcu, i_callback);
}
/**
@@ -289,7 +306,7 @@
if (!inode->i_nlink)
atomic_long_inc(&inode->i_sb->s_remove_count);
}
-EXPORT_SYMBOL(drop_nlink);
+EXPORT_SYMBOL_NS(drop_nlink, ANDROID_GKI_VFS_EXPORT_ONLY);
/**
* clear_nlink - directly zero an inode's link count
@@ -328,7 +345,7 @@
inode->__i_nlink = nlink;
}
}
-EXPORT_SYMBOL(set_nlink);
+EXPORT_SYMBOL_NS(set_nlink, ANDROID_GKI_VFS_EXPORT_ONLY);
/**
* inc_nlink - directly increment an inode's link count
@@ -351,7 +368,7 @@
static void __address_space_init_once(struct address_space *mapping)
{
- INIT_RADIX_TREE(&mapping->i_pages, GFP_ATOMIC | __GFP_ACCOUNT);
+ xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
init_rwsem(&mapping->i_mmap_rwsem);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
@@ -381,7 +398,7 @@
__address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
}
-EXPORT_SYMBOL(inode_init_once);
+EXPORT_SYMBOL_NS(inode_init_once, ANDROID_GKI_VFS_EXPORT_ONLY);
static void init_once(void *foo)
{
@@ -405,7 +422,7 @@
{
WARN_ON(atomic_inc_return(&inode->i_count) < 2);
}
-EXPORT_SYMBOL(ihold);
+EXPORT_SYMBOL_NS(ihold, ANDROID_GKI_VFS_EXPORT_ONLY);
static void inode_lru_list_add(struct inode *inode)
{
@@ -481,11 +498,11 @@
spin_lock(&inode_hash_lock);
spin_lock(&inode->i_lock);
- hlist_add_head(&inode->i_hash, b);
+ hlist_add_head_rcu(&inode->i_hash, b);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock);
}
-EXPORT_SYMBOL(__insert_inode_hash);
+EXPORT_SYMBOL_NS(__insert_inode_hash, ANDROID_GKI_VFS_EXPORT_ONLY);
/**
* __remove_inode_hash - remove an inode from the hash
@@ -497,11 +514,11 @@
{
spin_lock(&inode_hash_lock);
spin_lock(&inode->i_lock);
- hlist_del_init(&inode->i_hash);
+ hlist_del_init_rcu(&inode->i_hash);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock);
}
-EXPORT_SYMBOL(__remove_inode_hash);
+EXPORT_SYMBOL_NS(__remove_inode_hash, ANDROID_GKI_VFS_EXPORT_ONLY);
void clear_inode(struct inode *inode)
{
@@ -521,7 +538,7 @@
/* don't need i_lock here, no concurrent mods to i_state */
inode->i_state = I_FREEING | I_CLEAR;
}
-EXPORT_SYMBOL(clear_inode);
+EXPORT_SYMBOL_NS(clear_inode, ANDROID_GKI_VFS_EXPORT_ONLY);
/*
* Free the inode passed in, removing it from the lists it is still connected
@@ -983,7 +1000,7 @@
wake_up_bit(&inode->i_state, __I_NEW);
spin_unlock(&inode->i_lock);
}
-EXPORT_SYMBOL(unlock_new_inode);
+EXPORT_SYMBOL_NS(unlock_new_inode, ANDROID_GKI_VFS_EXPORT_ONLY);
void discard_new_inode(struct inode *inode)
{
@@ -997,6 +1014,48 @@
iput(inode);
}
EXPORT_SYMBOL(discard_new_inode);
+
+/**
+ * lock_two_inodes - lock two inodes (may be regular files but also dirs)
+ *
+ * Lock any non-NULL argument. The caller must make sure that if he is passing
+ * in two directories, one is not ancestor of the other. Zero, one or two
+ * objects may be locked by this function.
+ *
+ * @inode1: first inode to lock
+ * @inode2: second inode to lock
+ * @subclass1: inode lock subclass for the first lock obtained
+ * @subclass2: inode lock subclass for the second lock obtained
+ */
+void lock_two_inodes(struct inode *inode1, struct inode *inode2,
+ unsigned subclass1, unsigned subclass2)
+{
+ if (!inode1 || !inode2) {
+ /*
+ * Make sure @subclass1 will be used for the acquired lock.
+ * This is not strictly necessary (no current caller cares) but
+ * let's keep things consistent.
+ */
+ if (!inode1)
+ swap(inode1, inode2);
+ goto lock;
+ }
+
+ /*
+ * If one object is directory and the other is not, we must make sure
+ * to lock directory first as the other object may be its child.
+ */
+ if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) {
+ if (inode1 > inode2)
+ swap(inode1, inode2);
+ } else if (!S_ISDIR(inode1->i_mode))
+ swap(inode1, inode2);
+lock:
+ if (inode1)
+ inode_lock_nested(inode1, subclass1);
+ if (inode2 && inode2 != inode1)
+ inode_lock_nested(inode2, subclass2);
+}
/**
* lock_two_nondirectories - take two i_mutexes on non-directory objects
@@ -1091,7 +1150,7 @@
*/
spin_lock(&inode->i_lock);
inode->i_state |= I_NEW;
- hlist_add_head(&inode->i_hash, head);
+ hlist_add_head_rcu(&inode->i_hash, head);
spin_unlock(&inode->i_lock);
if (!creating)
inode_sb_list_add(inode);
@@ -1140,7 +1199,7 @@
}
return inode;
}
-EXPORT_SYMBOL(iget5_locked);
+EXPORT_SYMBOL_NS(iget5_locked, ANDROID_GKI_VFS_EXPORT_ONLY);
/**
* iget_locked - obtain an inode from a mounted file system
@@ -1185,7 +1244,7 @@
inode->i_ino = ino;
spin_lock(&inode->i_lock);
inode->i_state = I_NEW;
- hlist_add_head(&inode->i_hash, head);
+ hlist_add_head_rcu(&inode->i_hash, head);
spin_unlock(&inode->i_lock);
inode_sb_list_add(inode);
spin_unlock(&inode_hash_lock);
@@ -1228,15 +1287,10 @@
struct hlist_head *b = inode_hashtable + hash(sb, ino);
struct inode *inode;
- spin_lock(&inode_hash_lock);
- hlist_for_each_entry(inode, b, i_hash) {
- if (inode->i_ino == ino && inode->i_sb == sb) {
- spin_unlock(&inode_hash_lock);
+ hlist_for_each_entry_rcu(inode, b, i_hash) {
+ if (inode->i_ino == ino && inode->i_sb == sb)
return 0;
- }
}
- spin_unlock(&inode_hash_lock);
-
return 1;
}
@@ -1265,6 +1319,7 @@
static unsigned int counter;
ino_t res;
+ rcu_read_lock();
spin_lock(&iunique_lock);
do {
if (counter <= max_reserved)
@@ -1272,10 +1327,11 @@
res = counter++;
} while (!test_inode_iunique(sb, res));
spin_unlock(&iunique_lock);
+ rcu_read_unlock();
return res;
}
-EXPORT_SYMBOL(iunique);
+EXPORT_SYMBOL_NS(iunique, ANDROID_GKI_VFS_EXPORT_ONLY);
struct inode *igrab(struct inode *inode)
{
@@ -1358,7 +1414,7 @@
}
return inode;
}
-EXPORT_SYMBOL(ilookup5);
+EXPORT_SYMBOL_NS(ilookup5, ANDROID_GKI_VFS_EXPORT_ONLY);
/**
* ilookup - search for an inode in the inode cache
@@ -1440,6 +1496,84 @@
}
EXPORT_SYMBOL(find_inode_nowait);
+/**
+ * find_inode_rcu - find an inode in the inode cache
+ * @sb: Super block of file system to search
+ * @hashval: Key to hash
+ * @test: Function to test match on an inode
+ * @data: Data for test function
+ *
+ * Search for the inode specified by @hashval and @data in the inode cache,
+ * where the helper function @test will return 0 if the inode does not match
+ * and 1 if it does. The @test function must be responsible for taking the
+ * i_lock spin_lock and checking i_state for an inode being freed or being
+ * initialized.
+ *
+ * If successful, this will return the inode for which the @test function
+ * returned 1 and NULL otherwise.
+ *
+ * The @test function is not permitted to take a ref on any inode presented.
+ * It is also not permitted to sleep.
+ *
+ * The caller must hold the RCU read lock.
+ */
+struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
+ int (*test)(struct inode *, void *), void *data)
+{
+ struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+ struct inode *inode;
+
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
+ "suspicious find_inode_rcu() usage");
+
+ hlist_for_each_entry_rcu(inode, head, i_hash) {
+ if (inode->i_sb == sb &&
+ !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
+ test(inode, data))
+ return inode;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(find_inode_rcu);
+
+/**
+ * find_inode_by_rcu - Find an inode in the inode cache
+ * @sb: Super block of file system to search
+ * @ino: The inode number to match
+ *
+ * Search for the inode specified by @hashval and @data in the inode cache,
+ * where the helper function @test will return 0 if the inode does not match
+ * and 1 if it does. The @test function must be responsible for taking the
+ * i_lock spin_lock and checking i_state for an inode being freed or being
+ * initialized.
+ *
+ * If successful, this will return the inode for which the @test function
+ * returned 1 and NULL otherwise.
+ *
+ * The @test function is not permitted to take a ref on any inode presented.
+ * It is also not permitted to sleep.
+ *
+ * The caller must hold the RCU read lock.
+ */
+struct inode *find_inode_by_ino_rcu(struct super_block *sb,
+ unsigned long ino)
+{
+ struct hlist_head *head = inode_hashtable + hash(sb, ino);
+ struct inode *inode;
+
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
+ "suspicious find_inode_by_ino_rcu() usage");
+
+ hlist_for_each_entry_rcu(inode, head, i_hash) {
+ if (inode->i_ino == ino &&
+ inode->i_sb == sb &&
+ !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
+ return inode;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(find_inode_by_ino_rcu);
+
int insert_inode_locked(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
@@ -1464,7 +1598,7 @@
if (likely(!old)) {
spin_lock(&inode->i_lock);
inode->i_state |= I_NEW | I_CREATING;
- hlist_add_head(&inode->i_hash, head);
+ hlist_add_head_rcu(&inode->i_hash, head);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock);
return 0;
@@ -1524,6 +1658,7 @@
{
struct super_block *sb = inode->i_sb;
const struct super_operations *op = inode->i_sb->s_op;
+ unsigned long state;
int drop;
WARN_ON(inode->i_state & I_NEW);
@@ -1533,22 +1668,28 @@
else
drop = generic_drop_inode(inode);
- if (!drop && (sb->s_flags & SB_ACTIVE)) {
+ if (!drop &&
+ !(inode->i_state & I_DONTCACHE) &&
+ (sb->s_flags & SB_ACTIVE)) {
inode_add_lru(inode);
spin_unlock(&inode->i_lock);
return;
}
+ state = inode->i_state;
if (!drop) {
- inode->i_state |= I_WILL_FREE;
+ WRITE_ONCE(inode->i_state, state | I_WILL_FREE);
spin_unlock(&inode->i_lock);
+
write_inode_now(inode, 1);
+
spin_lock(&inode->i_lock);
- WARN_ON(inode->i_state & I_NEW);
- inode->i_state &= ~I_WILL_FREE;
+ state = inode->i_state;
+ WARN_ON(state & I_NEW);
+ state &= ~I_WILL_FREE;
}
- inode->i_state |= I_FREEING;
+ WRITE_ONCE(inode->i_state, state | I_FREEING);
if (!list_empty(&inode->i_lru))
inode_lru_list_del(inode);
spin_unlock(&inode->i_lock);
@@ -1584,25 +1725,31 @@
}
EXPORT_SYMBOL(iput);
+#ifdef CONFIG_BLOCK
/**
* bmap - find a block number in a file
- * @inode: inode of file
- * @block: block to find
+ * @inode: inode owning the block number being requested
+ * @block: pointer containing the block to find
*
- * Returns the block number on the device holding the inode that
- * is the disk block number for the block of the file requested.
- * That is, asked for block 4 of inode 1 the function will return the
- * disk block relative to the disk start that holds that block of the
- * file.
+ * Replaces the value in ``*block`` with the block number on the device holding
+ * corresponding to the requested block number in the file.
+ * That is, asked for block 4 of inode 1 the function will replace the
+ * 4 in ``*block``, with disk block relative to the disk start that holds that
+ * block of the file.
+ *
+ * Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
+ * hole, returns 0 and ``*block`` is also set to 0.
*/
-sector_t bmap(struct inode *inode, sector_t block)
+int bmap(struct inode *inode, sector_t *block)
{
- sector_t res = 0;
- if (inode->i_mapping->a_ops->bmap)
- res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
- return res;
+ if (!inode->i_mapping->a_ops->bmap)
+ return -EINVAL;
+
+ *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
+ return 0;
}
EXPORT_SYMBOL(bmap);
+#endif
/*
* With relative atime, only update atime if the previous atime is
@@ -1610,7 +1757,7 @@
* passed since the last atime update.
*/
static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
- struct timespec now)
+ struct timespec64 now)
{
if (!(mnt->mnt_flags & MNT_RELATIME))
@@ -1666,15 +1813,13 @@
* This does the actual work of updating an inodes time or version. Must have
* had called mnt_want_write() before calling this.
*/
-static int update_time(struct inode *inode, struct timespec64 *time, int flags)
+int inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
{
- int (*update_time)(struct inode *, struct timespec64 *, int);
-
- update_time = inode->i_op->update_time ? inode->i_op->update_time :
- generic_update_time;
-
- return update_time(inode, time, flags);
+ if (inode->i_op->update_time)
+ return inode->i_op->update_time(inode, time, flags);
+ return generic_update_time(inode, time, flags);
}
+EXPORT_SYMBOL(inode_update_time);
/**
* touch_atime - update the access time
@@ -1711,7 +1856,7 @@
now = current_time(inode);
- if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now)))
+ if (!relatime_need_update(mnt, inode, now))
return false;
if (timespec64_equal(&inode->i_atime, &now))
@@ -1744,41 +1889,12 @@
* of the fs read only, e.g. subvolumes in Btrfs.
*/
now = current_time(inode);
- update_time(inode, &now, S_ATIME);
+ inode_update_time(inode, &now, S_ATIME);
__mnt_drop_write(mnt);
skip_update:
sb_end_write(inode->i_sb);
}
-EXPORT_SYMBOL(touch_atime);
-
-/*
- * The logic we want is
- *
- * if suid or (sgid and xgrp)
- * remove privs
- */
-int should_remove_suid(struct dentry *dentry)
-{
- umode_t mode = d_inode(dentry)->i_mode;
- int kill = 0;
-
- /* suid always must be killed */
- if (unlikely(mode & S_ISUID))
- kill = ATTR_KILL_SUID;
-
- /*
- * sgid without any exec bits is just a mandatory locking mark; leave
- * it alone. If some exec bits are set, it's a real sgid; kill it.
- */
- if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
- kill |= ATTR_KILL_SGID;
-
- if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
- return kill;
-
- return 0;
-}
-EXPORT_SYMBOL(should_remove_suid);
+EXPORT_SYMBOL_NS(touch_atime, ANDROID_GKI_VFS_EXPORT_ONLY);
/*
* Return mask of changes for notify_change() that need to be done as a
@@ -1794,7 +1910,7 @@
if (IS_NOSEC(inode))
return 0;
- mask = should_remove_suid(dentry);
+ mask = setattr_should_drop_suidgid(inode);
ret = security_inode_need_killpriv(dentry);
if (ret < 0)
return ret;
@@ -1803,7 +1919,7 @@
return mask;
}
-static int __remove_privs(struct vfsmount *mnt, struct dentry *dentry, int kill)
+static int __remove_privs(struct dentry *dentry, int kill)
{
struct iattr newattrs;
@@ -1812,7 +1928,7 @@
* Note we call this on write, so notify_change will not
* encounter any conflicting delegations:
*/
- return notify_change2(mnt, dentry, &newattrs, NULL);
+ return notify_change(dentry, &newattrs, NULL);
}
/*
@@ -1839,13 +1955,13 @@
if (kill < 0)
return kill;
if (kill)
- error = __remove_privs(file->f_path.mnt, dentry, kill);
+ error = __remove_privs(dentry, kill);
if (!error)
inode_has_no_xattr(inode);
return error;
}
-EXPORT_SYMBOL(file_remove_privs);
+EXPORT_SYMBOL_NS(file_remove_privs, ANDROID_GKI_VFS_EXPORT_ONLY);
/**
* file_update_time - update mtime and ctime time
@@ -1888,12 +2004,32 @@
if (__mnt_want_write_file(file))
return 0;
- ret = update_time(inode, &now, sync_it);
+ ret = inode_update_time(inode, &now, sync_it);
__mnt_drop_write_file(file);
return ret;
}
EXPORT_SYMBOL(file_update_time);
+
+/* Caller must hold the file's inode lock */
+int file_modified(struct file *file)
+{
+ int err;
+
+ /*
+ * Clear the security bits if the process is not being run by root.
+ * This keeps people from modifying setuid and setgid binaries.
+ */
+ err = file_remove_privs(file);
+ if (err)
+ return err;
+
+ if (unlikely(file->f_mode & FMODE_NOCMTIME))
+ return 0;
+
+ return file_update_time(file);
+}
+EXPORT_SYMBOL(file_modified);
int inode_needs_sync(struct inode *inode)
{
@@ -2006,7 +2142,7 @@
" inode %s:%lu\n", mode, inode->i_sb->s_id,
inode->i_ino);
}
-EXPORT_SYMBOL(init_special_inode);
+EXPORT_SYMBOL_NS(init_special_inode, ANDROID_GKI_VFS_EXPORT_ONLY);
/**
* inode_init_owner - Init uid,gid,mode for new inode according to posix standards
@@ -2024,15 +2160,11 @@
/* Directories are special, and always inherit S_ISGID */
if (S_ISDIR(mode))
mode |= S_ISGID;
- else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
- !in_group_p(inode->i_gid) &&
- !capable_wrt_inode_uidgid(dir, CAP_FSETID))
- mode &= ~S_ISGID;
} else
inode->i_gid = current_fsgid();
inode->i_mode = mode;
}
-EXPORT_SYMBOL(inode_init_owner);
+EXPORT_SYMBOL_NS(inode_init_owner, ANDROID_GKI_VFS_EXPORT_ONLY);
/**
* inode_owner_or_capable - check current task permissions to inode
@@ -2086,7 +2218,7 @@
if (atomic_read(&inode->i_dio_count))
__inode_dio_wait(inode);
}
-EXPORT_SYMBOL(inode_dio_wait);
+EXPORT_SYMBOL_NS(inode_dio_wait, ANDROID_GKI_VFS_EXPORT_ONLY);
/*
* inode_set_flags - atomically set some inode flags
@@ -2107,16 +2239,10 @@
void inode_set_flags(struct inode *inode, unsigned int flags,
unsigned int mask)
{
- unsigned int old_flags, new_flags;
-
WARN_ON_ONCE(flags & ~mask);
- do {
- old_flags = READ_ONCE(inode->i_flags);
- new_flags = (old_flags & ~mask) | flags;
- } while (unlikely(cmpxchg(&inode->i_flags, old_flags,
- new_flags) != old_flags));
+ set_mask_bits(&inode->i_flags, mask, flags);
}
-EXPORT_SYMBOL(inode_set_flags);
+EXPORT_SYMBOL_NS(inode_set_flags, ANDROID_GKI_VFS_EXPORT_ONLY);
void inode_nohighmem(struct inode *inode)
{
@@ -2125,28 +2251,35 @@
EXPORT_SYMBOL(inode_nohighmem);
/**
- * timespec64_trunc - Truncate timespec64 to a granularity
- * @t: Timespec64
- * @gran: Granularity in ns.
+ * timestamp_truncate - Truncate timespec to a granularity
+ * @t: Timespec
+ * @inode: inode being updated
*
- * Truncate a timespec64 to a granularity. Always rounds down. gran must
+ * Truncate a timespec to the granularity supported by the fs
+ * containing the inode. Always rounds down. gran must
* not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
*/
-struct timespec64 timespec64_trunc(struct timespec64 t, unsigned gran)
+struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
{
- /* Avoid division in the common cases 1 ns and 1 s. */
- if (gran == 1) {
- /* nothing */
- } else if (gran == NSEC_PER_SEC) {
+ struct super_block *sb = inode->i_sb;
+ unsigned int gran = sb->s_time_gran;
+
+ t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
+ if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
t.tv_nsec = 0;
- } else if (gran > 1 && gran < NSEC_PER_SEC) {
+
+ /* Avoid division in the common cases 1 ns and 1 s. */
+ if (gran == 1)
+ ; /* nothing */
+ else if (gran == NSEC_PER_SEC)
+ t.tv_nsec = 0;
+ else if (gran > 1 && gran < NSEC_PER_SEC)
t.tv_nsec -= t.tv_nsec % gran;
- } else {
- WARN(1, "illegal file time granularity: %u", gran);
- }
+ else
+ WARN(1, "invalid file time granularity: %u", gran);
return t;
}
-EXPORT_SYMBOL(timespec64_trunc);
+EXPORT_SYMBOL_NS(timestamp_truncate, ANDROID_GKI_VFS_EXPORT_ONLY);
/**
* current_time - Return FS time
@@ -2160,14 +2293,16 @@
*/
struct timespec64 current_time(struct inode *inode)
{
- struct timespec64 now = current_kernel_time64();
+ struct timespec64 now;
+
+ ktime_get_coarse_real_ts64(&now);
if (unlikely(!inode->i_sb)) {
WARN(1, "current_time() called with uninitialized super_block in the inode");
return now;
}
- return timespec64_trunc(now, inode->i_sb->s_time_gran);
+ return timestamp_truncate(now, inode);
}
EXPORT_SYMBOL(current_time);
@@ -2256,3 +2391,48 @@
return 0;
}
EXPORT_SYMBOL(vfs_ioc_fssetxattr_check);
+
+/**
+ * in_group_or_capable - check whether caller is CAP_FSETID privileged
+ * @inode: inode to check
+ * @gid: the new/current gid of @inode
+ *
+ * Check wether @gid is in the caller's group list or if the caller is
+ * privileged with CAP_FSETID over @inode. This can be used to determine
+ * whether the setgid bit can be kept or must be dropped.
+ *
+ * Return: true if the caller is sufficiently privileged, false if not.
+ */
+bool in_group_or_capable(const struct inode *inode, kgid_t gid)
+{
+ if (in_group_p(gid))
+ return true;
+ if (capable_wrt_inode_uidgid(inode, CAP_FSETID))
+ return true;
+ return false;
+}
+
+/**
+ * mode_strip_sgid - handle the sgid bit for non-directories
+ * @dir: parent directory inode
+ * @mode: mode of the file to be created in @dir
+ *
+ * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
+ * raised and @dir has the S_ISGID bit raised ensure that the caller is
+ * either in the group of the parent directory or they have CAP_FSETID
+ * in their user namespace and are privileged over the parent directory.
+ * In all other cases, strip the S_ISGID bit from @mode.
+ *
+ * Return: the new mode to use for the file
+ */
+umode_t mode_strip_sgid(const struct inode *dir, umode_t mode)
+{
+ if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
+ return mode;
+ if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
+ return mode;
+ if (in_group_or_capable(dir, dir->i_gid))
+ return mode;
+ return mode & ~S_ISGID;
+}
+EXPORT_SYMBOL(mode_strip_sgid);
--
Gitblit v1.6.2