From 2f7c68cb55ecb7331f2381deb497c27155f32faf Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Wed, 03 Jan 2024 09:43:39 +0000 Subject: [PATCH] update kernel to 5.10.198 --- kernel/fs/kernfs/dir.c | 182 +++++++++++++++++++++++++-------------------- 1 files changed, 100 insertions(+), 82 deletions(-) diff --git a/kernel/fs/kernfs/dir.c b/kernel/fs/kernfs/dir.c index a4a538a..c91ee05 100644 --- a/kernel/fs/kernfs/dir.c +++ b/kernel/fs/kernfs/dir.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/dir.c - kernfs directory implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> - * - * This file is released under the GPLv2. */ #include <linux/sched.h> @@ -20,7 +19,15 @@ DEFINE_MUTEX(kernfs_mutex); static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ -static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ +/* + * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to + * call pr_cont() while holding rename_lock. Because sometimes pr_cont() + * will perform wakeups when releasing console_sem. Holding rename_lock + * will introduce deadlock if the scheduler reads the kernfs_name in the + * wakeup path. + */ +static DEFINE_SPINLOCK(kernfs_pr_cont_lock); +static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) @@ -138,6 +145,9 @@ if (kn_from == kn_to) return strlcpy(buf, "/", buflen); + if (!buf) + return -EINVAL; + common = kernfs_common_ancestor(kn_from, kn_to); if (WARN_ON(!common)) return -EINVAL; @@ -145,8 +155,7 @@ depth_to = kernfs_depth(common, kn_to); depth_from = kernfs_depth(common, kn_from); - if (buf) - buf[0] = '\0'; + buf[0] = '\0'; for (i = 0; i < depth_from; i++) len += strlcpy(buf + len, parent_str, @@ -229,12 +238,12 @@ { unsigned long flags; - spin_lock_irqsave(&kernfs_rename_lock, flags); + spin_lock_irqsave(&kernfs_pr_cont_lock, flags); - kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); + kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); pr_cont("%s", kernfs_pr_cont_buf); - spin_unlock_irqrestore(&kernfs_rename_lock, flags); + spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); } /** @@ -248,10 +257,10 @@ unsigned long flags; int sz; - spin_lock_irqsave(&kernfs_rename_lock, flags); + spin_lock_irqsave(&kernfs_pr_cont_lock, flags); - sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf, - sizeof(kernfs_pr_cont_buf)); + sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf, + sizeof(kernfs_pr_cont_buf)); if (sz < 0) { pr_cont("(error)"); goto out; @@ -265,7 +274,7 @@ pr_cont("%s", kernfs_pr_cont_buf); out: - spin_unlock_irqrestore(&kernfs_rename_lock, flags); + spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); } /** @@ -431,19 +440,18 @@ */ void kernfs_put_active(struct kernfs_node *kn) { - struct kernfs_root *root = kernfs_root(kn); int v; if (unlikely(!kn)) return; if (kernfs_lockdep(kn)) - rwsem_release(&kn->dep_map, 1, _RET_IP_); + rwsem_release(&kn->dep_map, _RET_IP_); v = atomic_dec_return(&kn->active); if (likely(v != KN_DEACTIVATED_BIAS)) return; - wake_up_all(&root->deactivate_waitq); + wake_up_all(&kernfs_root(kn)->deactivate_waitq); } /** @@ -476,7 +484,7 @@ if (kernfs_lockdep(kn)) { lock_acquired(&kn->dep_map, _RET_IP_); - rwsem_release(&kn->dep_map, 1, _RET_IP_); + rwsem_release(&kn->dep_map, _RET_IP_); } kernfs_drain_open_files(kn); @@ -508,10 +516,6 @@ struct kernfs_node *parent; struct kernfs_root *root; - /* - * kernfs_node is freed with ->count 0, kernfs_find_and_get_node_by_ino - * depends on this to filter reused stale node - */ if (!kn || !atomic_dec_and_test(&kn->count)) return; root = kernfs_root(kn); @@ -532,14 +536,11 @@ kfree_const(kn->name); if (kn->iattr) { - if (kn->iattr->ia_secdata) - security_release_secctx(kn->iattr->ia_secdata, - kn->iattr->ia_secdata_len); simple_xattrs_free(&kn->iattr->xattrs); + kmem_cache_free(kernfs_iattrs_cache, kn->iattr); } - kfree(kn->iattr); spin_lock(&kernfs_idr_lock); - idr_remove(&root->ino_idr, kn->id.ino); + idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); spin_unlock(&kernfs_idr_lock); kmem_cache_free(kernfs_node_cache, kn); @@ -618,12 +619,13 @@ } static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, + struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, unsigned flags) { struct kernfs_node *kn; - u32 gen; + u32 id_highbits; int ret; name = kstrdup_const(name, GFP_KERNEL); @@ -637,22 +639,18 @@ idr_preload(GFP_KERNEL); spin_lock(&kernfs_idr_lock); ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC); - if (ret >= 0 && ret < root->last_ino) - root->next_generation++; - gen = root->next_generation; - root->last_ino = ret; + if (ret >= 0 && ret < root->last_id_lowbits) + root->id_highbits++; + id_highbits = root->id_highbits; + root->last_id_lowbits = ret; spin_unlock(&kernfs_idr_lock); idr_preload_end(); if (ret < 0) goto err_out2; - kn->id.ino = ret; - kn->id.generation = gen; - /* - * set ino first. This RELEASE is paired with atomic_inc_not_zero in - * kernfs_find_and_get_node_by_ino - */ - atomic_set_release(&kn->count, 1); + kn->id = (u64)id_highbits << 32 | ret; + + atomic_set(&kn->count, 1); atomic_set(&kn->active, KN_DEACTIVATED_BIAS); RB_CLEAR_NODE(&kn->rb); @@ -672,10 +670,18 @@ goto err_out3; } + if (parent) { + ret = security_kernfs_init_security(parent, kn); + if (ret) + goto err_out3; + } + return kn; err_out3: - idr_remove(&root->ino_idr, kn->id.ino); + spin_lock(&kernfs_idr_lock); + idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); + spin_unlock(&kernfs_idr_lock); err_out2: kmem_cache_free(kernfs_node_cache, kn); err_out1: @@ -690,7 +696,7 @@ { struct kernfs_node *kn; - kn = __kernfs_new_node(kernfs_root(parent), + kn = __kernfs_new_node(kernfs_root(parent), parent, name, mode, uid, gid, flags); if (kn) { kernfs_get(parent); @@ -700,50 +706,52 @@ } /* - * kernfs_find_and_get_node_by_ino - get kernfs_node from inode number + * kernfs_find_and_get_node_by_id - get kernfs_node from node id * @root: the kernfs root - * @ino: inode number + * @id: the target node id + * + * @id's lower 32bits encode ino and upper gen. If the gen portion is + * zero, all generations are matched. * * RETURNS: * NULL on failure. Return a kernfs node with reference counter incremented */ -struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root, - unsigned int ino) +struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, + u64 id) { struct kernfs_node *kn; + ino_t ino = kernfs_id_ino(id); + u32 gen = kernfs_id_gen(id); - rcu_read_lock(); - kn = idr_find(&root->ino_idr, ino); + spin_lock(&kernfs_idr_lock); + + kn = idr_find(&root->ino_idr, (u32)ino); if (!kn) - goto out; + goto err_unlock; - /* - * Since kernfs_node is freed in RCU, it's possible an old node for ino - * is freed, but reused before RCU grace period. But a freed node (see - * kernfs_put) or an incompletedly initialized node (see - * __kernfs_new_node) should have 'count' 0. We can use this fact to - * filter out such node. - */ - if (!atomic_inc_not_zero(&kn->count)) { - kn = NULL; - goto out; + if (sizeof(ino_t) >= sizeof(u64)) { + /* we looked up with the low 32bits, compare the whole */ + if (kernfs_ino(kn) != ino) + goto err_unlock; + } else { + /* 0 matches all generations */ + if (unlikely(gen && kernfs_gen(kn) != gen)) + goto err_unlock; } /* - * The node could be a new node or a reused node. If it's a new node, - * we are ok. If it's reused because of RCU (because of - * SLAB_TYPESAFE_BY_RCU), the __kernfs_new_node always sets its 'ino' - * before 'count'. So if 'count' is uptodate, 'ino' should be uptodate, - * hence we can use 'ino' to filter stale node. + * ACTIVATED is protected with kernfs_mutex but it was clear when + * @kn was added to idr and we just wanna see it set. No need to + * grab kernfs_mutex. */ - if (kn->id.ino != ino) - goto out; - rcu_read_unlock(); + if (unlikely(!(kn->flags & KERNFS_ACTIVATED) || + !atomic_inc_not_zero(&kn->count))) + goto err_unlock; + spin_unlock(&kernfs_idr_lock); return kn; -out: - rcu_read_unlock(); - kernfs_put(kn); +err_unlock: + spin_unlock(&kernfs_idr_lock); return NULL; } @@ -793,9 +801,8 @@ /* Update timestamps on the parent */ ps_iattr = parent->iattr; if (ps_iattr) { - struct iattr *ps_iattrs = &ps_iattr->ia_iattr; - ktime_get_real_ts64(&ps_iattrs->ia_ctime); - ps_iattrs->ia_mtime = ps_iattrs->ia_ctime; + ktime_get_real_ts64(&ps_iattr->ia_ctime); + ps_iattr->ia_mtime = ps_iattr->ia_ctime; } mutex_unlock(&kernfs_mutex); @@ -867,13 +874,12 @@ lockdep_assert_held(&kernfs_mutex); - /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */ - spin_lock_irq(&kernfs_rename_lock); + spin_lock_irq(&kernfs_pr_cont_lock); len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); if (len >= sizeof(kernfs_pr_cont_buf)) { - spin_unlock_irq(&kernfs_rename_lock); + spin_unlock_irq(&kernfs_pr_cont_lock); return NULL; } @@ -885,7 +891,7 @@ parent = kernfs_find_ns(parent, name, ns); } - spin_unlock_irq(&kernfs_rename_lock); + spin_unlock_irq(&kernfs_pr_cont_lock); return parent; } @@ -958,9 +964,19 @@ idr_init(&root->ino_idr); INIT_LIST_HEAD(&root->supers); - root->next_generation = 1; - kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, + /* + * On 64bit ino setups, id is ino. On 32bit, low 32bits are ino. + * High bits generation. The starting value for both ino and + * genenration is 1. Initialize upper 32bit allocation + * accordingly. + */ + if (sizeof(ino_t) >= sizeof(u64)) + root->id_highbits = 0; + else + root->id_highbits = 1; + + kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR); if (!kn) { @@ -1259,7 +1275,7 @@ pos = NULL; while ((pos = kernfs_next_descendant_post(pos, kn))) { - if (!pos || (pos->flags & KERNFS_ACTIVATED)) + if (pos->flags & KERNFS_ACTIVATED) continue; WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb)); @@ -1327,9 +1343,8 @@ /* update timestamps on the parent */ if (ps_iattr) { - ktime_get_real_ts64(&ps_iattr->ia_iattr.ia_ctime); - ps_iattr->ia_iattr.ia_mtime = - ps_iattr->ia_iattr.ia_ctime; + ktime_get_real_ts64(&ps_iattr->ia_ctime); + ps_iattr->ia_mtime = ps_iattr->ia_ctime; } kernfs_put(pos); @@ -1506,8 +1521,11 @@ mutex_lock(&kernfs_mutex); kn = kernfs_find_ns(parent, name, ns); - if (kn) + if (kn) { + kernfs_get(kn); __kernfs_remove(kn); + kernfs_put(kn); + } mutex_unlock(&kernfs_mutex); @@ -1675,7 +1693,7 @@ const char *name = pos->name; unsigned int type = dt_type(pos); int len = strlen(name); - ino_t ino = pos->id.ino; + ino_t ino = kernfs_ino(pos); ctx->pos = pos->hash; file->private_data = pos; -- Gitblit v1.6.2