hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/fs/kernfs/dir.c
....@@ -1,11 +1,10 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * fs/kernfs/dir.c - kernfs directory implementation
34 *
45 * Copyright (c) 2001-3 Patrick Mochel
56 * Copyright (c) 2007 SUSE Linux Products GmbH
67 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
7
- *
8
- * This file is released under the GPLv2.
98 */
109
1110 #include <linux/sched.h>
....@@ -20,7 +19,15 @@
2019
2120 DEFINE_MUTEX(kernfs_mutex);
2221 static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
23
-static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
22
+/*
23
+ * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to
24
+ * call pr_cont() while holding rename_lock. Because sometimes pr_cont()
25
+ * will perform wakeups when releasing console_sem. Holding rename_lock
26
+ * will introduce deadlock if the scheduler reads the kernfs_name in the
27
+ * wakeup path.
28
+ */
29
+static DEFINE_SPINLOCK(kernfs_pr_cont_lock);
30
+static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */
2431 static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
2532
2633 #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
....@@ -138,6 +145,9 @@
138145 if (kn_from == kn_to)
139146 return strlcpy(buf, "/", buflen);
140147
148
+ if (!buf)
149
+ return -EINVAL;
150
+
141151 common = kernfs_common_ancestor(kn_from, kn_to);
142152 if (WARN_ON(!common))
143153 return -EINVAL;
....@@ -145,8 +155,7 @@
145155 depth_to = kernfs_depth(common, kn_to);
146156 depth_from = kernfs_depth(common, kn_from);
147157
148
- if (buf)
149
- buf[0] = '\0';
158
+ buf[0] = '\0';
150159
151160 for (i = 0; i < depth_from; i++)
152161 len += strlcpy(buf + len, parent_str,
....@@ -229,12 +238,12 @@
229238 {
230239 unsigned long flags;
231240
232
- spin_lock_irqsave(&kernfs_rename_lock, flags);
241
+ spin_lock_irqsave(&kernfs_pr_cont_lock, flags);
233242
234
- kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
243
+ kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
235244 pr_cont("%s", kernfs_pr_cont_buf);
236245
237
- spin_unlock_irqrestore(&kernfs_rename_lock, flags);
246
+ spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
238247 }
239248
240249 /**
....@@ -248,10 +257,10 @@
248257 unsigned long flags;
249258 int sz;
250259
251
- spin_lock_irqsave(&kernfs_rename_lock, flags);
260
+ spin_lock_irqsave(&kernfs_pr_cont_lock, flags);
252261
253
- sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
254
- sizeof(kernfs_pr_cont_buf));
262
+ sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf,
263
+ sizeof(kernfs_pr_cont_buf));
255264 if (sz < 0) {
256265 pr_cont("(error)");
257266 goto out;
....@@ -265,7 +274,7 @@
265274 pr_cont("%s", kernfs_pr_cont_buf);
266275
267276 out:
268
- spin_unlock_irqrestore(&kernfs_rename_lock, flags);
277
+ spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags);
269278 }
270279
271280 /**
....@@ -431,19 +440,18 @@
431440 */
432441 void kernfs_put_active(struct kernfs_node *kn)
433442 {
434
- struct kernfs_root *root = kernfs_root(kn);
435443 int v;
436444
437445 if (unlikely(!kn))
438446 return;
439447
440448 if (kernfs_lockdep(kn))
441
- rwsem_release(&kn->dep_map, 1, _RET_IP_);
449
+ rwsem_release(&kn->dep_map, _RET_IP_);
442450 v = atomic_dec_return(&kn->active);
443451 if (likely(v != KN_DEACTIVATED_BIAS))
444452 return;
445453
446
- wake_up_all(&root->deactivate_waitq);
454
+ wake_up_all(&kernfs_root(kn)->deactivate_waitq);
447455 }
448456
449457 /**
....@@ -476,7 +484,7 @@
476484
477485 if (kernfs_lockdep(kn)) {
478486 lock_acquired(&kn->dep_map, _RET_IP_);
479
- rwsem_release(&kn->dep_map, 1, _RET_IP_);
487
+ rwsem_release(&kn->dep_map, _RET_IP_);
480488 }
481489
482490 kernfs_drain_open_files(kn);
....@@ -508,10 +516,6 @@
508516 struct kernfs_node *parent;
509517 struct kernfs_root *root;
510518
511
- /*
512
- * kernfs_node is freed with ->count 0, kernfs_find_and_get_node_by_ino
513
- * depends on this to filter reused stale node
514
- */
515519 if (!kn || !atomic_dec_and_test(&kn->count))
516520 return;
517521 root = kernfs_root(kn);
....@@ -532,14 +536,11 @@
532536 kfree_const(kn->name);
533537
534538 if (kn->iattr) {
535
- if (kn->iattr->ia_secdata)
536
- security_release_secctx(kn->iattr->ia_secdata,
537
- kn->iattr->ia_secdata_len);
538539 simple_xattrs_free(&kn->iattr->xattrs);
540
+ kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
539541 }
540
- kfree(kn->iattr);
541542 spin_lock(&kernfs_idr_lock);
542
- idr_remove(&root->ino_idr, kn->id.ino);
543
+ idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
543544 spin_unlock(&kernfs_idr_lock);
544545 kmem_cache_free(kernfs_node_cache, kn);
545546
....@@ -618,12 +619,13 @@
618619 }
619620
620621 static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
622
+ struct kernfs_node *parent,
621623 const char *name, umode_t mode,
622624 kuid_t uid, kgid_t gid,
623625 unsigned flags)
624626 {
625627 struct kernfs_node *kn;
626
- u32 gen;
628
+ u32 id_highbits;
627629 int ret;
628630
629631 name = kstrdup_const(name, GFP_KERNEL);
....@@ -637,22 +639,18 @@
637639 idr_preload(GFP_KERNEL);
638640 spin_lock(&kernfs_idr_lock);
639641 ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC);
640
- if (ret >= 0 && ret < root->last_ino)
641
- root->next_generation++;
642
- gen = root->next_generation;
643
- root->last_ino = ret;
642
+ if (ret >= 0 && ret < root->last_id_lowbits)
643
+ root->id_highbits++;
644
+ id_highbits = root->id_highbits;
645
+ root->last_id_lowbits = ret;
644646 spin_unlock(&kernfs_idr_lock);
645647 idr_preload_end();
646648 if (ret < 0)
647649 goto err_out2;
648
- kn->id.ino = ret;
649
- kn->id.generation = gen;
650650
651
- /*
652
- * set ino first. This RELEASE is paired with atomic_inc_not_zero in
653
- * kernfs_find_and_get_node_by_ino
654
- */
655
- atomic_set_release(&kn->count, 1);
651
+ kn->id = (u64)id_highbits << 32 | ret;
652
+
653
+ atomic_set(&kn->count, 1);
656654 atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
657655 RB_CLEAR_NODE(&kn->rb);
658656
....@@ -672,10 +670,18 @@
672670 goto err_out3;
673671 }
674672
673
+ if (parent) {
674
+ ret = security_kernfs_init_security(parent, kn);
675
+ if (ret)
676
+ goto err_out3;
677
+ }
678
+
675679 return kn;
676680
677681 err_out3:
678
- idr_remove(&root->ino_idr, kn->id.ino);
682
+ spin_lock(&kernfs_idr_lock);
683
+ idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
684
+ spin_unlock(&kernfs_idr_lock);
679685 err_out2:
680686 kmem_cache_free(kernfs_node_cache, kn);
681687 err_out1:
....@@ -690,7 +696,7 @@
690696 {
691697 struct kernfs_node *kn;
692698
693
- kn = __kernfs_new_node(kernfs_root(parent),
699
+ kn = __kernfs_new_node(kernfs_root(parent), parent,
694700 name, mode, uid, gid, flags);
695701 if (kn) {
696702 kernfs_get(parent);
....@@ -700,50 +706,52 @@
700706 }
701707
702708 /*
703
- * kernfs_find_and_get_node_by_ino - get kernfs_node from inode number
709
+ * kernfs_find_and_get_node_by_id - get kernfs_node from node id
704710 * @root: the kernfs root
705
- * @ino: inode number
711
+ * @id: the target node id
712
+ *
713
+ * @id's lower 32bits encode ino and upper gen. If the gen portion is
714
+ * zero, all generations are matched.
706715 *
707716 * RETURNS:
708717 * NULL on failure. Return a kernfs node with reference counter incremented
709718 */
710
-struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
711
- unsigned int ino)
719
+struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
720
+ u64 id)
712721 {
713722 struct kernfs_node *kn;
723
+ ino_t ino = kernfs_id_ino(id);
724
+ u32 gen = kernfs_id_gen(id);
714725
715
- rcu_read_lock();
716
- kn = idr_find(&root->ino_idr, ino);
726
+ spin_lock(&kernfs_idr_lock);
727
+
728
+ kn = idr_find(&root->ino_idr, (u32)ino);
717729 if (!kn)
718
- goto out;
730
+ goto err_unlock;
719731
720
- /*
721
- * Since kernfs_node is freed in RCU, it's possible an old node for ino
722
- * is freed, but reused before RCU grace period. But a freed node (see
723
- * kernfs_put) or an incompletedly initialized node (see
724
- * __kernfs_new_node) should have 'count' 0. We can use this fact to
725
- * filter out such node.
726
- */
727
- if (!atomic_inc_not_zero(&kn->count)) {
728
- kn = NULL;
729
- goto out;
732
+ if (sizeof(ino_t) >= sizeof(u64)) {
733
+ /* we looked up with the low 32bits, compare the whole */
734
+ if (kernfs_ino(kn) != ino)
735
+ goto err_unlock;
736
+ } else {
737
+ /* 0 matches all generations */
738
+ if (unlikely(gen && kernfs_gen(kn) != gen))
739
+ goto err_unlock;
730740 }
731741
732742 /*
733
- * The node could be a new node or a reused node. If it's a new node,
734
- * we are ok. If it's reused because of RCU (because of
735
- * SLAB_TYPESAFE_BY_RCU), the __kernfs_new_node always sets its 'ino'
736
- * before 'count'. So if 'count' is uptodate, 'ino' should be uptodate,
737
- * hence we can use 'ino' to filter stale node.
743
+ * ACTIVATED is protected with kernfs_mutex but it was clear when
744
+ * @kn was added to idr and we just wanna see it set. No need to
745
+ * grab kernfs_mutex.
738746 */
739
- if (kn->id.ino != ino)
740
- goto out;
741
- rcu_read_unlock();
747
+ if (unlikely(!(kn->flags & KERNFS_ACTIVATED) ||
748
+ !atomic_inc_not_zero(&kn->count)))
749
+ goto err_unlock;
742750
751
+ spin_unlock(&kernfs_idr_lock);
743752 return kn;
744
-out:
745
- rcu_read_unlock();
746
- kernfs_put(kn);
753
+err_unlock:
754
+ spin_unlock(&kernfs_idr_lock);
747755 return NULL;
748756 }
749757
....@@ -793,9 +801,8 @@
793801 /* Update timestamps on the parent */
794802 ps_iattr = parent->iattr;
795803 if (ps_iattr) {
796
- struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
797
- ktime_get_real_ts64(&ps_iattrs->ia_ctime);
798
- ps_iattrs->ia_mtime = ps_iattrs->ia_ctime;
804
+ ktime_get_real_ts64(&ps_iattr->ia_ctime);
805
+ ps_iattr->ia_mtime = ps_iattr->ia_ctime;
799806 }
800807
801808 mutex_unlock(&kernfs_mutex);
....@@ -867,13 +874,12 @@
867874
868875 lockdep_assert_held(&kernfs_mutex);
869876
870
- /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
871
- spin_lock_irq(&kernfs_rename_lock);
877
+ spin_lock_irq(&kernfs_pr_cont_lock);
872878
873879 len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
874880
875881 if (len >= sizeof(kernfs_pr_cont_buf)) {
876
- spin_unlock_irq(&kernfs_rename_lock);
882
+ spin_unlock_irq(&kernfs_pr_cont_lock);
877883 return NULL;
878884 }
879885
....@@ -885,7 +891,7 @@
885891 parent = kernfs_find_ns(parent, name, ns);
886892 }
887893
888
- spin_unlock_irq(&kernfs_rename_lock);
894
+ spin_unlock_irq(&kernfs_pr_cont_lock);
889895
890896 return parent;
891897 }
....@@ -958,9 +964,19 @@
958964
959965 idr_init(&root->ino_idr);
960966 INIT_LIST_HEAD(&root->supers);
961
- root->next_generation = 1;
962967
963
- kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
968
+ /*
969
+ * On 64bit ino setups, id is ino. On 32bit, low 32bits are ino.
970
+ * High bits generation. The starting value for both ino and
971
+ * genenration is 1. Initialize upper 32bit allocation
972
+ * accordingly.
973
+ */
974
+ if (sizeof(ino_t) >= sizeof(u64))
975
+ root->id_highbits = 0;
976
+ else
977
+ root->id_highbits = 1;
978
+
979
+ kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO,
964980 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
965981 KERNFS_DIR);
966982 if (!kn) {
....@@ -1259,7 +1275,7 @@
12591275
12601276 pos = NULL;
12611277 while ((pos = kernfs_next_descendant_post(pos, kn))) {
1262
- if (!pos || (pos->flags & KERNFS_ACTIVATED))
1278
+ if (pos->flags & KERNFS_ACTIVATED)
12631279 continue;
12641280
12651281 WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
....@@ -1327,9 +1343,8 @@
13271343
13281344 /* update timestamps on the parent */
13291345 if (ps_iattr) {
1330
- ktime_get_real_ts64(&ps_iattr->ia_iattr.ia_ctime);
1331
- ps_iattr->ia_iattr.ia_mtime =
1332
- ps_iattr->ia_iattr.ia_ctime;
1346
+ ktime_get_real_ts64(&ps_iattr->ia_ctime);
1347
+ ps_iattr->ia_mtime = ps_iattr->ia_ctime;
13331348 }
13341349
13351350 kernfs_put(pos);
....@@ -1506,8 +1521,11 @@
15061521 mutex_lock(&kernfs_mutex);
15071522
15081523 kn = kernfs_find_ns(parent, name, ns);
1509
- if (kn)
1524
+ if (kn) {
1525
+ kernfs_get(kn);
15101526 __kernfs_remove(kn);
1527
+ kernfs_put(kn);
1528
+ }
15111529
15121530 mutex_unlock(&kernfs_mutex);
15131531
....@@ -1675,7 +1693,7 @@
16751693 const char *name = pos->name;
16761694 unsigned int type = dt_type(pos);
16771695 int len = strlen(name);
1678
- ino_t ino = pos->id.ino;
1696
+ ino_t ino = kernfs_ino(pos);
16791697
16801698 ctx->pos = pos->hash;
16811699 file->private_data = pos;