.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * fs/kernfs/dir.c - kernfs directory implementation |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (c) 2001-3 Patrick Mochel |
---|
5 | 6 | * Copyright (c) 2007 SUSE Linux Products GmbH |
---|
6 | 7 | * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> |
---|
7 | | - * |
---|
8 | | - * This file is released under the GPLv2. |
---|
9 | 8 | */ |
---|
10 | 9 | |
---|
11 | 10 | #include <linux/sched.h> |
---|
.. | .. |
---|
20 | 19 | |
---|
21 | 20 | DEFINE_MUTEX(kernfs_mutex); |
---|
22 | 21 | static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ |
---|
23 | | -static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ |
---|
| 22 | +/* |
---|
| 23 | + * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to |
---|
| 24 | + * call pr_cont() while holding rename_lock. Because sometimes pr_cont() |
---|
| 25 | + * will perform wakeups when releasing console_sem. Holding rename_lock |
---|
| 26 | + * will introduce deadlock if the scheduler reads the kernfs_name in the |
---|
| 27 | + * wakeup path. |
---|
| 28 | + */ |
---|
| 29 | +static DEFINE_SPINLOCK(kernfs_pr_cont_lock); |
---|
| 30 | +static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ |
---|
24 | 31 | static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ |
---|
25 | 32 | |
---|
26 | 33 | #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) |
---|
.. | .. |
---|
138 | 145 | if (kn_from == kn_to) |
---|
139 | 146 | return strlcpy(buf, "/", buflen); |
---|
140 | 147 | |
---|
| 148 | + if (!buf) |
---|
| 149 | + return -EINVAL; |
---|
| 150 | + |
---|
141 | 151 | common = kernfs_common_ancestor(kn_from, kn_to); |
---|
142 | 152 | if (WARN_ON(!common)) |
---|
143 | 153 | return -EINVAL; |
---|
.. | .. |
---|
145 | 155 | depth_to = kernfs_depth(common, kn_to); |
---|
146 | 156 | depth_from = kernfs_depth(common, kn_from); |
---|
147 | 157 | |
---|
148 | | - if (buf) |
---|
149 | | - buf[0] = '\0'; |
---|
| 158 | + buf[0] = '\0'; |
---|
150 | 159 | |
---|
151 | 160 | for (i = 0; i < depth_from; i++) |
---|
152 | 161 | len += strlcpy(buf + len, parent_str, |
---|
.. | .. |
---|
229 | 238 | { |
---|
230 | 239 | unsigned long flags; |
---|
231 | 240 | |
---|
232 | | - spin_lock_irqsave(&kernfs_rename_lock, flags); |
---|
| 241 | + spin_lock_irqsave(&kernfs_pr_cont_lock, flags); |
---|
233 | 242 | |
---|
234 | | - kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); |
---|
| 243 | + kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); |
---|
235 | 244 | pr_cont("%s", kernfs_pr_cont_buf); |
---|
236 | 245 | |
---|
237 | | - spin_unlock_irqrestore(&kernfs_rename_lock, flags); |
---|
| 246 | + spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); |
---|
238 | 247 | } |
---|
239 | 248 | |
---|
240 | 249 | /** |
---|
.. | .. |
---|
248 | 257 | unsigned long flags; |
---|
249 | 258 | int sz; |
---|
250 | 259 | |
---|
251 | | - spin_lock_irqsave(&kernfs_rename_lock, flags); |
---|
| 260 | + spin_lock_irqsave(&kernfs_pr_cont_lock, flags); |
---|
252 | 261 | |
---|
253 | | - sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf, |
---|
254 | | - sizeof(kernfs_pr_cont_buf)); |
---|
| 262 | + sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf, |
---|
| 263 | + sizeof(kernfs_pr_cont_buf)); |
---|
255 | 264 | if (sz < 0) { |
---|
256 | 265 | pr_cont("(error)"); |
---|
257 | 266 | goto out; |
---|
.. | .. |
---|
265 | 274 | pr_cont("%s", kernfs_pr_cont_buf); |
---|
266 | 275 | |
---|
267 | 276 | out: |
---|
268 | | - spin_unlock_irqrestore(&kernfs_rename_lock, flags); |
---|
| 277 | + spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); |
---|
269 | 278 | } |
---|
270 | 279 | |
---|
271 | 280 | /** |
---|
.. | .. |
---|
431 | 440 | */ |
---|
432 | 441 | void kernfs_put_active(struct kernfs_node *kn) |
---|
433 | 442 | { |
---|
434 | | - struct kernfs_root *root = kernfs_root(kn); |
---|
435 | 443 | int v; |
---|
436 | 444 | |
---|
437 | 445 | if (unlikely(!kn)) |
---|
438 | 446 | return; |
---|
439 | 447 | |
---|
440 | 448 | if (kernfs_lockdep(kn)) |
---|
441 | | - rwsem_release(&kn->dep_map, 1, _RET_IP_); |
---|
| 449 | + rwsem_release(&kn->dep_map, _RET_IP_); |
---|
442 | 450 | v = atomic_dec_return(&kn->active); |
---|
443 | 451 | if (likely(v != KN_DEACTIVATED_BIAS)) |
---|
444 | 452 | return; |
---|
445 | 453 | |
---|
446 | | - wake_up_all(&root->deactivate_waitq); |
---|
| 454 | + wake_up_all(&kernfs_root(kn)->deactivate_waitq); |
---|
447 | 455 | } |
---|
448 | 456 | |
---|
449 | 457 | /** |
---|
.. | .. |
---|
476 | 484 | |
---|
477 | 485 | if (kernfs_lockdep(kn)) { |
---|
478 | 486 | lock_acquired(&kn->dep_map, _RET_IP_); |
---|
479 | | - rwsem_release(&kn->dep_map, 1, _RET_IP_); |
---|
| 487 | + rwsem_release(&kn->dep_map, _RET_IP_); |
---|
480 | 488 | } |
---|
481 | 489 | |
---|
482 | 490 | kernfs_drain_open_files(kn); |
---|
.. | .. |
---|
508 | 516 | struct kernfs_node *parent; |
---|
509 | 517 | struct kernfs_root *root; |
---|
510 | 518 | |
---|
511 | | - /* |
---|
512 | | - * kernfs_node is freed with ->count 0, kernfs_find_and_get_node_by_ino |
---|
513 | | - * depends on this to filter reused stale node |
---|
514 | | - */ |
---|
515 | 519 | if (!kn || !atomic_dec_and_test(&kn->count)) |
---|
516 | 520 | return; |
---|
517 | 521 | root = kernfs_root(kn); |
---|
.. | .. |
---|
532 | 536 | kfree_const(kn->name); |
---|
533 | 537 | |
---|
534 | 538 | if (kn->iattr) { |
---|
535 | | - if (kn->iattr->ia_secdata) |
---|
536 | | - security_release_secctx(kn->iattr->ia_secdata, |
---|
537 | | - kn->iattr->ia_secdata_len); |
---|
538 | 539 | simple_xattrs_free(&kn->iattr->xattrs); |
---|
| 540 | + kmem_cache_free(kernfs_iattrs_cache, kn->iattr); |
---|
539 | 541 | } |
---|
540 | | - kfree(kn->iattr); |
---|
541 | 542 | spin_lock(&kernfs_idr_lock); |
---|
542 | | - idr_remove(&root->ino_idr, kn->id.ino); |
---|
| 543 | + idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); |
---|
543 | 544 | spin_unlock(&kernfs_idr_lock); |
---|
544 | 545 | kmem_cache_free(kernfs_node_cache, kn); |
---|
545 | 546 | |
---|
.. | .. |
---|
618 | 619 | } |
---|
619 | 620 | |
---|
620 | 621 | static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, |
---|
| 622 | + struct kernfs_node *parent, |
---|
621 | 623 | const char *name, umode_t mode, |
---|
622 | 624 | kuid_t uid, kgid_t gid, |
---|
623 | 625 | unsigned flags) |
---|
624 | 626 | { |
---|
625 | 627 | struct kernfs_node *kn; |
---|
626 | | - u32 gen; |
---|
| 628 | + u32 id_highbits; |
---|
627 | 629 | int ret; |
---|
628 | 630 | |
---|
629 | 631 | name = kstrdup_const(name, GFP_KERNEL); |
---|
.. | .. |
---|
637 | 639 | idr_preload(GFP_KERNEL); |
---|
638 | 640 | spin_lock(&kernfs_idr_lock); |
---|
639 | 641 | ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC); |
---|
640 | | - if (ret >= 0 && ret < root->last_ino) |
---|
641 | | - root->next_generation++; |
---|
642 | | - gen = root->next_generation; |
---|
643 | | - root->last_ino = ret; |
---|
| 642 | + if (ret >= 0 && ret < root->last_id_lowbits) |
---|
| 643 | + root->id_highbits++; |
---|
| 644 | + id_highbits = root->id_highbits; |
---|
| 645 | + root->last_id_lowbits = ret; |
---|
644 | 646 | spin_unlock(&kernfs_idr_lock); |
---|
645 | 647 | idr_preload_end(); |
---|
646 | 648 | if (ret < 0) |
---|
647 | 649 | goto err_out2; |
---|
648 | | - kn->id.ino = ret; |
---|
649 | | - kn->id.generation = gen; |
---|
650 | 650 | |
---|
651 | | - /* |
---|
652 | | - * set ino first. This RELEASE is paired with atomic_inc_not_zero in |
---|
653 | | - * kernfs_find_and_get_node_by_ino |
---|
654 | | - */ |
---|
655 | | - atomic_set_release(&kn->count, 1); |
---|
| 651 | + kn->id = (u64)id_highbits << 32 | ret; |
---|
| 652 | + |
---|
| 653 | + atomic_set(&kn->count, 1); |
---|
656 | 654 | atomic_set(&kn->active, KN_DEACTIVATED_BIAS); |
---|
657 | 655 | RB_CLEAR_NODE(&kn->rb); |
---|
658 | 656 | |
---|
.. | .. |
---|
672 | 670 | goto err_out3; |
---|
673 | 671 | } |
---|
674 | 672 | |
---|
| 673 | + if (parent) { |
---|
| 674 | + ret = security_kernfs_init_security(parent, kn); |
---|
| 675 | + if (ret) |
---|
| 676 | + goto err_out3; |
---|
| 677 | + } |
---|
| 678 | + |
---|
675 | 679 | return kn; |
---|
676 | 680 | |
---|
677 | 681 | err_out3: |
---|
678 | | - idr_remove(&root->ino_idr, kn->id.ino); |
---|
| 682 | + spin_lock(&kernfs_idr_lock); |
---|
| 683 | + idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); |
---|
| 684 | + spin_unlock(&kernfs_idr_lock); |
---|
679 | 685 | err_out2: |
---|
680 | 686 | kmem_cache_free(kernfs_node_cache, kn); |
---|
681 | 687 | err_out1: |
---|
.. | .. |
---|
690 | 696 | { |
---|
691 | 697 | struct kernfs_node *kn; |
---|
692 | 698 | |
---|
693 | | - kn = __kernfs_new_node(kernfs_root(parent), |
---|
| 699 | + kn = __kernfs_new_node(kernfs_root(parent), parent, |
---|
694 | 700 | name, mode, uid, gid, flags); |
---|
695 | 701 | if (kn) { |
---|
696 | 702 | kernfs_get(parent); |
---|
.. | .. |
---|
700 | 706 | } |
---|
701 | 707 | |
---|
702 | 708 | /* |
---|
703 | | - * kernfs_find_and_get_node_by_ino - get kernfs_node from inode number |
---|
| 709 | + * kernfs_find_and_get_node_by_id - get kernfs_node from node id |
---|
704 | 710 | * @root: the kernfs root |
---|
705 | | - * @ino: inode number |
---|
| 711 | + * @id: the target node id |
---|
| 712 | + * |
---|
| 713 | + * @id's lower 32bits encode ino and upper gen. If the gen portion is |
---|
| 714 | + * zero, all generations are matched. |
---|
706 | 715 | * |
---|
707 | 716 | * RETURNS: |
---|
708 | 717 | * NULL on failure. Return a kernfs node with reference counter incremented |
---|
709 | 718 | */ |
---|
710 | | -struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root, |
---|
711 | | - unsigned int ino) |
---|
| 719 | +struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, |
---|
| 720 | + u64 id) |
---|
712 | 721 | { |
---|
713 | 722 | struct kernfs_node *kn; |
---|
| 723 | + ino_t ino = kernfs_id_ino(id); |
---|
| 724 | + u32 gen = kernfs_id_gen(id); |
---|
714 | 725 | |
---|
715 | | - rcu_read_lock(); |
---|
716 | | - kn = idr_find(&root->ino_idr, ino); |
---|
| 726 | + spin_lock(&kernfs_idr_lock); |
---|
| 727 | + |
---|
| 728 | + kn = idr_find(&root->ino_idr, (u32)ino); |
---|
717 | 729 | if (!kn) |
---|
718 | | - goto out; |
---|
| 730 | + goto err_unlock; |
---|
719 | 731 | |
---|
720 | | - /* |
---|
721 | | - * Since kernfs_node is freed in RCU, it's possible an old node for ino |
---|
722 | | - * is freed, but reused before RCU grace period. But a freed node (see |
---|
723 | | - * kernfs_put) or an incompletedly initialized node (see |
---|
724 | | - * __kernfs_new_node) should have 'count' 0. We can use this fact to |
---|
725 | | - * filter out such node. |
---|
726 | | - */ |
---|
727 | | - if (!atomic_inc_not_zero(&kn->count)) { |
---|
728 | | - kn = NULL; |
---|
729 | | - goto out; |
---|
| 732 | + if (sizeof(ino_t) >= sizeof(u64)) { |
---|
| 733 | + /* we looked up with the low 32bits, compare the whole */ |
---|
| 734 | + if (kernfs_ino(kn) != ino) |
---|
| 735 | + goto err_unlock; |
---|
| 736 | + } else { |
---|
| 737 | + /* 0 matches all generations */ |
---|
| 738 | + if (unlikely(gen && kernfs_gen(kn) != gen)) |
---|
| 739 | + goto err_unlock; |
---|
730 | 740 | } |
---|
731 | 741 | |
---|
732 | 742 | /* |
---|
733 | | - * The node could be a new node or a reused node. If it's a new node, |
---|
734 | | - * we are ok. If it's reused because of RCU (because of |
---|
735 | | - * SLAB_TYPESAFE_BY_RCU), the __kernfs_new_node always sets its 'ino' |
---|
736 | | - * before 'count'. So if 'count' is uptodate, 'ino' should be uptodate, |
---|
737 | | - * hence we can use 'ino' to filter stale node. |
---|
| 743 | + * ACTIVATED is protected with kernfs_mutex but it was clear when |
---|
| 744 | + * @kn was added to idr and we just wanna see it set. No need to |
---|
| 745 | + * grab kernfs_mutex. |
---|
738 | 746 | */ |
---|
739 | | - if (kn->id.ino != ino) |
---|
740 | | - goto out; |
---|
741 | | - rcu_read_unlock(); |
---|
| 747 | + if (unlikely(!(kn->flags & KERNFS_ACTIVATED) || |
---|
| 748 | + !atomic_inc_not_zero(&kn->count))) |
---|
| 749 | + goto err_unlock; |
---|
742 | 750 | |
---|
| 751 | + spin_unlock(&kernfs_idr_lock); |
---|
743 | 752 | return kn; |
---|
744 | | -out: |
---|
745 | | - rcu_read_unlock(); |
---|
746 | | - kernfs_put(kn); |
---|
| 753 | +err_unlock: |
---|
| 754 | + spin_unlock(&kernfs_idr_lock); |
---|
747 | 755 | return NULL; |
---|
748 | 756 | } |
---|
749 | 757 | |
---|
.. | .. |
---|
793 | 801 | /* Update timestamps on the parent */ |
---|
794 | 802 | ps_iattr = parent->iattr; |
---|
795 | 803 | if (ps_iattr) { |
---|
796 | | - struct iattr *ps_iattrs = &ps_iattr->ia_iattr; |
---|
797 | | - ktime_get_real_ts64(&ps_iattrs->ia_ctime); |
---|
798 | | - ps_iattrs->ia_mtime = ps_iattrs->ia_ctime; |
---|
| 804 | + ktime_get_real_ts64(&ps_iattr->ia_ctime); |
---|
| 805 | + ps_iattr->ia_mtime = ps_iattr->ia_ctime; |
---|
799 | 806 | } |
---|
800 | 807 | |
---|
801 | 808 | mutex_unlock(&kernfs_mutex); |
---|
.. | .. |
---|
867 | 874 | |
---|
868 | 875 | lockdep_assert_held(&kernfs_mutex); |
---|
869 | 876 | |
---|
870 | | - /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */ |
---|
871 | | - spin_lock_irq(&kernfs_rename_lock); |
---|
| 877 | + spin_lock_irq(&kernfs_pr_cont_lock); |
---|
872 | 878 | |
---|
873 | 879 | len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); |
---|
874 | 880 | |
---|
875 | 881 | if (len >= sizeof(kernfs_pr_cont_buf)) { |
---|
876 | | - spin_unlock_irq(&kernfs_rename_lock); |
---|
| 882 | + spin_unlock_irq(&kernfs_pr_cont_lock); |
---|
877 | 883 | return NULL; |
---|
878 | 884 | } |
---|
879 | 885 | |
---|
.. | .. |
---|
885 | 891 | parent = kernfs_find_ns(parent, name, ns); |
---|
886 | 892 | } |
---|
887 | 893 | |
---|
888 | | - spin_unlock_irq(&kernfs_rename_lock); |
---|
| 894 | + spin_unlock_irq(&kernfs_pr_cont_lock); |
---|
889 | 895 | |
---|
890 | 896 | return parent; |
---|
891 | 897 | } |
---|
.. | .. |
---|
958 | 964 | |
---|
959 | 965 | idr_init(&root->ino_idr); |
---|
960 | 966 | INIT_LIST_HEAD(&root->supers); |
---|
961 | | - root->next_generation = 1; |
---|
962 | 967 | |
---|
963 | | - kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, |
---|
| 968 | + /* |
---|
| 969 | + * On 64bit ino setups, id is ino. On 32bit, low 32bits are ino. |
---|
| 970 | + * High bits generation. The starting value for both ino and |
---|
| 971 | + * genenration is 1. Initialize upper 32bit allocation |
---|
| 972 | + * accordingly. |
---|
| 973 | + */ |
---|
| 974 | + if (sizeof(ino_t) >= sizeof(u64)) |
---|
| 975 | + root->id_highbits = 0; |
---|
| 976 | + else |
---|
| 977 | + root->id_highbits = 1; |
---|
| 978 | + |
---|
| 979 | + kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO, |
---|
964 | 980 | GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, |
---|
965 | 981 | KERNFS_DIR); |
---|
966 | 982 | if (!kn) { |
---|
.. | .. |
---|
1259 | 1275 | |
---|
1260 | 1276 | pos = NULL; |
---|
1261 | 1277 | while ((pos = kernfs_next_descendant_post(pos, kn))) { |
---|
1262 | | - if (!pos || (pos->flags & KERNFS_ACTIVATED)) |
---|
| 1278 | + if (pos->flags & KERNFS_ACTIVATED) |
---|
1263 | 1279 | continue; |
---|
1264 | 1280 | |
---|
1265 | 1281 | WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb)); |
---|
.. | .. |
---|
1327 | 1343 | |
---|
1328 | 1344 | /* update timestamps on the parent */ |
---|
1329 | 1345 | if (ps_iattr) { |
---|
1330 | | - ktime_get_real_ts64(&ps_iattr->ia_iattr.ia_ctime); |
---|
1331 | | - ps_iattr->ia_iattr.ia_mtime = |
---|
1332 | | - ps_iattr->ia_iattr.ia_ctime; |
---|
| 1346 | + ktime_get_real_ts64(&ps_iattr->ia_ctime); |
---|
| 1347 | + ps_iattr->ia_mtime = ps_iattr->ia_ctime; |
---|
1333 | 1348 | } |
---|
1334 | 1349 | |
---|
1335 | 1350 | kernfs_put(pos); |
---|
.. | .. |
---|
1506 | 1521 | mutex_lock(&kernfs_mutex); |
---|
1507 | 1522 | |
---|
1508 | 1523 | kn = kernfs_find_ns(parent, name, ns); |
---|
1509 | | - if (kn) |
---|
| 1524 | + if (kn) { |
---|
| 1525 | + kernfs_get(kn); |
---|
1510 | 1526 | __kernfs_remove(kn); |
---|
| 1527 | + kernfs_put(kn); |
---|
| 1528 | + } |
---|
1511 | 1529 | |
---|
1512 | 1530 | mutex_unlock(&kernfs_mutex); |
---|
1513 | 1531 | |
---|
.. | .. |
---|
1675 | 1693 | const char *name = pos->name; |
---|
1676 | 1694 | unsigned int type = dt_type(pos); |
---|
1677 | 1695 | int len = strlen(name); |
---|
1678 | | - ino_t ino = pos->id.ino; |
---|
| 1696 | + ino_t ino = kernfs_ino(pos); |
---|
1679 | 1697 | |
---|
1680 | 1698 | ctx->pos = pos->hash; |
---|
1681 | 1699 | file->private_data = pos; |
---|