hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/proc/base.c
....@@ -59,6 +59,7 @@
5959 #include <linux/capability.h>
6060 #include <linux/file.h>
6161 #include <linux/fdtable.h>
62
+#include <linux/generic-radix-tree.h>
6263 #include <linux/string.h>
6364 #include <linux/seq_file.h>
6465 #include <linux/namei.h>
....@@ -92,11 +93,11 @@
9293 #include <linux/sched/coredump.h>
9394 #include <linux/sched/debug.h>
9495 #include <linux/sched/stat.h>
95
-#include <linux/flex_array.h>
9696 #include <linux/posix-timers.h>
97
+#include <linux/time_namespace.h>
98
+#include <linux/resctrl.h>
9799 #include <linux/cpufreq_times.h>
98100 #include <trace/events/oom.h>
99
-#include <linux/swait.h>
100101 #include "internal.h"
101102 #include "fd.h"
102103
....@@ -142,9 +143,13 @@
142143 #define REG(NAME, MODE, fops) \
143144 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
144145 #define ONE(NAME, MODE, show) \
145
- NOD(NAME, (S_IFREG|(MODE)), \
146
+ NOD(NAME, (S_IFREG|(MODE)), \
146147 NULL, &proc_single_file_operations, \
147148 { .proc_show = show } )
149
+#define ATTR(LSM, NAME, MODE) \
150
+ NOD(NAME, (S_IFREG|(MODE)), \
151
+ NULL, &proc_pid_attr_operations, \
152
+ { .lsm = LSM })
148153
149154 /*
150155 * Count the number of hardlinks for the pid_entry table, excluding the .
....@@ -401,11 +406,11 @@
401406
402407 static int lock_trace(struct task_struct *task)
403408 {
404
- int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
409
+ int err = down_read_killable(&task->signal->exec_update_lock);
405410 if (err)
406411 return err;
407412 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
408
- mutex_unlock(&task->signal->cred_guard_mutex);
413
+ up_read(&task->signal->exec_update_lock);
409414 return -EPERM;
410415 }
411416 return 0;
....@@ -413,7 +418,7 @@
413418
414419 static void unlock_trace(struct task_struct *task)
415420 {
416
- mutex_unlock(&task->signal->cred_guard_mutex);
421
+ up_read(&task->signal->exec_update_lock);
417422 }
418423
419424 #ifdef CONFIG_STACKTRACE
....@@ -423,7 +428,6 @@
423428 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
424429 struct pid *pid, struct task_struct *task)
425430 {
426
- struct stack_trace trace;
427431 unsigned long *entries;
428432 int err;
429433
....@@ -446,20 +450,17 @@
446450 if (!entries)
447451 return -ENOMEM;
448452
449
- trace.nr_entries = 0;
450
- trace.max_entries = MAX_STACK_TRACE_DEPTH;
451
- trace.entries = entries;
452
- trace.skip = 0;
453
-
454453 err = lock_trace(task);
455454 if (!err) {
456
- unsigned int i;
455
+ unsigned int i, nr_entries;
457456
458
- save_stack_trace_tsk(task, &trace);
457
+ nr_entries = stack_trace_save_tsk(task, entries,
458
+ MAX_STACK_TRACE_DEPTH, 0);
459459
460
- for (i = 0; i < trace.nr_entries; i++) {
460
+ for (i = 0; i < nr_entries; i++) {
461461 seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
462462 }
463
+
463464 unlock_trace(task);
464465 }
465466 kfree(entries);
....@@ -476,7 +477,7 @@
476477 struct pid *pid, struct task_struct *task)
477478 {
478479 if (unlikely(!sched_info_on()))
479
- seq_printf(m, "0 0 0\n");
480
+ seq_puts(m, "0 0 0\n");
480481 else
481482 seq_printf(m, "%llu %llu %lu\n",
482483 (unsigned long long)task->se.sum_exec_runtime,
....@@ -505,9 +506,8 @@
505506 lr->count, lr->time, lr->max);
506507 for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
507508 unsigned long bt = lr->backtrace[q];
509
+
508510 if (!bt)
509
- break;
510
- if (bt == ULONG_MAX)
511511 break;
512512 seq_printf(m, " %ps", (void *)bt);
513513 }
....@@ -531,7 +531,7 @@
531531
532532 if (!task)
533533 return -ESRCH;
534
- clear_all_latency_tracing(task);
534
+ clear_tsk_latency_tracing(task);
535535 put_task_struct(task);
536536
537537 return count;
....@@ -550,11 +550,19 @@
550550 static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
551551 struct pid *pid, struct task_struct *task)
552552 {
553
- unsigned long totalpages = totalram_pages + total_swap_pages;
553
+ unsigned long totalpages = totalram_pages() + total_swap_pages;
554554 unsigned long points = 0;
555
+ long badness;
555556
556
- points = oom_badness(task, NULL, NULL, totalpages) *
557
- 1000 / totalpages;
557
+ badness = oom_badness(task, totalpages);
558
+ /*
559
+ * Special case OOM_SCORE_ADJ_MIN for all others scale the
560
+ * badness value into [0, 2000] range which we have been
561
+ * exporting for a long time so userspace might depend on it.
562
+ */
563
+ if (badness != LONG_MIN)
564
+ points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3;
565
+
558566 seq_printf(m, "%lu\n", points);
559567
560568 return 0;
....@@ -601,8 +609,10 @@
601609 /*
602610 * print the file header
603611 */
604
- seq_printf(m, "%-25s %-20s %-20s %-10s\n",
605
- "Limit", "Soft Limit", "Hard Limit", "Units");
612
+ seq_puts(m, "Limit "
613
+ "Soft Limit "
614
+ "Hard Limit "
615
+ "Units \n");
606616
607617 for (i = 0; i < RLIM_NLIMITS; i++) {
608618 if (rlim[i].rlim_cur == RLIM_INFINITY)
....@@ -630,24 +640,25 @@
630640 static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
631641 struct pid *pid, struct task_struct *task)
632642 {
633
- long nr;
634
- unsigned long args[6], sp, pc;
643
+ struct syscall_info info;
644
+ u64 *args = &info.data.args[0];
635645 int res;
636646
637647 res = lock_trace(task);
638648 if (res)
639649 return res;
640650
641
- if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
651
+ if (task_current_syscall(task, &info))
642652 seq_puts(m, "running\n");
643
- else if (nr < 0)
644
- seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
653
+ else if (info.data.nr < 0)
654
+ seq_printf(m, "%d 0x%llx 0x%llx\n",
655
+ info.data.nr, info.sp, info.data.instruction_pointer);
645656 else
646657 seq_printf(m,
647
- "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
648
- nr,
658
+ "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
659
+ info.data.nr,
649660 args[0], args[1], args[2], args[3], args[4], args[5],
650
- sp, pc);
661
+ info.sp, info.data.instruction_pointer);
651662 unlock_trace(task);
652663
653664 return 0;
....@@ -696,13 +707,21 @@
696707 * May current process learn task's sched/cmdline info (for hide_pid_min=1)
697708 * or euid/egid (for hide_pid_min=2)?
698709 */
699
-static bool has_pid_permissions(struct pid_namespace *pid,
710
+static bool has_pid_permissions(struct proc_fs_info *fs_info,
700711 struct task_struct *task,
701
- int hide_pid_min)
712
+ enum proc_hidepid hide_pid_min)
702713 {
703
- if (pid->hide_pid < hide_pid_min)
714
+ /*
715
+ * If 'hidpid' mount option is set force a ptrace check,
716
+ * we indicate that we are using a filesystem syscall
717
+ * by passing PTRACE_MODE_READ_FSCREDS
718
+ */
719
+ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE)
720
+ return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
721
+
722
+ if (fs_info->hide_pid < hide_pid_min)
704723 return true;
705
- if (in_group_p(pid->pid_gid))
724
+ if (in_group_p(fs_info->pid_gid))
706725 return true;
707726 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
708727 }
....@@ -710,18 +729,18 @@
710729
711730 static int proc_pid_permission(struct inode *inode, int mask)
712731 {
713
- struct pid_namespace *pid = proc_pid_ns(inode);
732
+ struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
714733 struct task_struct *task;
715734 bool has_perms;
716735
717736 task = get_proc_task(inode);
718737 if (!task)
719738 return -ESRCH;
720
- has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
739
+ has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS);
721740 put_task_struct(task);
722741
723742 if (!has_perms) {
724
- if (pid->hide_pid == HIDEPID_INVISIBLE) {
743
+ if (fs_info->hide_pid == HIDEPID_INVISIBLE) {
725744 /*
726745 * Let's make getdents(), stat(), and open()
727746 * consistent with each other. If a process
....@@ -745,7 +764,7 @@
745764 static int proc_single_show(struct seq_file *m, void *v)
746765 {
747766 struct inode *inode = m->private;
748
- struct pid_namespace *ns = proc_pid_ns(inode);
767
+ struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
749768 struct pid *pid = proc_pid(inode);
750769 struct task_struct *task;
751770 int ret;
....@@ -1031,6 +1050,8 @@
10311050 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
10321051 OOM_SCORE_ADJ_MAX;
10331052 put_task_struct(task);
1053
+ if (oom_adj > OOM_ADJUST_MAX)
1054
+ oom_adj = OOM_ADJUST_MAX;
10341055 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
10351056 return simple_read_from_buffer(buf, count, ppos, buffer, len);
10361057 }
....@@ -1223,7 +1244,7 @@
12231244 .llseek = default_llseek,
12241245 };
12251246
1226
-#ifdef CONFIG_AUDITSYSCALL
1247
+#ifdef CONFIG_AUDIT
12271248 #define TMPBUFLEN 11
12281249 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
12291250 size_t count, loff_t *ppos)
....@@ -1249,6 +1270,10 @@
12491270 uid_t loginuid;
12501271 kuid_t kloginuid;
12511272 int rv;
1273
+
1274
+ /* Don't let kthreads write their own loginuid */
1275
+ if (current->flags & PF_KTHREAD)
1276
+ return -EPERM;
12521277
12531278 rcu_read_lock();
12541279 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
....@@ -1413,7 +1438,7 @@
14131438 static int sched_show(struct seq_file *m, void *v)
14141439 {
14151440 struct inode *inode = m->private;
1416
- struct pid_namespace *ns = proc_pid_ns(inode);
1441
+ struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
14171442 struct task_struct *p;
14181443
14191444 p = get_proc_task(inode);
....@@ -1533,6 +1558,108 @@
15331558
15341559 #endif /* CONFIG_SCHED_AUTOGROUP */
15351560
1561
+#ifdef CONFIG_TIME_NS
1562
+static int timens_offsets_show(struct seq_file *m, void *v)
1563
+{
1564
+ struct task_struct *p;
1565
+
1566
+ p = get_proc_task(file_inode(m->file));
1567
+ if (!p)
1568
+ return -ESRCH;
1569
+ proc_timens_show_offsets(p, m);
1570
+
1571
+ put_task_struct(p);
1572
+
1573
+ return 0;
1574
+}
1575
+
1576
+static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
1577
+ size_t count, loff_t *ppos)
1578
+{
1579
+ struct inode *inode = file_inode(file);
1580
+ struct proc_timens_offset offsets[2];
1581
+ char *kbuf = NULL, *pos, *next_line;
1582
+ struct task_struct *p;
1583
+ int ret, noffsets;
1584
+
1585
+ /* Only allow < page size writes at the beginning of the file */
1586
+ if ((*ppos != 0) || (count >= PAGE_SIZE))
1587
+ return -EINVAL;
1588
+
1589
+ /* Slurp in the user data */
1590
+ kbuf = memdup_user_nul(buf, count);
1591
+ if (IS_ERR(kbuf))
1592
+ return PTR_ERR(kbuf);
1593
+
1594
+ /* Parse the user data */
1595
+ ret = -EINVAL;
1596
+ noffsets = 0;
1597
+ for (pos = kbuf; pos; pos = next_line) {
1598
+ struct proc_timens_offset *off = &offsets[noffsets];
1599
+ char clock[10];
1600
+ int err;
1601
+
1602
+ /* Find the end of line and ensure we don't look past it */
1603
+ next_line = strchr(pos, '\n');
1604
+ if (next_line) {
1605
+ *next_line = '\0';
1606
+ next_line++;
1607
+ if (*next_line == '\0')
1608
+ next_line = NULL;
1609
+ }
1610
+
1611
+ err = sscanf(pos, "%9s %lld %lu", clock,
1612
+ &off->val.tv_sec, &off->val.tv_nsec);
1613
+ if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
1614
+ goto out;
1615
+
1616
+ clock[sizeof(clock) - 1] = 0;
1617
+ if (strcmp(clock, "monotonic") == 0 ||
1618
+ strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0)
1619
+ off->clockid = CLOCK_MONOTONIC;
1620
+ else if (strcmp(clock, "boottime") == 0 ||
1621
+ strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0)
1622
+ off->clockid = CLOCK_BOOTTIME;
1623
+ else
1624
+ goto out;
1625
+
1626
+ noffsets++;
1627
+ if (noffsets == ARRAY_SIZE(offsets)) {
1628
+ if (next_line)
1629
+ count = next_line - kbuf;
1630
+ break;
1631
+ }
1632
+ }
1633
+
1634
+ ret = -ESRCH;
1635
+ p = get_proc_task(inode);
1636
+ if (!p)
1637
+ goto out;
1638
+ ret = proc_timens_set_offset(file, p, offsets, noffsets);
1639
+ put_task_struct(p);
1640
+ if (ret)
1641
+ goto out;
1642
+
1643
+ ret = count;
1644
+out:
1645
+ kfree(kbuf);
1646
+ return ret;
1647
+}
1648
+
1649
+static int timens_offsets_open(struct inode *inode, struct file *filp)
1650
+{
1651
+ return single_open(filp, timens_offsets_show, inode);
1652
+}
1653
+
1654
+static const struct file_operations proc_timens_offsets_operations = {
1655
+ .open = timens_offsets_open,
1656
+ .read = seq_read,
1657
+ .write = timens_offsets_write,
1658
+ .llseek = seq_lseek,
1659
+ .release = single_release,
1660
+};
1661
+#endif /* CONFIG_TIME_NS */
1662
+
15361663 static ssize_t comm_write(struct file *file, const char __user *buf,
15371664 size_t count, loff_t *offset)
15381665 {
....@@ -1626,8 +1753,7 @@
16261753 if (error)
16271754 goto out;
16281755
1629
- nd_jump_link(&path);
1630
- return NULL;
1756
+ error = nd_jump_link(&path);
16311757 out:
16321758 return ERR_PTR(error);
16331759 }
....@@ -1743,11 +1869,25 @@
17431869 *rgid = gid;
17441870 }
17451871
1746
-struct inode *proc_pid_make_inode(struct super_block * sb,
1872
+void proc_pid_evict_inode(struct proc_inode *ei)
1873
+{
1874
+ struct pid *pid = ei->pid;
1875
+
1876
+ if (S_ISDIR(ei->vfs_inode.i_mode)) {
1877
+ spin_lock(&pid->lock);
1878
+ hlist_del_init_rcu(&ei->sibling_inodes);
1879
+ spin_unlock(&pid->lock);
1880
+ }
1881
+
1882
+ put_pid(pid);
1883
+}
1884
+
1885
+struct inode *proc_pid_make_inode(struct super_block *sb,
17471886 struct task_struct *task, umode_t mode)
17481887 {
17491888 struct inode * inode;
17501889 struct proc_inode *ei;
1890
+ struct pid *pid;
17511891
17521892 /* We need a new inode */
17531893
....@@ -1765,9 +1905,12 @@
17651905 /*
17661906 * grab the reference to task.
17671907 */
1768
- ei->pid = get_task_pid(task, PIDTYPE_PID);
1769
- if (!ei->pid)
1908
+ pid = get_task_pid(task, PIDTYPE_PID);
1909
+ if (!pid)
17701910 goto out_unlock;
1911
+
1912
+ /* Let the pid remember us for quick removal */
1913
+ ei->pid = pid;
17711914
17721915 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
17731916 security_task_to_inode(task, inode);
....@@ -1780,11 +1923,44 @@
17801923 return NULL;
17811924 }
17821925
1926
+/*
1927
+ * Generating an inode and adding it into @pid->inodes, so that task will
1928
+ * invalidate inode's dentry before being released.
1929
+ *
1930
+ * This helper is used for creating dir-type entries under '/proc' and
1931
+ * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>'
1932
+ * can be released by invalidating '/proc/<tgid>' dentry.
1933
+ * In theory, dentries under '/proc/<tgid>/task' can also be released by
1934
+ * invalidating '/proc/<tgid>' dentry, we reserve it to handle single
1935
+ * thread exiting situation: Any one of threads should invalidate its
1936
+ * '/proc/<tgid>/task/<pid>' dentry before released.
1937
+ */
1938
+static struct inode *proc_pid_make_base_inode(struct super_block *sb,
1939
+ struct task_struct *task, umode_t mode)
1940
+{
1941
+ struct inode *inode;
1942
+ struct proc_inode *ei;
1943
+ struct pid *pid;
1944
+
1945
+ inode = proc_pid_make_inode(sb, task, mode);
1946
+ if (!inode)
1947
+ return NULL;
1948
+
1949
+ /* Let proc_flush_pid find this directory inode */
1950
+ ei = PROC_I(inode);
1951
+ pid = ei->pid;
1952
+ spin_lock(&pid->lock);
1953
+ hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
1954
+ spin_unlock(&pid->lock);
1955
+
1956
+ return inode;
1957
+}
1958
+
17831959 int pid_getattr(const struct path *path, struct kstat *stat,
17841960 u32 request_mask, unsigned int query_flags)
17851961 {
17861962 struct inode *inode = d_inode(path->dentry);
1787
- struct pid_namespace *pid = proc_pid_ns(inode);
1963
+ struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
17881964 struct task_struct *task;
17891965
17901966 generic_fillattr(inode, stat);
....@@ -1794,7 +1970,7 @@
17941970 rcu_read_lock();
17951971 task = pid_task(proc_pid(inode), PIDTYPE_PID);
17961972 if (task) {
1797
- if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
1973
+ if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) {
17981974 rcu_read_unlock();
17991975 /*
18001976 * This doesn't prevent learning whether PID exists,
....@@ -1891,7 +2067,7 @@
18912067
18922068 child = d_hash_and_lookup(dir, &qname);
18932069 if (!child) {
1894
- DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
2070
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
18952071 child = d_alloc_parallel(dir, &qname, &wq);
18962072 if (IS_ERR(child))
18972073 goto end_instantiate;
....@@ -1979,11 +2155,11 @@
19792155 goto out;
19802156
19812157 if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
1982
- status = down_read_killable(&mm->mmap_sem);
2158
+ status = mmap_read_lock_killable(mm);
19832159 if (!status) {
19842160 exact_vma_exists = !!find_exact_vma(mm, vm_start,
19852161 vm_end);
1986
- up_read(&mm->mmap_sem);
2162
+ mmap_read_unlock(mm);
19872163 }
19882164 }
19892165
....@@ -2030,7 +2206,7 @@
20302206 if (rc)
20312207 goto out_mmput;
20322208
2033
- rc = down_read_killable(&mm->mmap_sem);
2209
+ rc = mmap_read_lock_killable(mm);
20342210 if (rc)
20352211 goto out_mmput;
20362212
....@@ -2041,7 +2217,7 @@
20412217 path_get(path);
20422218 rc = 0;
20432219 }
2044
- up_read(&mm->mmap_sem);
2220
+ mmap_read_unlock(mm);
20452221
20462222 out_mmput:
20472223 mmput(mm);
....@@ -2056,16 +2232,16 @@
20562232 };
20572233
20582234 /*
2059
- * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the
2060
- * symlinks may be used to bypass permissions on ancestor directories in the
2061
- * path to the file in question.
2235
+ * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due
2236
+ * to concerns about how the symlinks may be used to bypass permissions on
2237
+ * ancestor directories in the path to the file in question.
20622238 */
20632239 static const char *
20642240 proc_map_files_get_link(struct dentry *dentry,
20652241 struct inode *inode,
20662242 struct delayed_call *done)
20672243 {
2068
- if (!capable(CAP_SYS_ADMIN))
2244
+ if (!checkpoint_restore_ns_capable(&init_user_ns))
20692245 return ERR_PTR(-EPERM);
20702246
20712247 return proc_pid_get_link(dentry, inode, done);
....@@ -2131,7 +2307,7 @@
21312307 goto out_put_task;
21322308
21332309 result = ERR_PTR(-EINTR);
2134
- if (down_read_killable(&mm->mmap_sem))
2310
+ if (mmap_read_lock_killable(mm))
21352311 goto out_put_mm;
21362312
21372313 result = ERR_PTR(-ENOENT);
....@@ -2144,7 +2320,7 @@
21442320 (void *)(unsigned long)vma->vm_file->f_mode);
21452321
21462322 out_no_vma:
2147
- up_read(&mm->mmap_sem);
2323
+ mmap_read_unlock(mm);
21482324 out_put_mm:
21492325 mmput(mm);
21502326 out_put_task:
....@@ -2166,10 +2342,11 @@
21662342 struct task_struct *task;
21672343 struct mm_struct *mm;
21682344 unsigned long nr_files, pos, i;
2169
- struct flex_array *fa = NULL;
2170
- struct map_files_info info;
2345
+ GENRADIX(struct map_files_info) fa;
21712346 struct map_files_info *p;
21722347 int ret;
2348
+
2349
+ genradix_init(&fa);
21732350
21742351 ret = -ENOENT;
21752352 task = get_proc_task(file_inode(file));
....@@ -2188,7 +2365,7 @@
21882365 if (!mm)
21892366 goto out_put_task;
21902367
2191
- ret = down_read_killable(&mm->mmap_sem);
2368
+ ret = mmap_read_lock_killable(mm);
21922369 if (ret) {
21932370 mmput(mm);
21942371 goto out_put_task;
....@@ -2199,52 +2376,39 @@
21992376 /*
22002377 * We need two passes here:
22012378 *
2202
- * 1) Collect vmas of mapped files with mmap_sem taken
2203
- * 2) Release mmap_sem and instantiate entries
2379
+ * 1) Collect vmas of mapped files with mmap_lock taken
2380
+ * 2) Release mmap_lock and instantiate entries
22042381 *
22052382 * otherwise we get lockdep complained, since filldir()
2206
- * routine might require mmap_sem taken in might_fault().
2383
+ * routine might require mmap_lock taken in might_fault().
22072384 */
22082385
22092386 for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
2210
- if (vma->vm_file && ++pos > ctx->pos)
2211
- nr_files++;
2212
- }
2387
+ if (!vma->vm_file)
2388
+ continue;
2389
+ if (++pos <= ctx->pos)
2390
+ continue;
22132391
2214
- if (nr_files) {
2215
- fa = flex_array_alloc(sizeof(info), nr_files,
2216
- GFP_KERNEL);
2217
- if (!fa || flex_array_prealloc(fa, 0, nr_files,
2218
- GFP_KERNEL)) {
2392
+ p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL);
2393
+ if (!p) {
22192394 ret = -ENOMEM;
2220
- if (fa)
2221
- flex_array_free(fa);
2222
- up_read(&mm->mmap_sem);
2395
+ mmap_read_unlock(mm);
22232396 mmput(mm);
22242397 goto out_put_task;
22252398 }
2226
- for (i = 0, vma = mm->mmap, pos = 2; vma;
2227
- vma = vma->vm_next) {
2228
- if (!vma->vm_file)
2229
- continue;
2230
- if (++pos <= ctx->pos)
2231
- continue;
22322399
2233
- info.start = vma->vm_start;
2234
- info.end = vma->vm_end;
2235
- info.mode = vma->vm_file->f_mode;
2236
- if (flex_array_put(fa, i++, &info, GFP_KERNEL))
2237
- BUG();
2238
- }
2400
+ p->start = vma->vm_start;
2401
+ p->end = vma->vm_end;
2402
+ p->mode = vma->vm_file->f_mode;
22392403 }
2240
- up_read(&mm->mmap_sem);
2404
+ mmap_read_unlock(mm);
22412405 mmput(mm);
22422406
22432407 for (i = 0; i < nr_files; i++) {
22442408 char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */
22452409 unsigned int len;
22462410
2247
- p = flex_array_get(fa, i);
2411
+ p = genradix_ptr(&fa, i);
22482412 len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end);
22492413 if (!proc_fill_cache(file, ctx,
22502414 buf, len,
....@@ -2254,12 +2418,11 @@
22542418 break;
22552419 ctx->pos++;
22562420 }
2257
- if (fa)
2258
- flex_array_free(fa);
22592421
22602422 out_put_task:
22612423 put_task_struct(task);
22622424 out:
2425
+ genradix_free(&fa);
22632426 return ret;
22642427 }
22652428
....@@ -2358,7 +2521,7 @@
23582521 return -ENOMEM;
23592522
23602523 tp->pid = proc_pid(inode);
2361
- tp->ns = proc_pid_ns(inode);
2524
+ tp->ns = proc_pid_ns(inode->i_sb);
23622525 return 0;
23632526 }
23642527
....@@ -2387,10 +2550,13 @@
23872550 return -ESRCH;
23882551
23892552 if (p != current) {
2390
- if (!capable(CAP_SYS_NICE)) {
2553
+ rcu_read_lock();
2554
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
2555
+ rcu_read_unlock();
23912556 count = -EPERM;
23922557 goto out;
23932558 }
2559
+ rcu_read_unlock();
23942560
23952561 err = security_task_setscheduler(p);
23962562 if (err) {
....@@ -2423,11 +2589,14 @@
24232589 return -ESRCH;
24242590
24252591 if (p != current) {
2426
-
2427
- if (!capable(CAP_SYS_NICE)) {
2592
+ rcu_read_lock();
2593
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
2594
+ rcu_read_unlock();
24282595 err = -EPERM;
24292596 goto out;
24302597 }
2598
+ rcu_read_unlock();
2599
+
24312600 err = security_task_getscheduler(p);
24322601 if (err)
24332602 goto out;
....@@ -2482,11 +2651,10 @@
24822651
24832652 static struct dentry *proc_pident_lookup(struct inode *dir,
24842653 struct dentry *dentry,
2485
- const struct pid_entry *ents,
2486
- unsigned int nents)
2654
+ const struct pid_entry *p,
2655
+ const struct pid_entry *end)
24872656 {
24882657 struct task_struct *task = get_proc_task(dir);
2489
- const struct pid_entry *p, *last;
24902658 struct dentry *res = ERR_PTR(-ENOENT);
24912659
24922660 if (!task)
....@@ -2496,8 +2664,7 @@
24962664 * Yes, it does not scale. And it should not. Don't add
24972665 * new entries into /proc/<tgid>/ without very good reasons.
24982666 */
2499
- last = &ents[nents];
2500
- for (p = ents; p < last; p++) {
2667
+ for (; p < end; p++) {
25012668 if (p->len != dentry->d_name.len)
25022669 continue;
25032670 if (!memcmp(dentry->d_name.name, p->name, p->len)) {
....@@ -2555,7 +2722,7 @@
25552722 if (!task)
25562723 return -ESRCH;
25572724
2558
- length = security_getprocattr(task,
2725
+ length = security_getprocattr(task, PROC_I(inode)->op.lsm,
25592726 (char*)file->f_path.dentry->d_name.name,
25602727 &p);
25612728 put_task_struct(task);
....@@ -2613,7 +2780,9 @@
26132780 if (rv < 0)
26142781 goto out_free;
26152782
2616
- rv = security_setprocattr(file->f_path.dentry->d_name.name, page, count);
2783
+ rv = security_setprocattr(PROC_I(inode)->op.lsm,
2784
+ file->f_path.dentry->d_name.name, page,
2785
+ count);
26172786 mutex_unlock(&current->signal->cred_guard_mutex);
26182787 out_free:
26192788 kfree(page);
....@@ -2629,13 +2798,66 @@
26292798 .release = mem_release,
26302799 };
26312800
2801
+#define LSM_DIR_OPS(LSM) \
2802
+static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
2803
+ struct dir_context *ctx) \
2804
+{ \
2805
+ return proc_pident_readdir(filp, ctx, \
2806
+ LSM##_attr_dir_stuff, \
2807
+ ARRAY_SIZE(LSM##_attr_dir_stuff)); \
2808
+} \
2809
+\
2810
+static const struct file_operations proc_##LSM##_attr_dir_ops = { \
2811
+ .read = generic_read_dir, \
2812
+ .iterate = proc_##LSM##_attr_dir_iterate, \
2813
+ .llseek = default_llseek, \
2814
+}; \
2815
+\
2816
+static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \
2817
+ struct dentry *dentry, unsigned int flags) \
2818
+{ \
2819
+ return proc_pident_lookup(dir, dentry, \
2820
+ LSM##_attr_dir_stuff, \
2821
+ LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \
2822
+} \
2823
+\
2824
+static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \
2825
+ .lookup = proc_##LSM##_attr_dir_lookup, \
2826
+ .getattr = pid_getattr, \
2827
+ .setattr = proc_setattr, \
2828
+}
2829
+
2830
+#ifdef CONFIG_SECURITY_SMACK
2831
+static const struct pid_entry smack_attr_dir_stuff[] = {
2832
+ ATTR("smack", "current", 0666),
2833
+};
2834
+LSM_DIR_OPS(smack);
2835
+#endif
2836
+
2837
+#ifdef CONFIG_SECURITY_APPARMOR
2838
+static const struct pid_entry apparmor_attr_dir_stuff[] = {
2839
+ ATTR("apparmor", "current", 0666),
2840
+ ATTR("apparmor", "prev", 0444),
2841
+ ATTR("apparmor", "exec", 0666),
2842
+};
2843
+LSM_DIR_OPS(apparmor);
2844
+#endif
2845
+
26322846 static const struct pid_entry attr_dir_stuff[] = {
2633
- REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2634
- REG("prev", S_IRUGO, proc_pid_attr_operations),
2635
- REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2636
- REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2637
- REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2638
- REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2847
+ ATTR(NULL, "current", 0666),
2848
+ ATTR(NULL, "prev", 0444),
2849
+ ATTR(NULL, "exec", 0666),
2850
+ ATTR(NULL, "fscreate", 0666),
2851
+ ATTR(NULL, "keycreate", 0666),
2852
+ ATTR(NULL, "sockcreate", 0666),
2853
+#ifdef CONFIG_SECURITY_SMACK
2854
+ DIR("smack", 0555,
2855
+ proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
2856
+#endif
2857
+#ifdef CONFIG_SECURITY_APPARMOR
2858
+ DIR("apparmor", 0555,
2859
+ proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops),
2860
+#endif
26392861 };
26402862
26412863 static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
....@@ -2654,7 +2876,8 @@
26542876 struct dentry *dentry, unsigned int flags)
26552877 {
26562878 return proc_pident_lookup(dir, dentry,
2657
- attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
2879
+ attr_dir_stuff,
2880
+ attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff));
26582881 }
26592882
26602883 static const struct inode_operations proc_attr_dir_inode_operations = {
....@@ -2749,7 +2972,7 @@
27492972 unsigned long flags;
27502973 int result;
27512974
2752
- result = mutex_lock_killable(&task->signal->cred_guard_mutex);
2975
+ result = down_read_killable(&task->signal->exec_update_lock);
27532976 if (result)
27542977 return result;
27552978
....@@ -2785,7 +3008,7 @@
27853008 result = 0;
27863009
27873010 out_unlock:
2788
- mutex_unlock(&task->signal->cred_guard_mutex);
3011
+ up_read(&task->signal->exec_update_lock);
27893012 return result;
27903013 }
27913014
....@@ -2954,6 +3177,21 @@
29543177 }
29553178 #endif /* CONFIG_LIVEPATCH */
29563179
3180
+#ifdef CONFIG_STACKLEAK_METRICS
3181
+static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
3182
+ struct pid *pid, struct task_struct *task)
3183
+{
3184
+ unsigned long prev_depth = THREAD_SIZE -
3185
+ (task->prev_lowest_stack & (THREAD_SIZE - 1));
3186
+ unsigned long depth = THREAD_SIZE -
3187
+ (task->lowest_stack & (THREAD_SIZE - 1));
3188
+
3189
+ seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
3190
+ prev_depth, depth);
3191
+ return 0;
3192
+}
3193
+#endif /* CONFIG_STACKLEAK_METRICS */
3194
+
29573195 /*
29583196 * Thread groups
29593197 */
....@@ -2964,7 +3202,7 @@
29643202 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
29653203 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
29663204 DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
2967
- DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3205
+ DIR("fdinfo", S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
29683206 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
29693207 #ifdef CONFIG_NET
29703208 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
....@@ -2979,6 +3217,9 @@
29793217 #endif
29803218 #ifdef CONFIG_SCHED_AUTOGROUP
29813219 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
3220
+#endif
3221
+#ifdef CONFIG_TIME_NS
3222
+ REG("timens_offsets", S_IRUGO|S_IWUSR, proc_timens_offsets_operations),
29823223 #endif
29833224 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
29843225 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
....@@ -3025,10 +3266,13 @@
30253266 #ifdef CONFIG_CGROUPS
30263267 ONE("cgroup", S_IRUGO, proc_cgroup_show),
30273268 #endif
3269
+#ifdef CONFIG_PROC_CPU_RESCTRL
3270
+ ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
3271
+#endif
30283272 ONE("oom_score", S_IRUGO, proc_oom_score),
30293273 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
30303274 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3031
-#ifdef CONFIG_AUDITSYSCALL
3275
+#ifdef CONFIG_AUDIT
30323276 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
30333277 REG("sessionid", S_IRUGO, proc_sessionid_operations),
30343278 #endif
....@@ -3058,6 +3302,12 @@
30583302 #ifdef CONFIG_CPU_FREQ_TIMES
30593303 ONE("time_in_state", 0444, proc_time_in_state_show),
30603304 #endif
3305
+#ifdef CONFIG_STACKLEAK_METRICS
3306
+ ONE("stack_depth", S_IRUGO, proc_stack_depth),
3307
+#endif
3308
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
3309
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
3310
+#endif
30613311 };
30623312
30633313 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
....@@ -3074,8 +3324,7 @@
30743324
30753325 struct pid *tgid_pidfd_to_pid(const struct file *file)
30763326 {
3077
- if (!d_is_dir(file->f_path.dentry) ||
3078
- (file->f_op != &proc_tgid_base_operations))
3327
+ if (file->f_op != &proc_tgid_base_operations)
30793328 return ERR_PTR(-EBADF);
30803329
30813330 return proc_pid(file_inode(file));
....@@ -3084,7 +3333,8 @@
30843333 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
30853334 {
30863335 return proc_pident_lookup(dir, dentry,
3087
- tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
3336
+ tgid_base_stuff,
3337
+ tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff));
30883338 }
30893339
30903340 static const struct inode_operations proc_tgid_base_inode_operations = {
....@@ -3094,90 +3344,28 @@
30943344 .permission = proc_pid_permission,
30953345 };
30963346
3097
-static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
3098
-{
3099
- struct dentry *dentry, *leader, *dir;
3100
- char buf[10 + 1];
3101
- struct qstr name;
3102
-
3103
- name.name = buf;
3104
- name.len = snprintf(buf, sizeof(buf), "%u", pid);
3105
- /* no ->d_hash() rejects on procfs */
3106
- dentry = d_hash_and_lookup(mnt->mnt_root, &name);
3107
- if (dentry) {
3108
- d_invalidate(dentry);
3109
- dput(dentry);
3110
- }
3111
-
3112
- if (pid == tgid)
3113
- return;
3114
-
3115
- name.name = buf;
3116
- name.len = snprintf(buf, sizeof(buf), "%u", tgid);
3117
- leader = d_hash_and_lookup(mnt->mnt_root, &name);
3118
- if (!leader)
3119
- goto out;
3120
-
3121
- name.name = "task";
3122
- name.len = strlen(name.name);
3123
- dir = d_hash_and_lookup(leader, &name);
3124
- if (!dir)
3125
- goto out_put_leader;
3126
-
3127
- name.name = buf;
3128
- name.len = snprintf(buf, sizeof(buf), "%u", pid);
3129
- dentry = d_hash_and_lookup(dir, &name);
3130
- if (dentry) {
3131
- d_invalidate(dentry);
3132
- dput(dentry);
3133
- }
3134
-
3135
- dput(dir);
3136
-out_put_leader:
3137
- dput(leader);
3138
-out:
3139
- return;
3140
-}
3141
-
31423347 /**
3143
- * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
3144
- * @task: task that should be flushed.
3348
+ * proc_flush_pid - Remove dcache entries for @pid from the /proc dcache.
3349
+ * @pid: pid that should be flushed.
31453350 *
3146
- * When flushing dentries from proc, one needs to flush them from global
3147
- * proc (proc_mnt) and from all the namespaces' procs this task was seen
3148
- * in. This call is supposed to do all of this job.
3149
- *
3150
- * Looks in the dcache for
3151
- * /proc/@pid
3152
- * /proc/@tgid/task/@pid
3153
- * if either directory is present flushes it and all of it'ts children
3154
- * from the dcache.
3351
+ * This function walks a list of inodes (that belong to any proc
3352
+ * filesystem) that are attached to the pid and flushes them from
3353
+ * the dentry cache.
31553354 *
31563355 * It is safe and reasonable to cache /proc entries for a task until
31573356 * that task exits. After that they just clog up the dcache with
31583357 * useless entries, possibly causing useful dcache entries to be
3159
- * flushed instead. This routine is proved to flush those useless
3160
- * dcache entries at process exit time.
3358
+ * flushed instead. This routine is provided to flush those useless
3359
+ * dcache entries when a process is reaped.
31613360 *
31623361 * NOTE: This routine is just an optimization so it does not guarantee
3163
- * that no dcache entries will exist at process exit time it
3164
- * just makes it very unlikely that any will persist.
3362
+ * that no dcache entries will exist after a process is reaped
3363
+ * it just makes it very unlikely that any will persist.
31653364 */
31663365
3167
-void proc_flush_task(struct task_struct *task)
3366
+void proc_flush_pid(struct pid *pid)
31683367 {
3169
- int i;
3170
- struct pid *pid, *tgid;
3171
- struct upid *upid;
3172
-
3173
- pid = task_pid(task);
3174
- tgid = task_tgid(task);
3175
-
3176
- for (i = 0; i <= pid->level; i++) {
3177
- upid = &pid->numbers[i];
3178
- proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
3179
- tgid->numbers[i].nr);
3180
- }
3368
+ proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock);
31813369 }
31823370
31833371 static struct dentry *proc_pid_instantiate(struct dentry * dentry,
....@@ -3185,7 +3373,8 @@
31853373 {
31863374 struct inode *inode;
31873375
3188
- inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
3376
+ inode = proc_pid_make_base_inode(dentry->d_sb, task,
3377
+ S_IFDIR | S_IRUGO | S_IXUGO);
31893378 if (!inode)
31903379 return ERR_PTR(-ENOENT);
31913380
....@@ -3200,10 +3389,11 @@
32003389 return d_splice_alias(inode, dentry);
32013390 }
32023391
3203
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
3392
+struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
32043393 {
32053394 struct task_struct *task;
32063395 unsigned tgid;
3396
+ struct proc_fs_info *fs_info;
32073397 struct pid_namespace *ns;
32083398 struct dentry *result = ERR_PTR(-ENOENT);
32093399
....@@ -3211,7 +3401,8 @@
32113401 if (tgid == ~0U)
32123402 goto out;
32133403
3214
- ns = dentry->d_sb->s_fs_info;
3404
+ fs_info = proc_sb_info(dentry->d_sb);
3405
+ ns = fs_info->pid_ns;
32153406 rcu_read_lock();
32163407 task = find_task_by_pid_ns(tgid, ns);
32173408 if (task)
....@@ -3220,7 +3411,14 @@
32203411 if (!task)
32213412 goto out;
32223413
3414
+ /* Limit procfs to only ptraceable tasks */
3415
+ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) {
3416
+ if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS))
3417
+ goto out_put_task;
3418
+ }
3419
+
32233420 result = proc_pid_instantiate(dentry, task, NULL);
3421
+out_put_task:
32243422 put_task_struct(task);
32253423 out:
32263424 return result;
....@@ -3246,20 +3444,8 @@
32463444 pid = find_ge_pid(iter.tgid, ns);
32473445 if (pid) {
32483446 iter.tgid = pid_nr_ns(pid, ns);
3249
- iter.task = pid_task(pid, PIDTYPE_PID);
3250
- /* What we to know is if the pid we have find is the
3251
- * pid of a thread_group_leader. Testing for task
3252
- * being a thread_group_leader is the obvious thing
3253
- * todo but there is a window when it fails, due to
3254
- * the pid transfer logic in de_thread.
3255
- *
3256
- * So we perform the straight forward test of seeing
3257
- * if the pid we have found is the pid of a thread
3258
- * group leader, and don't worry if the task we have
3259
- * found doesn't happen to be a thread group leader.
3260
- * As we don't care in the case of readdir.
3261
- */
3262
- if (!iter.task || !has_group_leader_pid(iter.task)) {
3447
+ iter.task = pid_task(pid, PIDTYPE_TGID);
3448
+ if (!iter.task) {
32633449 iter.tgid += 1;
32643450 goto retry;
32653451 }
....@@ -3275,20 +3461,21 @@
32753461 int proc_pid_readdir(struct file *file, struct dir_context *ctx)
32763462 {
32773463 struct tgid_iter iter;
3278
- struct pid_namespace *ns = proc_pid_ns(file_inode(file));
3464
+ struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
3465
+ struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb);
32793466 loff_t pos = ctx->pos;
32803467
32813468 if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
32823469 return 0;
32833470
32843471 if (pos == TGID_OFFSET - 2) {
3285
- struct inode *inode = d_inode(ns->proc_self);
3472
+ struct inode *inode = d_inode(fs_info->proc_self);
32863473 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
32873474 return 0;
32883475 ctx->pos = pos = pos + 1;
32893476 }
32903477 if (pos == TGID_OFFSET - 1) {
3291
- struct inode *inode = d_inode(ns->proc_thread_self);
3478
+ struct inode *inode = d_inode(fs_info->proc_thread_self);
32923479 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
32933480 return 0;
32943481 ctx->pos = pos = pos + 1;
....@@ -3302,7 +3489,7 @@
33023489 unsigned int len;
33033490
33043491 cond_resched();
3305
- if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
3492
+ if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE))
33063493 continue;
33073494
33083495 len = snprintf(name, sizeof(name), "%u", iter.tgid);
....@@ -3352,7 +3539,8 @@
33523539 }
33533540
33543541 static const struct inode_operations proc_tid_comm_inode_operations = {
3355
- .permission = proc_tid_comm_permission,
3542
+ .setattr = proc_setattr,
3543
+ .permission = proc_tid_comm_permission,
33563544 };
33573545
33583546 /*
....@@ -3360,7 +3548,7 @@
33603548 */
33613549 static const struct pid_entry tid_base_stuff[] = {
33623550 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3363
- DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3551
+ DIR("fdinfo", S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
33643552 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
33653553 #ifdef CONFIG_NET
33663554 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
....@@ -3422,10 +3610,13 @@
34223610 #ifdef CONFIG_CGROUPS
34233611 ONE("cgroup", S_IRUGO, proc_cgroup_show),
34243612 #endif
3613
+#ifdef CONFIG_PROC_CPU_RESCTRL
3614
+ ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
3615
+#endif
34253616 ONE("oom_score", S_IRUGO, proc_oom_score),
34263617 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
34273618 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3428
-#ifdef CONFIG_AUDITSYSCALL
3619
+#ifdef CONFIG_AUDIT
34293620 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
34303621 REG("sessionid", S_IRUGO, proc_sessionid_operations),
34313622 #endif
....@@ -3445,6 +3636,9 @@
34453636 #ifdef CONFIG_LIVEPATCH
34463637 ONE("patch_state", S_IRUSR, proc_pid_patch_state),
34473638 #endif
3639
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
3640
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
3641
+#endif
34483642 #ifdef CONFIG_CPU_FREQ_TIMES
34493643 ONE("time_in_state", 0444, proc_time_in_state_show),
34503644 #endif
....@@ -3459,7 +3653,8 @@
34593653 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
34603654 {
34613655 return proc_pident_lookup(dir, dentry,
3462
- tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
3656
+ tid_base_stuff,
3657
+ tid_base_stuff + ARRAY_SIZE(tid_base_stuff));
34633658 }
34643659
34653660 static const struct file_operations proc_tid_base_operations = {
....@@ -3478,7 +3673,8 @@
34783673 struct task_struct *task, const void *ptr)
34793674 {
34803675 struct inode *inode;
3481
- inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
3676
+ inode = proc_pid_make_base_inode(dentry->d_sb, task,
3677
+ S_IFDIR | S_IRUGO | S_IXUGO);
34823678 if (!inode)
34833679 return ERR_PTR(-ENOENT);
34843680
....@@ -3498,6 +3694,7 @@
34983694 struct task_struct *task;
34993695 struct task_struct *leader = get_proc_task(dir);
35003696 unsigned tid;
3697
+ struct proc_fs_info *fs_info;
35013698 struct pid_namespace *ns;
35023699 struct dentry *result = ERR_PTR(-ENOENT);
35033700
....@@ -3508,7 +3705,8 @@
35083705 if (tid == ~0U)
35093706 goto out;
35103707
3511
- ns = dentry->d_sb->s_fs_info;
3708
+ fs_info = proc_sb_info(dentry->d_sb);
3709
+ ns = fs_info->pid_ns;
35123710 rcu_read_lock();
35133711 task = find_task_by_pid_ns(tid, ns);
35143712 if (task)
....@@ -3622,7 +3820,7 @@
36223820 /* f_version caches the tgid value that the last readdir call couldn't
36233821 * return. lseek aka telldir automagically resets f_version to 0.
36243822 */
3625
- ns = proc_pid_ns(inode);
3823
+ ns = proc_pid_ns(inode->i_sb);
36263824 tid = (int)file->f_version;
36273825 file->f_version = 0;
36283826 for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);