hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/proc/base.c
....@@ -59,6 +59,7 @@
5959 #include <linux/capability.h>
6060 #include <linux/file.h>
6161 #include <linux/fdtable.h>
62
+#include <linux/generic-radix-tree.h>
6263 #include <linux/string.h>
6364 #include <linux/seq_file.h>
6465 #include <linux/namei.h>
....@@ -92,8 +93,9 @@
9293 #include <linux/sched/coredump.h>
9394 #include <linux/sched/debug.h>
9495 #include <linux/sched/stat.h>
95
-#include <linux/flex_array.h>
9696 #include <linux/posix-timers.h>
97
+#include <linux/time_namespace.h>
98
+#include <linux/resctrl.h>
9799 #include <linux/cpufreq_times.h>
98100 #include <trace/events/oom.h>
99101 #include "internal.h"
....@@ -141,9 +143,13 @@
141143 #define REG(NAME, MODE, fops) \
142144 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
143145 #define ONE(NAME, MODE, show) \
144
- NOD(NAME, (S_IFREG|(MODE)), \
146
+ NOD(NAME, (S_IFREG|(MODE)), \
145147 NULL, &proc_single_file_operations, \
146148 { .proc_show = show } )
149
+#define ATTR(LSM, NAME, MODE) \
150
+ NOD(NAME, (S_IFREG|(MODE)), \
151
+ NULL, &proc_pid_attr_operations, \
152
+ { .lsm = LSM })
147153
148154 /*
149155 * Count the number of hardlinks for the pid_entry table, excluding the .
....@@ -400,11 +406,11 @@
400406
401407 static int lock_trace(struct task_struct *task)
402408 {
403
- int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
409
+ int err = down_read_killable(&task->signal->exec_update_lock);
404410 if (err)
405411 return err;
406412 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
407
- mutex_unlock(&task->signal->cred_guard_mutex);
413
+ up_read(&task->signal->exec_update_lock);
408414 return -EPERM;
409415 }
410416 return 0;
....@@ -412,7 +418,7 @@
412418
413419 static void unlock_trace(struct task_struct *task)
414420 {
415
- mutex_unlock(&task->signal->cred_guard_mutex);
421
+ up_read(&task->signal->exec_update_lock);
416422 }
417423
418424 #ifdef CONFIG_STACKTRACE
....@@ -422,7 +428,6 @@
422428 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
423429 struct pid *pid, struct task_struct *task)
424430 {
425
- struct stack_trace trace;
426431 unsigned long *entries;
427432 int err;
428433
....@@ -445,20 +450,17 @@
445450 if (!entries)
446451 return -ENOMEM;
447452
448
- trace.nr_entries = 0;
449
- trace.max_entries = MAX_STACK_TRACE_DEPTH;
450
- trace.entries = entries;
451
- trace.skip = 0;
452
-
453453 err = lock_trace(task);
454454 if (!err) {
455
- unsigned int i;
455
+ unsigned int i, nr_entries;
456456
457
- save_stack_trace_tsk(task, &trace);
457
+ nr_entries = stack_trace_save_tsk(task, entries,
458
+ MAX_STACK_TRACE_DEPTH, 0);
458459
459
- for (i = 0; i < trace.nr_entries; i++) {
460
+ for (i = 0; i < nr_entries; i++) {
460461 seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
461462 }
463
+
462464 unlock_trace(task);
463465 }
464466 kfree(entries);
....@@ -475,7 +477,7 @@
475477 struct pid *pid, struct task_struct *task)
476478 {
477479 if (unlikely(!sched_info_on()))
478
- seq_printf(m, "0 0 0\n");
480
+ seq_puts(m, "0 0 0\n");
479481 else
480482 seq_printf(m, "%llu %llu %lu\n",
481483 (unsigned long long)task->se.sum_exec_runtime,
....@@ -504,9 +506,8 @@
504506 lr->count, lr->time, lr->max);
505507 for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
506508 unsigned long bt = lr->backtrace[q];
509
+
507510 if (!bt)
508
- break;
509
- if (bt == ULONG_MAX)
510511 break;
511512 seq_printf(m, " %ps", (void *)bt);
512513 }
....@@ -530,7 +531,7 @@
530531
531532 if (!task)
532533 return -ESRCH;
533
- clear_all_latency_tracing(task);
534
+ clear_tsk_latency_tracing(task);
534535 put_task_struct(task);
535536
536537 return count;
....@@ -549,11 +550,19 @@
549550 static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
550551 struct pid *pid, struct task_struct *task)
551552 {
552
- unsigned long totalpages = totalram_pages + total_swap_pages;
553
+ unsigned long totalpages = totalram_pages() + total_swap_pages;
553554 unsigned long points = 0;
555
+ long badness;
554556
555
- points = oom_badness(task, NULL, NULL, totalpages) *
556
- 1000 / totalpages;
557
+ badness = oom_badness(task, totalpages);
558
+ /*
559
+ * Special case OOM_SCORE_ADJ_MIN for all others scale the
560
+ * badness value into [0, 2000] range which we have been
561
+ * exporting for a long time so userspace might depend on it.
562
+ */
563
+ if (badness != LONG_MIN)
564
+ points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3;
565
+
557566 seq_printf(m, "%lu\n", points);
558567
559568 return 0;
....@@ -600,8 +609,10 @@
600609 /*
601610 * print the file header
602611 */
603
- seq_printf(m, "%-25s %-20s %-20s %-10s\n",
604
- "Limit", "Soft Limit", "Hard Limit", "Units");
612
+ seq_puts(m, "Limit "
613
+ "Soft Limit "
614
+ "Hard Limit "
615
+ "Units \n");
605616
606617 for (i = 0; i < RLIM_NLIMITS; i++) {
607618 if (rlim[i].rlim_cur == RLIM_INFINITY)
....@@ -629,24 +640,25 @@
629640 static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
630641 struct pid *pid, struct task_struct *task)
631642 {
632
- long nr;
633
- unsigned long args[6], sp, pc;
643
+ struct syscall_info info;
644
+ u64 *args = &info.data.args[0];
634645 int res;
635646
636647 res = lock_trace(task);
637648 if (res)
638649 return res;
639650
640
- if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
651
+ if (task_current_syscall(task, &info))
641652 seq_puts(m, "running\n");
642
- else if (nr < 0)
643
- seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
653
+ else if (info.data.nr < 0)
654
+ seq_printf(m, "%d 0x%llx 0x%llx\n",
655
+ info.data.nr, info.sp, info.data.instruction_pointer);
644656 else
645657 seq_printf(m,
646
- "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
647
- nr,
658
+ "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
659
+ info.data.nr,
648660 args[0], args[1], args[2], args[3], args[4], args[5],
649
- sp, pc);
661
+ info.sp, info.data.instruction_pointer);
650662 unlock_trace(task);
651663
652664 return 0;
....@@ -695,13 +707,21 @@
695707 * May current process learn task's sched/cmdline info (for hide_pid_min=1)
696708 * or euid/egid (for hide_pid_min=2)?
697709 */
698
-static bool has_pid_permissions(struct pid_namespace *pid,
710
+static bool has_pid_permissions(struct proc_fs_info *fs_info,
699711 struct task_struct *task,
700
- int hide_pid_min)
712
+ enum proc_hidepid hide_pid_min)
701713 {
702
- if (pid->hide_pid < hide_pid_min)
714
+ /*
715
+ * If 'hidpid' mount option is set force a ptrace check,
716
+ * we indicate that we are using a filesystem syscall
717
+ * by passing PTRACE_MODE_READ_FSCREDS
718
+ */
719
+ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE)
720
+ return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
721
+
722
+ if (fs_info->hide_pid < hide_pid_min)
703723 return true;
704
- if (in_group_p(pid->pid_gid))
724
+ if (in_group_p(fs_info->pid_gid))
705725 return true;
706726 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
707727 }
....@@ -709,18 +729,18 @@
709729
710730 static int proc_pid_permission(struct inode *inode, int mask)
711731 {
712
- struct pid_namespace *pid = proc_pid_ns(inode);
732
+ struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
713733 struct task_struct *task;
714734 bool has_perms;
715735
716736 task = get_proc_task(inode);
717737 if (!task)
718738 return -ESRCH;
719
- has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
739
+ has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS);
720740 put_task_struct(task);
721741
722742 if (!has_perms) {
723
- if (pid->hide_pid == HIDEPID_INVISIBLE) {
743
+ if (fs_info->hide_pid == HIDEPID_INVISIBLE) {
724744 /*
725745 * Let's make getdents(), stat(), and open()
726746 * consistent with each other. If a process
....@@ -744,7 +764,7 @@
744764 static int proc_single_show(struct seq_file *m, void *v)
745765 {
746766 struct inode *inode = m->private;
747
- struct pid_namespace *ns = proc_pid_ns(inode);
767
+ struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
748768 struct pid *pid = proc_pid(inode);
749769 struct task_struct *task;
750770 int ret;
....@@ -1030,6 +1050,8 @@
10301050 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
10311051 OOM_SCORE_ADJ_MAX;
10321052 put_task_struct(task);
1053
+ if (oom_adj > OOM_ADJUST_MAX)
1054
+ oom_adj = OOM_ADJUST_MAX;
10331055 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
10341056 return simple_read_from_buffer(buf, count, ppos, buffer, len);
10351057 }
....@@ -1222,7 +1244,7 @@
12221244 .llseek = default_llseek,
12231245 };
12241246
1225
-#ifdef CONFIG_AUDITSYSCALL
1247
+#ifdef CONFIG_AUDIT
12261248 #define TMPBUFLEN 11
12271249 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
12281250 size_t count, loff_t *ppos)
....@@ -1248,6 +1270,10 @@
12481270 uid_t loginuid;
12491271 kuid_t kloginuid;
12501272 int rv;
1273
+
1274
+ /* Don't let kthreads write their own loginuid */
1275
+ if (current->flags & PF_KTHREAD)
1276
+ return -EPERM;
12511277
12521278 rcu_read_lock();
12531279 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
....@@ -1412,7 +1438,7 @@
14121438 static int sched_show(struct seq_file *m, void *v)
14131439 {
14141440 struct inode *inode = m->private;
1415
- struct pid_namespace *ns = proc_pid_ns(inode);
1441
+ struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
14161442 struct task_struct *p;
14171443
14181444 p = get_proc_task(inode);
....@@ -1532,6 +1558,108 @@
15321558
15331559 #endif /* CONFIG_SCHED_AUTOGROUP */
15341560
1561
+#ifdef CONFIG_TIME_NS
1562
+static int timens_offsets_show(struct seq_file *m, void *v)
1563
+{
1564
+ struct task_struct *p;
1565
+
1566
+ p = get_proc_task(file_inode(m->file));
1567
+ if (!p)
1568
+ return -ESRCH;
1569
+ proc_timens_show_offsets(p, m);
1570
+
1571
+ put_task_struct(p);
1572
+
1573
+ return 0;
1574
+}
1575
+
1576
+static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
1577
+ size_t count, loff_t *ppos)
1578
+{
1579
+ struct inode *inode = file_inode(file);
1580
+ struct proc_timens_offset offsets[2];
1581
+ char *kbuf = NULL, *pos, *next_line;
1582
+ struct task_struct *p;
1583
+ int ret, noffsets;
1584
+
1585
+ /* Only allow < page size writes at the beginning of the file */
1586
+ if ((*ppos != 0) || (count >= PAGE_SIZE))
1587
+ return -EINVAL;
1588
+
1589
+ /* Slurp in the user data */
1590
+ kbuf = memdup_user_nul(buf, count);
1591
+ if (IS_ERR(kbuf))
1592
+ return PTR_ERR(kbuf);
1593
+
1594
+ /* Parse the user data */
1595
+ ret = -EINVAL;
1596
+ noffsets = 0;
1597
+ for (pos = kbuf; pos; pos = next_line) {
1598
+ struct proc_timens_offset *off = &offsets[noffsets];
1599
+ char clock[10];
1600
+ int err;
1601
+
1602
+ /* Find the end of line and ensure we don't look past it */
1603
+ next_line = strchr(pos, '\n');
1604
+ if (next_line) {
1605
+ *next_line = '\0';
1606
+ next_line++;
1607
+ if (*next_line == '\0')
1608
+ next_line = NULL;
1609
+ }
1610
+
1611
+ err = sscanf(pos, "%9s %lld %lu", clock,
1612
+ &off->val.tv_sec, &off->val.tv_nsec);
1613
+ if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
1614
+ goto out;
1615
+
1616
+ clock[sizeof(clock) - 1] = 0;
1617
+ if (strcmp(clock, "monotonic") == 0 ||
1618
+ strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0)
1619
+ off->clockid = CLOCK_MONOTONIC;
1620
+ else if (strcmp(clock, "boottime") == 0 ||
1621
+ strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0)
1622
+ off->clockid = CLOCK_BOOTTIME;
1623
+ else
1624
+ goto out;
1625
+
1626
+ noffsets++;
1627
+ if (noffsets == ARRAY_SIZE(offsets)) {
1628
+ if (next_line)
1629
+ count = next_line - kbuf;
1630
+ break;
1631
+ }
1632
+ }
1633
+
1634
+ ret = -ESRCH;
1635
+ p = get_proc_task(inode);
1636
+ if (!p)
1637
+ goto out;
1638
+ ret = proc_timens_set_offset(file, p, offsets, noffsets);
1639
+ put_task_struct(p);
1640
+ if (ret)
1641
+ goto out;
1642
+
1643
+ ret = count;
1644
+out:
1645
+ kfree(kbuf);
1646
+ return ret;
1647
+}
1648
+
1649
+static int timens_offsets_open(struct inode *inode, struct file *filp)
1650
+{
1651
+ return single_open(filp, timens_offsets_show, inode);
1652
+}
1653
+
1654
+static const struct file_operations proc_timens_offsets_operations = {
1655
+ .open = timens_offsets_open,
1656
+ .read = seq_read,
1657
+ .write = timens_offsets_write,
1658
+ .llseek = seq_lseek,
1659
+ .release = single_release,
1660
+};
1661
+#endif /* CONFIG_TIME_NS */
1662
+
15351663 static ssize_t comm_write(struct file *file, const char __user *buf,
15361664 size_t count, loff_t *offset)
15371665 {
....@@ -1625,8 +1753,7 @@
16251753 if (error)
16261754 goto out;
16271755
1628
- nd_jump_link(&path);
1629
- return NULL;
1756
+ error = nd_jump_link(&path);
16301757 out:
16311758 return ERR_PTR(error);
16321759 }
....@@ -1742,11 +1869,25 @@
17421869 *rgid = gid;
17431870 }
17441871
1745
-struct inode *proc_pid_make_inode(struct super_block * sb,
1872
+void proc_pid_evict_inode(struct proc_inode *ei)
1873
+{
1874
+ struct pid *pid = ei->pid;
1875
+
1876
+ if (S_ISDIR(ei->vfs_inode.i_mode)) {
1877
+ spin_lock(&pid->lock);
1878
+ hlist_del_init_rcu(&ei->sibling_inodes);
1879
+ spin_unlock(&pid->lock);
1880
+ }
1881
+
1882
+ put_pid(pid);
1883
+}
1884
+
1885
+struct inode *proc_pid_make_inode(struct super_block *sb,
17461886 struct task_struct *task, umode_t mode)
17471887 {
17481888 struct inode * inode;
17491889 struct proc_inode *ei;
1890
+ struct pid *pid;
17501891
17511892 /* We need a new inode */
17521893
....@@ -1764,9 +1905,12 @@
17641905 /*
17651906 * grab the reference to task.
17661907 */
1767
- ei->pid = get_task_pid(task, PIDTYPE_PID);
1768
- if (!ei->pid)
1908
+ pid = get_task_pid(task, PIDTYPE_PID);
1909
+ if (!pid)
17691910 goto out_unlock;
1911
+
1912
+ /* Let the pid remember us for quick removal */
1913
+ ei->pid = pid;
17701914
17711915 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
17721916 security_task_to_inode(task, inode);
....@@ -1779,11 +1923,44 @@
17791923 return NULL;
17801924 }
17811925
1926
+/*
1927
+ * Generating an inode and adding it into @pid->inodes, so that task will
1928
+ * invalidate inode's dentry before being released.
1929
+ *
1930
+ * This helper is used for creating dir-type entries under '/proc' and
1931
+ * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>'
1932
+ * can be released by invalidating '/proc/<tgid>' dentry.
1933
+ * In theory, dentries under '/proc/<tgid>/task' can also be released by
1934
+ * invalidating '/proc/<tgid>' dentry, we reserve it to handle single
1935
+ * thread exiting situation: Any one of threads should invalidate its
1936
+ * '/proc/<tgid>/task/<pid>' dentry before released.
1937
+ */
1938
+static struct inode *proc_pid_make_base_inode(struct super_block *sb,
1939
+ struct task_struct *task, umode_t mode)
1940
+{
1941
+ struct inode *inode;
1942
+ struct proc_inode *ei;
1943
+ struct pid *pid;
1944
+
1945
+ inode = proc_pid_make_inode(sb, task, mode);
1946
+ if (!inode)
1947
+ return NULL;
1948
+
1949
+ /* Let proc_flush_pid find this directory inode */
1950
+ ei = PROC_I(inode);
1951
+ pid = ei->pid;
1952
+ spin_lock(&pid->lock);
1953
+ hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
1954
+ spin_unlock(&pid->lock);
1955
+
1956
+ return inode;
1957
+}
1958
+
17821959 int pid_getattr(const struct path *path, struct kstat *stat,
17831960 u32 request_mask, unsigned int query_flags)
17841961 {
17851962 struct inode *inode = d_inode(path->dentry);
1786
- struct pid_namespace *pid = proc_pid_ns(inode);
1963
+ struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
17871964 struct task_struct *task;
17881965
17891966 generic_fillattr(inode, stat);
....@@ -1793,7 +1970,7 @@
17931970 rcu_read_lock();
17941971 task = pid_task(proc_pid(inode), PIDTYPE_PID);
17951972 if (task) {
1796
- if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
1973
+ if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) {
17971974 rcu_read_unlock();
17981975 /*
17991976 * This doesn't prevent learning whether PID exists,
....@@ -1978,11 +2155,11 @@
19782155 goto out;
19792156
19802157 if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
1981
- status = down_read_killable(&mm->mmap_sem);
2158
+ status = mmap_read_lock_killable(mm);
19822159 if (!status) {
19832160 exact_vma_exists = !!find_exact_vma(mm, vm_start,
19842161 vm_end);
1985
- up_read(&mm->mmap_sem);
2162
+ mmap_read_unlock(mm);
19862163 }
19872164 }
19882165
....@@ -2029,7 +2206,7 @@
20292206 if (rc)
20302207 goto out_mmput;
20312208
2032
- rc = down_read_killable(&mm->mmap_sem);
2209
+ rc = mmap_read_lock_killable(mm);
20332210 if (rc)
20342211 goto out_mmput;
20352212
....@@ -2040,7 +2217,7 @@
20402217 path_get(path);
20412218 rc = 0;
20422219 }
2043
- up_read(&mm->mmap_sem);
2220
+ mmap_read_unlock(mm);
20442221
20452222 out_mmput:
20462223 mmput(mm);
....@@ -2055,16 +2232,16 @@
20552232 };
20562233
20572234 /*
2058
- * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the
2059
- * symlinks may be used to bypass permissions on ancestor directories in the
2060
- * path to the file in question.
2235
+ * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due
2236
+ * to concerns about how the symlinks may be used to bypass permissions on
2237
+ * ancestor directories in the path to the file in question.
20612238 */
20622239 static const char *
20632240 proc_map_files_get_link(struct dentry *dentry,
20642241 struct inode *inode,
20652242 struct delayed_call *done)
20662243 {
2067
- if (!capable(CAP_SYS_ADMIN))
2244
+ if (!checkpoint_restore_ns_capable(&init_user_ns))
20682245 return ERR_PTR(-EPERM);
20692246
20702247 return proc_pid_get_link(dentry, inode, done);
....@@ -2130,7 +2307,7 @@
21302307 goto out_put_task;
21312308
21322309 result = ERR_PTR(-EINTR);
2133
- if (down_read_killable(&mm->mmap_sem))
2310
+ if (mmap_read_lock_killable(mm))
21342311 goto out_put_mm;
21352312
21362313 result = ERR_PTR(-ENOENT);
....@@ -2143,7 +2320,7 @@
21432320 (void *)(unsigned long)vma->vm_file->f_mode);
21442321
21452322 out_no_vma:
2146
- up_read(&mm->mmap_sem);
2323
+ mmap_read_unlock(mm);
21472324 out_put_mm:
21482325 mmput(mm);
21492326 out_put_task:
....@@ -2165,10 +2342,11 @@
21652342 struct task_struct *task;
21662343 struct mm_struct *mm;
21672344 unsigned long nr_files, pos, i;
2168
- struct flex_array *fa = NULL;
2169
- struct map_files_info info;
2345
+ GENRADIX(struct map_files_info) fa;
21702346 struct map_files_info *p;
21712347 int ret;
2348
+
2349
+ genradix_init(&fa);
21722350
21732351 ret = -ENOENT;
21742352 task = get_proc_task(file_inode(file));
....@@ -2187,7 +2365,7 @@
21872365 if (!mm)
21882366 goto out_put_task;
21892367
2190
- ret = down_read_killable(&mm->mmap_sem);
2368
+ ret = mmap_read_lock_killable(mm);
21912369 if (ret) {
21922370 mmput(mm);
21932371 goto out_put_task;
....@@ -2198,52 +2376,39 @@
21982376 /*
21992377 * We need two passes here:
22002378 *
2201
- * 1) Collect vmas of mapped files with mmap_sem taken
2202
- * 2) Release mmap_sem and instantiate entries
2379
+ * 1) Collect vmas of mapped files with mmap_lock taken
2380
+ * 2) Release mmap_lock and instantiate entries
22032381 *
22042382 * otherwise we get lockdep complained, since filldir()
2205
- * routine might require mmap_sem taken in might_fault().
2383
+ * routine might require mmap_lock taken in might_fault().
22062384 */
22072385
22082386 for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
2209
- if (vma->vm_file && ++pos > ctx->pos)
2210
- nr_files++;
2211
- }
2387
+ if (!vma->vm_file)
2388
+ continue;
2389
+ if (++pos <= ctx->pos)
2390
+ continue;
22122391
2213
- if (nr_files) {
2214
- fa = flex_array_alloc(sizeof(info), nr_files,
2215
- GFP_KERNEL);
2216
- if (!fa || flex_array_prealloc(fa, 0, nr_files,
2217
- GFP_KERNEL)) {
2392
+ p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL);
2393
+ if (!p) {
22182394 ret = -ENOMEM;
2219
- if (fa)
2220
- flex_array_free(fa);
2221
- up_read(&mm->mmap_sem);
2395
+ mmap_read_unlock(mm);
22222396 mmput(mm);
22232397 goto out_put_task;
22242398 }
2225
- for (i = 0, vma = mm->mmap, pos = 2; vma;
2226
- vma = vma->vm_next) {
2227
- if (!vma->vm_file)
2228
- continue;
2229
- if (++pos <= ctx->pos)
2230
- continue;
22312399
2232
- info.start = vma->vm_start;
2233
- info.end = vma->vm_end;
2234
- info.mode = vma->vm_file->f_mode;
2235
- if (flex_array_put(fa, i++, &info, GFP_KERNEL))
2236
- BUG();
2237
- }
2400
+ p->start = vma->vm_start;
2401
+ p->end = vma->vm_end;
2402
+ p->mode = vma->vm_file->f_mode;
22382403 }
2239
- up_read(&mm->mmap_sem);
2404
+ mmap_read_unlock(mm);
22402405 mmput(mm);
22412406
22422407 for (i = 0; i < nr_files; i++) {
22432408 char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */
22442409 unsigned int len;
22452410
2246
- p = flex_array_get(fa, i);
2411
+ p = genradix_ptr(&fa, i);
22472412 len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end);
22482413 if (!proc_fill_cache(file, ctx,
22492414 buf, len,
....@@ -2253,12 +2418,11 @@
22532418 break;
22542419 ctx->pos++;
22552420 }
2256
- if (fa)
2257
- flex_array_free(fa);
22582421
22592422 out_put_task:
22602423 put_task_struct(task);
22612424 out:
2425
+ genradix_free(&fa);
22622426 return ret;
22632427 }
22642428
....@@ -2357,7 +2521,7 @@
23572521 return -ENOMEM;
23582522
23592523 tp->pid = proc_pid(inode);
2360
- tp->ns = proc_pid_ns(inode);
2524
+ tp->ns = proc_pid_ns(inode->i_sb);
23612525 return 0;
23622526 }
23632527
....@@ -2386,10 +2550,13 @@
23862550 return -ESRCH;
23872551
23882552 if (p != current) {
2389
- if (!capable(CAP_SYS_NICE)) {
2553
+ rcu_read_lock();
2554
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
2555
+ rcu_read_unlock();
23902556 count = -EPERM;
23912557 goto out;
23922558 }
2559
+ rcu_read_unlock();
23932560
23942561 err = security_task_setscheduler(p);
23952562 if (err) {
....@@ -2422,11 +2589,14 @@
24222589 return -ESRCH;
24232590
24242591 if (p != current) {
2425
-
2426
- if (!capable(CAP_SYS_NICE)) {
2592
+ rcu_read_lock();
2593
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
2594
+ rcu_read_unlock();
24272595 err = -EPERM;
24282596 goto out;
24292597 }
2598
+ rcu_read_unlock();
2599
+
24302600 err = security_task_getscheduler(p);
24312601 if (err)
24322602 goto out;
....@@ -2481,11 +2651,10 @@
24812651
24822652 static struct dentry *proc_pident_lookup(struct inode *dir,
24832653 struct dentry *dentry,
2484
- const struct pid_entry *ents,
2485
- unsigned int nents)
2654
+ const struct pid_entry *p,
2655
+ const struct pid_entry *end)
24862656 {
24872657 struct task_struct *task = get_proc_task(dir);
2488
- const struct pid_entry *p, *last;
24892658 struct dentry *res = ERR_PTR(-ENOENT);
24902659
24912660 if (!task)
....@@ -2495,8 +2664,7 @@
24952664 * Yes, it does not scale. And it should not. Don't add
24962665 * new entries into /proc/<tgid>/ without very good reasons.
24972666 */
2498
- last = &ents[nents];
2499
- for (p = ents; p < last; p++) {
2667
+ for (; p < end; p++) {
25002668 if (p->len != dentry->d_name.len)
25012669 continue;
25022670 if (!memcmp(dentry->d_name.name, p->name, p->len)) {
....@@ -2554,7 +2722,7 @@
25542722 if (!task)
25552723 return -ESRCH;
25562724
2557
- length = security_getprocattr(task,
2725
+ length = security_getprocattr(task, PROC_I(inode)->op.lsm,
25582726 (char*)file->f_path.dentry->d_name.name,
25592727 &p);
25602728 put_task_struct(task);
....@@ -2612,7 +2780,9 @@
26122780 if (rv < 0)
26132781 goto out_free;
26142782
2615
- rv = security_setprocattr(file->f_path.dentry->d_name.name, page, count);
2783
+ rv = security_setprocattr(PROC_I(inode)->op.lsm,
2784
+ file->f_path.dentry->d_name.name, page,
2785
+ count);
26162786 mutex_unlock(&current->signal->cred_guard_mutex);
26172787 out_free:
26182788 kfree(page);
....@@ -2628,13 +2798,66 @@
26282798 .release = mem_release,
26292799 };
26302800
2801
+#define LSM_DIR_OPS(LSM) \
2802
+static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
2803
+ struct dir_context *ctx) \
2804
+{ \
2805
+ return proc_pident_readdir(filp, ctx, \
2806
+ LSM##_attr_dir_stuff, \
2807
+ ARRAY_SIZE(LSM##_attr_dir_stuff)); \
2808
+} \
2809
+\
2810
+static const struct file_operations proc_##LSM##_attr_dir_ops = { \
2811
+ .read = generic_read_dir, \
2812
+ .iterate = proc_##LSM##_attr_dir_iterate, \
2813
+ .llseek = default_llseek, \
2814
+}; \
2815
+\
2816
+static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \
2817
+ struct dentry *dentry, unsigned int flags) \
2818
+{ \
2819
+ return proc_pident_lookup(dir, dentry, \
2820
+ LSM##_attr_dir_stuff, \
2821
+ LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \
2822
+} \
2823
+\
2824
+static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \
2825
+ .lookup = proc_##LSM##_attr_dir_lookup, \
2826
+ .getattr = pid_getattr, \
2827
+ .setattr = proc_setattr, \
2828
+}
2829
+
2830
+#ifdef CONFIG_SECURITY_SMACK
2831
+static const struct pid_entry smack_attr_dir_stuff[] = {
2832
+ ATTR("smack", "current", 0666),
2833
+};
2834
+LSM_DIR_OPS(smack);
2835
+#endif
2836
+
2837
+#ifdef CONFIG_SECURITY_APPARMOR
2838
+static const struct pid_entry apparmor_attr_dir_stuff[] = {
2839
+ ATTR("apparmor", "current", 0666),
2840
+ ATTR("apparmor", "prev", 0444),
2841
+ ATTR("apparmor", "exec", 0666),
2842
+};
2843
+LSM_DIR_OPS(apparmor);
2844
+#endif
2845
+
26312846 static const struct pid_entry attr_dir_stuff[] = {
2632
- REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2633
- REG("prev", S_IRUGO, proc_pid_attr_operations),
2634
- REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2635
- REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2636
- REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2637
- REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
2847
+ ATTR(NULL, "current", 0666),
2848
+ ATTR(NULL, "prev", 0444),
2849
+ ATTR(NULL, "exec", 0666),
2850
+ ATTR(NULL, "fscreate", 0666),
2851
+ ATTR(NULL, "keycreate", 0666),
2852
+ ATTR(NULL, "sockcreate", 0666),
2853
+#ifdef CONFIG_SECURITY_SMACK
2854
+ DIR("smack", 0555,
2855
+ proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
2856
+#endif
2857
+#ifdef CONFIG_SECURITY_APPARMOR
2858
+ DIR("apparmor", 0555,
2859
+ proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops),
2860
+#endif
26382861 };
26392862
26402863 static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
....@@ -2653,7 +2876,8 @@
26532876 struct dentry *dentry, unsigned int flags)
26542877 {
26552878 return proc_pident_lookup(dir, dentry,
2656
- attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
2879
+ attr_dir_stuff,
2880
+ attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff));
26572881 }
26582882
26592883 static const struct inode_operations proc_attr_dir_inode_operations = {
....@@ -2748,7 +2972,7 @@
27482972 unsigned long flags;
27492973 int result;
27502974
2751
- result = mutex_lock_killable(&task->signal->cred_guard_mutex);
2975
+ result = down_read_killable(&task->signal->exec_update_lock);
27522976 if (result)
27532977 return result;
27542978
....@@ -2784,7 +3008,7 @@
27843008 result = 0;
27853009
27863010 out_unlock:
2787
- mutex_unlock(&task->signal->cred_guard_mutex);
3011
+ up_read(&task->signal->exec_update_lock);
27883012 return result;
27893013 }
27903014
....@@ -2953,6 +3177,21 @@
29533177 }
29543178 #endif /* CONFIG_LIVEPATCH */
29553179
3180
+#ifdef CONFIG_STACKLEAK_METRICS
3181
+static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
3182
+ struct pid *pid, struct task_struct *task)
3183
+{
3184
+ unsigned long prev_depth = THREAD_SIZE -
3185
+ (task->prev_lowest_stack & (THREAD_SIZE - 1));
3186
+ unsigned long depth = THREAD_SIZE -
3187
+ (task->lowest_stack & (THREAD_SIZE - 1));
3188
+
3189
+ seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
3190
+ prev_depth, depth);
3191
+ return 0;
3192
+}
3193
+#endif /* CONFIG_STACKLEAK_METRICS */
3194
+
29563195 /*
29573196 * Thread groups
29583197 */
....@@ -2963,7 +3202,7 @@
29633202 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
29643203 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
29653204 DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
2966
- DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3205
+ DIR("fdinfo", S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
29673206 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
29683207 #ifdef CONFIG_NET
29693208 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
....@@ -2978,6 +3217,9 @@
29783217 #endif
29793218 #ifdef CONFIG_SCHED_AUTOGROUP
29803219 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
3220
+#endif
3221
+#ifdef CONFIG_TIME_NS
3222
+ REG("timens_offsets", S_IRUGO|S_IWUSR, proc_timens_offsets_operations),
29813223 #endif
29823224 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
29833225 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
....@@ -3024,10 +3266,13 @@
30243266 #ifdef CONFIG_CGROUPS
30253267 ONE("cgroup", S_IRUGO, proc_cgroup_show),
30263268 #endif
3269
+#ifdef CONFIG_PROC_CPU_RESCTRL
3270
+ ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
3271
+#endif
30273272 ONE("oom_score", S_IRUGO, proc_oom_score),
30283273 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
30293274 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3030
-#ifdef CONFIG_AUDITSYSCALL
3275
+#ifdef CONFIG_AUDIT
30313276 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
30323277 REG("sessionid", S_IRUGO, proc_sessionid_operations),
30333278 #endif
....@@ -3057,6 +3302,12 @@
30573302 #ifdef CONFIG_CPU_FREQ_TIMES
30583303 ONE("time_in_state", 0444, proc_time_in_state_show),
30593304 #endif
3305
+#ifdef CONFIG_STACKLEAK_METRICS
3306
+ ONE("stack_depth", S_IRUGO, proc_stack_depth),
3307
+#endif
3308
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
3309
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
3310
+#endif
30603311 };
30613312
30623313 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
....@@ -3073,8 +3324,7 @@
30733324
30743325 struct pid *tgid_pidfd_to_pid(const struct file *file)
30753326 {
3076
- if (!d_is_dir(file->f_path.dentry) ||
3077
- (file->f_op != &proc_tgid_base_operations))
3327
+ if (file->f_op != &proc_tgid_base_operations)
30783328 return ERR_PTR(-EBADF);
30793329
30803330 return proc_pid(file_inode(file));
....@@ -3083,7 +3333,8 @@
30833333 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
30843334 {
30853335 return proc_pident_lookup(dir, dentry,
3086
- tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
3336
+ tgid_base_stuff,
3337
+ tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff));
30873338 }
30883339
30893340 static const struct inode_operations proc_tgid_base_inode_operations = {
....@@ -3093,90 +3344,28 @@
30933344 .permission = proc_pid_permission,
30943345 };
30953346
3096
-static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
3097
-{
3098
- struct dentry *dentry, *leader, *dir;
3099
- char buf[10 + 1];
3100
- struct qstr name;
3101
-
3102
- name.name = buf;
3103
- name.len = snprintf(buf, sizeof(buf), "%u", pid);
3104
- /* no ->d_hash() rejects on procfs */
3105
- dentry = d_hash_and_lookup(mnt->mnt_root, &name);
3106
- if (dentry) {
3107
- d_invalidate(dentry);
3108
- dput(dentry);
3109
- }
3110
-
3111
- if (pid == tgid)
3112
- return;
3113
-
3114
- name.name = buf;
3115
- name.len = snprintf(buf, sizeof(buf), "%u", tgid);
3116
- leader = d_hash_and_lookup(mnt->mnt_root, &name);
3117
- if (!leader)
3118
- goto out;
3119
-
3120
- name.name = "task";
3121
- name.len = strlen(name.name);
3122
- dir = d_hash_and_lookup(leader, &name);
3123
- if (!dir)
3124
- goto out_put_leader;
3125
-
3126
- name.name = buf;
3127
- name.len = snprintf(buf, sizeof(buf), "%u", pid);
3128
- dentry = d_hash_and_lookup(dir, &name);
3129
- if (dentry) {
3130
- d_invalidate(dentry);
3131
- dput(dentry);
3132
- }
3133
-
3134
- dput(dir);
3135
-out_put_leader:
3136
- dput(leader);
3137
-out:
3138
- return;
3139
-}
3140
-
31413347 /**
3142
- * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
3143
- * @task: task that should be flushed.
3348
+ * proc_flush_pid - Remove dcache entries for @pid from the /proc dcache.
3349
+ * @pid: pid that should be flushed.
31443350 *
3145
- * When flushing dentries from proc, one needs to flush them from global
3146
- * proc (proc_mnt) and from all the namespaces' procs this task was seen
3147
- * in. This call is supposed to do all of this job.
3148
- *
3149
- * Looks in the dcache for
3150
- * /proc/@pid
3151
- * /proc/@tgid/task/@pid
3152
- * if either directory is present flushes it and all of it'ts children
3153
- * from the dcache.
3351
+ * This function walks a list of inodes (that belong to any proc
3352
+ * filesystem) that are attached to the pid and flushes them from
3353
+ * the dentry cache.
31543354 *
31553355 * It is safe and reasonable to cache /proc entries for a task until
31563356 * that task exits. After that they just clog up the dcache with
31573357 * useless entries, possibly causing useful dcache entries to be
3158
- * flushed instead. This routine is proved to flush those useless
3159
- * dcache entries at process exit time.
3358
+ * flushed instead. This routine is provided to flush those useless
3359
+ * dcache entries when a process is reaped.
31603360 *
31613361 * NOTE: This routine is just an optimization so it does not guarantee
3162
- * that no dcache entries will exist at process exit time it
3163
- * just makes it very unlikely that any will persist.
3362
+ * that no dcache entries will exist after a process is reaped
3363
+ * it just makes it very unlikely that any will persist.
31643364 */
31653365
3166
-void proc_flush_task(struct task_struct *task)
3366
+void proc_flush_pid(struct pid *pid)
31673367 {
3168
- int i;
3169
- struct pid *pid, *tgid;
3170
- struct upid *upid;
3171
-
3172
- pid = task_pid(task);
3173
- tgid = task_tgid(task);
3174
-
3175
- for (i = 0; i <= pid->level; i++) {
3176
- upid = &pid->numbers[i];
3177
- proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
3178
- tgid->numbers[i].nr);
3179
- }
3368
+ proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock);
31803369 }
31813370
31823371 static struct dentry *proc_pid_instantiate(struct dentry * dentry,
....@@ -3184,7 +3373,8 @@
31843373 {
31853374 struct inode *inode;
31863375
3187
- inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
3376
+ inode = proc_pid_make_base_inode(dentry->d_sb, task,
3377
+ S_IFDIR | S_IRUGO | S_IXUGO);
31883378 if (!inode)
31893379 return ERR_PTR(-ENOENT);
31903380
....@@ -3199,10 +3389,11 @@
31993389 return d_splice_alias(inode, dentry);
32003390 }
32013391
3202
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
3392
+struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
32033393 {
32043394 struct task_struct *task;
32053395 unsigned tgid;
3396
+ struct proc_fs_info *fs_info;
32063397 struct pid_namespace *ns;
32073398 struct dentry *result = ERR_PTR(-ENOENT);
32083399
....@@ -3210,7 +3401,8 @@
32103401 if (tgid == ~0U)
32113402 goto out;
32123403
3213
- ns = dentry->d_sb->s_fs_info;
3404
+ fs_info = proc_sb_info(dentry->d_sb);
3405
+ ns = fs_info->pid_ns;
32143406 rcu_read_lock();
32153407 task = find_task_by_pid_ns(tgid, ns);
32163408 if (task)
....@@ -3219,7 +3411,14 @@
32193411 if (!task)
32203412 goto out;
32213413
3414
+ /* Limit procfs to only ptraceable tasks */
3415
+ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) {
3416
+ if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS))
3417
+ goto out_put_task;
3418
+ }
3419
+
32223420 result = proc_pid_instantiate(dentry, task, NULL);
3421
+out_put_task:
32233422 put_task_struct(task);
32243423 out:
32253424 return result;
....@@ -3245,20 +3444,8 @@
32453444 pid = find_ge_pid(iter.tgid, ns);
32463445 if (pid) {
32473446 iter.tgid = pid_nr_ns(pid, ns);
3248
- iter.task = pid_task(pid, PIDTYPE_PID);
3249
- /* What we to know is if the pid we have find is the
3250
- * pid of a thread_group_leader. Testing for task
3251
- * being a thread_group_leader is the obvious thing
3252
- * todo but there is a window when it fails, due to
3253
- * the pid transfer logic in de_thread.
3254
- *
3255
- * So we perform the straight forward test of seeing
3256
- * if the pid we have found is the pid of a thread
3257
- * group leader, and don't worry if the task we have
3258
- * found doesn't happen to be a thread group leader.
3259
- * As we don't care in the case of readdir.
3260
- */
3261
- if (!iter.task || !has_group_leader_pid(iter.task)) {
3447
+ iter.task = pid_task(pid, PIDTYPE_TGID);
3448
+ if (!iter.task) {
32623449 iter.tgid += 1;
32633450 goto retry;
32643451 }
....@@ -3274,20 +3461,21 @@
32743461 int proc_pid_readdir(struct file *file, struct dir_context *ctx)
32753462 {
32763463 struct tgid_iter iter;
3277
- struct pid_namespace *ns = proc_pid_ns(file_inode(file));
3464
+ struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
3465
+ struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb);
32783466 loff_t pos = ctx->pos;
32793467
32803468 if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
32813469 return 0;
32823470
32833471 if (pos == TGID_OFFSET - 2) {
3284
- struct inode *inode = d_inode(ns->proc_self);
3472
+ struct inode *inode = d_inode(fs_info->proc_self);
32853473 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
32863474 return 0;
32873475 ctx->pos = pos = pos + 1;
32883476 }
32893477 if (pos == TGID_OFFSET - 1) {
3290
- struct inode *inode = d_inode(ns->proc_thread_self);
3478
+ struct inode *inode = d_inode(fs_info->proc_thread_self);
32913479 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
32923480 return 0;
32933481 ctx->pos = pos = pos + 1;
....@@ -3301,7 +3489,7 @@
33013489 unsigned int len;
33023490
33033491 cond_resched();
3304
- if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
3492
+ if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE))
33053493 continue;
33063494
33073495 len = snprintf(name, sizeof(name), "%u", iter.tgid);
....@@ -3351,7 +3539,8 @@
33513539 }
33523540
33533541 static const struct inode_operations proc_tid_comm_inode_operations = {
3354
- .permission = proc_tid_comm_permission,
3542
+ .setattr = proc_setattr,
3543
+ .permission = proc_tid_comm_permission,
33553544 };
33563545
33573546 /*
....@@ -3359,7 +3548,7 @@
33593548 */
33603549 static const struct pid_entry tid_base_stuff[] = {
33613550 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3362
- DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3551
+ DIR("fdinfo", S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
33633552 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
33643553 #ifdef CONFIG_NET
33653554 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
....@@ -3421,10 +3610,13 @@
34213610 #ifdef CONFIG_CGROUPS
34223611 ONE("cgroup", S_IRUGO, proc_cgroup_show),
34233612 #endif
3613
+#ifdef CONFIG_PROC_CPU_RESCTRL
3614
+ ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
3615
+#endif
34243616 ONE("oom_score", S_IRUGO, proc_oom_score),
34253617 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
34263618 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3427
-#ifdef CONFIG_AUDITSYSCALL
3619
+#ifdef CONFIG_AUDIT
34283620 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
34293621 REG("sessionid", S_IRUGO, proc_sessionid_operations),
34303622 #endif
....@@ -3444,6 +3636,9 @@
34443636 #ifdef CONFIG_LIVEPATCH
34453637 ONE("patch_state", S_IRUSR, proc_pid_patch_state),
34463638 #endif
3639
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
3640
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
3641
+#endif
34473642 #ifdef CONFIG_CPU_FREQ_TIMES
34483643 ONE("time_in_state", 0444, proc_time_in_state_show),
34493644 #endif
....@@ -3458,7 +3653,8 @@
34583653 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
34593654 {
34603655 return proc_pident_lookup(dir, dentry,
3461
- tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
3656
+ tid_base_stuff,
3657
+ tid_base_stuff + ARRAY_SIZE(tid_base_stuff));
34623658 }
34633659
34643660 static const struct file_operations proc_tid_base_operations = {
....@@ -3477,7 +3673,8 @@
34773673 struct task_struct *task, const void *ptr)
34783674 {
34793675 struct inode *inode;
3480
- inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
3676
+ inode = proc_pid_make_base_inode(dentry->d_sb, task,
3677
+ S_IFDIR | S_IRUGO | S_IXUGO);
34813678 if (!inode)
34823679 return ERR_PTR(-ENOENT);
34833680
....@@ -3497,6 +3694,7 @@
34973694 struct task_struct *task;
34983695 struct task_struct *leader = get_proc_task(dir);
34993696 unsigned tid;
3697
+ struct proc_fs_info *fs_info;
35003698 struct pid_namespace *ns;
35013699 struct dentry *result = ERR_PTR(-ENOENT);
35023700
....@@ -3507,7 +3705,8 @@
35073705 if (tid == ~0U)
35083706 goto out;
35093707
3510
- ns = dentry->d_sb->s_fs_info;
3708
+ fs_info = proc_sb_info(dentry->d_sb);
3709
+ ns = fs_info->pid_ns;
35113710 rcu_read_lock();
35123711 task = find_task_by_pid_ns(tid, ns);
35133712 if (task)
....@@ -3621,7 +3820,7 @@
36213820 /* f_version caches the tgid value that the last readdir call couldn't
36223821 * return. lseek aka telldir automagically resets f_version to 0.
36233822 */
3624
- ns = proc_pid_ns(inode);
3823
+ ns = proc_pid_ns(inode->i_sb);
36253824 tid = (int)file->f_version;
36263825 file->f_version = 0;
36273826 for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);