hc
2023-12-11 d2ccde1c8e90d38cee87a1b0309ad2827f3fd30d
kernel/arch/arm64/kernel/process.c
....@@ -1,34 +1,27 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Based on arch/arm/kernel/process.c
34 *
45 * Original Copyright (C) 1995 Linus Torvalds
56 * Copyright (C) 1996-2000 Russell King - Converted to ARM.
67 * Copyright (C) 2012 ARM Ltd.
7
- *
8
- * This program is free software; you can redistribute it and/or modify
9
- * it under the terms of the GNU General Public License version 2 as
10
- * published by the Free Software Foundation.
11
- *
12
- * This program is distributed in the hope that it will be useful,
13
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- * GNU General Public License for more details.
16
- *
17
- * You should have received a copy of the GNU General Public License
18
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
198 */
209
2110 #include <stdarg.h>
2211
2312 #include <linux/compat.h>
2413 #include <linux/efi.h>
14
+#include <linux/elf.h>
2515 #include <linux/export.h>
2616 #include <linux/sched.h>
2717 #include <linux/sched/debug.h>
2818 #include <linux/sched/task.h>
2919 #include <linux/sched/task_stack.h>
3020 #include <linux/kernel.h>
21
+#include <linux/lockdep.h>
22
+#include <linux/mman.h>
3123 #include <linux/mm.h>
24
+#include <linux/nospec.h>
3225 #include <linux/stddef.h>
3326 #include <linux/sysctl.h>
3427 #include <linux/unistd.h>
....@@ -51,18 +44,22 @@
5144 #include <linux/percpu.h>
5245 #include <linux/thread_info.h>
5346 #include <linux/prctl.h>
47
+#include <trace/hooks/fpsimd.h>
5448
5549 #include <asm/alternative.h>
50
+#include <asm/arch_gicv3.h>
5651 #include <asm/compat.h>
52
+#include <asm/cpufeature.h>
5753 #include <asm/cacheflush.h>
5854 #include <asm/exec.h>
5955 #include <asm/fpsimd.h>
6056 #include <asm/mmu_context.h>
57
+#include <asm/mte.h>
6158 #include <asm/processor.h>
62
-#include <asm/scs.h>
59
+#include <asm/pointer_auth.h>
6360 #include <asm/stacktrace.h>
6461
65
-#ifdef CONFIG_STACKPROTECTOR
62
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
6663 #include <linux/stackprotector.h>
6764 unsigned long __stack_chk_guard __ro_after_init;
6865 EXPORT_SYMBOL(__stack_chk_guard);
....@@ -74,22 +71,61 @@
7471 void (*pm_power_off)(void);
7572 EXPORT_SYMBOL_GPL(pm_power_off);
7673
77
-void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
78
-EXPORT_SYMBOL_GPL(arm_pm_restart);
74
+static void noinstr __cpu_do_idle(void)
75
+{
76
+ dsb(sy);
77
+ wfi();
78
+}
79
+
80
+static void noinstr __cpu_do_idle_irqprio(void)
81
+{
82
+ unsigned long pmr;
83
+ unsigned long daif_bits;
84
+
85
+ daif_bits = read_sysreg(daif);
86
+ write_sysreg(daif_bits | PSR_I_BIT, daif);
87
+
88
+ /*
89
+ * Unmask PMR before going idle to make sure interrupts can
90
+ * be raised.
91
+ */
92
+ pmr = gic_read_pmr();
93
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
94
+
95
+ __cpu_do_idle();
96
+
97
+ gic_write_pmr(pmr);
98
+ write_sysreg(daif_bits, daif);
99
+}
100
+
101
+/*
102
+ * cpu_do_idle()
103
+ *
104
+ * Idle the processor (wait for interrupt).
105
+ *
106
+ * If the CPU supports priority masking we must do additional work to
107
+ * ensure that interrupts are not masked at the PMR (because the core will
108
+ * not wake up if we block the wake up signal in the interrupt controller).
109
+ */
110
+void noinstr cpu_do_idle(void)
111
+{
112
+ if (system_uses_irq_prio_masking())
113
+ __cpu_do_idle_irqprio();
114
+ else
115
+ __cpu_do_idle();
116
+}
79117
80118 /*
81119 * This is our default idle handler.
82120 */
83
-void arch_cpu_idle(void)
121
+void noinstr arch_cpu_idle(void)
84122 {
85123 /*
86124 * This should do all the clock switching and wait for interrupt
87125 * tricks
88126 */
89
- trace_cpu_idle_rcuidle(1, smp_processor_id());
90127 cpu_do_idle();
91
- local_irq_enable();
92
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
128
+ raw_local_irq_enable();
93129 }
94130
95131 void arch_cpu_idle_enter(void)
....@@ -116,11 +152,11 @@
116152 * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
117153 * kexec'd kernel to use any and all RAM as it sees fit, without having to
118154 * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
119
- * functionality embodied in disable_nonboot_cpus() to achieve this.
155
+ * functionality embodied in smpt_shutdown_nonboot_cpus() to achieve this.
120156 */
121157 void machine_shutdown(void)
122158 {
123
- disable_nonboot_cpus();
159
+ smp_shutdown_nonboot_cpus(reboot_cpu);
124160 }
125161
126162 /*
....@@ -174,10 +210,7 @@
174210 efi_reboot(reboot_mode, NULL);
175211
176212 /* Now call the architecture specific reboot code. */
177
- if (arm_pm_restart)
178
- arm_pm_restart(reboot_mode, cmd);
179
- else
180
- do_kernel_restart(cmd);
213
+ do_kernel_restart(cmd);
181214
182215 /*
183216 * Whoops - the architecture was unable to reboot.
....@@ -185,6 +218,15 @@
185218 printk("Reboot failed -- System halted\n");
186219 while (1);
187220 }
221
+
222
+#define bstr(suffix, str) [PSR_BTYPE_ ## suffix >> PSR_BTYPE_SHIFT] = str
223
+static const char *const btypes[] = {
224
+ bstr(NONE, "--"),
225
+ bstr( JC, "jc"),
226
+ bstr( C, "-c"),
227
+ bstr( J , "j-")
228
+};
229
+#undef bstr
188230
189231 static void print_pstate(struct pt_regs *regs)
190232 {
....@@ -204,7 +246,10 @@
204246 pstate & PSR_AA32_I_BIT ? 'I' : 'i',
205247 pstate & PSR_AA32_F_BIT ? 'F' : 'f');
206248 } else {
207
- printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO)\n",
249
+ const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >>
250
+ PSR_BTYPE_SHIFT];
251
+
252
+ printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO BTYPE=%s)\n",
208253 pstate,
209254 pstate & PSR_N_BIT ? 'N' : 'n',
210255 pstate & PSR_Z_BIT ? 'Z' : 'z',
....@@ -215,7 +260,9 @@
215260 pstate & PSR_I_BIT ? 'I' : 'i',
216261 pstate & PSR_F_BIT ? 'F' : 'f',
217262 pstate & PSR_PAN_BIT ? '+' : '-',
218
- pstate & PSR_UAO_BIT ? '+' : '-');
263
+ pstate & PSR_UAO_BIT ? '+' : '-',
264
+ pstate & PSR_TCO_BIT ? '+' : '-',
265
+ btype_str);
219266 }
220267 }
221268
....@@ -258,7 +305,8 @@
258305
259306 for (j = 0; j < 8; j++) {
260307 u32 data;
261
- if (probe_kernel_address(p, data)) {
308
+
309
+ if (aarch64_insn_read((void *)p, &data)) {
262310 pr_cont(" ********");
263311 } else {
264312 pr_cont(" %08x", data);
....@@ -307,13 +355,16 @@
307355
308356 if (!user_mode(regs)) {
309357 printk("pc : %pS\n", (void *)regs->pc);
310
- printk("lr : %pS\n", (void *)lr);
358
+ printk("lr : %pS\n", (void *)ptrauth_strip_insn_pac(lr));
311359 } else {
312360 printk("pc : %016llx\n", regs->pc);
313361 printk("lr : %016llx\n", lr);
314362 }
315363
316364 printk("sp : %016llx\n", sp);
365
+
366
+ if (system_uses_irq_prio_masking())
367
+ printk("pmr_save: %08llx\n", regs->pmr_save);
317368
318369 i = top_reg;
319370
....@@ -333,11 +384,12 @@
333384 void show_regs(struct pt_regs * regs)
334385 {
335386 __show_regs(regs);
336
- dump_backtrace(regs, NULL);
387
+ dump_backtrace(regs, NULL, KERN_DEFAULT);
337388
338389 if (!user_mode(regs))
339390 show_extra_register_data(regs, 512);
340391 }
392
+EXPORT_SYMBOL_GPL(show_regs);
341393
342394 static void tls_thread_flush(void)
343395 {
....@@ -400,13 +452,16 @@
400452 dst->thread.sve_state = NULL;
401453 clear_tsk_thread_flag(dst, TIF_SVE);
402454
455
+ /* clear any pending asynchronous tag fault raised by the parent */
456
+ clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
457
+
403458 return 0;
404459 }
405460
406461 asmlinkage void ret_from_fork(void) asm("ret_from_fork");
407462
408463 int copy_thread(unsigned long clone_flags, unsigned long stack_start,
409
- unsigned long stk_sz, struct task_struct *p)
464
+ unsigned long stk_sz, struct task_struct *p, unsigned long tls)
410465 {
411466 struct pt_regs *childregs = task_pt_regs(p);
412467
....@@ -421,7 +476,9 @@
421476 */
422477 fpsimd_flush_task_state(p);
423478
424
- if (likely(!(p->flags & PF_KTHREAD))) {
479
+ ptrauth_thread_init_kernel(p);
480
+
481
+ if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
425482 *childregs = *current_pt_regs();
426483 childregs->regs[0] = 0;
427484
....@@ -439,20 +496,21 @@
439496 }
440497
441498 /*
442
- * If a TLS pointer was passed to clone (4th argument), use it
443
- * for the new thread.
499
+ * If a TLS pointer was passed to clone, use it for the new
500
+ * thread.
444501 */
445502 if (clone_flags & CLONE_SETTLS)
446
- p->thread.uw.tp_value = childregs->regs[3];
503
+ p->thread.uw.tp_value = tls;
447504 } else {
505
+ /*
506
+ * A kthread has no context to ERET to, so ensure any buggy
507
+ * ERET is treated as an illegal exception return.
508
+ *
509
+ * When a user task is created from a kthread, childregs will
510
+ * be initialized by start_thread() or start_compat_thread().
511
+ */
448512 memset(childregs, 0, sizeof(struct pt_regs));
449
- childregs->pstate = PSR_MODE_EL1h;
450
- if (IS_ENABLED(CONFIG_ARM64_UAO) &&
451
- cpus_have_const_cap(ARM64_HAS_UAO))
452
- childregs->pstate |= PSR_UAO_BIT;
453
-
454
- if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
455
- set_ssbs_bit(childregs);
513
+ childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
456514
457515 p->thread.cpu_context.x19 = stack_start;
458516 p->thread.cpu_context.x20 = stk_sz;
....@@ -499,8 +557,6 @@
499557 */
500558 static void ssbs_thread_switch(struct task_struct *next)
501559 {
502
- struct pt_regs *regs = task_pt_regs(next);
503
-
504560 /*
505561 * Nothing to do for kernel threads, but 'regs' may be junk
506562 * (e.g. idle task) so check the flags and bail early.
....@@ -512,18 +568,10 @@
512568 * If all CPUs implement the SSBS extension, then we just need to
513569 * context-switch the PSTATE field.
514570 */
515
- if (cpu_have_feature(cpu_feature(SSBS)))
571
+ if (cpus_have_const_cap(ARM64_SSBS))
516572 return;
517573
518
- /* If the mitigation is enabled, then we leave SSBS clear. */
519
- if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
520
- test_tsk_thread_flag(next, TIF_SSBD))
521
- return;
522
-
523
- if (compat_user_mode(regs))
524
- set_compat_ssbs_bit(regs);
525
- else if (user_mode(regs))
526
- set_ssbs_bit(regs);
574
+ spectre_v4_enable_task_mitigation(next);
527575 }
528576
529577 /*
....@@ -541,6 +589,48 @@
541589 }
542590
543591 /*
592
+ * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
593
+ * Ensure access is disabled when switching to a 32bit task, ensure
594
+ * access is enabled when switching to a 64bit task.
595
+ */
596
+static void erratum_1418040_thread_switch(struct task_struct *next)
597
+{
598
+ if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
599
+ !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
600
+ return;
601
+
602
+ if (is_compat_thread(task_thread_info(next)))
603
+ sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
604
+ else
605
+ sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
606
+}
607
+
608
+static void erratum_1418040_new_exec(void)
609
+{
610
+ preempt_disable();
611
+ erratum_1418040_thread_switch(current);
612
+ preempt_enable();
613
+}
614
+
615
+/*
616
+ * __switch_to() checks current->thread.sctlr_user as an optimisation. Therefore
617
+ * this function must be called with preemption disabled and the update to
618
+ * sctlr_user must be made in the same preemption disabled block so that
619
+ * __switch_to() does not see the variable update before the SCTLR_EL1 one.
620
+ */
621
+void update_sctlr_el1(u64 sctlr)
622
+{
623
+ /*
624
+ * EnIA must not be cleared while in the kernel as this is necessary for
625
+ * in-kernel PAC. It will be cleared on kernel exit if needed.
626
+ */
627
+ sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr);
628
+
629
+ /* ISB required for the kernel uaccess routines when setting TCF0. */
630
+ isb();
631
+}
632
+
633
+/*
544634 * Thread switching.
545635 */
546636 __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
....@@ -555,7 +645,8 @@
555645 entry_task_switch(next);
556646 uao_thread_switch(next);
557647 ssbs_thread_switch(next);
558
- scs_overflow_check(next);
648
+ erratum_1418040_thread_switch(next);
649
+ ptrauth_thread_switch_user(next);
559650
560651 /*
561652 * Complete any pending TLB or cache maintenance on this CPU in case
....@@ -564,6 +655,18 @@
564655 * call.
565656 */
566657 dsb(ish);
658
+
659
+ /*
660
+ * MTE thread switching must happen after the DSB above to ensure that
661
+ * any asynchronous tag check faults have been logged in the TFSR*_EL1
662
+ * registers.
663
+ */
664
+ mte_thread_switch(next);
665
+ /* avoid expensive SCTLR_EL1 accesses if no change */
666
+ if (prev->thread.sctlr_user != next->thread.sctlr_user)
667
+ update_sctlr_el1(next->thread.sctlr_user);
668
+
669
+ trace_android_vh_is_fpsimd_save(prev, next);
567670
568671 /* the actual thread switch */
569672 last = cpu_switch_to(prev, next);
....@@ -583,11 +686,8 @@
583686 if (!stack_page)
584687 return 0;
585688
586
- frame.fp = thread_saved_fp(p);
587
- frame.pc = thread_saved_pc(p);
588
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
589
- frame.graph = p->curr_ret_stack;
590
-#endif
689
+ start_backtrace(&frame, thread_saved_fp(p), thread_saved_pc(p));
690
+
591691 do {
592692 if (unwind_frame(p, &frame))
593693 goto out;
....@@ -601,6 +701,7 @@
601701 put_task_stack(p);
602702 return ret;
603703 }
704
+EXPORT_SYMBOL_GPL(get_wchan);
604705
605706 unsigned long arch_align_stack(unsigned long sp)
606707 {
....@@ -609,43 +710,39 @@
609710 return sp & ~0xf;
610711 }
611712
612
-unsigned long arch_randomize_brk(struct mm_struct *mm)
613
-{
614
- if (is_compat_task())
615
- return randomize_page(mm->brk, SZ_32M);
616
- else
617
- return randomize_page(mm->brk, SZ_1G);
618
-}
619
-
620713 /*
621714 * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
622715 */
623716 void arch_setup_new_exec(void)
624717 {
625
- current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
718
+ unsigned long mmflags = 0;
719
+
720
+ if (is_compat_task()) {
721
+ mmflags = MMCF_AARCH32;
722
+
723
+ /*
724
+ * Restrict the CPU affinity mask for a 32-bit task so that
725
+ * it contains only 32-bit-capable CPUs.
726
+ *
727
+ * From the perspective of the task, this looks similar to
728
+ * what would happen if the 64-bit-only CPUs were hot-unplugged
729
+ * at the point of execve(), although we try a bit harder to
730
+ * honour the cpuset hierarchy.
731
+ */
732
+ if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
733
+ force_compatible_cpus_allowed_ptr(current);
734
+ }
735
+
736
+ current->mm->context.flags = mmflags;
737
+ ptrauth_thread_init_user();
738
+ mte_thread_init_user();
739
+ erratum_1418040_new_exec();
740
+
741
+ if (task_spec_ssb_noexec(current)) {
742
+ arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
743
+ PR_SPEC_ENABLE);
744
+ }
626745 }
627
-
628
-#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
629
-void __used stackleak_check_alloca(unsigned long size)
630
-{
631
- unsigned long stack_left;
632
- unsigned long current_sp = current_stack_pointer;
633
- struct stack_info info;
634
-
635
- BUG_ON(!on_accessible_stack(current, current_sp, &info));
636
-
637
- stack_left = current_sp - info.low;
638
-
639
- /*
640
- * There's a good chance we're almost out of stack space if this
641
- * is true. Using panic() over BUG() is more likely to give
642
- * reliable debugging output.
643
- */
644
- if (size >= stack_left)
645
- panic("alloca() over the kernel stack boundary\n");
646
-}
647
-EXPORT_SYMBOL(stackleak_check_alloca);
648
-#endif
649746
650747 #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
651748 /*
....@@ -653,11 +750,18 @@
653750 */
654751 static unsigned int tagged_addr_disabled;
655752
656
-long set_tagged_addr_ctrl(unsigned long arg)
753
+long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
657754 {
658
- if (is_compat_task())
755
+ unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
756
+ struct thread_info *ti = task_thread_info(task);
757
+
758
+ if (is_compat_thread(ti))
659759 return -EINVAL;
660
- if (arg & ~PR_TAGGED_ADDR_ENABLE)
760
+
761
+ if (system_supports_mte())
762
+ valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK;
763
+
764
+ if (arg & ~valid_mask)
661765 return -EINVAL;
662766
663767 /*
....@@ -667,20 +771,28 @@
667771 if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
668772 return -EINVAL;
669773
670
- update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
774
+ if (set_mte_ctrl(task, arg) != 0)
775
+ return -EINVAL;
776
+
777
+ update_ti_thread_flag(ti, TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
671778
672779 return 0;
673780 }
674781
675
-long get_tagged_addr_ctrl(void)
782
+long get_tagged_addr_ctrl(struct task_struct *task)
676783 {
677
- if (is_compat_task())
784
+ long ret = 0;
785
+ struct thread_info *ti = task_thread_info(task);
786
+
787
+ if (is_compat_thread(ti))
678788 return -EINVAL;
679789
680
- if (test_thread_flag(TIF_TAGGED_ADDR))
681
- return PR_TAGGED_ADDR_ENABLE;
790
+ if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR))
791
+ ret = PR_TAGGED_ADDR_ENABLE;
682792
683
- return 0;
793
+ ret |= get_mte_ctrl(task);
794
+
795
+ return ret;
684796 }
685797
686798 /*
....@@ -688,8 +800,6 @@
688800 * only prevents the tagged address ABI enabling via prctl() and does not
689801 * disable it for tasks that already opted in to the relaxed ABI.
690802 */
691
-static int zero;
692
-static int one = 1;
693803
694804 static struct ctl_table tagged_addr_sysctl_table[] = {
695805 {
....@@ -698,8 +808,8 @@
698808 .data = &tagged_addr_disabled,
699809 .maxlen = sizeof(int),
700810 .proc_handler = proc_dointvec_minmax,
701
- .extra1 = &zero,
702
- .extra2 = &one,
811
+ .extra1 = SYSCTL_ZERO,
812
+ .extra2 = SYSCTL_ONE,
703813 },
704814 { }
705815 };
....@@ -713,3 +823,41 @@
713823
714824 core_initcall(tagged_addr_init);
715825 #endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */
826
+
827
+asmlinkage void __sched arm64_preempt_schedule_irq(void)
828
+{
829
+ lockdep_assert_irqs_disabled();
830
+
831
+ /*
832
+ * Preempting a task from an IRQ means we leave copies of PSTATE
833
+ * on the stack. cpufeature's enable calls may modify PSTATE, but
834
+ * resuming one of these preempted tasks would undo those changes.
835
+ *
836
+ * Only allow a task to be preempted once cpufeatures have been
837
+ * enabled.
838
+ */
839
+ if (system_capabilities_finalized())
840
+ preempt_schedule_irq();
841
+}
842
+
843
+#ifdef CONFIG_BINFMT_ELF
844
+int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
845
+ bool has_interp, bool is_interp)
846
+{
847
+ /*
848
+ * For dynamically linked executables the interpreter is
849
+ * responsible for setting PROT_BTI on everything except
850
+ * itself.
851
+ */
852
+ if (is_interp != has_interp)
853
+ return prot;
854
+
855
+ if (!(state->flags & ARM64_ELF_BTI))
856
+ return prot;
857
+
858
+ if (prot & PROT_EXEC)
859
+ prot |= PROT_BTI;
860
+
861
+ return prot;
862
+}
863
+#endif