.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Based on arch/arm/mm/fault.c |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (C) 1995 Linus Torvalds |
---|
5 | 6 | * Copyright (C) 1995-2004 Russell King |
---|
6 | 7 | * Copyright (C) 2012 ARM Ltd. |
---|
7 | | - * |
---|
8 | | - * This program is free software; you can redistribute it and/or modify |
---|
9 | | - * it under the terms of the GNU General Public License version 2 as |
---|
10 | | - * published by the Free Software Foundation. |
---|
11 | | - * |
---|
12 | | - * This program is distributed in the hope that it will be useful, |
---|
13 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
15 | | - * GNU General Public License for more details. |
---|
16 | | - * |
---|
17 | | - * You should have received a copy of the GNU General Public License |
---|
18 | | - * along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
19 | 8 | */ |
---|
20 | 9 | |
---|
| 10 | +#include <linux/acpi.h> |
---|
| 11 | +#include <linux/bitfield.h> |
---|
21 | 12 | #include <linux/extable.h> |
---|
| 13 | +#include <linux/kfence.h> |
---|
22 | 14 | #include <linux/signal.h> |
---|
23 | 15 | #include <linux/mm.h> |
---|
24 | 16 | #include <linux/hardirq.h> |
---|
25 | 17 | #include <linux/init.h> |
---|
| 18 | +#include <linux/kasan.h> |
---|
26 | 19 | #include <linux/kprobes.h> |
---|
27 | 20 | #include <linux/uaccess.h> |
---|
28 | 21 | #include <linux/page-flags.h> |
---|
.. | .. |
---|
33 | 26 | #include <linux/preempt.h> |
---|
34 | 27 | #include <linux/hugetlb.h> |
---|
35 | 28 | |
---|
| 29 | +#include <asm/acpi.h> |
---|
36 | 30 | #include <asm/bug.h> |
---|
37 | 31 | #include <asm/cmpxchg.h> |
---|
38 | 32 | #include <asm/cpufeature.h> |
---|
39 | 33 | #include <asm/exception.h> |
---|
| 34 | +#include <asm/daifflags.h> |
---|
40 | 35 | #include <asm/debug-monitors.h> |
---|
41 | 36 | #include <asm/esr.h> |
---|
42 | | -#include <asm/kasan.h> |
---|
| 37 | +#include <asm/kprobes.h> |
---|
| 38 | +#include <asm/mte.h> |
---|
| 39 | +#include <asm/processor.h> |
---|
43 | 40 | #include <asm/sysreg.h> |
---|
44 | 41 | #include <asm/system_misc.h> |
---|
45 | | -#include <asm/pgtable.h> |
---|
46 | 42 | #include <asm/tlbflush.h> |
---|
47 | 43 | #include <asm/traps.h> |
---|
48 | 44 | |
---|
49 | | -#include <acpi/ghes.h> |
---|
| 45 | +#include <trace/hooks/fault.h> |
---|
50 | 46 | |
---|
51 | 47 | struct fault_info { |
---|
52 | | - int (*fn)(unsigned long addr, unsigned int esr, |
---|
| 48 | + int (*fn)(unsigned long far, unsigned int esr, |
---|
53 | 49 | struct pt_regs *regs); |
---|
54 | 50 | int sig; |
---|
55 | 51 | int code; |
---|
.. | .. |
---|
57 | 53 | }; |
---|
58 | 54 | |
---|
59 | 55 | static const struct fault_info fault_info[]; |
---|
| 56 | +static struct fault_info debug_fault_info[]; |
---|
60 | 57 | |
---|
61 | 58 | static inline const struct fault_info *esr_to_fault_info(unsigned int esr) |
---|
62 | 59 | { |
---|
63 | | - return fault_info + (esr & 63); |
---|
| 60 | + return fault_info + (esr & ESR_ELx_FSC); |
---|
64 | 61 | } |
---|
65 | 62 | |
---|
66 | | -#ifdef CONFIG_KPROBES |
---|
67 | | -static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) |
---|
| 63 | +static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) |
---|
68 | 64 | { |
---|
69 | | - int ret = 0; |
---|
70 | | - |
---|
71 | | - /* kprobe_running() needs smp_processor_id() */ |
---|
72 | | - if (!user_mode(regs)) { |
---|
73 | | - preempt_disable(); |
---|
74 | | - if (kprobe_running() && kprobe_fault_handler(regs, esr)) |
---|
75 | | - ret = 1; |
---|
76 | | - preempt_enable(); |
---|
77 | | - } |
---|
78 | | - |
---|
79 | | - return ret; |
---|
| 65 | + return debug_fault_info + DBG_ESR_EVT(esr); |
---|
80 | 66 | } |
---|
81 | | -#else |
---|
82 | | -static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) |
---|
83 | | -{ |
---|
84 | | - return 0; |
---|
85 | | -} |
---|
86 | | -#endif |
---|
87 | 67 | |
---|
88 | 68 | static void data_abort_decode(unsigned int esr) |
---|
89 | 69 | { |
---|
.. | .. |
---|
112 | 92 | pr_alert("Mem abort info:\n"); |
---|
113 | 93 | |
---|
114 | 94 | pr_alert(" ESR = 0x%08x\n", esr); |
---|
115 | | - pr_alert(" Exception class = %s, IL = %u bits\n", |
---|
116 | | - esr_get_class_string(esr), |
---|
| 95 | + pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n", |
---|
| 96 | + ESR_ELx_EC(esr), esr_get_class_string(esr), |
---|
117 | 97 | (esr & ESR_ELx_IL) ? 32 : 16); |
---|
118 | 98 | pr_alert(" SET = %lu, FnV = %lu\n", |
---|
119 | 99 | (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT, |
---|
.. | .. |
---|
126 | 106 | data_abort_decode(esr); |
---|
127 | 107 | } |
---|
128 | 108 | |
---|
129 | | -static inline bool is_ttbr0_addr(unsigned long addr) |
---|
| 109 | +static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm) |
---|
130 | 110 | { |
---|
131 | | - /* entry assembly clears tags for TTBR0 addrs */ |
---|
132 | | - return addr < TASK_SIZE; |
---|
133 | | -} |
---|
| 111 | + /* Either init_pg_dir or swapper_pg_dir */ |
---|
| 112 | + if (mm == &init_mm) |
---|
| 113 | + return __pa_symbol(mm->pgd); |
---|
134 | 114 | |
---|
135 | | -static inline bool is_ttbr1_addr(unsigned long addr) |
---|
136 | | -{ |
---|
137 | | - /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ |
---|
138 | | - return arch_kasan_reset_tag(addr) >= VA_START; |
---|
| 115 | + return (unsigned long)virt_to_phys(mm->pgd); |
---|
139 | 116 | } |
---|
140 | 117 | |
---|
141 | 118 | /* |
---|
142 | 119 | * Dump out the page tables associated with 'addr' in the currently active mm. |
---|
143 | 120 | */ |
---|
144 | | -void show_pte(unsigned long addr) |
---|
| 121 | +static void show_pte(unsigned long addr) |
---|
145 | 122 | { |
---|
146 | 123 | struct mm_struct *mm; |
---|
147 | 124 | pgd_t *pgdp; |
---|
.. | .. |
---|
164 | 141 | return; |
---|
165 | 142 | } |
---|
166 | 143 | |
---|
167 | | - pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n", |
---|
| 144 | + pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n", |
---|
168 | 145 | mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, |
---|
169 | | - VA_BITS, mm->pgd); |
---|
| 146 | + vabits_actual, mm_to_pgd_phys(mm)); |
---|
170 | 147 | pgdp = pgd_offset(mm, addr); |
---|
171 | 148 | pgd = READ_ONCE(*pgdp); |
---|
172 | 149 | pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); |
---|
173 | 150 | |
---|
174 | 151 | do { |
---|
| 152 | + p4d_t *p4dp, p4d; |
---|
175 | 153 | pud_t *pudp, pud; |
---|
176 | 154 | pmd_t *pmdp, pmd; |
---|
177 | 155 | pte_t *ptep, pte; |
---|
.. | .. |
---|
179 | 157 | if (pgd_none(pgd) || pgd_bad(pgd)) |
---|
180 | 158 | break; |
---|
181 | 159 | |
---|
182 | | - pudp = pud_offset(pgdp, addr); |
---|
| 160 | + p4dp = p4d_offset(pgdp, addr); |
---|
| 161 | + p4d = READ_ONCE(*p4dp); |
---|
| 162 | + pr_cont(", p4d=%016llx", p4d_val(p4d)); |
---|
| 163 | + if (p4d_none(p4d) || p4d_bad(p4d)) |
---|
| 164 | + break; |
---|
| 165 | + |
---|
| 166 | + pudp = pud_offset(p4dp, addr); |
---|
183 | 167 | pud = READ_ONCE(*pudp); |
---|
184 | 168 | pr_cont(", pud=%016llx", pud_val(pud)); |
---|
185 | 169 | if (pud_none(pud) || pud_bad(pud)) |
---|
.. | .. |
---|
239 | 223 | pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); |
---|
240 | 224 | } while (pteval != old_pteval); |
---|
241 | 225 | |
---|
242 | | - flush_tlb_fix_spurious_fault(vma, address); |
---|
| 226 | + /* Invalidate a stale read-only entry */ |
---|
| 227 | + if (dirty) |
---|
| 228 | + flush_tlb_page(vma, address); |
---|
243 | 229 | return 1; |
---|
244 | 230 | } |
---|
245 | 231 | |
---|
.. | .. |
---|
248 | 234 | return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; |
---|
249 | 235 | } |
---|
250 | 236 | |
---|
251 | | -static inline bool is_el1_permission_fault(unsigned int esr, |
---|
252 | | - struct pt_regs *regs, |
---|
253 | | - unsigned long addr) |
---|
| 237 | +static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, |
---|
| 238 | + struct pt_regs *regs) |
---|
254 | 239 | { |
---|
255 | 240 | unsigned int ec = ESR_ELx_EC(esr); |
---|
256 | 241 | unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; |
---|
.. | .. |
---|
268 | 253 | return false; |
---|
269 | 254 | } |
---|
270 | 255 | |
---|
| 256 | +static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, |
---|
| 257 | + unsigned int esr, |
---|
| 258 | + struct pt_regs *regs) |
---|
| 259 | +{ |
---|
| 260 | + unsigned long flags; |
---|
| 261 | + u64 par, dfsc; |
---|
| 262 | + |
---|
| 263 | + if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR || |
---|
| 264 | + (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT) |
---|
| 265 | + return false; |
---|
| 266 | + |
---|
| 267 | + local_irq_save(flags); |
---|
| 268 | + asm volatile("at s1e1r, %0" :: "r" (addr)); |
---|
| 269 | + isb(); |
---|
| 270 | + par = read_sysreg_par(); |
---|
| 271 | + local_irq_restore(flags); |
---|
| 272 | + |
---|
| 273 | + /* |
---|
| 274 | + * If we now have a valid translation, treat the translation fault as |
---|
| 275 | + * spurious. |
---|
| 276 | + */ |
---|
| 277 | + if (!(par & SYS_PAR_EL1_F)) |
---|
| 278 | + return true; |
---|
| 279 | + |
---|
| 280 | + /* |
---|
| 281 | + * If we got a different type of fault from the AT instruction, |
---|
| 282 | + * treat the translation fault as spurious. |
---|
| 283 | + */ |
---|
| 284 | + dfsc = FIELD_GET(SYS_PAR_EL1_FST, par); |
---|
| 285 | + return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT; |
---|
| 286 | +} |
---|
| 287 | + |
---|
271 | 288 | static void die_kernel_fault(const char *msg, unsigned long addr, |
---|
272 | 289 | unsigned int esr, struct pt_regs *regs) |
---|
273 | 290 | { |
---|
.. | .. |
---|
276 | 293 | pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg, |
---|
277 | 294 | addr); |
---|
278 | 295 | |
---|
| 296 | + trace_android_rvh_die_kernel_fault(regs, esr, addr, msg); |
---|
279 | 297 | mem_abort_decode(esr); |
---|
280 | 298 | |
---|
281 | 299 | show_pte(addr); |
---|
282 | 300 | die("Oops", regs, esr); |
---|
283 | 301 | bust_spinlocks(0); |
---|
284 | | - do_exit(SIGKILL); |
---|
| 302 | + make_task_dead(SIGKILL); |
---|
| 303 | +} |
---|
| 304 | + |
---|
| 305 | +#ifdef CONFIG_KASAN_HW_TAGS |
---|
| 306 | +static void report_tag_fault(unsigned long addr, unsigned int esr, |
---|
| 307 | + struct pt_regs *regs) |
---|
| 308 | +{ |
---|
| 309 | + static bool reported; |
---|
| 310 | + bool is_write; |
---|
| 311 | + |
---|
| 312 | + if (READ_ONCE(reported)) |
---|
| 313 | + return; |
---|
| 314 | + |
---|
| 315 | + /* |
---|
| 316 | + * This is used for KASAN tests and assumes that no MTE faults |
---|
| 317 | + * happened before running the tests. |
---|
| 318 | + */ |
---|
| 319 | + if (mte_report_once()) |
---|
| 320 | + WRITE_ONCE(reported, true); |
---|
| 321 | + |
---|
| 322 | + /* |
---|
| 323 | + * SAS bits aren't set for all faults reported in EL1, so we can't |
---|
| 324 | + * find out access size. |
---|
| 325 | + */ |
---|
| 326 | + is_write = !!(esr & ESR_ELx_WNR); |
---|
| 327 | + kasan_report(addr, 0, is_write, regs->pc); |
---|
| 328 | +} |
---|
| 329 | +#else |
---|
| 330 | +/* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */ |
---|
| 331 | +static inline void report_tag_fault(unsigned long addr, unsigned int esr, |
---|
| 332 | + struct pt_regs *regs) { } |
---|
| 333 | +#endif |
---|
| 334 | + |
---|
| 335 | +static void do_tag_recovery(unsigned long addr, unsigned int esr, |
---|
| 336 | + struct pt_regs *regs) |
---|
| 337 | +{ |
---|
| 338 | + |
---|
| 339 | + report_tag_fault(addr, esr, regs); |
---|
| 340 | + |
---|
| 341 | + /* |
---|
| 342 | + * Disable MTE Tag Checking on the local CPU for the current EL. |
---|
| 343 | + * It will be done lazily on the other CPUs when they will hit a |
---|
| 344 | + * tag fault. |
---|
| 345 | + */ |
---|
| 346 | + sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_NONE); |
---|
| 347 | + isb(); |
---|
| 348 | +} |
---|
| 349 | + |
---|
| 350 | +static bool is_el1_mte_sync_tag_check_fault(unsigned int esr) |
---|
| 351 | +{ |
---|
| 352 | + unsigned int ec = ESR_ELx_EC(esr); |
---|
| 353 | + unsigned int fsc = esr & ESR_ELx_FSC; |
---|
| 354 | + |
---|
| 355 | + if (ec != ESR_ELx_EC_DABT_CUR) |
---|
| 356 | + return false; |
---|
| 357 | + |
---|
| 358 | + if (fsc == ESR_ELx_FSC_MTE) |
---|
| 359 | + return true; |
---|
| 360 | + |
---|
| 361 | + return false; |
---|
| 362 | +} |
---|
| 363 | + |
---|
| 364 | +static bool is_translation_fault(unsigned long esr) |
---|
| 365 | +{ |
---|
| 366 | + return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT; |
---|
285 | 367 | } |
---|
286 | 368 | |
---|
287 | 369 | static void __do_kernel_fault(unsigned long addr, unsigned int esr, |
---|
.. | .. |
---|
296 | 378 | if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) |
---|
297 | 379 | return; |
---|
298 | 380 | |
---|
299 | | - if (is_el1_permission_fault(esr, regs, addr)) { |
---|
| 381 | + if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs), |
---|
| 382 | + "Ignoring spurious kernel translation fault at virtual address %016lx\n", addr)) |
---|
| 383 | + return; |
---|
| 384 | + |
---|
| 385 | + if (is_el1_mte_sync_tag_check_fault(esr)) { |
---|
| 386 | + do_tag_recovery(addr, esr, regs); |
---|
| 387 | + |
---|
| 388 | + return; |
---|
| 389 | + } |
---|
| 390 | + |
---|
| 391 | + if (is_el1_permission_fault(addr, esr, regs)) { |
---|
300 | 392 | if (esr & ESR_ELx_WNR) |
---|
301 | 393 | msg = "write to read-only memory"; |
---|
| 394 | + else if (is_el1_instruction_abort(esr)) |
---|
| 395 | + msg = "execute from non-executable memory"; |
---|
302 | 396 | else |
---|
303 | 397 | msg = "read from unreadable memory"; |
---|
304 | 398 | } else if (addr < PAGE_SIZE) { |
---|
305 | 399 | msg = "NULL pointer dereference"; |
---|
306 | 400 | } else { |
---|
| 401 | + if (is_translation_fault(esr) && |
---|
| 402 | + kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs)) |
---|
| 403 | + return; |
---|
| 404 | + |
---|
307 | 405 | msg = "paging request"; |
---|
308 | 406 | } |
---|
309 | 407 | |
---|
310 | 408 | die_kernel_fault(msg, addr, esr, regs); |
---|
311 | 409 | } |
---|
312 | 410 | |
---|
313 | | -static void __do_user_fault(struct siginfo *info, unsigned int esr) |
---|
| 411 | +static void set_thread_esr(unsigned long address, unsigned int esr) |
---|
314 | 412 | { |
---|
315 | | - current->thread.fault_address = (unsigned long)info->si_addr; |
---|
| 413 | + current->thread.fault_address = address; |
---|
316 | 414 | |
---|
317 | 415 | /* |
---|
318 | 416 | * If the faulting address is in the kernel, we must sanitize the ESR. |
---|
.. | .. |
---|
365 | 463 | } |
---|
366 | 464 | |
---|
367 | 465 | current->thread.fault_code = esr; |
---|
368 | | - arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current); |
---|
369 | 466 | } |
---|
370 | 467 | |
---|
371 | | -static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) |
---|
| 468 | +static void do_bad_area(unsigned long far, unsigned int esr, |
---|
| 469 | + struct pt_regs *regs) |
---|
372 | 470 | { |
---|
| 471 | + unsigned long addr = untagged_addr(far); |
---|
| 472 | + |
---|
373 | 473 | /* |
---|
374 | 474 | * If we are in kernel mode at this point, we have no context to |
---|
375 | 475 | * handle this fault with. |
---|
376 | 476 | */ |
---|
377 | 477 | if (user_mode(regs)) { |
---|
378 | 478 | const struct fault_info *inf = esr_to_fault_info(esr); |
---|
379 | | - struct siginfo si; |
---|
380 | 479 | |
---|
381 | | - clear_siginfo(&si); |
---|
382 | | - si.si_signo = inf->sig; |
---|
383 | | - si.si_code = inf->code; |
---|
384 | | - si.si_addr = (void __user *)addr; |
---|
385 | | - |
---|
386 | | - __do_user_fault(&si, esr); |
---|
| 480 | + set_thread_esr(addr, esr); |
---|
| 481 | + arm64_force_sig_fault(inf->sig, inf->code, far, inf->name); |
---|
387 | 482 | } else { |
---|
388 | 483 | __do_kernel_fault(addr, esr, regs); |
---|
389 | 484 | } |
---|
390 | 485 | } |
---|
391 | 486 | |
---|
392 | | -#define VM_FAULT_BADMAP 0x010000 |
---|
393 | | -#define VM_FAULT_BADACCESS 0x020000 |
---|
| 487 | +#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) |
---|
| 488 | +#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) |
---|
394 | 489 | |
---|
395 | | -static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, |
---|
396 | | - unsigned int mm_flags, unsigned long vm_flags, |
---|
397 | | - struct task_struct *tsk) |
---|
| 490 | +static int __do_page_fault(struct vm_area_struct *vma, unsigned long addr, |
---|
| 491 | + unsigned int mm_flags, unsigned long vm_flags, |
---|
| 492 | + struct pt_regs *regs) |
---|
398 | 493 | { |
---|
399 | | - struct vm_area_struct *vma; |
---|
400 | | - vm_fault_t fault; |
---|
401 | 494 | |
---|
402 | | - vma = find_vma(mm, addr); |
---|
403 | | - fault = VM_FAULT_BADMAP; |
---|
404 | 495 | if (unlikely(!vma)) |
---|
405 | | - goto out; |
---|
406 | | - if (unlikely(vma->vm_start > addr)) |
---|
407 | | - goto check_stack; |
---|
| 496 | + return VM_FAULT_BADMAP; |
---|
408 | 497 | |
---|
409 | 498 | /* |
---|
410 | 499 | * Ok, we have a good vm_area for this memory access, so we can handle |
---|
411 | 500 | * it. |
---|
412 | 501 | */ |
---|
413 | | -good_area: |
---|
| 502 | + if (unlikely(vma->vm_start > addr)) { |
---|
| 503 | + if (!(vma->vm_flags & VM_GROWSDOWN)) |
---|
| 504 | + return VM_FAULT_BADMAP; |
---|
| 505 | + if (expand_stack(vma, addr)) |
---|
| 506 | + return VM_FAULT_BADMAP; |
---|
| 507 | + } |
---|
| 508 | + |
---|
414 | 509 | /* |
---|
415 | 510 | * Check that the permissions on the VMA allow for the fault which |
---|
416 | 511 | * occurred. |
---|
417 | 512 | */ |
---|
418 | | - if (!(vma->vm_flags & vm_flags)) { |
---|
419 | | - fault = VM_FAULT_BADACCESS; |
---|
420 | | - goto out; |
---|
421 | | - } |
---|
422 | | - |
---|
423 | | - return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); |
---|
424 | | - |
---|
425 | | -check_stack: |
---|
426 | | - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) |
---|
427 | | - goto good_area; |
---|
428 | | -out: |
---|
429 | | - return fault; |
---|
| 513 | + if (!(vma->vm_flags & vm_flags)) |
---|
| 514 | + return VM_FAULT_BADACCESS; |
---|
| 515 | + return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs); |
---|
430 | 516 | } |
---|
431 | 517 | |
---|
432 | 518 | static bool is_el0_instruction_abort(unsigned int esr) |
---|
.. | .. |
---|
434 | 520 | return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; |
---|
435 | 521 | } |
---|
436 | 522 | |
---|
437 | | -static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, |
---|
| 523 | +/* |
---|
| 524 | + * Note: not valid for EL1 DC IVAC, but we never use that such that it |
---|
| 525 | + * should fault. EL0 cannot issue DC IVAC (undef). |
---|
| 526 | + */ |
---|
| 527 | +static bool is_write_abort(unsigned int esr) |
---|
| 528 | +{ |
---|
| 529 | + return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); |
---|
| 530 | +} |
---|
| 531 | + |
---|
| 532 | +static int __kprobes do_page_fault(unsigned long far, unsigned int esr, |
---|
438 | 533 | struct pt_regs *regs) |
---|
439 | 534 | { |
---|
440 | | - struct task_struct *tsk; |
---|
441 | | - struct mm_struct *mm; |
---|
442 | | - struct siginfo si; |
---|
443 | | - vm_fault_t fault, major = 0; |
---|
444 | | - unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; |
---|
445 | | - unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; |
---|
| 535 | + const struct fault_info *inf; |
---|
| 536 | + struct mm_struct *mm = current->mm; |
---|
| 537 | + vm_fault_t fault; |
---|
| 538 | + unsigned long vm_flags = VM_ACCESS_FLAGS; |
---|
| 539 | + unsigned int mm_flags = FAULT_FLAG_DEFAULT; |
---|
| 540 | + struct vm_area_struct *vma = NULL; |
---|
| 541 | + unsigned long addr = untagged_addr(far); |
---|
446 | 542 | |
---|
447 | | - if (notify_page_fault(regs, esr)) |
---|
| 543 | + if (kprobe_page_fault(regs, esr)) |
---|
448 | 544 | return 0; |
---|
449 | | - |
---|
450 | | - tsk = current; |
---|
451 | | - mm = tsk->mm; |
---|
452 | 545 | |
---|
453 | 546 | /* |
---|
454 | 547 | * If we're in an interrupt or have no user context, we must not take |
---|
.. | .. |
---|
462 | 555 | |
---|
463 | 556 | if (is_el0_instruction_abort(esr)) { |
---|
464 | 557 | vm_flags = VM_EXEC; |
---|
465 | | - } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { |
---|
| 558 | + mm_flags |= FAULT_FLAG_INSTRUCTION; |
---|
| 559 | + } else if (is_write_abort(esr)) { |
---|
466 | 560 | vm_flags = VM_WRITE; |
---|
467 | 561 | mm_flags |= FAULT_FLAG_WRITE; |
---|
468 | 562 | } |
---|
469 | 563 | |
---|
470 | | - if (is_ttbr0_addr(addr) && is_el1_permission_fault(esr, regs, addr)) { |
---|
| 564 | + if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) { |
---|
471 | 565 | /* regs->orig_addr_limit may be 0 if we entered from EL0 */ |
---|
472 | 566 | if (regs->orig_addr_limit == KERNEL_DS) |
---|
473 | 567 | die_kernel_fault("access to user memory with fs=KERNEL_DS", |
---|
.. | .. |
---|
485 | 579 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); |
---|
486 | 580 | |
---|
487 | 581 | /* |
---|
| 582 | + * let's try a speculative page fault without grabbing the |
---|
| 583 | + * mmap_sem. |
---|
| 584 | + */ |
---|
| 585 | + fault = handle_speculative_fault(mm, addr, mm_flags, &vma, regs); |
---|
| 586 | + if (fault != VM_FAULT_RETRY) |
---|
| 587 | + goto done; |
---|
| 588 | + |
---|
| 589 | + /* |
---|
488 | 590 | * As per x86, we may deadlock here. However, since the kernel only |
---|
489 | 591 | * validly references user space from well defined areas of the code, |
---|
490 | 592 | * we can bug out early if this is from code which shouldn't. |
---|
491 | 593 | */ |
---|
492 | | - if (!down_read_trylock(&mm->mmap_sem)) { |
---|
| 594 | + if (!mmap_read_trylock(mm)) { |
---|
493 | 595 | if (!user_mode(regs) && !search_exception_tables(regs->pc)) |
---|
494 | 596 | goto no_context; |
---|
495 | 597 | retry: |
---|
496 | | - down_read(&mm->mmap_sem); |
---|
| 598 | + mmap_read_lock(mm); |
---|
497 | 599 | } else { |
---|
498 | 600 | /* |
---|
499 | 601 | * The above down_read_trylock() might have succeeded in which |
---|
.. | .. |
---|
501 | 603 | */ |
---|
502 | 604 | might_sleep(); |
---|
503 | 605 | #ifdef CONFIG_DEBUG_VM |
---|
504 | | - if (!user_mode(regs) && !search_exception_tables(regs->pc)) |
---|
| 606 | + if (!user_mode(regs) && !search_exception_tables(regs->pc)) { |
---|
| 607 | + mmap_read_unlock(mm); |
---|
505 | 608 | goto no_context; |
---|
| 609 | + } |
---|
506 | 610 | #endif |
---|
507 | 611 | } |
---|
508 | 612 | |
---|
509 | | - fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); |
---|
510 | | - major |= fault & VM_FAULT_MAJOR; |
---|
| 613 | + if (!vma || !can_reuse_spf_vma(vma, addr)) |
---|
| 614 | + vma = find_vma(mm, addr); |
---|
| 615 | + fault = __do_page_fault(vma, addr, mm_flags, vm_flags, regs); |
---|
| 616 | + |
---|
| 617 | + /* Quick path to respond to signals */ |
---|
| 618 | + if (fault_signal_pending(fault, regs)) { |
---|
| 619 | + if (!user_mode(regs)) |
---|
| 620 | + goto no_context; |
---|
| 621 | + return 0; |
---|
| 622 | + } |
---|
511 | 623 | |
---|
512 | 624 | if (fault & VM_FAULT_RETRY) { |
---|
513 | | - /* |
---|
514 | | - * If we need to retry but a fatal signal is pending, |
---|
515 | | - * handle the signal first. We do not need to release |
---|
516 | | - * the mmap_sem because it would already be released |
---|
517 | | - * in __lock_page_or_retry in mm/filemap.c. |
---|
518 | | - */ |
---|
519 | | - if (fatal_signal_pending(current)) { |
---|
520 | | - if (!user_mode(regs)) |
---|
521 | | - goto no_context; |
---|
522 | | - return 0; |
---|
523 | | - } |
---|
524 | | - |
---|
525 | | - /* |
---|
526 | | - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of |
---|
527 | | - * starvation. |
---|
528 | | - */ |
---|
529 | 625 | if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { |
---|
530 | | - mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; |
---|
531 | 626 | mm_flags |= FAULT_FLAG_TRIED; |
---|
| 627 | + |
---|
| 628 | + /* |
---|
| 629 | + * Do not try to reuse this vma and fetch it |
---|
| 630 | + * again since we will release the mmap_sem. |
---|
| 631 | + */ |
---|
| 632 | + vma = NULL; |
---|
| 633 | + |
---|
532 | 634 | goto retry; |
---|
533 | 635 | } |
---|
534 | 636 | } |
---|
535 | | - up_read(&mm->mmap_sem); |
---|
| 637 | + mmap_read_unlock(mm); |
---|
| 638 | + |
---|
| 639 | +done: |
---|
536 | 640 | |
---|
537 | 641 | /* |
---|
538 | 642 | * Handle the "normal" (no error) case first. |
---|
539 | 643 | */ |
---|
540 | 644 | if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | |
---|
541 | | - VM_FAULT_BADACCESS)))) { |
---|
542 | | - /* |
---|
543 | | - * Major/minor page fault accounting is only done |
---|
544 | | - * once. If we go through a retry, it is extremely |
---|
545 | | - * likely that the page will be found in page cache at |
---|
546 | | - * that point. |
---|
547 | | - */ |
---|
548 | | - if (major) { |
---|
549 | | - tsk->maj_flt++; |
---|
550 | | - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, |
---|
551 | | - addr); |
---|
552 | | - } else { |
---|
553 | | - tsk->min_flt++; |
---|
554 | | - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, |
---|
555 | | - addr); |
---|
556 | | - } |
---|
557 | | - |
---|
| 645 | + VM_FAULT_BADACCESS)))) |
---|
558 | 646 | return 0; |
---|
559 | | - } |
---|
560 | 647 | |
---|
561 | 648 | /* |
---|
562 | 649 | * If we are in kernel mode at this point, we have no context to |
---|
.. | .. |
---|
575 | 662 | return 0; |
---|
576 | 663 | } |
---|
577 | 664 | |
---|
578 | | - clear_siginfo(&si); |
---|
579 | | - si.si_addr = (void __user *)addr; |
---|
580 | | - |
---|
| 665 | + inf = esr_to_fault_info(esr); |
---|
| 666 | + set_thread_esr(addr, esr); |
---|
581 | 667 | if (fault & VM_FAULT_SIGBUS) { |
---|
582 | 668 | /* |
---|
583 | 669 | * We had some memory, but were unable to successfully fix up |
---|
584 | 670 | * this page fault. |
---|
585 | 671 | */ |
---|
586 | | - si.si_signo = SIGBUS; |
---|
587 | | - si.si_code = BUS_ADRERR; |
---|
588 | | - } else if (fault & VM_FAULT_HWPOISON_LARGE) { |
---|
589 | | - unsigned int hindex = VM_FAULT_GET_HINDEX(fault); |
---|
| 672 | + arm64_force_sig_fault(SIGBUS, BUS_ADRERR, far, inf->name); |
---|
| 673 | + } else if (fault & (VM_FAULT_HWPOISON_LARGE | VM_FAULT_HWPOISON)) { |
---|
| 674 | + unsigned int lsb; |
---|
590 | 675 | |
---|
591 | | - si.si_signo = SIGBUS; |
---|
592 | | - si.si_code = BUS_MCEERR_AR; |
---|
593 | | - si.si_addr_lsb = hstate_index_to_shift(hindex); |
---|
594 | | - } else if (fault & VM_FAULT_HWPOISON) { |
---|
595 | | - si.si_signo = SIGBUS; |
---|
596 | | - si.si_code = BUS_MCEERR_AR; |
---|
597 | | - si.si_addr_lsb = PAGE_SHIFT; |
---|
| 676 | + lsb = PAGE_SHIFT; |
---|
| 677 | + if (fault & VM_FAULT_HWPOISON_LARGE) |
---|
| 678 | + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); |
---|
| 679 | + |
---|
| 680 | + arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name); |
---|
598 | 681 | } else { |
---|
599 | 682 | /* |
---|
600 | 683 | * Something tried to access memory that isn't in our memory |
---|
601 | 684 | * map. |
---|
602 | 685 | */ |
---|
603 | | - si.si_signo = SIGSEGV; |
---|
604 | | - si.si_code = fault == VM_FAULT_BADACCESS ? |
---|
605 | | - SEGV_ACCERR : SEGV_MAPERR; |
---|
| 686 | + arm64_force_sig_fault(SIGSEGV, |
---|
| 687 | + fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR, |
---|
| 688 | + far, inf->name); |
---|
606 | 689 | } |
---|
607 | 690 | |
---|
608 | | - __do_user_fault(&si, esr); |
---|
609 | 691 | return 0; |
---|
610 | 692 | |
---|
611 | 693 | no_context: |
---|
.. | .. |
---|
613 | 695 | return 0; |
---|
614 | 696 | } |
---|
615 | 697 | |
---|
616 | | -int __weak do_tlb_conf_fault(unsigned long addr, |
---|
617 | | - unsigned int esr, |
---|
618 | | - struct pt_regs *regs) |
---|
619 | | -{ |
---|
620 | | - return 1; /* do_bad default */ |
---|
621 | | -} |
---|
622 | | - |
---|
623 | | -int (*do_tlb_conf_fault_cb)(unsigned long addr, |
---|
624 | | - unsigned int esr, |
---|
625 | | - struct pt_regs *regs) |
---|
626 | | - = do_tlb_conf_fault; /* initialization saves us a branch */ |
---|
627 | | -EXPORT_SYMBOL_GPL(do_tlb_conf_fault_cb); |
---|
628 | | - |
---|
629 | | -static int _do_tlb_conf_fault(unsigned long addr, |
---|
630 | | - unsigned int esr, |
---|
631 | | - struct pt_regs *regs) |
---|
632 | | -{ |
---|
633 | | - return (*do_tlb_conf_fault_cb)(addr, esr, regs); |
---|
634 | | -} |
---|
635 | | - |
---|
636 | | -static int __kprobes do_translation_fault(unsigned long addr, |
---|
| 698 | +static int __kprobes do_translation_fault(unsigned long far, |
---|
637 | 699 | unsigned int esr, |
---|
638 | 700 | struct pt_regs *regs) |
---|
639 | 701 | { |
---|
640 | | - if (is_ttbr0_addr(addr)) |
---|
641 | | - return do_page_fault(addr, esr, regs); |
---|
| 702 | + unsigned long addr = untagged_addr(far); |
---|
642 | 703 | |
---|
643 | | - do_bad_area(addr, esr, regs); |
---|
| 704 | + if (is_ttbr0_addr(addr)) |
---|
| 705 | + return do_page_fault(far, esr, regs); |
---|
| 706 | + |
---|
| 707 | + do_bad_area(far, esr, regs); |
---|
644 | 708 | return 0; |
---|
645 | 709 | } |
---|
646 | 710 | |
---|
647 | | -static int do_alignment_fault(unsigned long addr, unsigned int esr, |
---|
| 711 | +#ifdef CONFIG_ROCKCHIP_ARM64_ALIGN_FAULT_FIX |
---|
| 712 | +extern int alignment_fixup_helper(unsigned long addr, unsigned int esr, |
---|
| 713 | + struct pt_regs *regs); |
---|
| 714 | +#endif |
---|
| 715 | +static int do_alignment_fault(unsigned long far, unsigned int esr, |
---|
648 | 716 | struct pt_regs *regs) |
---|
649 | 717 | { |
---|
650 | | - do_bad_area(addr, esr, regs); |
---|
| 718 | +#ifdef CONFIG_ROCKCHIP_ARM64_ALIGN_FAULT_FIX |
---|
| 719 | + if (!alignment_fixup_helper(far, esr, regs)) |
---|
| 720 | + return 0; |
---|
| 721 | +#endif |
---|
| 722 | + do_bad_area(far, esr, regs); |
---|
651 | 723 | return 0; |
---|
652 | 724 | } |
---|
653 | 725 | |
---|
654 | | -static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) |
---|
| 726 | +static int do_bad(unsigned long far, unsigned int esr, struct pt_regs *regs) |
---|
655 | 727 | { |
---|
656 | | - return 1; /* "fault" */ |
---|
| 728 | + unsigned long addr = untagged_addr(far); |
---|
| 729 | + int ret = 1; |
---|
| 730 | + |
---|
| 731 | + trace_android_vh_handle_tlb_conf(addr, esr, &ret); |
---|
| 732 | + return ret; |
---|
657 | 733 | } |
---|
658 | 734 | |
---|
659 | | -static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) |
---|
| 735 | +static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs) |
---|
660 | 736 | { |
---|
661 | | - struct siginfo info; |
---|
662 | 737 | const struct fault_info *inf; |
---|
| 738 | + unsigned long siaddr; |
---|
663 | 739 | |
---|
664 | 740 | inf = esr_to_fault_info(esr); |
---|
665 | 741 | |
---|
666 | | - /* |
---|
667 | | - * Synchronous aborts may interrupt code which had interrupts masked. |
---|
668 | | - * Before calling out into the wider kernel tell the interested |
---|
669 | | - * subsystems. |
---|
670 | | - */ |
---|
671 | | - if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { |
---|
672 | | - if (interrupts_enabled(regs)) |
---|
673 | | - nmi_enter(); |
---|
674 | | - |
---|
675 | | - ghes_notify_sea(); |
---|
676 | | - |
---|
677 | | - if (interrupts_enabled(regs)) |
---|
678 | | - nmi_exit(); |
---|
| 742 | + if (user_mode(regs) && apei_claim_sea(regs) == 0) { |
---|
| 743 | + /* |
---|
| 744 | + * APEI claimed this as a firmware-first notification. |
---|
| 745 | + * Some processing deferred to task_work before ret_to_user(). |
---|
| 746 | + */ |
---|
| 747 | + return 0; |
---|
679 | 748 | } |
---|
680 | 749 | |
---|
681 | | - clear_siginfo(&info); |
---|
682 | | - info.si_signo = inf->sig; |
---|
683 | | - info.si_errno = 0; |
---|
684 | | - info.si_code = inf->code; |
---|
685 | | - if (esr & ESR_ELx_FnV) |
---|
686 | | - info.si_addr = NULL; |
---|
687 | | - else |
---|
688 | | - info.si_addr = (void __user *)addr; |
---|
689 | | - arm64_notify_die(inf->name, regs, &info, esr); |
---|
| 750 | + if (esr & ESR_ELx_FnV) { |
---|
| 751 | + siaddr = 0; |
---|
| 752 | + } else { |
---|
| 753 | + /* |
---|
| 754 | + * The architecture specifies that the tag bits of FAR_EL1 are |
---|
| 755 | + * UNKNOWN for synchronous external aborts. Mask them out now |
---|
| 756 | + * so that userspace doesn't see them. |
---|
| 757 | + */ |
---|
| 758 | + siaddr = untagged_addr(far); |
---|
| 759 | + } |
---|
| 760 | + trace_android_rvh_do_sea(regs, esr, siaddr, inf->name); |
---|
| 761 | + arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr); |
---|
690 | 762 | |
---|
| 763 | + return 0; |
---|
| 764 | +} |
---|
| 765 | + |
---|
| 766 | +static int do_tag_check_fault(unsigned long far, unsigned int esr, |
---|
| 767 | + struct pt_regs *regs) |
---|
| 768 | +{ |
---|
| 769 | + /* |
---|
| 770 | + * The architecture specifies that bits 63:60 of FAR_EL1 are UNKNOWN |
---|
| 771 | + * for tag check faults. Set them to corresponding bits in the untagged |
---|
| 772 | + * address. |
---|
| 773 | + */ |
---|
| 774 | + far = (__untagged_addr(far) & ~MTE_TAG_MASK) | (far & MTE_TAG_MASK); |
---|
| 775 | + do_bad_area(far, esr, regs); |
---|
691 | 776 | return 0; |
---|
692 | 777 | } |
---|
693 | 778 | |
---|
.. | .. |
---|
709 | 794 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, |
---|
710 | 795 | { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, |
---|
711 | 796 | { do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" }, |
---|
712 | | - { do_bad, SIGKILL, SI_KERNEL, "unknown 17" }, |
---|
| 797 | + { do_tag_check_fault, SIGSEGV, SEGV_MTESERR, "synchronous tag check fault" }, |
---|
713 | 798 | { do_bad, SIGKILL, SI_KERNEL, "unknown 18" }, |
---|
714 | 799 | { do_bad, SIGKILL, SI_KERNEL, "unknown 19" }, |
---|
715 | 800 | { do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" }, |
---|
.. | .. |
---|
740 | 825 | { do_bad, SIGKILL, SI_KERNEL, "unknown 45" }, |
---|
741 | 826 | { do_bad, SIGKILL, SI_KERNEL, "unknown 46" }, |
---|
742 | 827 | { do_bad, SIGKILL, SI_KERNEL, "unknown 47" }, |
---|
743 | | - { _do_tlb_conf_fault, SIGKILL, SI_KERNEL, "TLB conflict abort" }, |
---|
| 828 | + { do_bad, SIGKILL, SI_KERNEL, "TLB conflict abort" }, |
---|
744 | 829 | { do_bad, SIGKILL, SI_KERNEL, "Unsupported atomic hardware update fault" }, |
---|
745 | 830 | { do_bad, SIGKILL, SI_KERNEL, "unknown 50" }, |
---|
746 | 831 | { do_bad, SIGKILL, SI_KERNEL, "unknown 51" }, |
---|
.. | .. |
---|
758 | 843 | { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, |
---|
759 | 844 | }; |
---|
760 | 845 | |
---|
761 | | -int handle_guest_sea(phys_addr_t addr, unsigned int esr) |
---|
762 | | -{ |
---|
763 | | - return ghes_notify_sea(); |
---|
764 | | -} |
---|
765 | | - |
---|
766 | | -asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, |
---|
767 | | - struct pt_regs *regs) |
---|
| 846 | +void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs) |
---|
768 | 847 | { |
---|
769 | 848 | const struct fault_info *inf = esr_to_fault_info(esr); |
---|
770 | | - struct siginfo info; |
---|
| 849 | + unsigned long addr = untagged_addr(far); |
---|
771 | 850 | |
---|
772 | | - if (!inf->fn(addr, esr, regs)) |
---|
| 851 | + if (!inf->fn(far, esr, regs)) |
---|
773 | 852 | return; |
---|
774 | 853 | |
---|
775 | 854 | if (!user_mode(regs)) { |
---|
776 | 855 | pr_alert("Unhandled fault at 0x%016lx\n", addr); |
---|
| 856 | + trace_android_rvh_do_mem_abort(regs, esr, addr, inf->name); |
---|
777 | 857 | mem_abort_decode(esr); |
---|
778 | 858 | show_pte(addr); |
---|
779 | 859 | } |
---|
780 | 860 | |
---|
781 | | - clear_siginfo(&info); |
---|
782 | | - info.si_signo = inf->sig; |
---|
783 | | - info.si_errno = 0; |
---|
784 | | - info.si_code = inf->code; |
---|
785 | | - info.si_addr = (void __user *)addr; |
---|
786 | | - arm64_notify_die(inf->name, regs, &info, esr); |
---|
| 861 | + /* |
---|
| 862 | + * At this point we have an unrecognized fault type whose tag bits may |
---|
| 863 | + * have been defined as UNKNOWN. Therefore we only expose the untagged |
---|
| 864 | + * address to the signal handler. |
---|
| 865 | + */ |
---|
| 866 | + arm64_notify_die(inf->name, regs, inf->sig, inf->code, addr, esr); |
---|
787 | 867 | } |
---|
| 868 | +NOKPROBE_SYMBOL(do_mem_abort); |
---|
788 | 869 | |
---|
789 | | -asmlinkage void __exception do_el0_irq_bp_hardening(void) |
---|
| 870 | +void do_el0_irq_bp_hardening(void) |
---|
790 | 871 | { |
---|
791 | 872 | /* PC has already been checked in entry.S */ |
---|
792 | 873 | arm64_apply_bp_hardening(); |
---|
793 | 874 | } |
---|
| 875 | +NOKPROBE_SYMBOL(do_el0_irq_bp_hardening); |
---|
794 | 876 | |
---|
795 | | -asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, |
---|
796 | | - unsigned int esr, |
---|
797 | | - struct pt_regs *regs) |
---|
| 877 | +void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) |
---|
798 | 878 | { |
---|
799 | | - /* |
---|
800 | | - * We've taken an instruction abort from userspace and not yet |
---|
801 | | - * re-enabled IRQs. If the address is a kernel address, apply |
---|
802 | | - * BP hardening prior to enabling IRQs and pre-emption. |
---|
803 | | - */ |
---|
804 | | - if (!is_ttbr0_addr(addr)) |
---|
805 | | - arm64_apply_bp_hardening(); |
---|
| 879 | + trace_android_rvh_do_sp_pc_abort(regs, esr, addr, user_mode(regs)); |
---|
806 | 880 | |
---|
807 | | - local_irq_enable(); |
---|
808 | | - do_mem_abort(addr, esr, regs); |
---|
| 881 | + arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN, |
---|
| 882 | + addr, esr); |
---|
809 | 883 | } |
---|
810 | | - |
---|
811 | | - |
---|
812 | | -asmlinkage void __exception do_sp_pc_abort(unsigned long addr, |
---|
813 | | - unsigned int esr, |
---|
814 | | - struct pt_regs *regs) |
---|
815 | | -{ |
---|
816 | | - struct siginfo info; |
---|
817 | | - |
---|
818 | | - if (user_mode(regs)) { |
---|
819 | | - if (!is_ttbr0_addr(instruction_pointer(regs))) |
---|
820 | | - arm64_apply_bp_hardening(); |
---|
821 | | - local_irq_enable(); |
---|
822 | | - } |
---|
823 | | - |
---|
824 | | - clear_siginfo(&info); |
---|
825 | | - info.si_signo = SIGBUS; |
---|
826 | | - info.si_errno = 0; |
---|
827 | | - info.si_code = BUS_ADRALN; |
---|
828 | | - info.si_addr = (void __user *)addr; |
---|
829 | | - arm64_notify_die("SP/PC alignment exception", regs, &info, esr); |
---|
830 | | -} |
---|
| 884 | +NOKPROBE_SYMBOL(do_sp_pc_abort); |
---|
831 | 885 | |
---|
832 | 886 | int __init early_brk64(unsigned long addr, unsigned int esr, |
---|
833 | 887 | struct pt_regs *regs); |
---|
.. | .. |
---|
860 | 914 | debug_fault_info[nr].name = name; |
---|
861 | 915 | } |
---|
862 | 916 | |
---|
| 917 | +/* |
---|
| 918 | + * In debug exception context, we explicitly disable preemption despite |
---|
| 919 | + * having interrupts disabled. |
---|
| 920 | + * This serves two purposes: it makes it much less likely that we would |
---|
| 921 | + * accidentally schedule in exception context and it will force a warning |
---|
| 922 | + * if we somehow manage to schedule by accident. |
---|
| 923 | + */ |
---|
| 924 | +static void debug_exception_enter(struct pt_regs *regs) |
---|
| 925 | +{ |
---|
| 926 | + preempt_disable(); |
---|
| 927 | + |
---|
| 928 | + /* This code is a bit fragile. Test it. */ |
---|
| 929 | + RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work"); |
---|
| 930 | +} |
---|
| 931 | +NOKPROBE_SYMBOL(debug_exception_enter); |
---|
| 932 | + |
---|
| 933 | +static void debug_exception_exit(struct pt_regs *regs) |
---|
| 934 | +{ |
---|
| 935 | + preempt_enable_no_resched(); |
---|
| 936 | +} |
---|
| 937 | +NOKPROBE_SYMBOL(debug_exception_exit); |
---|
| 938 | + |
---|
863 | 939 | #ifdef CONFIG_ARM64_ERRATUM_1463225 |
---|
864 | 940 | DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); |
---|
865 | 941 | |
---|
866 | | -static int __exception |
---|
867 | | -cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) |
---|
| 942 | +static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) |
---|
868 | 943 | { |
---|
869 | 944 | if (user_mode(regs)) |
---|
870 | 945 | return 0; |
---|
.. | .. |
---|
883 | 958 | return 1; |
---|
884 | 959 | } |
---|
885 | 960 | #else |
---|
886 | | -static int __exception |
---|
887 | | -cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) |
---|
| 961 | +static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) |
---|
888 | 962 | { |
---|
889 | 963 | return 0; |
---|
890 | 964 | } |
---|
891 | 965 | #endif /* CONFIG_ARM64_ERRATUM_1463225 */ |
---|
| 966 | +NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler); |
---|
892 | 967 | |
---|
893 | | -asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint, |
---|
894 | | - unsigned int esr, |
---|
895 | | - struct pt_regs *regs) |
---|
| 968 | +void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, |
---|
| 969 | + struct pt_regs *regs) |
---|
896 | 970 | { |
---|
897 | | - const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); |
---|
| 971 | + const struct fault_info *inf = esr_to_debug_fault_info(esr); |
---|
898 | 972 | unsigned long pc = instruction_pointer(regs); |
---|
899 | | - int rv; |
---|
900 | 973 | |
---|
901 | 974 | if (cortex_a76_erratum_1463225_debug_handler(regs)) |
---|
902 | | - return 0; |
---|
| 975 | + return; |
---|
903 | 976 | |
---|
904 | | - /* |
---|
905 | | - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were |
---|
906 | | - * already disabled to preserve the last enabled/disabled addresses. |
---|
907 | | - */ |
---|
908 | | - if (interrupts_enabled(regs)) |
---|
909 | | - trace_hardirqs_off(); |
---|
| 977 | + debug_exception_enter(regs); |
---|
910 | 978 | |
---|
911 | 979 | if (user_mode(regs) && !is_ttbr0_addr(pc)) |
---|
912 | 980 | arm64_apply_bp_hardening(); |
---|
913 | 981 | |
---|
914 | | - if (!inf->fn(addr_if_watchpoint, esr, regs)) { |
---|
915 | | - rv = 1; |
---|
916 | | - } else { |
---|
917 | | - struct siginfo info; |
---|
918 | | - |
---|
919 | | - clear_siginfo(&info); |
---|
920 | | - info.si_signo = inf->sig; |
---|
921 | | - info.si_errno = 0; |
---|
922 | | - info.si_code = inf->code; |
---|
923 | | - info.si_addr = (void __user *)pc; |
---|
924 | | - arm64_notify_die(inf->name, regs, &info, esr); |
---|
925 | | - rv = 0; |
---|
| 982 | + if (inf->fn(addr_if_watchpoint, esr, regs)) { |
---|
| 983 | + arm64_notify_die(inf->name, regs, inf->sig, inf->code, pc, esr); |
---|
926 | 984 | } |
---|
927 | 985 | |
---|
928 | | - if (interrupts_enabled(regs)) |
---|
929 | | - trace_hardirqs_on(); |
---|
930 | | - |
---|
931 | | - return rv; |
---|
| 986 | + debug_exception_exit(regs); |
---|
932 | 987 | } |
---|
933 | 988 | NOKPROBE_SYMBOL(do_debug_exception); |
---|
934 | 989 | |
---|
935 | | -#ifdef CONFIG_ARM64_PAN |
---|
936 | | -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) |
---|
| 990 | +/* |
---|
| 991 | + * Used during anonymous page fault handling. |
---|
| 992 | + */ |
---|
| 993 | +struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, |
---|
| 994 | + unsigned long vaddr) |
---|
937 | 995 | { |
---|
938 | | - /* |
---|
939 | | - * We modify PSTATE. This won't work from irq context as the PSTATE |
---|
940 | | - * is discarded once we return from the exception. |
---|
941 | | - */ |
---|
942 | | - WARN_ON_ONCE(in_interrupt()); |
---|
| 996 | + gfp_t flags = GFP_HIGHUSER_MOVABLE | __GFP_ZERO | __GFP_CMA; |
---|
943 | 997 | |
---|
944 | | - sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); |
---|
945 | | - asm(SET_PSTATE_PAN(1)); |
---|
| 998 | + /* |
---|
| 999 | + * If the page is mapped with PROT_MTE, initialise the tags at the |
---|
| 1000 | + * point of allocation and page zeroing as this is usually faster than |
---|
| 1001 | + * separate DC ZVA and STGM. |
---|
| 1002 | + */ |
---|
| 1003 | + if (vma->vm_flags & VM_MTE) |
---|
| 1004 | + flags |= __GFP_ZEROTAGS; |
---|
| 1005 | + |
---|
| 1006 | + return alloc_page_vma(flags, vma, vaddr); |
---|
946 | 1007 | } |
---|
947 | | -#endif /* CONFIG_ARM64_PAN */ |
---|
| 1008 | + |
---|
| 1009 | +void tag_clear_highpage(struct page *page) |
---|
| 1010 | +{ |
---|
| 1011 | + mte_zero_clear_page_tags(page_address(page)); |
---|
| 1012 | + page_kasan_tag_reset(page); |
---|
| 1013 | + set_bit(PG_mte_tagged, &page->flags); |
---|
| 1014 | +} |
---|