From b22da3d8526a935aa31e086e63f60ff3246cb61c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Sat, 09 Dec 2023 07:24:11 +0000 Subject: [PATCH] add stmac read mac form eeprom --- kernel/arch/arm64/mm/fault.c | 673 ++++++++++++++++++++++++++++++------------------------- 1 files changed, 367 insertions(+), 306 deletions(-) diff --git a/kernel/arch/arm64/mm/fault.c b/kernel/arch/arm64/mm/fault.c index d44543c..45e652d 100644 --- a/kernel/arch/arm64/mm/fault.c +++ b/kernel/arch/arm64/mm/fault.c @@ -1,28 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Based on arch/arm/mm/fault.c * * Copyright (C) 1995 Linus Torvalds * Copyright (C) 1995-2004 Russell King * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/acpi.h> +#include <linux/bitfield.h> #include <linux/extable.h> +#include <linux/kfence.h> #include <linux/signal.h> #include <linux/mm.h> #include <linux/hardirq.h> #include <linux/init.h> +#include <linux/kasan.h> #include <linux/kprobes.h> #include <linux/uaccess.h> #include <linux/page-flags.h> @@ -33,23 +26,26 @@ #include <linux/preempt.h> #include <linux/hugetlb.h> +#include <asm/acpi.h> #include <asm/bug.h> #include <asm/cmpxchg.h> #include <asm/cpufeature.h> #include <asm/exception.h> +#include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> -#include <asm/kasan.h> +#include <asm/kprobes.h> +#include <asm/mte.h> +#include <asm/processor.h> #include <asm/sysreg.h> #include <asm/system_misc.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/traps.h> -#include <acpi/ghes.h> +#include <trace/hooks/fault.h> struct fault_info { - int (*fn)(unsigned long addr, unsigned int esr, + int (*fn)(unsigned long far, unsigned int esr, struct pt_regs *regs); int sig; int code; @@ -57,33 +53,17 @@ }; static const struct fault_info fault_info[]; +static struct fault_info debug_fault_info[]; static inline const struct fault_info *esr_to_fault_info(unsigned int esr) { - return fault_info + (esr & 63); + return fault_info + (esr & ESR_ELx_FSC); } -#ifdef CONFIG_KPROBES -static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr) { - int ret = 0; - - /* kprobe_running() needs smp_processor_id() */ - if (!user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, esr)) - ret = 1; - preempt_enable(); - } - - return ret; + return debug_fault_info + DBG_ESR_EVT(esr); } -#else -static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) -{ - return 0; -} -#endif static void data_abort_decode(unsigned int esr) { @@ -112,8 +92,8 @@ pr_alert("Mem abort info:\n"); pr_alert(" ESR = 0x%08x\n", esr); - pr_alert(" Exception class = %s, IL = %u bits\n", - esr_get_class_string(esr), + pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n", + ESR_ELx_EC(esr), esr_get_class_string(esr), (esr & ESR_ELx_IL) ? 32 : 16); pr_alert(" SET = %lu, FnV = %lu\n", (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT, @@ -126,22 +106,19 @@ data_abort_decode(esr); } -static inline bool is_ttbr0_addr(unsigned long addr) +static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm) { - /* entry assembly clears tags for TTBR0 addrs */ - return addr < TASK_SIZE; -} + /* Either init_pg_dir or swapper_pg_dir */ + if (mm == &init_mm) + return __pa_symbol(mm->pgd); -static inline bool is_ttbr1_addr(unsigned long addr) -{ - /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ - return arch_kasan_reset_tag(addr) >= VA_START; + return (unsigned long)virt_to_phys(mm->pgd); } /* * Dump out the page tables associated with 'addr' in the currently active mm. */ -void show_pte(unsigned long addr) +static void show_pte(unsigned long addr) { struct mm_struct *mm; pgd_t *pgdp; @@ -164,14 +141,15 @@ return; } - pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n", + pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n", mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, - VA_BITS, mm->pgd); + vabits_actual, mm_to_pgd_phys(mm)); pgdp = pgd_offset(mm, addr); pgd = READ_ONCE(*pgdp); pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); do { + p4d_t *p4dp, p4d; pud_t *pudp, pud; pmd_t *pmdp, pmd; pte_t *ptep, pte; @@ -179,7 +157,13 @@ if (pgd_none(pgd) || pgd_bad(pgd)) break; - pudp = pud_offset(pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + pr_cont(", p4d=%016llx", p4d_val(p4d)); + if (p4d_none(p4d) || p4d_bad(p4d)) + break; + + pudp = pud_offset(p4dp, addr); pud = READ_ONCE(*pudp); pr_cont(", pud=%016llx", pud_val(pud)); if (pud_none(pud) || pud_bad(pud)) @@ -239,7 +223,9 @@ pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); } while (pteval != old_pteval); - flush_tlb_fix_spurious_fault(vma, address); + /* Invalidate a stale read-only entry */ + if (dirty) + flush_tlb_page(vma, address); return 1; } @@ -248,9 +234,8 @@ return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } -static inline bool is_el1_permission_fault(unsigned int esr, - struct pt_regs *regs, - unsigned long addr) +static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; @@ -268,6 +253,38 @@ return false; } +static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) +{ + unsigned long flags; + u64 par, dfsc; + + if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR || + (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT) + return false; + + local_irq_save(flags); + asm volatile("at s1e1r, %0" :: "r" (addr)); + isb(); + par = read_sysreg_par(); + local_irq_restore(flags); + + /* + * If we now have a valid translation, treat the translation fault as + * spurious. + */ + if (!(par & SYS_PAR_EL1_F)) + return true; + + /* + * If we got a different type of fault from the AT instruction, + * treat the translation fault as spurious. + */ + dfsc = FIELD_GET(SYS_PAR_EL1_FST, par); + return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT; +} + static void die_kernel_fault(const char *msg, unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -276,12 +293,72 @@ pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg, addr); + trace_android_rvh_die_kernel_fault(regs, esr, addr, msg); mem_abort_decode(esr); show_pte(addr); die("Oops", regs, esr); bust_spinlocks(0); do_exit(SIGKILL); +} + +#ifdef CONFIG_KASAN_HW_TAGS +static void report_tag_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + static bool reported; + bool is_write; + + if (READ_ONCE(reported)) + return; + + /* + * This is used for KASAN tests and assumes that no MTE faults + * happened before running the tests. + */ + if (mte_report_once()) + WRITE_ONCE(reported, true); + + /* + * SAS bits aren't set for all faults reported in EL1, so we can't + * find out access size. + */ + is_write = !!(esr & ESR_ELx_WNR); + kasan_report(addr, 0, is_write, regs->pc); +} +#else +/* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */ +static inline void report_tag_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { } +#endif + +static void do_tag_recovery(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + + report_tag_fault(addr, esr, regs); + + /* + * Disable MTE Tag Checking on the local CPU for the current EL. + * It will be done lazily on the other CPUs when they will hit a + * tag fault. + */ + sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_NONE); + isb(); +} + +static bool is_el1_mte_sync_tag_check_fault(unsigned int esr) +{ + unsigned int ec = ESR_ELx_EC(esr); + unsigned int fsc = esr & ESR_ELx_FSC; + + if (ec != ESR_ELx_EC_DABT_CUR) + return false; + + if (fsc == ESR_ELx_FSC_MTE) + return true; + + return false; } static void __do_kernel_fault(unsigned long addr, unsigned int esr, @@ -296,23 +373,38 @@ if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; - if (is_el1_permission_fault(esr, regs, addr)) { + if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs), + "Ignoring spurious kernel translation fault at virtual address %016lx\n", addr)) + return; + + if (is_el1_mte_sync_tag_check_fault(esr)) { + do_tag_recovery(addr, esr, regs); + + return; + } + + if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; + else if (is_el1_instruction_abort(esr)) + msg = "execute from non-executable memory"; else msg = "read from unreadable memory"; } else if (addr < PAGE_SIZE) { msg = "NULL pointer dereference"; } else { + if (kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs)) + return; + msg = "paging request"; } die_kernel_fault(msg, addr, esr, regs); } -static void __do_user_fault(struct siginfo *info, unsigned int esr) +static void set_thread_esr(unsigned long address, unsigned int esr) { - current->thread.fault_address = (unsigned long)info->si_addr; + current->thread.fault_address = address; /* * If the faulting address is in the kernel, we must sanitize the ESR. @@ -365,25 +457,22 @@ } current->thread.fault_code = esr; - arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current); } -static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) +static void do_bad_area(unsigned long far, unsigned int esr, + struct pt_regs *regs) { + unsigned long addr = untagged_addr(far); + /* * If we are in kernel mode at this point, we have no context to * handle this fault with. */ if (user_mode(regs)) { const struct fault_info *inf = esr_to_fault_info(esr); - struct siginfo si; - clear_siginfo(&si); - si.si_signo = inf->sig; - si.si_code = inf->code; - si.si_addr = (void __user *)addr; - - __do_user_fault(&si, esr); + set_thread_esr(addr, esr); + arm64_force_sig_fault(inf->sig, inf->code, far, inf->name); } else { __do_kernel_fault(addr, esr, regs); } @@ -392,41 +481,32 @@ #define VM_FAULT_BADMAP 0x010000 #define VM_FAULT_BADACCESS 0x020000 -static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int mm_flags, unsigned long vm_flags, - struct task_struct *tsk) +static int __do_page_fault(struct vm_area_struct *vma, unsigned long addr, + unsigned int mm_flags, unsigned long vm_flags, + struct pt_regs *regs) { - struct vm_area_struct *vma; - vm_fault_t fault; - vma = find_vma(mm, addr); - fault = VM_FAULT_BADMAP; if (unlikely(!vma)) - goto out; - if (unlikely(vma->vm_start > addr)) - goto check_stack; + return VM_FAULT_BADMAP; /* * Ok, we have a good vm_area for this memory access, so we can handle * it. */ -good_area: + if (unlikely(vma->vm_start > addr)) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + return VM_FAULT_BADMAP; + if (expand_stack(vma, addr)) + return VM_FAULT_BADMAP; + } + /* * Check that the permissions on the VMA allow for the fault which * occurred. */ - if (!(vma->vm_flags & vm_flags)) { - fault = VM_FAULT_BADACCESS; - goto out; - } - - return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); - -check_stack: - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) - goto good_area; -out: - return fault; + if (!(vma->vm_flags & vm_flags)) + return VM_FAULT_BADACCESS; + return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs); } static bool is_el0_instruction_abort(unsigned int esr) @@ -434,21 +514,28 @@ return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; } -static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, +/* + * Note: not valid for EL1 DC IVAC, but we never use that such that it + * should fault. EL0 cannot issue DC IVAC (undef). + */ +static bool is_write_abort(unsigned int esr) +{ + return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); +} + +static int __kprobes do_page_fault(unsigned long far, unsigned int esr, struct pt_regs *regs) { - struct task_struct *tsk; - struct mm_struct *mm; - struct siginfo si; - vm_fault_t fault, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; - unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + const struct fault_info *inf; + struct mm_struct *mm = current->mm; + vm_fault_t fault; + unsigned long vm_flags = VM_ACCESS_FLAGS; + unsigned int mm_flags = FAULT_FLAG_DEFAULT; + struct vm_area_struct *vma = NULL; + unsigned long addr = untagged_addr(far); - if (notify_page_fault(regs, esr)) + if (kprobe_page_fault(regs, esr)) return 0; - - tsk = current; - mm = tsk->mm; /* * If we're in an interrupt or have no user context, we must not take @@ -462,12 +549,13 @@ if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; - } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { + mm_flags |= FAULT_FLAG_INSTRUCTION; + } else if (is_write_abort(esr)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } - if (is_ttbr0_addr(addr) && is_el1_permission_fault(esr, regs, addr)) { + if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die_kernel_fault("access to user memory with fs=KERNEL_DS", @@ -485,15 +573,23 @@ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); /* + * let's try a speculative page fault without grabbing the + * mmap_sem. + */ + fault = handle_speculative_fault(mm, addr, mm_flags, &vma, regs); + if (fault != VM_FAULT_RETRY) + goto done; + + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, * we can bug out early if this is from code which shouldn't. */ - if (!down_read_trylock(&mm->mmap_sem)) { + if (!mmap_read_trylock(mm)) { if (!user_mode(regs) && !search_exception_tables(regs->pc)) goto no_context; retry: - down_read(&mm->mmap_sem); + mmap_read_lock(mm); } else { /* * The above down_read_trylock() might have succeeded in which @@ -501,62 +597,47 @@ */ might_sleep(); #ifdef CONFIG_DEBUG_VM - if (!user_mode(regs) && !search_exception_tables(regs->pc)) + if (!user_mode(regs) && !search_exception_tables(regs->pc)) { + mmap_read_unlock(mm); goto no_context; + } #endif } - fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); - major |= fault & VM_FAULT_MAJOR; + if (!vma || !can_reuse_spf_vma(vma, addr)) + vma = find_vma(mm, addr); + fault = __do_page_fault(vma, addr, mm_flags, vm_flags, regs); + + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + goto no_context; + return 0; + } if (fault & VM_FAULT_RETRY) { - /* - * If we need to retry but a fatal signal is pending, - * handle the signal first. We do not need to release - * the mmap_sem because it would already be released - * in __lock_page_or_retry in mm/filemap.c. - */ - if (fatal_signal_pending(current)) { - if (!user_mode(regs)) - goto no_context; - return 0; - } - - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of - * starvation. - */ if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { - mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; mm_flags |= FAULT_FLAG_TRIED; + + /* + * Do not try to reuse this vma and fetch it + * again since we will release the mmap_sem. + */ + vma = NULL; + goto retry; } } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); + +done: /* * Handle the "normal" (no error) case first. */ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | - VM_FAULT_BADACCESS)))) { - /* - * Major/minor page fault accounting is only done - * once. If we go through a retry, it is extremely - * likely that the page will be found in page cache at - * that point. - */ - if (major) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, - addr); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, - addr); - } - + VM_FAULT_BADACCESS)))) return 0; - } /* * If we are in kernel mode at this point, we have no context to @@ -575,37 +656,32 @@ return 0; } - clear_siginfo(&si); - si.si_addr = (void __user *)addr; - + inf = esr_to_fault_info(esr); + set_thread_esr(addr, esr); if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to successfully fix up * this page fault. */ - si.si_signo = SIGBUS; - si.si_code = BUS_ADRERR; - } else if (fault & VM_FAULT_HWPOISON_LARGE) { - unsigned int hindex = VM_FAULT_GET_HINDEX(fault); + arm64_force_sig_fault(SIGBUS, BUS_ADRERR, far, inf->name); + } else if (fault & (VM_FAULT_HWPOISON_LARGE | VM_FAULT_HWPOISON)) { + unsigned int lsb; - si.si_signo = SIGBUS; - si.si_code = BUS_MCEERR_AR; - si.si_addr_lsb = hstate_index_to_shift(hindex); - } else if (fault & VM_FAULT_HWPOISON) { - si.si_signo = SIGBUS; - si.si_code = BUS_MCEERR_AR; - si.si_addr_lsb = PAGE_SHIFT; + lsb = PAGE_SHIFT; + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + + arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name); } else { /* * Something tried to access memory that isn't in our memory * map. */ - si.si_signo = SIGSEGV; - si.si_code = fault == VM_FAULT_BADACCESS ? - SEGV_ACCERR : SEGV_MAPERR; + arm64_force_sig_fault(SIGSEGV, + fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR, + far, inf->name); } - __do_user_fault(&si, esr); return 0; no_context: @@ -613,81 +689,84 @@ return 0; } -int __weak do_tlb_conf_fault(unsigned long addr, - unsigned int esr, - struct pt_regs *regs) -{ - return 1; /* do_bad default */ -} - -int (*do_tlb_conf_fault_cb)(unsigned long addr, - unsigned int esr, - struct pt_regs *regs) - = do_tlb_conf_fault; /* initialization saves us a branch */ -EXPORT_SYMBOL_GPL(do_tlb_conf_fault_cb); - -static int _do_tlb_conf_fault(unsigned long addr, - unsigned int esr, - struct pt_regs *regs) -{ - return (*do_tlb_conf_fault_cb)(addr, esr, regs); -} - -static int __kprobes do_translation_fault(unsigned long addr, +static int __kprobes do_translation_fault(unsigned long far, unsigned int esr, struct pt_regs *regs) { - if (is_ttbr0_addr(addr)) - return do_page_fault(addr, esr, regs); + unsigned long addr = untagged_addr(far); - do_bad_area(addr, esr, regs); + if (is_ttbr0_addr(addr)) + return do_page_fault(far, esr, regs); + + do_bad_area(far, esr, regs); return 0; } -static int do_alignment_fault(unsigned long addr, unsigned int esr, +#ifdef CONFIG_ROCKCHIP_ARM64_ALIGN_FAULT_FIX +extern int alignment_fixup_helper(unsigned long addr, unsigned int esr, + struct pt_regs *regs); +#endif +static int do_alignment_fault(unsigned long far, unsigned int esr, struct pt_regs *regs) { - do_bad_area(addr, esr, regs); +#ifdef CONFIG_ROCKCHIP_ARM64_ALIGN_FAULT_FIX + if (!alignment_fixup_helper(far, esr, regs)) + return 0; +#endif + do_bad_area(far, esr, regs); return 0; } -static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) +static int do_bad(unsigned long far, unsigned int esr, struct pt_regs *regs) { - return 1; /* "fault" */ + unsigned long addr = untagged_addr(far); + int ret = 1; + + trace_android_vh_handle_tlb_conf(addr, esr, &ret); + return ret; } -static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) +static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs) { - struct siginfo info; const struct fault_info *inf; + unsigned long siaddr; inf = esr_to_fault_info(esr); - /* - * Synchronous aborts may interrupt code which had interrupts masked. - * Before calling out into the wider kernel tell the interested - * subsystems. - */ - if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { - if (interrupts_enabled(regs)) - nmi_enter(); - - ghes_notify_sea(); - - if (interrupts_enabled(regs)) - nmi_exit(); + if (user_mode(regs) && apei_claim_sea(regs) == 0) { + /* + * APEI claimed this as a firmware-first notification. + * Some processing deferred to task_work before ret_to_user(). + */ + return 0; } - clear_siginfo(&info); - info.si_signo = inf->sig; - info.si_errno = 0; - info.si_code = inf->code; - if (esr & ESR_ELx_FnV) - info.si_addr = NULL; - else - info.si_addr = (void __user *)addr; - arm64_notify_die(inf->name, regs, &info, esr); + if (esr & ESR_ELx_FnV) { + siaddr = 0; + } else { + /* + * The architecture specifies that the tag bits of FAR_EL1 are + * UNKNOWN for synchronous external aborts. Mask them out now + * so that userspace doesn't see them. + */ + siaddr = untagged_addr(far); + } + trace_android_rvh_do_sea(regs, esr, siaddr, inf->name); + arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr); + return 0; +} + +static int do_tag_check_fault(unsigned long far, unsigned int esr, + struct pt_regs *regs) +{ + /* + * The architecture specifies that bits 63:60 of FAR_EL1 are UNKNOWN + * for tag check faults. Set them to corresponding bits in the untagged + * address. + */ + far = (__untagged_addr(far) & ~MTE_TAG_MASK) | (far & MTE_TAG_MASK); + do_bad_area(far, esr, regs); return 0; } @@ -709,7 +788,7 @@ { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" }, - { do_bad, SIGKILL, SI_KERNEL, "unknown 17" }, + { do_tag_check_fault, SIGSEGV, SEGV_MTESERR, "synchronous tag check fault" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 18" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 19" }, { do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" }, @@ -740,7 +819,7 @@ { do_bad, SIGKILL, SI_KERNEL, "unknown 45" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 46" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 47" }, - { _do_tlb_conf_fault, SIGKILL, SI_KERNEL, "TLB conflict abort" }, + { do_bad, SIGKILL, SI_KERNEL, "TLB conflict abort" }, { do_bad, SIGKILL, SI_KERNEL, "Unsupported atomic hardware update fault" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 50" }, { do_bad, SIGKILL, SI_KERNEL, "unknown 51" }, @@ -758,76 +837,45 @@ { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, }; -int handle_guest_sea(phys_addr_t addr, unsigned int esr) -{ - return ghes_notify_sea(); -} - -asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, - struct pt_regs *regs) +void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf = esr_to_fault_info(esr); - struct siginfo info; + unsigned long addr = untagged_addr(far); - if (!inf->fn(addr, esr, regs)) + if (!inf->fn(far, esr, regs)) return; if (!user_mode(regs)) { pr_alert("Unhandled fault at 0x%016lx\n", addr); + trace_android_rvh_do_mem_abort(regs, esr, addr, inf->name); mem_abort_decode(esr); show_pte(addr); } - clear_siginfo(&info); - info.si_signo = inf->sig; - info.si_errno = 0; - info.si_code = inf->code; - info.si_addr = (void __user *)addr; - arm64_notify_die(inf->name, regs, &info, esr); + /* + * At this point we have an unrecognized fault type whose tag bits may + * have been defined as UNKNOWN. Therefore we only expose the untagged + * address to the signal handler. + */ + arm64_notify_die(inf->name, regs, inf->sig, inf->code, addr, esr); } +NOKPROBE_SYMBOL(do_mem_abort); -asmlinkage void __exception do_el0_irq_bp_hardening(void) +void do_el0_irq_bp_hardening(void) { /* PC has already been checked in entry.S */ arm64_apply_bp_hardening(); } +NOKPROBE_SYMBOL(do_el0_irq_bp_hardening); -asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, - unsigned int esr, - struct pt_regs *regs) +void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - /* - * We've taken an instruction abort from userspace and not yet - * re-enabled IRQs. If the address is a kernel address, apply - * BP hardening prior to enabling IRQs and pre-emption. - */ - if (!is_ttbr0_addr(addr)) - arm64_apply_bp_hardening(); + trace_android_rvh_do_sp_pc_abort(regs, esr, addr, user_mode(regs)); - local_irq_enable(); - do_mem_abort(addr, esr, regs); + arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN, + addr, esr); } - - -asmlinkage void __exception do_sp_pc_abort(unsigned long addr, - unsigned int esr, - struct pt_regs *regs) -{ - struct siginfo info; - - if (user_mode(regs)) { - if (!is_ttbr0_addr(instruction_pointer(regs))) - arm64_apply_bp_hardening(); - local_irq_enable(); - } - - clear_siginfo(&info); - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRALN; - info.si_addr = (void __user *)addr; - arm64_notify_die("SP/PC alignment exception", regs, &info, esr); -} +NOKPROBE_SYMBOL(do_sp_pc_abort); int __init early_brk64(unsigned long addr, unsigned int esr, struct pt_regs *regs); @@ -860,11 +908,32 @@ debug_fault_info[nr].name = name; } +/* + * In debug exception context, we explicitly disable preemption despite + * having interrupts disabled. + * This serves two purposes: it makes it much less likely that we would + * accidentally schedule in exception context and it will force a warning + * if we somehow manage to schedule by accident. + */ +static void debug_exception_enter(struct pt_regs *regs) +{ + preempt_disable(); + + /* This code is a bit fragile. Test it. */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work"); +} +NOKPROBE_SYMBOL(debug_exception_enter); + +static void debug_exception_exit(struct pt_regs *regs) +{ + preempt_enable_no_resched(); +} +NOKPROBE_SYMBOL(debug_exception_exit); + #ifdef CONFIG_ARM64_ERRATUM_1463225 DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); -static int __exception -cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) { if (user_mode(regs)) return 0; @@ -883,65 +952,57 @@ return 1; } #else -static int __exception -cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) { return 0; } #endif /* CONFIG_ARM64_ERRATUM_1463225 */ +NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler); -asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint, - unsigned int esr, - struct pt_regs *regs) +void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, + struct pt_regs *regs) { - const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); + const struct fault_info *inf = esr_to_debug_fault_info(esr); unsigned long pc = instruction_pointer(regs); - int rv; if (cortex_a76_erratum_1463225_debug_handler(regs)) - return 0; + return; - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); + debug_exception_enter(regs); if (user_mode(regs) && !is_ttbr0_addr(pc)) arm64_apply_bp_hardening(); - if (!inf->fn(addr_if_watchpoint, esr, regs)) { - rv = 1; - } else { - struct siginfo info; - - clear_siginfo(&info); - info.si_signo = inf->sig; - info.si_errno = 0; - info.si_code = inf->code; - info.si_addr = (void __user *)pc; - arm64_notify_die(inf->name, regs, &info, esr); - rv = 0; + if (inf->fn(addr_if_watchpoint, esr, regs)) { + arm64_notify_die(inf->name, regs, inf->sig, inf->code, pc, esr); } - if (interrupts_enabled(regs)) - trace_hardirqs_on(); - - return rv; + debug_exception_exit(regs); } NOKPROBE_SYMBOL(do_debug_exception); -#ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) +/* + * Used during anonymous page fault handling. + */ +struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, + unsigned long vaddr) { - /* - * We modify PSTATE. This won't work from irq context as the PSTATE - * is discarded once we return from the exception. - */ - WARN_ON_ONCE(in_interrupt()); + gfp_t flags = GFP_HIGHUSER_MOVABLE | __GFP_ZERO | __GFP_CMA; - sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); - asm(SET_PSTATE_PAN(1)); + /* + * If the page is mapped with PROT_MTE, initialise the tags at the + * point of allocation and page zeroing as this is usually faster than + * separate DC ZVA and STGM. + */ + if (vma->vm_flags & VM_MTE) + flags |= __GFP_ZEROTAGS; + + return alloc_page_vma(flags, vma, vaddr); } -#endif /* CONFIG_ARM64_PAN */ + +void tag_clear_highpage(struct page *page) +{ + mte_zero_clear_page_tags(page_address(page)); + page_kasan_tag_reset(page); + set_bit(PG_mte_tagged, &page->flags); +} -- Gitblit v1.6.2