~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,28 +1,21 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* Based on arch/arm/mm/fault.c
3	4	*
4	5	* Copyright (C) 1995 Linus Torvalds
5	6	* Copyright (C) 1995-2004 Russell King
6	7	* Copyright (C) 2012 ARM Ltd.
7		- *
8		- * This program is free software; you can redistribute it and/or modify
9		- * it under the terms of the GNU General Public License version 2 as
10		- * published by the Free Software Foundation.
11		- *
12		- * This program is distributed in the hope that it will be useful,
13		- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14		- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15		- * GNU General Public License for more details.
16		- *
17		- * You should have received a copy of the GNU General Public License
18		- * along with this program. If not, see <http://www.gnu.org/licenses/>.
19	8	*/
20	9
	10	+#include <linux/acpi.h>
	11	+#include <linux/bitfield.h>
21	12	#include <linux/extable.h>
	13	+#include <linux/kfence.h>
22	14	#include <linux/signal.h>
23	15	#include <linux/mm.h>
24	16	#include <linux/hardirq.h>
25	17	#include <linux/init.h>
	18	+#include <linux/kasan.h>
26	19	#include <linux/kprobes.h>
27	20	#include <linux/uaccess.h>
28	21	#include <linux/page-flags.h>
..	..	@@ -33,23 +26,26 @@
33	26	#include <linux/preempt.h>
34	27	#include <linux/hugetlb.h>
35	28
	29	+#include <asm/acpi.h>
36	30	#include <asm/bug.h>
37	31	#include <asm/cmpxchg.h>
38	32	#include <asm/cpufeature.h>
39	33	#include <asm/exception.h>
	34	+#include <asm/daifflags.h>
40	35	#include <asm/debug-monitors.h>
41	36	#include <asm/esr.h>
42		-#include <asm/kasan.h>
	37	+#include <asm/kprobes.h>
	38	+#include <asm/mte.h>
	39	+#include <asm/processor.h>
43	40	#include <asm/sysreg.h>
44	41	#include <asm/system_misc.h>
45		-#include <asm/pgtable.h>
46	42	#include <asm/tlbflush.h>
47	43	#include <asm/traps.h>
48	44
49		-#include <acpi/ghes.h>
	45	+#include <trace/hooks/fault.h>
50	46
51	47	struct fault_info {
52		- int (*fn)(unsigned long addr, unsigned int esr,
	48	+ int (*fn)(unsigned long far, unsigned int esr,
53	49	struct pt_regs *regs);
54	50	int sig;
55	51	int code;
..	..	@@ -57,33 +53,17 @@
57	53	};
58	54
59	55	static const struct fault_info fault_info[];
	56	+static struct fault_info debug_fault_info[];
60	57
61	58	static inline const struct fault_info *esr_to_fault_info(unsigned int esr)
62	59	{
63		- return fault_info + (esr & 63);
	60	+ return fault_info + (esr & ESR_ELx_FSC);
64	61	}
65	62
66		-#ifdef CONFIG_KPROBES
67		-static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
	63	+static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr)
68	64	{
69		- int ret = 0;
70		-
71		- /* kprobe_running() needs smp_processor_id() */
72		- if (!user_mode(regs)) {
73		- preempt_disable();
74		- if (kprobe_running() && kprobe_fault_handler(regs, esr))
75		- ret = 1;
76		- preempt_enable();
77		- }
78		-
79		- return ret;
	65	+ return debug_fault_info + DBG_ESR_EVT(esr);
80	66	}
81		-#else
82		-static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
83		-{
84		- return 0;
85		-}
86		-#endif
87	67
88	68	static void data_abort_decode(unsigned int esr)
89	69	{
..	..	@@ -112,8 +92,8 @@
112	92	pr_alert("Mem abort info:\n");
113	93
114	94	pr_alert(" ESR = 0x%08x\n", esr);
115		- pr_alert(" Exception class = %s, IL = %u bits\n",
116		- esr_get_class_string(esr),
	95	+ pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n",
	96	+ ESR_ELx_EC(esr), esr_get_class_string(esr),
117	97	(esr & ESR_ELx_IL) ? 32 : 16);
118	98	pr_alert(" SET = %lu, FnV = %lu\n",
119	99	(esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT,
..	..	@@ -126,22 +106,19 @@
126	106	data_abort_decode(esr);
127	107	}
128	108
129		-static inline bool is_ttbr0_addr(unsigned long addr)
	109	+static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm)
130	110	{
131		- /* entry assembly clears tags for TTBR0 addrs */
132		- return addr < TASK_SIZE;
133		-}
	111	+ /* Either init_pg_dir or swapper_pg_dir */
	112	+ if (mm == &init_mm)
	113	+ return __pa_symbol(mm->pgd);
134	114
135		-static inline bool is_ttbr1_addr(unsigned long addr)
136		-{
137		- /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
138		- return arch_kasan_reset_tag(addr) >= VA_START;
	115	+ return (unsigned long)virt_to_phys(mm->pgd);
139	116	}
140	117
141	118	/*
142	119	* Dump out the page tables associated with 'addr' in the currently active mm.
143	120	*/
144		-void show_pte(unsigned long addr)
	121	+static void show_pte(unsigned long addr)
145	122	{
146	123	struct mm_struct *mm;
147	124	pgd_t *pgdp;
..	..	@@ -164,14 +141,15 @@
164	141	return;
165	142	}
166	143
167		- pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
	144	+ pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n",
168	145	mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
169		- VA_BITS, mm->pgd);
	146	+ vabits_actual, mm_to_pgd_phys(mm));
170	147	pgdp = pgd_offset(mm, addr);
171	148	pgd = READ_ONCE(*pgdp);
172	149	pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
173	150
174	151	do {
	152	+ p4d_t *p4dp, p4d;
175	153	pud_t *pudp, pud;
176	154	pmd_t *pmdp, pmd;
177	155	pte_t *ptep, pte;
..	..	@@ -179,7 +157,13 @@
179	157	if (pgd_none(pgd) \|\| pgd_bad(pgd))
180	158	break;
181	159
182		- pudp = pud_offset(pgdp, addr);
	160	+ p4dp = p4d_offset(pgdp, addr);
	161	+ p4d = READ_ONCE(*p4dp);
	162	+ pr_cont(", p4d=%016llx", p4d_val(p4d));
	163	+ if (p4d_none(p4d) \|\| p4d_bad(p4d))
	164	+ break;
	165	+
	166	+ pudp = pud_offset(p4dp, addr);
183	167	pud = READ_ONCE(*pudp);
184	168	pr_cont(", pud=%016llx", pud_val(pud));
185	169	if (pud_none(pud) \|\| pud_bad(pud))
..	..	@@ -239,7 +223,9 @@
239	223	pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
240	224	} while (pteval != old_pteval);
241	225
242		- flush_tlb_fix_spurious_fault(vma, address);
	226	+ /* Invalidate a stale read-only entry */
	227	+ if (dirty)
	228	+ flush_tlb_page(vma, address);
243	229	return 1;
244	230	}
245	231
..	..	@@ -248,9 +234,8 @@
248	234	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
249	235	}
250	236
251		-static inline bool is_el1_permission_fault(unsigned int esr,
252		- struct pt_regs *regs,
253		- unsigned long addr)
	237	+static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
	238	+ struct pt_regs *regs)
254	239	{
255	240	unsigned int ec = ESR_ELx_EC(esr);
256	241	unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
..	..	@@ -268,6 +253,38 @@
268	253	return false;
269	254	}
270	255
	256	+static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
	257	+ unsigned int esr,
	258	+ struct pt_regs *regs)
	259	+{
	260	+ unsigned long flags;
	261	+ u64 par, dfsc;
	262	+
	263	+ if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR \|\|
	264	+ (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT)
	265	+ return false;
	266	+
	267	+ local_irq_save(flags);
	268	+ asm volatile("at s1e1r, %0" :: "r" (addr));
	269	+ isb();
	270	+ par = read_sysreg_par();
	271	+ local_irq_restore(flags);
	272	+
	273	+ /*
	274	+ * If we now have a valid translation, treat the translation fault as
	275	+ * spurious.
	276	+ */
	277	+ if (!(par & SYS_PAR_EL1_F))
	278	+ return true;
	279	+
	280	+ /*
	281	+ * If we got a different type of fault from the AT instruction,
	282	+ * treat the translation fault as spurious.
	283	+ */
	284	+ dfsc = FIELD_GET(SYS_PAR_EL1_FST, par);
	285	+ return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT;
	286	+}
	287	+
271	288	static void die_kernel_fault(const char *msg, unsigned long addr,
272	289	unsigned int esr, struct pt_regs *regs)
273	290	{
..	..	@@ -276,12 +293,77 @@
276	293	pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg,
277	294	addr);
278	295
	296	+ trace_android_rvh_die_kernel_fault(regs, esr, addr, msg);
279	297	mem_abort_decode(esr);
280	298
281	299	show_pte(addr);
282	300	die("Oops", regs, esr);
283	301	bust_spinlocks(0);
284		- do_exit(SIGKILL);
	302	+ make_task_dead(SIGKILL);
	303	+}
	304	+
	305	+#ifdef CONFIG_KASAN_HW_TAGS
	306	+static void report_tag_fault(unsigned long addr, unsigned int esr,
	307	+ struct pt_regs *regs)
	308	+{
	309	+ static bool reported;
	310	+ bool is_write;
	311	+
	312	+ if (READ_ONCE(reported))
	313	+ return;
	314	+
	315	+ /*
	316	+ * This is used for KASAN tests and assumes that no MTE faults
	317	+ * happened before running the tests.
	318	+ */
	319	+ if (mte_report_once())
	320	+ WRITE_ONCE(reported, true);
	321	+
	322	+ /*
	323	+ * SAS bits aren't set for all faults reported in EL1, so we can't
	324	+ * find out access size.
	325	+ */
	326	+ is_write = !!(esr & ESR_ELx_WNR);
	327	+ kasan_report(addr, 0, is_write, regs->pc);
	328	+}
	329	+#else
	330	+/* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */
	331	+static inline void report_tag_fault(unsigned long addr, unsigned int esr,
	332	+ struct pt_regs *regs) { }
	333	+#endif
	334	+
	335	+static void do_tag_recovery(unsigned long addr, unsigned int esr,
	336	+ struct pt_regs *regs)
	337	+{
	338	+
	339	+ report_tag_fault(addr, esr, regs);
	340	+
	341	+ /*
	342	+ * Disable MTE Tag Checking on the local CPU for the current EL.
	343	+ * It will be done lazily on the other CPUs when they will hit a
	344	+ * tag fault.
	345	+ */
	346	+ sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_NONE);
	347	+ isb();
	348	+}
	349	+
	350	+static bool is_el1_mte_sync_tag_check_fault(unsigned int esr)
	351	+{
	352	+ unsigned int ec = ESR_ELx_EC(esr);
	353	+ unsigned int fsc = esr & ESR_ELx_FSC;
	354	+
	355	+ if (ec != ESR_ELx_EC_DABT_CUR)
	356	+ return false;
	357	+
	358	+ if (fsc == ESR_ELx_FSC_MTE)
	359	+ return true;
	360	+
	361	+ return false;
	362	+}
	363	+
	364	+static bool is_translation_fault(unsigned long esr)
	365	+{
	366	+ return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
285	367	}
286	368
287	369	static void __do_kernel_fault(unsigned long addr, unsigned int esr,
..	..	@@ -296,23 +378,39 @@
296	378	if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
297	379	return;
298	380
299		- if (is_el1_permission_fault(esr, regs, addr)) {
	381	+ if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs),
	382	+ "Ignoring spurious kernel translation fault at virtual address %016lx\n", addr))
	383	+ return;
	384	+
	385	+ if (is_el1_mte_sync_tag_check_fault(esr)) {
	386	+ do_tag_recovery(addr, esr, regs);
	387	+
	388	+ return;
	389	+ }
	390	+
	391	+ if (is_el1_permission_fault(addr, esr, regs)) {
300	392	if (esr & ESR_ELx_WNR)
301	393	msg = "write to read-only memory";
	394	+ else if (is_el1_instruction_abort(esr))
	395	+ msg = "execute from non-executable memory";
302	396	else
303	397	msg = "read from unreadable memory";
304	398	} else if (addr < PAGE_SIZE) {
305	399	msg = "NULL pointer dereference";
306	400	} else {
	401	+ if (is_translation_fault(esr) &&
	402	+ kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs))
	403	+ return;
	404	+
307	405	msg = "paging request";
308	406	}
309	407
310	408	die_kernel_fault(msg, addr, esr, regs);
311	409	}
312	410
313		-static void __do_user_fault(struct siginfo *info, unsigned int esr)
	411	+static void set_thread_esr(unsigned long address, unsigned int esr)
314	412	{
315		- current->thread.fault_address = (unsigned long)info->si_addr;
	413	+ current->thread.fault_address = address;
316	414
317	415	/*
318	416	* If the faulting address is in the kernel, we must sanitize the ESR.
..	..	@@ -365,68 +463,56 @@
365	463	}
366	464
367	465	current->thread.fault_code = esr;
368		- arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current);
369	466	}
370	467
371		-static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
	468	+static void do_bad_area(unsigned long far, unsigned int esr,
	469	+ struct pt_regs *regs)
372	470	{
	471	+ unsigned long addr = untagged_addr(far);
	472	+
373	473	/*
374	474	* If we are in kernel mode at this point, we have no context to
375	475	* handle this fault with.
376	476	*/
377	477	if (user_mode(regs)) {
378	478	const struct fault_info *inf = esr_to_fault_info(esr);
379		- struct siginfo si;
380	479
381		- clear_siginfo(&si);
382		- si.si_signo = inf->sig;
383		- si.si_code = inf->code;
384		- si.si_addr = (void __user *)addr;
385		-
386		- __do_user_fault(&si, esr);
	480	+ set_thread_esr(addr, esr);
	481	+ arm64_force_sig_fault(inf->sig, inf->code, far, inf->name);
387	482	} else {
388	483	__do_kernel_fault(addr, esr, regs);
389	484	}
390	485	}
391	486
392		-#define VM_FAULT_BADMAP 0x010000
393		-#define VM_FAULT_BADACCESS 0x020000
	487	+#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000)
	488	+#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000)
394	489
395		-static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
396		- unsigned int mm_flags, unsigned long vm_flags,
397		- struct task_struct *tsk)
	490	+static int __do_page_fault(struct vm_area_struct *vma, unsigned long addr,
	491	+ unsigned int mm_flags, unsigned long vm_flags,
	492	+ struct pt_regs *regs)
398	493	{
399		- struct vm_area_struct *vma;
400		- vm_fault_t fault;
401	494
402		- vma = find_vma(mm, addr);
403		- fault = VM_FAULT_BADMAP;
404	495	if (unlikely(!vma))
405		- goto out;
406		- if (unlikely(vma->vm_start > addr))
407		- goto check_stack;
	496	+ return VM_FAULT_BADMAP;
408	497
409	498	/*
410	499	* Ok, we have a good vm_area for this memory access, so we can handle
411	500	* it.
412	501	*/
413		-good_area:
	502	+ if (unlikely(vma->vm_start > addr)) {
	503	+ if (!(vma->vm_flags & VM_GROWSDOWN))
	504	+ return VM_FAULT_BADMAP;
	505	+ if (expand_stack(vma, addr))
	506	+ return VM_FAULT_BADMAP;
	507	+ }
	508	+
414	509	/*
415	510	* Check that the permissions on the VMA allow for the fault which
416	511	* occurred.
417	512	*/
418		- if (!(vma->vm_flags & vm_flags)) {
419		- fault = VM_FAULT_BADACCESS;
420		- goto out;
421		- }
422		-
423		- return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags);
424		-
425		-check_stack:
426		- if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
427		- goto good_area;
428		-out:
429		- return fault;
	513	+ if (!(vma->vm_flags & vm_flags))
	514	+ return VM_FAULT_BADACCESS;
	515	+ return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs);
430	516	}
431	517
432	518	static bool is_el0_instruction_abort(unsigned int esr)
..	..	@@ -434,21 +520,28 @@
434	520	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
435	521	}
436	522
437		-static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
	523	+/*
	524	+ * Note: not valid for EL1 DC IVAC, but we never use that such that it
	525	+ * should fault. EL0 cannot issue DC IVAC (undef).
	526	+ */
	527	+static bool is_write_abort(unsigned int esr)
	528	+{
	529	+ return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
	530	+}
	531	+
	532	+static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
438	533	struct pt_regs *regs)
439	534	{
440		- struct task_struct *tsk;
441		- struct mm_struct *mm;
442		- struct siginfo si;
443		- vm_fault_t fault, major = 0;
444		- unsigned long vm_flags = VM_READ \| VM_WRITE \| VM_EXEC;
445		- unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE;
	535	+ const struct fault_info *inf;
	536	+ struct mm_struct *mm = current->mm;
	537	+ vm_fault_t fault;
	538	+ unsigned long vm_flags = VM_ACCESS_FLAGS;
	539	+ unsigned int mm_flags = FAULT_FLAG_DEFAULT;
	540	+ struct vm_area_struct *vma = NULL;
	541	+ unsigned long addr = untagged_addr(far);
446	542
447		- if (notify_page_fault(regs, esr))
	543	+ if (kprobe_page_fault(regs, esr))
448	544	return 0;
449		-
450		- tsk = current;
451		- mm = tsk->mm;
452	545
453	546	/*
454	547	* If we're in an interrupt or have no user context, we must not take
..	..	@@ -462,12 +555,13 @@
462	555
463	556	if (is_el0_instruction_abort(esr)) {
464	557	vm_flags = VM_EXEC;
465		- } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
	558	+ mm_flags \|= FAULT_FLAG_INSTRUCTION;
	559	+ } else if (is_write_abort(esr)) {
466	560	vm_flags = VM_WRITE;
467	561	mm_flags \|= FAULT_FLAG_WRITE;
468	562	}
469	563
470		- if (is_ttbr0_addr(addr) && is_el1_permission_fault(esr, regs, addr)) {
	564	+ if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) {
471	565	/* regs->orig_addr_limit may be 0 if we entered from EL0 */
472	566	if (regs->orig_addr_limit == KERNEL_DS)
473	567	die_kernel_fault("access to user memory with fs=KERNEL_DS",
..	..	@@ -485,15 +579,23 @@
485	579	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
486	580
487	581	/*
	582	+ * let's try a speculative page fault without grabbing the
	583	+ * mmap_sem.
	584	+ */
	585	+ fault = handle_speculative_fault(mm, addr, mm_flags, &vma, regs);
	586	+ if (fault != VM_FAULT_RETRY)
	587	+ goto done;
	588	+
	589	+ /*
488	590	* As per x86, we may deadlock here. However, since the kernel only
489	591	* validly references user space from well defined areas of the code,
490	592	* we can bug out early if this is from code which shouldn't.
491	593	*/
492		- if (!down_read_trylock(&mm->mmap_sem)) {
	594	+ if (!mmap_read_trylock(mm)) {
493	595	if (!user_mode(regs) && !search_exception_tables(regs->pc))
494	596	goto no_context;
495	597	retry:
496		- down_read(&mm->mmap_sem);
	598	+ mmap_read_lock(mm);
497	599	} else {
498	600	/*
499	601	* The above down_read_trylock() might have succeeded in which
..	..	@@ -501,62 +603,47 @@
501	603	*/
502	604	might_sleep();
503	605	#ifdef CONFIG_DEBUG_VM
504		- if (!user_mode(regs) && !search_exception_tables(regs->pc))
	606	+ if (!user_mode(regs) && !search_exception_tables(regs->pc)) {
	607	+ mmap_read_unlock(mm);
505	608	goto no_context;
	609	+ }
506	610	#endif
507	611	}
508	612
509		- fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
510		- major \|= fault & VM_FAULT_MAJOR;
	613	+ if (!vma \|\| !can_reuse_spf_vma(vma, addr))
	614	+ vma = find_vma(mm, addr);
	615	+ fault = __do_page_fault(vma, addr, mm_flags, vm_flags, regs);
	616	+
	617	+ /* Quick path to respond to signals */
	618	+ if (fault_signal_pending(fault, regs)) {
	619	+ if (!user_mode(regs))
	620	+ goto no_context;
	621	+ return 0;
	622	+ }
511	623
512	624	if (fault & VM_FAULT_RETRY) {
513		- /*
514		- * If we need to retry but a fatal signal is pending,
515		- * handle the signal first. We do not need to release
516		- * the mmap_sem because it would already be released
517		- * in __lock_page_or_retry in mm/filemap.c.
518		- */
519		- if (fatal_signal_pending(current)) {
520		- if (!user_mode(regs))
521		- goto no_context;
522		- return 0;
523		- }
524		-
525		- /*
526		- * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
527		- * starvation.
528		- */
529	625	if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
530		- mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
531	626	mm_flags \|= FAULT_FLAG_TRIED;
	627	+
	628	+ /*
	629	+ * Do not try to reuse this vma and fetch it
	630	+ * again since we will release the mmap_sem.
	631	+ */
	632	+ vma = NULL;
	633	+
532	634	goto retry;
533	635	}
534	636	}
535		- up_read(&mm->mmap_sem);
	637	+ mmap_read_unlock(mm);
	638	+
	639	+done:
536	640
537	641	/*
538	642	* Handle the "normal" (no error) case first.
539	643	*/
540	644	if (likely(!(fault & (VM_FAULT_ERROR \| VM_FAULT_BADMAP \|
541		- VM_FAULT_BADACCESS)))) {
542		- /*
543		- * Major/minor page fault accounting is only done
544		- * once. If we go through a retry, it is extremely
545		- * likely that the page will be found in page cache at
546		- * that point.
547		- */
548		- if (major) {
549		- tsk->maj_flt++;
550		- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
551		- addr);
552		- } else {
553		- tsk->min_flt++;
554		- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
555		- addr);
556		- }
557		-
	645	+ VM_FAULT_BADACCESS))))
558	646	return 0;
559		- }
560	647
561	648	/*
562	649	* If we are in kernel mode at this point, we have no context to
..	..	@@ -575,37 +662,32 @@
575	662	return 0;
576	663	}
577	664
578		- clear_siginfo(&si);
579		- si.si_addr = (void __user *)addr;
580		-
	665	+ inf = esr_to_fault_info(esr);
	666	+ set_thread_esr(addr, esr);
581	667	if (fault & VM_FAULT_SIGBUS) {
582	668	/*
583	669	* We had some memory, but were unable to successfully fix up
584	670	* this page fault.
585	671	*/
586		- si.si_signo = SIGBUS;
587		- si.si_code = BUS_ADRERR;
588		- } else if (fault & VM_FAULT_HWPOISON_LARGE) {
589		- unsigned int hindex = VM_FAULT_GET_HINDEX(fault);
	672	+ arm64_force_sig_fault(SIGBUS, BUS_ADRERR, far, inf->name);
	673	+ } else if (fault & (VM_FAULT_HWPOISON_LARGE \| VM_FAULT_HWPOISON)) {
	674	+ unsigned int lsb;
590	675
591		- si.si_signo = SIGBUS;
592		- si.si_code = BUS_MCEERR_AR;
593		- si.si_addr_lsb = hstate_index_to_shift(hindex);
594		- } else if (fault & VM_FAULT_HWPOISON) {
595		- si.si_signo = SIGBUS;
596		- si.si_code = BUS_MCEERR_AR;
597		- si.si_addr_lsb = PAGE_SHIFT;
	676	+ lsb = PAGE_SHIFT;
	677	+ if (fault & VM_FAULT_HWPOISON_LARGE)
	678	+ lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
	679	+
	680	+ arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
598	681	} else {
599	682	/*
600	683	* Something tried to access memory that isn't in our memory
601	684	* map.
602	685	*/
603		- si.si_signo = SIGSEGV;
604		- si.si_code = fault == VM_FAULT_BADACCESS ?
605		- SEGV_ACCERR : SEGV_MAPERR;
	686	+ arm64_force_sig_fault(SIGSEGV,
	687	+ fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,
	688	+ far, inf->name);
606	689	}
607	690
608		- __do_user_fault(&si, esr);
609	691	return 0;
610	692
611	693	no_context:
..	..	@@ -613,81 +695,84 @@
613	695	return 0;
614	696	}
615	697
616		-int __weak do_tlb_conf_fault(unsigned long addr,
617		- unsigned int esr,
618		- struct pt_regs *regs)
619		-{
620		- return 1; /* do_bad default */
621		-}
622		-
623		-int (*do_tlb_conf_fault_cb)(unsigned long addr,
624		- unsigned int esr,
625		- struct pt_regs *regs)
626		- = do_tlb_conf_fault; /* initialization saves us a branch */
627		-EXPORT_SYMBOL_GPL(do_tlb_conf_fault_cb);
628		-
629		-static int _do_tlb_conf_fault(unsigned long addr,
630		- unsigned int esr,
631		- struct pt_regs *regs)
632		-{
633		- return (*do_tlb_conf_fault_cb)(addr, esr, regs);
634		-}
635		-
636		-static int __kprobes do_translation_fault(unsigned long addr,
	698	+static int __kprobes do_translation_fault(unsigned long far,
637	699	unsigned int esr,
638	700	struct pt_regs *regs)
639	701	{
640		- if (is_ttbr0_addr(addr))
641		- return do_page_fault(addr, esr, regs);
	702	+ unsigned long addr = untagged_addr(far);
642	703
643		- do_bad_area(addr, esr, regs);
	704	+ if (is_ttbr0_addr(addr))
	705	+ return do_page_fault(far, esr, regs);
	706	+
	707	+ do_bad_area(far, esr, regs);
644	708	return 0;
645	709	}
646	710
647		-static int do_alignment_fault(unsigned long addr, unsigned int esr,
	711	+#ifdef CONFIG_ROCKCHIP_ARM64_ALIGN_FAULT_FIX
	712	+extern int alignment_fixup_helper(unsigned long addr, unsigned int esr,
	713	+ struct pt_regs *regs);
	714	+#endif
	715	+static int do_alignment_fault(unsigned long far, unsigned int esr,
648	716	struct pt_regs *regs)
649	717	{
650		- do_bad_area(addr, esr, regs);
	718	+#ifdef CONFIG_ROCKCHIP_ARM64_ALIGN_FAULT_FIX
	719	+ if (!alignment_fixup_helper(far, esr, regs))
	720	+ return 0;
	721	+#endif
	722	+ do_bad_area(far, esr, regs);
651	723	return 0;
652	724	}
653	725
654		-static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
	726	+static int do_bad(unsigned long far, unsigned int esr, struct pt_regs *regs)
655	727	{
656		- return 1; /* "fault" */
	728	+ unsigned long addr = untagged_addr(far);
	729	+ int ret = 1;
	730	+
	731	+ trace_android_vh_handle_tlb_conf(addr, esr, &ret);
	732	+ return ret;
657	733	}
658	734
659		-static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
	735	+static int do_sea(unsigned long far, unsigned int esr, struct pt_regs *regs)
660	736	{
661		- struct siginfo info;
662	737	const struct fault_info *inf;
	738	+ unsigned long siaddr;
663	739
664	740	inf = esr_to_fault_info(esr);
665	741
666		- /*
667		- * Synchronous aborts may interrupt code which had interrupts masked.
668		- * Before calling out into the wider kernel tell the interested
669		- * subsystems.
670		- */
671		- if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
672		- if (interrupts_enabled(regs))
673		- nmi_enter();
674		-
675		- ghes_notify_sea();
676		-
677		- if (interrupts_enabled(regs))
678		- nmi_exit();
	742	+ if (user_mode(regs) && apei_claim_sea(regs) == 0) {
	743	+ /*
	744	+ * APEI claimed this as a firmware-first notification.
	745	+ * Some processing deferred to task_work before ret_to_user().
	746	+ */
	747	+ return 0;
679	748	}
680	749
681		- clear_siginfo(&info);
682		- info.si_signo = inf->sig;
683		- info.si_errno = 0;
684		- info.si_code = inf->code;
685		- if (esr & ESR_ELx_FnV)
686		- info.si_addr = NULL;
687		- else
688		- info.si_addr = (void __user *)addr;
689		- arm64_notify_die(inf->name, regs, &info, esr);
	750	+ if (esr & ESR_ELx_FnV) {
	751	+ siaddr = 0;
	752	+ } else {
	753	+ /*
	754	+ * The architecture specifies that the tag bits of FAR_EL1 are
	755	+ * UNKNOWN for synchronous external aborts. Mask them out now
	756	+ * so that userspace doesn't see them.
	757	+ */
	758	+ siaddr = untagged_addr(far);
	759	+ }
	760	+ trace_android_rvh_do_sea(regs, esr, siaddr, inf->name);
	761	+ arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
690	762
	763	+ return 0;
	764	+}
	765	+
	766	+static int do_tag_check_fault(unsigned long far, unsigned int esr,
	767	+ struct pt_regs *regs)
	768	+{
	769	+ /*
	770	+ * The architecture specifies that bits 63:60 of FAR_EL1 are UNKNOWN
	771	+ * for tag check faults. Set them to corresponding bits in the untagged
	772	+ * address.
	773	+ */
	774	+ far = (__untagged_addr(far) & ~MTE_TAG_MASK) \| (far & MTE_TAG_MASK);
	775	+ do_bad_area(far, esr, regs);
691	776	return 0;
692	777	}
693	778
..	..	@@ -709,7 +794,7 @@
709	794	{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
710	795	{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
711	796	{ do_sea, SIGBUS, BUS_OBJERR, "synchronous external abort" },
712		- { do_bad, SIGKILL, SI_KERNEL, "unknown 17" },
	797	+ { do_tag_check_fault, SIGSEGV, SEGV_MTESERR, "synchronous tag check fault" },
713	798	{ do_bad, SIGKILL, SI_KERNEL, "unknown 18" },
714	799	{ do_bad, SIGKILL, SI_KERNEL, "unknown 19" },
715	800	{ do_sea, SIGKILL, SI_KERNEL, "level 0 (translation table walk)" },
..	..	@@ -740,7 +825,7 @@
740	825	{ do_bad, SIGKILL, SI_KERNEL, "unknown 45" },
741	826	{ do_bad, SIGKILL, SI_KERNEL, "unknown 46" },
742	827	{ do_bad, SIGKILL, SI_KERNEL, "unknown 47" },
743		- { _do_tlb_conf_fault, SIGKILL, SI_KERNEL, "TLB conflict abort" },
	828	+ { do_bad, SIGKILL, SI_KERNEL, "TLB conflict abort" },
744	829	{ do_bad, SIGKILL, SI_KERNEL, "Unsupported atomic hardware update fault" },
745	830	{ do_bad, SIGKILL, SI_KERNEL, "unknown 50" },
746	831	{ do_bad, SIGKILL, SI_KERNEL, "unknown 51" },
..	..	@@ -758,76 +843,45 @@
758	843	{ do_bad, SIGKILL, SI_KERNEL, "unknown 63" },
759	844	};
760	845
761		-int handle_guest_sea(phys_addr_t addr, unsigned int esr)
762		-{
763		- return ghes_notify_sea();
764		-}
765		-
766		-asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
767		- struct pt_regs *regs)
	846	+void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
768	847	{
769	848	const struct fault_info *inf = esr_to_fault_info(esr);
770		- struct siginfo info;
	849	+ unsigned long addr = untagged_addr(far);
771	850
772		- if (!inf->fn(addr, esr, regs))
	851	+ if (!inf->fn(far, esr, regs))
773	852	return;
774	853
775	854	if (!user_mode(regs)) {
776	855	pr_alert("Unhandled fault at 0x%016lx\n", addr);
	856	+ trace_android_rvh_do_mem_abort(regs, esr, addr, inf->name);
777	857	mem_abort_decode(esr);
778	858	show_pte(addr);
779	859	}
780	860
781		- clear_siginfo(&info);
782		- info.si_signo = inf->sig;
783		- info.si_errno = 0;
784		- info.si_code = inf->code;
785		- info.si_addr = (void __user *)addr;
786		- arm64_notify_die(inf->name, regs, &info, esr);
	861	+ /*
	862	+ * At this point we have an unrecognized fault type whose tag bits may
	863	+ * have been defined as UNKNOWN. Therefore we only expose the untagged
	864	+ * address to the signal handler.
	865	+ */
	866	+ arm64_notify_die(inf->name, regs, inf->sig, inf->code, addr, esr);
787	867	}
	868	+NOKPROBE_SYMBOL(do_mem_abort);
788	869
789		-asmlinkage void __exception do_el0_irq_bp_hardening(void)
	870	+void do_el0_irq_bp_hardening(void)
790	871	{
791	872	/* PC has already been checked in entry.S */
792	873	arm64_apply_bp_hardening();
793	874	}
	875	+NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
794	876
795		-asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
796		- unsigned int esr,
797		- struct pt_regs *regs)
	877	+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
798	878	{
799		- /*
800		- * We've taken an instruction abort from userspace and not yet
801		- * re-enabled IRQs. If the address is a kernel address, apply
802		- * BP hardening prior to enabling IRQs and pre-emption.
803		- */
804		- if (!is_ttbr0_addr(addr))
805		- arm64_apply_bp_hardening();
	879	+ trace_android_rvh_do_sp_pc_abort(regs, esr, addr, user_mode(regs));
806	880
807		- local_irq_enable();
808		- do_mem_abort(addr, esr, regs);
	881	+ arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN,
	882	+ addr, esr);
809	883	}
810		-
811		-
812		-asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
813		- unsigned int esr,
814		- struct pt_regs *regs)
815		-{
816		- struct siginfo info;
817		-
818		- if (user_mode(regs)) {
819		- if (!is_ttbr0_addr(instruction_pointer(regs)))
820		- arm64_apply_bp_hardening();
821		- local_irq_enable();
822		- }
823		-
824		- clear_siginfo(&info);
825		- info.si_signo = SIGBUS;
826		- info.si_errno = 0;
827		- info.si_code = BUS_ADRALN;
828		- info.si_addr = (void __user *)addr;
829		- arm64_notify_die("SP/PC alignment exception", regs, &info, esr);
830		-}
	884	+NOKPROBE_SYMBOL(do_sp_pc_abort);
831	885
832	886	int __init early_brk64(unsigned long addr, unsigned int esr,
833	887	struct pt_regs *regs);
..	..	@@ -860,11 +914,32 @@
860	914	debug_fault_info[nr].name = name;
861	915	}
862	916
	917	+/*
	918	+ * In debug exception context, we explicitly disable preemption despite
	919	+ * having interrupts disabled.
	920	+ * This serves two purposes: it makes it much less likely that we would
	921	+ * accidentally schedule in exception context and it will force a warning
	922	+ * if we somehow manage to schedule by accident.
	923	+ */
	924	+static void debug_exception_enter(struct pt_regs *regs)
	925	+{
	926	+ preempt_disable();
	927	+
	928	+ /* This code is a bit fragile. Test it. */
	929	+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work");
	930	+}
	931	+NOKPROBE_SYMBOL(debug_exception_enter);
	932	+
	933	+static void debug_exception_exit(struct pt_regs *regs)
	934	+{
	935	+ preempt_enable_no_resched();
	936	+}
	937	+NOKPROBE_SYMBOL(debug_exception_exit);
	938	+
863	939	#ifdef CONFIG_ARM64_ERRATUM_1463225
864	940	DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
865	941
866		-static int __exception
867		-cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
	942	+static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
868	943	{
869	944	if (user_mode(regs))
870	945	return 0;
..	..	@@ -883,65 +958,57 @@
883	958	return 1;
884	959	}
885	960	#else
886		-static int __exception
887		-cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
	961	+static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
888	962	{
889	963	return 0;
890	964	}
891	965	#endif /* CONFIG_ARM64_ERRATUM_1463225 */
	966	+NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler);
892	967
893		-asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint,
894		- unsigned int esr,
895		- struct pt_regs *regs)
	968	+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
	969	+ struct pt_regs *regs)
896	970	{
897		- const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
	971	+ const struct fault_info *inf = esr_to_debug_fault_info(esr);
898	972	unsigned long pc = instruction_pointer(regs);
899		- int rv;
900	973
901	974	if (cortex_a76_erratum_1463225_debug_handler(regs))
902		- return 0;
	975	+ return;
903	976
904		- /*
905		- * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
906		- * already disabled to preserve the last enabled/disabled addresses.
907		- */
908		- if (interrupts_enabled(regs))
909		- trace_hardirqs_off();
	977	+ debug_exception_enter(regs);
910	978
911	979	if (user_mode(regs) && !is_ttbr0_addr(pc))
912	980	arm64_apply_bp_hardening();
913	981
914		- if (!inf->fn(addr_if_watchpoint, esr, regs)) {
915		- rv = 1;
916		- } else {
917		- struct siginfo info;
918		-
919		- clear_siginfo(&info);
920		- info.si_signo = inf->sig;
921		- info.si_errno = 0;
922		- info.si_code = inf->code;
923		- info.si_addr = (void __user *)pc;
924		- arm64_notify_die(inf->name, regs, &info, esr);
925		- rv = 0;
	982	+ if (inf->fn(addr_if_watchpoint, esr, regs)) {
	983	+ arm64_notify_die(inf->name, regs, inf->sig, inf->code, pc, esr);
926	984	}
927	985
928		- if (interrupts_enabled(regs))
929		- trace_hardirqs_on();
930		-
931		- return rv;
	986	+ debug_exception_exit(regs);
932	987	}
933	988	NOKPROBE_SYMBOL(do_debug_exception);
934	989
935		-#ifdef CONFIG_ARM64_PAN
936		-void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
	990	+/*
	991	+ * Used during anonymous page fault handling.
	992	+ */
	993	+struct page alloc_zeroed_user_highpage_movable(struct vm_area_struct vma,
	994	+ unsigned long vaddr)
937	995	{
938		- /*
939		- * We modify PSTATE. This won't work from irq context as the PSTATE
940		- * is discarded once we return from the exception.
941		- */
942		- WARN_ON_ONCE(in_interrupt());
	996	+ gfp_t flags = GFP_HIGHUSER_MOVABLE \| __GFP_ZERO \| __GFP_CMA;
943	997
944		- sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0);
945		- asm(SET_PSTATE_PAN(1));
	998	+ /*
	999	+ * If the page is mapped with PROT_MTE, initialise the tags at the
	1000	+ * point of allocation and page zeroing as this is usually faster than
	1001	+ * separate DC ZVA and STGM.
	1002	+ */
	1003	+ if (vma->vm_flags & VM_MTE)
	1004	+ flags \|= __GFP_ZEROTAGS;
	1005	+
	1006	+ return alloc_page_vma(flags, vma, vaddr);
946	1007	}
947		-#endif /* CONFIG_ARM64_PAN */
	1008	+
	1009	+void tag_clear_highpage(struct page *page)
	1010	+{
	1011	+ mte_zero_clear_page_tags(page_address(page));
	1012	+ page_kasan_tag_reset(page);
	1013	+ set_bit(PG_mte_tagged, &page->flags);
	1014	+}