hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/arch/riscv/mm/fault.c
....@@ -1,22 +1,9 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
34 * Lennox Wu <lennox.wu@sunplusct.com>
45 * Chen Liqin <liqin.chen@sunplusct.com>
56 * Copyright (C) 2012 Regents of the University of California
6
- *
7
- * This program is free software; you can redistribute it and/or modify
8
- * it under the terms of the GNU General Public License as published by
9
- * the Free Software Foundation; either version 2 of the License, or
10
- * (at your option) any later version.
11
- *
12
- * This program is distributed in the hope that it will be useful,
13
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- * GNU General Public License for more details.
16
- *
17
- * You should have received a copy of the GNU General Public License
18
- * along with this program; if not, see the file COPYING, or write
19
- * to the Free Software Foundation, Inc.,
207 */
218
229
....@@ -27,166 +14,13 @@
2714 #include <linux/signal.h>
2815 #include <linux/uaccess.h>
2916
30
-#include <asm/pgalloc.h>
3117 #include <asm/ptrace.h>
3218 #include <asm/tlbflush.h>
3319
34
-/*
35
- * This routine handles page faults. It determines the address and the
36
- * problem, and then passes it off to one of the appropriate routines.
37
- */
38
-asmlinkage void do_page_fault(struct pt_regs *regs)
20
+#include "../kernel/head.h"
21
+
22
+static inline void no_context(struct pt_regs *regs, unsigned long addr)
3923 {
40
- struct task_struct *tsk;
41
- struct vm_area_struct *vma;
42
- struct mm_struct *mm;
43
- unsigned long addr, cause;
44
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
45
- int code = SEGV_MAPERR;
46
- vm_fault_t fault;
47
-
48
- cause = regs->scause;
49
- addr = regs->sbadaddr;
50
-
51
- tsk = current;
52
- mm = tsk->mm;
53
-
54
- /*
55
- * Fault-in kernel-space virtual memory on-demand.
56
- * The 'reference' page table is init_mm.pgd.
57
- *
58
- * NOTE! We MUST NOT take any locks for this case. We may
59
- * be in an interrupt or a critical region, and should
60
- * only copy the information from the master page table,
61
- * nothing more.
62
- */
63
- if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END)))
64
- goto vmalloc_fault;
65
-
66
- /* Enable interrupts if they were enabled in the parent context. */
67
- if (likely(regs->sstatus & SR_SPIE))
68
- local_irq_enable();
69
-
70
- /*
71
- * If we're in an interrupt, have no user context, or are running
72
- * in an atomic region, then we must not take the fault.
73
- */
74
- if (unlikely(faulthandler_disabled() || !mm))
75
- goto no_context;
76
-
77
- if (user_mode(regs))
78
- flags |= FAULT_FLAG_USER;
79
-
80
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
81
-
82
-retry:
83
- down_read(&mm->mmap_sem);
84
- vma = find_vma(mm, addr);
85
- if (unlikely(!vma))
86
- goto bad_area;
87
- if (likely(vma->vm_start <= addr))
88
- goto good_area;
89
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
90
- goto bad_area;
91
- if (unlikely(expand_stack(vma, addr)))
92
- goto bad_area;
93
-
94
- /*
95
- * Ok, we have a good vm_area for this memory access, so
96
- * we can handle it.
97
- */
98
-good_area:
99
- code = SEGV_ACCERR;
100
-
101
- switch (cause) {
102
- case EXC_INST_PAGE_FAULT:
103
- if (!(vma->vm_flags & VM_EXEC))
104
- goto bad_area;
105
- break;
106
- case EXC_LOAD_PAGE_FAULT:
107
- if (!(vma->vm_flags & VM_READ))
108
- goto bad_area;
109
- break;
110
- case EXC_STORE_PAGE_FAULT:
111
- if (!(vma->vm_flags & VM_WRITE))
112
- goto bad_area;
113
- flags |= FAULT_FLAG_WRITE;
114
- break;
115
- default:
116
- panic("%s: unhandled cause %lu", __func__, cause);
117
- }
118
-
119
- /*
120
- * If for any reason at all we could not handle the fault,
121
- * make sure we exit gracefully rather than endlessly redo
122
- * the fault.
123
- */
124
- fault = handle_mm_fault(vma, addr, flags);
125
-
126
- /*
127
- * If we need to retry but a fatal signal is pending, handle the
128
- * signal first. We do not need to release the mmap_sem because it
129
- * would already be released in __lock_page_or_retry in mm/filemap.c.
130
- */
131
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk))
132
- return;
133
-
134
- if (unlikely(fault & VM_FAULT_ERROR)) {
135
- if (fault & VM_FAULT_OOM)
136
- goto out_of_memory;
137
- else if (fault & VM_FAULT_SIGBUS)
138
- goto do_sigbus;
139
- BUG();
140
- }
141
-
142
- /*
143
- * Major/minor page fault accounting is only done on the
144
- * initial attempt. If we go through a retry, it is extremely
145
- * likely that the page will be found in page cache at that point.
146
- */
147
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
148
- if (fault & VM_FAULT_MAJOR) {
149
- tsk->maj_flt++;
150
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
151
- 1, regs, addr);
152
- } else {
153
- tsk->min_flt++;
154
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
155
- 1, regs, addr);
156
- }
157
- if (fault & VM_FAULT_RETRY) {
158
- /*
159
- * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
160
- * of starvation.
161
- */
162
- flags &= ~(FAULT_FLAG_ALLOW_RETRY);
163
- flags |= FAULT_FLAG_TRIED;
164
-
165
- /*
166
- * No need to up_read(&mm->mmap_sem) as we would
167
- * have already released it in __lock_page_or_retry
168
- * in mm/filemap.c.
169
- */
170
- goto retry;
171
- }
172
- }
173
-
174
- up_read(&mm->mmap_sem);
175
- return;
176
-
177
- /*
178
- * Something tried to access memory that isn't in our memory map.
179
- * Fix it, but check if it's kernel or user first.
180
- */
181
-bad_area:
182
- up_read(&mm->mmap_sem);
183
- /* User mode accesses just cause a SIGSEGV */
184
- if (user_mode(regs)) {
185
- do_trap(regs, SIGSEGV, code, addr, tsk);
186
- return;
187
- }
188
-
189
-no_context:
19024 /* Are we prepared to handle this kernel fault? */
19125 if (fixup_exception(regs))
19226 return;
....@@ -200,100 +34,272 @@
20034 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
20135 "paging request", addr);
20236 die(regs, "Oops");
203
- do_exit(SIGKILL);
37
+ make_task_dead(SIGKILL);
38
+}
20439
205
- /*
206
- * We ran out of memory, call the OOM killer, and return the userspace
207
- * (which will retry the fault, or kill us if we got oom-killed).
208
- */
209
-out_of_memory:
210
- up_read(&mm->mmap_sem);
211
- if (!user_mode(regs))
212
- goto no_context;
213
- pagefault_out_of_memory();
214
- return;
215
-
216
-do_sigbus:
217
- up_read(&mm->mmap_sem);
218
- /* Kernel mode? Handle exceptions or die */
219
- if (!user_mode(regs))
220
- goto no_context;
221
- do_trap(regs, SIGBUS, BUS_ADRERR, addr, tsk);
222
- return;
223
-
224
-vmalloc_fault:
225
- {
226
- pgd_t *pgd, *pgd_k;
227
- pud_t *pud, *pud_k;
228
- p4d_t *p4d, *p4d_k;
229
- pmd_t *pmd, *pmd_k;
230
- pte_t *pte_k;
231
- int index;
232
-
233
- if (user_mode(regs))
234
- goto bad_area;
235
-
40
+static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
41
+{
42
+ if (fault & VM_FAULT_OOM) {
23643 /*
237
- * Synchronize this task's top level page-table
238
- * with the 'reference' page table.
239
- *
240
- * Do _not_ use "tsk->active_mm->pgd" here.
241
- * We might be inside an interrupt in the middle
242
- * of a task switch.
243
- *
244
- * Note: Use the old spbtr name instead of using the current
245
- * satp name to support binutils 2.29 which doesn't know about
246
- * the privileged ISA 1.10 yet.
44
+ * We ran out of memory, call the OOM killer, and return the userspace
45
+ * (which will retry the fault, or kill us if we got oom-killed).
24746 */
248
- index = pgd_index(addr);
249
- pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)) + index;
250
- pgd_k = init_mm.pgd + index;
251
-
252
- if (!pgd_present(*pgd_k))
253
- goto no_context;
254
- set_pgd(pgd, *pgd_k);
255
-
256
- p4d = p4d_offset(pgd, addr);
257
- p4d_k = p4d_offset(pgd_k, addr);
258
- if (!p4d_present(*p4d_k))
259
- goto no_context;
260
-
261
- pud = pud_offset(p4d, addr);
262
- pud_k = pud_offset(p4d_k, addr);
263
- if (!pud_present(*pud_k))
264
- goto no_context;
265
-
266
- /*
267
- * Since the vmalloc area is global, it is unnecessary
268
- * to copy individual PTEs
269
- */
270
- pmd = pmd_offset(pud, addr);
271
- pmd_k = pmd_offset(pud_k, addr);
272
- if (!pmd_present(*pmd_k))
273
- goto no_context;
274
- set_pmd(pmd, *pmd_k);
275
-
276
- /*
277
- * Make sure the actual PTE exists as well to
278
- * catch kernel vmalloc-area accesses to non-mapped
279
- * addresses. If we don't do this, this will just
280
- * silently loop forever.
281
- */
282
- pte_k = pte_offset_kernel(pmd_k, addr);
283
- if (!pte_present(*pte_k))
284
- goto no_context;
285
-
286
- /*
287
- * The kernel assumes that TLBs don't cache invalid
288
- * entries, but in RISC-V, SFENCE.VMA specifies an
289
- * ordering constraint, not a cache flush; it is
290
- * necessary even after writing invalid entries.
291
- * Relying on flush_tlb_fix_spurious_fault would
292
- * suffice, but the extra traps reduce
293
- * performance. So, eagerly SFENCE.VMA.
294
- */
295
- local_flush_tlb_page(addr);
296
-
47
+ if (!user_mode(regs)) {
48
+ no_context(regs, addr);
49
+ return;
50
+ }
51
+ pagefault_out_of_memory();
52
+ return;
53
+ } else if (fault & VM_FAULT_SIGBUS) {
54
+ /* Kernel mode? Handle exceptions or die */
55
+ if (!user_mode(regs)) {
56
+ no_context(regs, addr);
57
+ return;
58
+ }
59
+ do_trap(regs, SIGBUS, BUS_ADRERR, addr);
29760 return;
29861 }
62
+ BUG();
63
+}
64
+
65
+static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
66
+{
67
+ /*
68
+ * Something tried to access memory that isn't in our memory map.
69
+ * Fix it, but check if it's kernel or user first.
70
+ */
71
+ mmap_read_unlock(mm);
72
+ /* User mode accesses just cause a SIGSEGV */
73
+ if (user_mode(regs)) {
74
+ do_trap(regs, SIGSEGV, code, addr);
75
+ return;
76
+ }
77
+
78
+ no_context(regs, addr);
79
+}
80
+
81
+static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
82
+{
83
+ pgd_t *pgd, *pgd_k;
84
+ pud_t *pud, *pud_k;
85
+ p4d_t *p4d, *p4d_k;
86
+ pmd_t *pmd, *pmd_k;
87
+ pte_t *pte_k;
88
+ int index;
89
+ unsigned long pfn;
90
+
91
+ /* User mode accesses just cause a SIGSEGV */
92
+ if (user_mode(regs))
93
+ return do_trap(regs, SIGSEGV, code, addr);
94
+
95
+ /*
96
+ * Synchronize this task's top level page-table
97
+ * with the 'reference' page table.
98
+ *
99
+ * Do _not_ use "tsk->active_mm->pgd" here.
100
+ * We might be inside an interrupt in the middle
101
+ * of a task switch.
102
+ */
103
+ index = pgd_index(addr);
104
+ pfn = csr_read(CSR_SATP) & SATP_PPN;
105
+ pgd = (pgd_t *)pfn_to_virt(pfn) + index;
106
+ pgd_k = init_mm.pgd + index;
107
+
108
+ if (!pgd_present(*pgd_k)) {
109
+ no_context(regs, addr);
110
+ return;
111
+ }
112
+ set_pgd(pgd, *pgd_k);
113
+
114
+ p4d = p4d_offset(pgd, addr);
115
+ p4d_k = p4d_offset(pgd_k, addr);
116
+ if (!p4d_present(*p4d_k)) {
117
+ no_context(regs, addr);
118
+ return;
119
+ }
120
+
121
+ pud = pud_offset(p4d, addr);
122
+ pud_k = pud_offset(p4d_k, addr);
123
+ if (!pud_present(*pud_k)) {
124
+ no_context(regs, addr);
125
+ return;
126
+ }
127
+
128
+ /*
129
+ * Since the vmalloc area is global, it is unnecessary
130
+ * to copy individual PTEs
131
+ */
132
+ pmd = pmd_offset(pud, addr);
133
+ pmd_k = pmd_offset(pud_k, addr);
134
+ if (!pmd_present(*pmd_k)) {
135
+ no_context(regs, addr);
136
+ return;
137
+ }
138
+ set_pmd(pmd, *pmd_k);
139
+
140
+ /*
141
+ * Make sure the actual PTE exists as well to
142
+ * catch kernel vmalloc-area accesses to non-mapped
143
+ * addresses. If we don't do this, this will just
144
+ * silently loop forever.
145
+ */
146
+ pte_k = pte_offset_kernel(pmd_k, addr);
147
+ if (!pte_present(*pte_k)) {
148
+ no_context(regs, addr);
149
+ return;
150
+ }
151
+
152
+ /*
153
+ * The kernel assumes that TLBs don't cache invalid
154
+ * entries, but in RISC-V, SFENCE.VMA specifies an
155
+ * ordering constraint, not a cache flush; it is
156
+ * necessary even after writing invalid entries.
157
+ */
158
+ local_flush_tlb_page(addr);
159
+}
160
+
161
+static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
162
+{
163
+ switch (cause) {
164
+ case EXC_INST_PAGE_FAULT:
165
+ if (!(vma->vm_flags & VM_EXEC)) {
166
+ return true;
167
+ }
168
+ break;
169
+ case EXC_LOAD_PAGE_FAULT:
170
+ /* Write implies read */
171
+ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) {
172
+ return true;
173
+ }
174
+ break;
175
+ case EXC_STORE_PAGE_FAULT:
176
+ if (!(vma->vm_flags & VM_WRITE)) {
177
+ return true;
178
+ }
179
+ break;
180
+ default:
181
+ panic("%s: unhandled cause %lu", __func__, cause);
182
+ }
183
+ return false;
184
+}
185
+
186
+/*
187
+ * This routine handles page faults. It determines the address and the
188
+ * problem, and then passes it off to one of the appropriate routines.
189
+ */
190
+asmlinkage void do_page_fault(struct pt_regs *regs)
191
+{
192
+ struct task_struct *tsk;
193
+ struct vm_area_struct *vma;
194
+ struct mm_struct *mm;
195
+ unsigned long addr, cause;
196
+ unsigned int flags = FAULT_FLAG_DEFAULT;
197
+ int code = SEGV_MAPERR;
198
+ vm_fault_t fault;
199
+
200
+ cause = regs->cause;
201
+ addr = regs->badaddr;
202
+
203
+ tsk = current;
204
+ mm = tsk->mm;
205
+
206
+ /*
207
+ * Fault-in kernel-space virtual memory on-demand.
208
+ * The 'reference' page table is init_mm.pgd.
209
+ *
210
+ * NOTE! We MUST NOT take any locks for this case. We may
211
+ * be in an interrupt or a critical region, and should
212
+ * only copy the information from the master page table,
213
+ * nothing more.
214
+ */
215
+ if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
216
+ vmalloc_fault(regs, code, addr);
217
+ return;
218
+ }
219
+
220
+ /* Enable interrupts if they were enabled in the parent context. */
221
+ if (likely(regs->status & SR_PIE))
222
+ local_irq_enable();
223
+
224
+ /*
225
+ * If we're in an interrupt, have no user context, or are running
226
+ * in an atomic region, then we must not take the fault.
227
+ */
228
+ if (unlikely(faulthandler_disabled() || !mm)) {
229
+ no_context(regs, addr);
230
+ return;
231
+ }
232
+
233
+ if (user_mode(regs))
234
+ flags |= FAULT_FLAG_USER;
235
+
236
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
237
+
238
+ if (cause == EXC_STORE_PAGE_FAULT)
239
+ flags |= FAULT_FLAG_WRITE;
240
+ else if (cause == EXC_INST_PAGE_FAULT)
241
+ flags |= FAULT_FLAG_INSTRUCTION;
242
+retry:
243
+ mmap_read_lock(mm);
244
+ vma = find_vma(mm, addr);
245
+ if (unlikely(!vma)) {
246
+ bad_area(regs, mm, code, addr);
247
+ return;
248
+ }
249
+ if (likely(vma->vm_start <= addr))
250
+ goto good_area;
251
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
252
+ bad_area(regs, mm, code, addr);
253
+ return;
254
+ }
255
+ if (unlikely(expand_stack(vma, addr))) {
256
+ bad_area(regs, mm, code, addr);
257
+ return;
258
+ }
259
+
260
+ /*
261
+ * Ok, we have a good vm_area for this memory access, so
262
+ * we can handle it.
263
+ */
264
+good_area:
265
+ code = SEGV_ACCERR;
266
+
267
+ if (unlikely(access_error(cause, vma))) {
268
+ bad_area(regs, mm, code, addr);
269
+ return;
270
+ }
271
+
272
+ /*
273
+ * If for any reason at all we could not handle the fault,
274
+ * make sure we exit gracefully rather than endlessly redo
275
+ * the fault.
276
+ */
277
+ fault = handle_mm_fault(vma, addr, flags, regs);
278
+
279
+ /*
280
+ * If we need to retry but a fatal signal is pending, handle the
281
+ * signal first. We do not need to release the mmap_lock because it
282
+ * would already be released in __lock_page_or_retry in mm/filemap.c.
283
+ */
284
+ if (fault_signal_pending(fault, regs))
285
+ return;
286
+
287
+ if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
288
+ flags |= FAULT_FLAG_TRIED;
289
+
290
+ /*
291
+ * No need to mmap_read_unlock(mm) as we would
292
+ * have already released it in __lock_page_or_retry
293
+ * in mm/filemap.c.
294
+ */
295
+ goto retry;
296
+ }
297
+
298
+ mmap_read_unlock(mm);
299
+
300
+ if (unlikely(fault & VM_FAULT_ERROR)) {
301
+ mm_fault_error(regs, addr, fault);
302
+ return;
303
+ }
304
+ return;
299305 }