.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
---|
1 | 2 | /* |
---|
2 | 3 | * PowerPC version |
---|
3 | 4 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) |
---|
.. | .. |
---|
8 | 9 | * Modified by Cort Dougan and Paul Mackerras. |
---|
9 | 10 | * |
---|
10 | 11 | * Modified for PPC64 by Dave Engebretsen (engebret@ibm.com) |
---|
11 | | - * |
---|
12 | | - * This program is free software; you can redistribute it and/or |
---|
13 | | - * modify it under the terms of the GNU General Public License |
---|
14 | | - * as published by the Free Software Foundation; either version |
---|
15 | | - * 2 of the License, or (at your option) any later version. |
---|
16 | 12 | */ |
---|
17 | 13 | |
---|
18 | 14 | #include <linux/signal.h> |
---|
.. | .. |
---|
39 | 35 | |
---|
40 | 36 | #include <asm/firmware.h> |
---|
41 | 37 | #include <asm/page.h> |
---|
42 | | -#include <asm/pgtable.h> |
---|
43 | 38 | #include <asm/mmu.h> |
---|
44 | 39 | #include <asm/mmu_context.h> |
---|
45 | 40 | #include <asm/siginfo.h> |
---|
46 | 41 | #include <asm/debug.h> |
---|
| 42 | +#include <asm/kup.h> |
---|
| 43 | +#include <asm/inst.h> |
---|
47 | 44 | |
---|
48 | | -static inline bool notify_page_fault(struct pt_regs *regs) |
---|
49 | | -{ |
---|
50 | | - bool ret = false; |
---|
51 | 45 | |
---|
52 | | -#ifdef CONFIG_KPROBES |
---|
53 | | - /* kprobe_running() needs smp_processor_id() */ |
---|
54 | | - if (!user_mode(regs)) { |
---|
55 | | - preempt_disable(); |
---|
56 | | - if (kprobe_running() && kprobe_fault_handler(regs, 11)) |
---|
57 | | - ret = true; |
---|
58 | | - preempt_enable(); |
---|
59 | | - } |
---|
60 | | -#endif /* CONFIG_KPROBES */ |
---|
61 | | - |
---|
62 | | - if (unlikely(debugger_fault_handler(regs))) |
---|
63 | | - ret = true; |
---|
64 | | - |
---|
65 | | - return ret; |
---|
66 | | -} |
---|
67 | | - |
---|
68 | | -/* |
---|
69 | | - * Check whether the instruction inst is a store using |
---|
70 | | - * an update addressing form which will update r1. |
---|
71 | | - */ |
---|
72 | | -static bool store_updates_sp(unsigned int inst) |
---|
73 | | -{ |
---|
74 | | - /* check for 1 in the rA field */ |
---|
75 | | - if (((inst >> 16) & 0x1f) != 1) |
---|
76 | | - return false; |
---|
77 | | - /* check major opcode */ |
---|
78 | | - switch (inst >> 26) { |
---|
79 | | - case OP_STWU: |
---|
80 | | - case OP_STBU: |
---|
81 | | - case OP_STHU: |
---|
82 | | - case OP_STFSU: |
---|
83 | | - case OP_STFDU: |
---|
84 | | - return true; |
---|
85 | | - case OP_STD: /* std or stdu */ |
---|
86 | | - return (inst & 3) == 1; |
---|
87 | | - case OP_31: |
---|
88 | | - /* check minor opcode */ |
---|
89 | | - switch ((inst >> 1) & 0x3ff) { |
---|
90 | | - case OP_31_XOP_STDUX: |
---|
91 | | - case OP_31_XOP_STWUX: |
---|
92 | | - case OP_31_XOP_STBUX: |
---|
93 | | - case OP_31_XOP_STHUX: |
---|
94 | | - case OP_31_XOP_STFSUX: |
---|
95 | | - case OP_31_XOP_STFDUX: |
---|
96 | | - return true; |
---|
97 | | - } |
---|
98 | | - } |
---|
99 | | - return false; |
---|
100 | | -} |
---|
101 | 46 | /* |
---|
102 | 47 | * do_page_fault error handling helpers |
---|
103 | 48 | */ |
---|
104 | 49 | |
---|
105 | 50 | static int |
---|
106 | | -__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code, |
---|
107 | | - int pkey) |
---|
| 51 | +__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code) |
---|
108 | 52 | { |
---|
109 | 53 | /* |
---|
110 | 54 | * If we are in kernel mode, bail out with a SEGV, this will |
---|
.. | .. |
---|
114 | 58 | if (!user_mode(regs)) |
---|
115 | 59 | return SIGSEGV; |
---|
116 | 60 | |
---|
117 | | - _exception_pkey(SIGSEGV, regs, si_code, address, pkey); |
---|
| 61 | + _exception(SIGSEGV, regs, si_code, address); |
---|
118 | 62 | |
---|
119 | 63 | return 0; |
---|
120 | 64 | } |
---|
121 | 65 | |
---|
122 | 66 | static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address) |
---|
123 | 67 | { |
---|
124 | | - return __bad_area_nosemaphore(regs, address, SEGV_MAPERR, 0); |
---|
| 68 | + return __bad_area_nosemaphore(regs, address, SEGV_MAPERR); |
---|
125 | 69 | } |
---|
126 | 70 | |
---|
127 | | -static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code, |
---|
128 | | - int pkey) |
---|
| 71 | +static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code) |
---|
129 | 72 | { |
---|
130 | 73 | struct mm_struct *mm = current->mm; |
---|
131 | 74 | |
---|
.. | .. |
---|
133 | 76 | * Something tried to access memory that isn't in our memory map.. |
---|
134 | 77 | * Fix it, but check if it's kernel or user first.. |
---|
135 | 78 | */ |
---|
136 | | - up_read(&mm->mmap_sem); |
---|
| 79 | + mmap_read_unlock(mm); |
---|
137 | 80 | |
---|
138 | | - return __bad_area_nosemaphore(regs, address, si_code, pkey); |
---|
| 81 | + return __bad_area_nosemaphore(regs, address, si_code); |
---|
139 | 82 | } |
---|
140 | 83 | |
---|
141 | 84 | static noinline int bad_area(struct pt_regs *regs, unsigned long address) |
---|
142 | 85 | { |
---|
143 | | - return __bad_area(regs, address, SEGV_MAPERR, 0); |
---|
| 86 | + return __bad_area(regs, address, SEGV_MAPERR); |
---|
144 | 87 | } |
---|
145 | 88 | |
---|
146 | | -static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address, |
---|
147 | | - int pkey) |
---|
| 89 | +#ifdef CONFIG_PPC_MEM_KEYS |
---|
| 90 | +static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, |
---|
| 91 | + struct vm_area_struct *vma) |
---|
148 | 92 | { |
---|
149 | | - return __bad_area_nosemaphore(regs, address, SEGV_PKUERR, pkey); |
---|
| 93 | + struct mm_struct *mm = current->mm; |
---|
| 94 | + int pkey; |
---|
| 95 | + |
---|
| 96 | + /* |
---|
| 97 | + * We don't try to fetch the pkey from page table because reading |
---|
| 98 | + * page table without locking doesn't guarantee stable pte value. |
---|
| 99 | + * Hence the pkey value that we return to userspace can be different |
---|
| 100 | + * from the pkey that actually caused access error. |
---|
| 101 | + * |
---|
| 102 | + * It does *not* guarantee that the VMA we find here |
---|
| 103 | + * was the one that we faulted on. |
---|
| 104 | + * |
---|
| 105 | + * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); |
---|
| 106 | + * 2. T1 : set AMR to deny access to pkey=4, touches, page |
---|
| 107 | + * 3. T1 : faults... |
---|
| 108 | + * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); |
---|
| 109 | + * 5. T1 : enters fault handler, takes mmap_lock, etc... |
---|
| 110 | + * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really |
---|
| 111 | + * faulted on a pte with its pkey=4. |
---|
| 112 | + */ |
---|
| 113 | + pkey = vma_pkey(vma); |
---|
| 114 | + |
---|
| 115 | + mmap_read_unlock(mm); |
---|
| 116 | + |
---|
| 117 | + /* |
---|
| 118 | + * If we are in kernel mode, bail out with a SEGV, this will |
---|
| 119 | + * be caught by the assembly which will restore the non-volatile |
---|
| 120 | + * registers before calling bad_page_fault() |
---|
| 121 | + */ |
---|
| 122 | + if (!user_mode(regs)) |
---|
| 123 | + return SIGSEGV; |
---|
| 124 | + |
---|
| 125 | + _exception_pkey(regs, address, pkey); |
---|
| 126 | + |
---|
| 127 | + return 0; |
---|
150 | 128 | } |
---|
| 129 | +#endif |
---|
151 | 130 | |
---|
152 | 131 | static noinline int bad_access(struct pt_regs *regs, unsigned long address) |
---|
153 | 132 | { |
---|
154 | | - return __bad_area(regs, address, SEGV_ACCERR, 0); |
---|
| 133 | + return __bad_area(regs, address, SEGV_ACCERR); |
---|
155 | 134 | } |
---|
156 | 135 | |
---|
157 | 136 | static int do_sigbus(struct pt_regs *regs, unsigned long address, |
---|
158 | 137 | vm_fault_t fault) |
---|
159 | 138 | { |
---|
160 | | - siginfo_t info; |
---|
161 | | - unsigned int lsb = 0; |
---|
162 | | - |
---|
163 | 139 | if (!user_mode(regs)) |
---|
164 | 140 | return SIGBUS; |
---|
165 | 141 | |
---|
166 | 142 | current->thread.trap_nr = BUS_ADRERR; |
---|
167 | | - clear_siginfo(&info); |
---|
168 | | - info.si_signo = SIGBUS; |
---|
169 | | - info.si_errno = 0; |
---|
170 | | - info.si_code = BUS_ADRERR; |
---|
171 | | - info.si_addr = (void __user *)address; |
---|
172 | 143 | #ifdef CONFIG_MEMORY_FAILURE |
---|
173 | 144 | if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { |
---|
| 145 | + unsigned int lsb = 0; /* shutup gcc */ |
---|
| 146 | + |
---|
174 | 147 | pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", |
---|
175 | 148 | current->comm, current->pid, address); |
---|
176 | | - info.si_code = BUS_MCEERR_AR; |
---|
| 149 | + |
---|
| 150 | + if (fault & VM_FAULT_HWPOISON_LARGE) |
---|
| 151 | + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); |
---|
| 152 | + if (fault & VM_FAULT_HWPOISON) |
---|
| 153 | + lsb = PAGE_SHIFT; |
---|
| 154 | + |
---|
| 155 | + force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb); |
---|
| 156 | + return 0; |
---|
177 | 157 | } |
---|
178 | 158 | |
---|
179 | | - if (fault & VM_FAULT_HWPOISON_LARGE) |
---|
180 | | - lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); |
---|
181 | | - if (fault & VM_FAULT_HWPOISON) |
---|
182 | | - lsb = PAGE_SHIFT; |
---|
183 | 159 | #endif |
---|
184 | | - info.si_addr_lsb = lsb; |
---|
185 | | - force_sig_info(SIGBUS, &info, current); |
---|
| 160 | + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); |
---|
186 | 161 | return 0; |
---|
187 | 162 | } |
---|
188 | 163 | |
---|
.. | .. |
---|
218 | 193 | } |
---|
219 | 194 | |
---|
220 | 195 | /* Is this a bad kernel fault ? */ |
---|
221 | | -static bool bad_kernel_fault(bool is_exec, unsigned long error_code, |
---|
222 | | - unsigned long address) |
---|
| 196 | +static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, |
---|
| 197 | + unsigned long address, bool is_write) |
---|
223 | 198 | { |
---|
224 | | - /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ |
---|
225 | | - if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | |
---|
226 | | - DSISR_PROTFAULT))) { |
---|
227 | | - printk_ratelimited(KERN_CRIT "kernel tried to execute" |
---|
228 | | - " exec-protected page (%lx) -" |
---|
229 | | - "exploit attempt? (uid: %d)\n", |
---|
230 | | - address, from_kuid(&init_user_ns, |
---|
231 | | - current_uid())); |
---|
232 | | - } |
---|
233 | | - return is_exec || (address >= TASK_SIZE); |
---|
234 | | -} |
---|
| 199 | + int is_exec = TRAP(regs) == 0x400; |
---|
235 | 200 | |
---|
236 | | -// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE |
---|
237 | | -#define SIGFRAME_MAX_SIZE (4096 + 128) |
---|
| 201 | + if (is_exec) { |
---|
| 202 | + pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n", |
---|
| 203 | + address >= TASK_SIZE ? "exec-protected" : "user", |
---|
| 204 | + address, |
---|
| 205 | + from_kuid(&init_user_ns, current_uid())); |
---|
238 | 206 | |
---|
239 | | -static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, |
---|
240 | | - struct vm_area_struct *vma, unsigned int flags, |
---|
241 | | - bool *must_retry) |
---|
242 | | -{ |
---|
243 | | - /* |
---|
244 | | - * N.B. The POWER/Open ABI allows programs to access up to |
---|
245 | | - * 288 bytes below the stack pointer. |
---|
246 | | - * The kernel signal delivery code writes a bit over 4KB |
---|
247 | | - * below the stack pointer (r1) before decrementing it. |
---|
248 | | - * The exec code can write slightly over 640kB to the stack |
---|
249 | | - * before setting the user r1. Thus we allow the stack to |
---|
250 | | - * expand to 1MB without further checks. |
---|
251 | | - */ |
---|
252 | | - if (address + 0x100000 < vma->vm_end) { |
---|
253 | | - unsigned int __user *nip = (unsigned int __user *)regs->nip; |
---|
254 | | - /* get user regs even if this fault is in kernel mode */ |
---|
255 | | - struct pt_regs *uregs = current->thread.regs; |
---|
256 | | - if (uregs == NULL) |
---|
257 | | - return true; |
---|
258 | | - |
---|
259 | | - /* |
---|
260 | | - * A user-mode access to an address a long way below |
---|
261 | | - * the stack pointer is only valid if the instruction |
---|
262 | | - * is one which would update the stack pointer to the |
---|
263 | | - * address accessed if the instruction completed, |
---|
264 | | - * i.e. either stwu rs,n(r1) or stwux rs,r1,rb |
---|
265 | | - * (or the byte, halfword, float or double forms). |
---|
266 | | - * |
---|
267 | | - * If we don't check this then any write to the area |
---|
268 | | - * between the last mapped region and the stack will |
---|
269 | | - * expand the stack rather than segfaulting. |
---|
270 | | - */ |
---|
271 | | - if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1]) |
---|
272 | | - return false; |
---|
273 | | - |
---|
274 | | - if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && |
---|
275 | | - access_ok(VERIFY_READ, nip, sizeof(*nip))) { |
---|
276 | | - unsigned int inst; |
---|
277 | | - int res; |
---|
278 | | - |
---|
279 | | - pagefault_disable(); |
---|
280 | | - res = __get_user_inatomic(inst, nip); |
---|
281 | | - pagefault_enable(); |
---|
282 | | - if (!res) |
---|
283 | | - return !store_updates_sp(inst); |
---|
284 | | - *must_retry = true; |
---|
285 | | - } |
---|
| 207 | + // Kernel exec fault is always bad |
---|
286 | 208 | return true; |
---|
287 | 209 | } |
---|
| 210 | + |
---|
| 211 | + if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) && |
---|
| 212 | + !search_exception_tables(regs->nip)) { |
---|
| 213 | + pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n", |
---|
| 214 | + address, |
---|
| 215 | + from_kuid(&init_user_ns, current_uid())); |
---|
| 216 | + } |
---|
| 217 | + |
---|
| 218 | + // Kernel fault on kernel address is bad |
---|
| 219 | + if (address >= TASK_SIZE) |
---|
| 220 | + return true; |
---|
| 221 | + |
---|
| 222 | + // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad |
---|
| 223 | + if (!search_exception_tables(regs->nip)) |
---|
| 224 | + return true; |
---|
| 225 | + |
---|
| 226 | + // Read/write fault in a valid region (the exception table search passed |
---|
| 227 | + // above), but blocked by KUAP is bad, it can never succeed. |
---|
| 228 | + if (bad_kuap_fault(regs, address, is_write)) |
---|
| 229 | + return true; |
---|
| 230 | + |
---|
| 231 | + // What's left? Kernel fault on user in well defined regions (extable |
---|
| 232 | + // matched), and allowed by KUAP in the faulting context. |
---|
288 | 233 | return false; |
---|
289 | 234 | } |
---|
290 | 235 | |
---|
291 | | -static bool access_error(bool is_write, bool is_exec, |
---|
292 | | - struct vm_area_struct *vma) |
---|
| 236 | +#ifdef CONFIG_PPC_MEM_KEYS |
---|
| 237 | +static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, |
---|
| 238 | + struct vm_area_struct *vma) |
---|
| 239 | +{ |
---|
| 240 | + /* |
---|
| 241 | + * Make sure to check the VMA so that we do not perform |
---|
| 242 | + * faults just to hit a pkey fault as soon as we fill in a |
---|
| 243 | + * page. Only called for current mm, hence foreign == 0 |
---|
| 244 | + */ |
---|
| 245 | + if (!arch_vma_access_permitted(vma, is_write, is_exec, 0)) |
---|
| 246 | + return true; |
---|
| 247 | + |
---|
| 248 | + return false; |
---|
| 249 | +} |
---|
| 250 | +#endif |
---|
| 251 | + |
---|
| 252 | +static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma) |
---|
293 | 253 | { |
---|
294 | 254 | /* |
---|
295 | 255 | * Allow execution from readable areas if the MMU does not |
---|
.. | .. |
---|
313 | 273 | return false; |
---|
314 | 274 | } |
---|
315 | 275 | |
---|
316 | | - if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) |
---|
| 276 | + if (unlikely(!vma_is_accessible(vma))) |
---|
317 | 277 | return true; |
---|
318 | 278 | /* |
---|
319 | 279 | * We should ideally do the vma pkey access check here. But in the |
---|
.. | .. |
---|
341 | 301 | static inline void cmo_account_page_fault(void) { } |
---|
342 | 302 | #endif /* CONFIG_PPC_SMLPAR */ |
---|
343 | 303 | |
---|
344 | | -#ifdef CONFIG_PPC_STD_MMU |
---|
345 | | -static void sanity_check_fault(bool is_write, unsigned long error_code) |
---|
| 304 | +static void sanity_check_fault(bool is_write, bool is_user, |
---|
| 305 | + unsigned long error_code, unsigned long address) |
---|
346 | 306 | { |
---|
| 307 | + /* |
---|
| 308 | + * Userspace trying to access kernel address, we get PROTFAULT for that. |
---|
| 309 | + */ |
---|
| 310 | + if (is_user && address >= TASK_SIZE) { |
---|
| 311 | + if ((long)address == -1) |
---|
| 312 | + return; |
---|
| 313 | + |
---|
| 314 | + pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n", |
---|
| 315 | + current->comm, current->pid, address, |
---|
| 316 | + from_kuid(&init_user_ns, current_uid())); |
---|
| 317 | + return; |
---|
| 318 | + } |
---|
| 319 | + |
---|
| 320 | + if (!IS_ENABLED(CONFIG_PPC_BOOK3S)) |
---|
| 321 | + return; |
---|
| 322 | + |
---|
347 | 323 | /* |
---|
348 | 324 | * For hash translation mode, we should never get a |
---|
349 | 325 | * PROTFAULT. Any update to pte to reduce access will result in us |
---|
.. | .. |
---|
373 | 349 | * For radix, we can get prot fault for autonuma case, because radix |
---|
374 | 350 | * page table will have them marked noaccess for user. |
---|
375 | 351 | */ |
---|
376 | | - if (!radix_enabled() && !is_write) |
---|
377 | | - WARN_ON_ONCE(error_code & DSISR_PROTFAULT); |
---|
| 352 | + if (radix_enabled() || is_write) |
---|
| 353 | + return; |
---|
| 354 | + |
---|
| 355 | + WARN_ON_ONCE(error_code & DSISR_PROTFAULT); |
---|
378 | 356 | } |
---|
379 | | -#else |
---|
380 | | -static void sanity_check_fault(bool is_write, unsigned long error_code) { } |
---|
381 | | -#endif /* CONFIG_PPC_STD_MMU */ |
---|
382 | 357 | |
---|
383 | 358 | /* |
---|
384 | 359 | * Define the correct "is_write" bit in error_code based |
---|
.. | .. |
---|
416 | 391 | { |
---|
417 | 392 | struct vm_area_struct * vma; |
---|
418 | 393 | struct mm_struct *mm = current->mm; |
---|
419 | | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; |
---|
| 394 | + unsigned int flags = FAULT_FLAG_DEFAULT; |
---|
420 | 395 | int is_exec = TRAP(regs) == 0x400; |
---|
421 | 396 | int is_user = user_mode(regs); |
---|
422 | 397 | int is_write = page_fault_is_write(error_code); |
---|
423 | 398 | vm_fault_t fault, major = 0; |
---|
424 | | - bool must_retry = false; |
---|
| 399 | + bool kprobe_fault = kprobe_page_fault(regs, 11); |
---|
425 | 400 | |
---|
426 | | - if (notify_page_fault(regs)) |
---|
| 401 | + if (unlikely(debugger_fault_handler(regs) || kprobe_fault)) |
---|
427 | 402 | return 0; |
---|
428 | 403 | |
---|
429 | 404 | if (unlikely(page_fault_is_bad(error_code))) { |
---|
.. | .. |
---|
435 | 410 | } |
---|
436 | 411 | |
---|
437 | 412 | /* Additional sanity check(s) */ |
---|
438 | | - sanity_check_fault(is_write, error_code); |
---|
| 413 | + sanity_check_fault(is_write, is_user, error_code, address); |
---|
439 | 414 | |
---|
440 | 415 | /* |
---|
441 | 416 | * The kernel should never take an execute fault nor should it |
---|
442 | | - * take a page fault to a kernel address. |
---|
| 417 | + * take a page fault to a kernel address or a page fault to a user |
---|
| 418 | + * address outside of dedicated places |
---|
443 | 419 | */ |
---|
444 | | - if (unlikely(!is_user && bad_kernel_fault(is_exec, error_code, address))) |
---|
| 420 | + if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) |
---|
445 | 421 | return SIGSEGV; |
---|
446 | 422 | |
---|
447 | 423 | /* |
---|
.. | .. |
---|
463 | 439 | |
---|
464 | 440 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); |
---|
465 | 441 | |
---|
466 | | - if (error_code & DSISR_KEYFAULT) |
---|
467 | | - return bad_key_fault_exception(regs, address, |
---|
468 | | - get_mm_addr_key(mm, address)); |
---|
469 | | - |
---|
470 | 442 | /* |
---|
471 | | - * We want to do this outside mmap_sem, because reading code around nip |
---|
| 443 | + * We want to do this outside mmap_lock, because reading code around nip |
---|
472 | 444 | * can result in fault, which will cause a deadlock when called with |
---|
473 | | - * mmap_sem held |
---|
| 445 | + * mmap_lock held |
---|
474 | 446 | */ |
---|
475 | 447 | if (is_user) |
---|
476 | 448 | flags |= FAULT_FLAG_USER; |
---|
.. | .. |
---|
482 | 454 | /* When running in the kernel we expect faults to occur only to |
---|
483 | 455 | * addresses in user space. All other faults represent errors in the |
---|
484 | 456 | * kernel and should generate an OOPS. Unfortunately, in the case of an |
---|
485 | | - * erroneous fault occurring in a code path which already holds mmap_sem |
---|
| 457 | + * erroneous fault occurring in a code path which already holds mmap_lock |
---|
486 | 458 | * we will deadlock attempting to validate the fault against the |
---|
487 | 459 | * address space. Luckily the kernel only validly references user |
---|
488 | 460 | * space from well defined areas of code, which are listed in the |
---|
.. | .. |
---|
494 | 466 | * source. If this is invalid we can skip the address space check, |
---|
495 | 467 | * thus avoiding the deadlock. |
---|
496 | 468 | */ |
---|
497 | | - if (unlikely(!down_read_trylock(&mm->mmap_sem))) { |
---|
| 469 | + if (unlikely(!mmap_read_trylock(mm))) { |
---|
498 | 470 | if (!is_user && !search_exception_tables(regs->nip)) |
---|
499 | 471 | return bad_area_nosemaphore(regs, address); |
---|
500 | 472 | |
---|
501 | 473 | retry: |
---|
502 | | - down_read(&mm->mmap_sem); |
---|
| 474 | + mmap_read_lock(mm); |
---|
503 | 475 | } else { |
---|
504 | 476 | /* |
---|
505 | 477 | * The above down_read_trylock() might have succeeded in |
---|
.. | .. |
---|
512 | 484 | vma = find_vma(mm, address); |
---|
513 | 485 | if (unlikely(!vma)) |
---|
514 | 486 | return bad_area(regs, address); |
---|
515 | | - if (likely(vma->vm_start <= address)) |
---|
516 | | - goto good_area; |
---|
517 | | - if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) |
---|
518 | | - return bad_area(regs, address); |
---|
519 | 487 | |
---|
520 | | - /* The stack is being expanded, check if it's valid */ |
---|
521 | | - if (unlikely(bad_stack_expansion(regs, address, vma, flags, |
---|
522 | | - &must_retry))) { |
---|
523 | | - if (!must_retry) |
---|
| 488 | + if (unlikely(vma->vm_start > address)) { |
---|
| 489 | + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) |
---|
524 | 490 | return bad_area(regs, address); |
---|
525 | 491 | |
---|
526 | | - up_read(&mm->mmap_sem); |
---|
527 | | - if (fault_in_pages_readable((const char __user *)regs->nip, |
---|
528 | | - sizeof(unsigned int))) |
---|
529 | | - return bad_area_nosemaphore(regs, address); |
---|
530 | | - goto retry; |
---|
| 492 | + if (unlikely(expand_stack(vma, address))) |
---|
| 493 | + return bad_area(regs, address); |
---|
531 | 494 | } |
---|
532 | 495 | |
---|
533 | | - /* Try to expand it */ |
---|
534 | | - if (unlikely(expand_stack(vma, address))) |
---|
535 | | - return bad_area(regs, address); |
---|
| 496 | +#ifdef CONFIG_PPC_MEM_KEYS |
---|
| 497 | + if (unlikely(access_pkey_error(is_write, is_exec, |
---|
| 498 | + (error_code & DSISR_KEYFAULT), vma))) |
---|
| 499 | + return bad_access_pkey(regs, address, vma); |
---|
| 500 | +#endif /* CONFIG_PPC_MEM_KEYS */ |
---|
536 | 501 | |
---|
537 | | -good_area: |
---|
538 | 502 | if (unlikely(access_error(is_write, is_exec, vma))) |
---|
539 | 503 | return bad_access(regs, address); |
---|
540 | 504 | |
---|
.. | .. |
---|
543 | 507 | * make sure we exit gracefully rather than endlessly redo |
---|
544 | 508 | * the fault. |
---|
545 | 509 | */ |
---|
546 | | - fault = handle_mm_fault(vma, address, flags); |
---|
547 | | - |
---|
548 | | -#ifdef CONFIG_PPC_MEM_KEYS |
---|
549 | | - /* |
---|
550 | | - * we skipped checking for access error due to key earlier. |
---|
551 | | - * Check that using handle_mm_fault error return. |
---|
552 | | - */ |
---|
553 | | - if (unlikely(fault & VM_FAULT_SIGSEGV) && |
---|
554 | | - !arch_vma_access_permitted(vma, is_write, is_exec, 0)) { |
---|
555 | | - |
---|
556 | | - int pkey = vma_pkey(vma); |
---|
557 | | - |
---|
558 | | - up_read(&mm->mmap_sem); |
---|
559 | | - return bad_key_fault_exception(regs, address, pkey); |
---|
560 | | - } |
---|
561 | | -#endif /* CONFIG_PPC_MEM_KEYS */ |
---|
| 510 | + fault = handle_mm_fault(vma, address, flags, regs); |
---|
562 | 511 | |
---|
563 | 512 | major |= fault & VM_FAULT_MAJOR; |
---|
564 | 513 | |
---|
| 514 | + if (fault_signal_pending(fault, regs)) |
---|
| 515 | + return user_mode(regs) ? 0 : SIGBUS; |
---|
| 516 | + |
---|
565 | 517 | /* |
---|
566 | | - * Handle the retry right now, the mmap_sem has been released in that |
---|
| 518 | + * Handle the retry right now, the mmap_lock has been released in that |
---|
567 | 519 | * case. |
---|
568 | 520 | */ |
---|
569 | 521 | if (unlikely(fault & VM_FAULT_RETRY)) { |
---|
570 | | - /* We retry only once */ |
---|
571 | 522 | if (flags & FAULT_FLAG_ALLOW_RETRY) { |
---|
572 | | - /* |
---|
573 | | - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk |
---|
574 | | - * of starvation. |
---|
575 | | - */ |
---|
576 | | - flags &= ~FAULT_FLAG_ALLOW_RETRY; |
---|
577 | 523 | flags |= FAULT_FLAG_TRIED; |
---|
578 | | - if (!fatal_signal_pending(current)) |
---|
579 | | - goto retry; |
---|
| 524 | + goto retry; |
---|
580 | 525 | } |
---|
581 | | - |
---|
582 | | - /* |
---|
583 | | - * User mode? Just return to handle the fatal exception otherwise |
---|
584 | | - * return to bad_page_fault |
---|
585 | | - */ |
---|
586 | | - return is_user ? 0 : SIGBUS; |
---|
587 | 526 | } |
---|
588 | 527 | |
---|
589 | | - up_read(¤t->mm->mmap_sem); |
---|
| 528 | + mmap_read_unlock(current->mm); |
---|
590 | 529 | |
---|
591 | 530 | if (unlikely(fault & VM_FAULT_ERROR)) |
---|
592 | 531 | return mm_fault_error(regs, address, fault); |
---|
.. | .. |
---|
594 | 533 | /* |
---|
595 | 534 | * Major/minor page fault accounting. |
---|
596 | 535 | */ |
---|
597 | | - if (major) { |
---|
598 | | - current->maj_flt++; |
---|
599 | | - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); |
---|
| 536 | + if (major) |
---|
600 | 537 | cmo_account_page_fault(); |
---|
601 | | - } else { |
---|
602 | | - current->min_flt++; |
---|
603 | | - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); |
---|
604 | | - } |
---|
| 538 | + |
---|
605 | 539 | return 0; |
---|
606 | 540 | } |
---|
607 | 541 | NOKPROBE_SYMBOL(__do_page_fault); |
---|
.. | .. |
---|
624 | 558 | void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) |
---|
625 | 559 | { |
---|
626 | 560 | const struct exception_table_entry *entry; |
---|
| 561 | + int is_write = page_fault_is_write(regs->dsisr); |
---|
627 | 562 | |
---|
628 | 563 | /* Are we prepared to handle this fault? */ |
---|
629 | 564 | if ((entry = search_exception_tables(regs->nip)) != NULL) { |
---|
.. | .. |
---|
636 | 571 | switch (TRAP(regs)) { |
---|
637 | 572 | case 0x300: |
---|
638 | 573 | case 0x380: |
---|
639 | | - pr_alert("BUG: %s at 0x%08lx\n", |
---|
| 574 | + case 0xe00: |
---|
| 575 | + pr_alert("BUG: %s on %s at 0x%08lx\n", |
---|
640 | 576 | regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : |
---|
641 | | - "Unable to handle kernel data access", regs->dar); |
---|
| 577 | + "Unable to handle kernel data access", |
---|
| 578 | + is_write ? "write" : "read", regs->dar); |
---|
642 | 579 | break; |
---|
643 | 580 | case 0x400: |
---|
644 | 581 | case 0x480: |
---|