.. | .. |
---|
9 | 9 | #include <linux/init.h> |
---|
10 | 10 | #include <linux/hardirq.h> |
---|
11 | 11 | #include <linux/uaccess.h> |
---|
| 12 | +#include <linux/perf_event.h> |
---|
12 | 13 | |
---|
13 | | -#include <asm/pgtable.h> |
---|
14 | 14 | #include <asm/tlbflush.h> |
---|
15 | 15 | |
---|
16 | 16 | extern void die(const char *str, struct pt_regs *regs, long err); |
---|
.. | .. |
---|
30 | 30 | pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd)); |
---|
31 | 31 | |
---|
32 | 32 | do { |
---|
| 33 | + p4d_t *p4d; |
---|
| 34 | + pud_t *pud; |
---|
33 | 35 | pmd_t *pmd; |
---|
34 | 36 | |
---|
35 | 37 | if (pgd_none(*pgd)) |
---|
.. | .. |
---|
40 | 42 | break; |
---|
41 | 43 | } |
---|
42 | 44 | |
---|
43 | | - pmd = pmd_offset(pgd, addr); |
---|
| 45 | + p4d = p4d_offset(pgd, addr); |
---|
| 46 | + pud = pud_offset(p4d, addr); |
---|
| 47 | + pmd = pmd_offset(pud, addr); |
---|
44 | 48 | #if PTRS_PER_PMD != 1 |
---|
45 | 49 | pr_alert(", *pmd=%08lx", pmd_val(*pmd)); |
---|
46 | 50 | #endif |
---|
.. | .. |
---|
74 | 78 | struct vm_area_struct *vma; |
---|
75 | 79 | int si_code; |
---|
76 | 80 | vm_fault_t fault; |
---|
77 | | - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; |
---|
78 | | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; |
---|
| 81 | + unsigned int mask = VM_ACCESS_FLAGS; |
---|
| 82 | + unsigned int flags = FAULT_FLAG_DEFAULT; |
---|
79 | 83 | |
---|
80 | 84 | error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE); |
---|
81 | 85 | tsk = current; |
---|
.. | .. |
---|
117 | 121 | if (unlikely(faulthandler_disabled() || !mm)) |
---|
118 | 122 | goto no_context; |
---|
119 | 123 | |
---|
| 124 | + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); |
---|
| 125 | + |
---|
120 | 126 | /* |
---|
121 | 127 | * As per x86, we may deadlock here. However, since the kernel only |
---|
122 | 128 | * validly references user space from well defined areas of the code, |
---|
123 | 129 | * we can bug out early if this is from code which shouldn't. |
---|
124 | 130 | */ |
---|
125 | | - if (unlikely(!down_read_trylock(&mm->mmap_sem))) { |
---|
| 131 | + if (unlikely(!mmap_read_trylock(mm))) { |
---|
126 | 132 | if (!user_mode(regs) && |
---|
127 | 133 | !search_exception_tables(instruction_pointer(regs))) |
---|
128 | 134 | goto no_context; |
---|
129 | 135 | retry: |
---|
130 | | - down_read(&mm->mmap_sem); |
---|
| 136 | + mmap_read_lock(mm); |
---|
131 | 137 | } else { |
---|
132 | 138 | /* |
---|
133 | 139 | * The above down_read_trylock() might have succeeded in which |
---|
.. | .. |
---|
169 | 175 | mask = VM_EXEC; |
---|
170 | 176 | else { |
---|
171 | 177 | mask = VM_READ | VM_WRITE; |
---|
172 | | - if (vma->vm_flags & VM_WRITE) |
---|
173 | | - flags |= FAULT_FLAG_WRITE; |
---|
174 | 178 | } |
---|
175 | 179 | } else if (entry == ENTRY_TLB_MISC) { |
---|
176 | 180 | switch (error_code & ITYPE_mskETYPE) { |
---|
.. | .. |
---|
204 | 208 | * the fault. |
---|
205 | 209 | */ |
---|
206 | 210 | |
---|
207 | | - fault = handle_mm_fault(vma, addr, flags); |
---|
| 211 | + fault = handle_mm_fault(vma, addr, flags, regs); |
---|
208 | 212 | |
---|
209 | 213 | /* |
---|
210 | 214 | * If we need to retry but a fatal signal is pending, handle the |
---|
211 | | - * signal first. We do not need to release the mmap_sem because it |
---|
| 215 | + * signal first. We do not need to release the mmap_lock because it |
---|
212 | 216 | * would already be released in __lock_page_or_retry in mm/filemap.c. |
---|
213 | 217 | */ |
---|
214 | | - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { |
---|
| 218 | + if (fault_signal_pending(fault, regs)) { |
---|
215 | 219 | if (!user_mode(regs)) |
---|
216 | 220 | goto no_context; |
---|
217 | 221 | return; |
---|
.. | .. |
---|
226 | 230 | goto bad_area; |
---|
227 | 231 | } |
---|
228 | 232 | |
---|
229 | | - /* |
---|
230 | | - * Major/minor page fault accounting is only done on the initial |
---|
231 | | - * attempt. If we go through a retry, it is extremely likely that the |
---|
232 | | - * page will be found in page cache at that point. |
---|
233 | | - */ |
---|
234 | 233 | if (flags & FAULT_FLAG_ALLOW_RETRY) { |
---|
235 | | - if (fault & VM_FAULT_MAJOR) |
---|
236 | | - tsk->maj_flt++; |
---|
237 | | - else |
---|
238 | | - tsk->min_flt++; |
---|
239 | 234 | if (fault & VM_FAULT_RETRY) { |
---|
240 | | - flags &= ~FAULT_FLAG_ALLOW_RETRY; |
---|
241 | 235 | flags |= FAULT_FLAG_TRIED; |
---|
242 | 236 | |
---|
243 | | - /* No need to up_read(&mm->mmap_sem) as we would |
---|
| 237 | + /* No need to mmap_read_unlock(mm) as we would |
---|
244 | 238 | * have already released it in __lock_page_or_retry |
---|
245 | 239 | * in mm/filemap.c. |
---|
246 | 240 | */ |
---|
.. | .. |
---|
248 | 242 | } |
---|
249 | 243 | } |
---|
250 | 244 | |
---|
251 | | - up_read(&mm->mmap_sem); |
---|
| 245 | + mmap_read_unlock(mm); |
---|
252 | 246 | return; |
---|
253 | 247 | |
---|
254 | 248 | /* |
---|
.. | .. |
---|
256 | 250 | * Fix it, but check if it's kernel or user first.. |
---|
257 | 251 | */ |
---|
258 | 252 | bad_area: |
---|
259 | | - up_read(&mm->mmap_sem); |
---|
| 253 | + mmap_read_unlock(mm); |
---|
260 | 254 | |
---|
261 | 255 | bad_area_nosemaphore: |
---|
262 | 256 | |
---|
.. | .. |
---|
266 | 260 | tsk->thread.address = addr; |
---|
267 | 261 | tsk->thread.error_code = error_code; |
---|
268 | 262 | tsk->thread.trap_no = entry; |
---|
269 | | - force_sig_fault(SIGSEGV, si_code, (void __user *)addr, tsk); |
---|
| 263 | + force_sig_fault(SIGSEGV, si_code, (void __user *)addr); |
---|
270 | 264 | return; |
---|
271 | 265 | } |
---|
272 | 266 | |
---|
.. | .. |
---|
316 | 310 | */ |
---|
317 | 311 | |
---|
318 | 312 | out_of_memory: |
---|
319 | | - up_read(&mm->mmap_sem); |
---|
| 313 | + mmap_read_unlock(mm); |
---|
320 | 314 | if (!user_mode(regs)) |
---|
321 | 315 | goto no_context; |
---|
322 | 316 | pagefault_out_of_memory(); |
---|
323 | 317 | return; |
---|
324 | 318 | |
---|
325 | 319 | do_sigbus: |
---|
326 | | - up_read(&mm->mmap_sem); |
---|
| 320 | + mmap_read_unlock(mm); |
---|
327 | 321 | |
---|
328 | 322 | /* Kernel mode? Handle exceptions or die */ |
---|
329 | 323 | if (!user_mode(regs)) |
---|
.. | .. |
---|
335 | 329 | tsk->thread.address = addr; |
---|
336 | 330 | tsk->thread.error_code = error_code; |
---|
337 | 331 | tsk->thread.trap_no = entry; |
---|
338 | | - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, tsk); |
---|
| 332 | + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr); |
---|
339 | 333 | |
---|
340 | 334 | return; |
---|
341 | 335 | |
---|
.. | .. |
---|
354 | 348 | |
---|
355 | 349 | unsigned int index = pgd_index(addr); |
---|
356 | 350 | pgd_t *pgd, *pgd_k; |
---|
| 351 | + p4d_t *p4d, *p4d_k; |
---|
357 | 352 | pud_t *pud, *pud_k; |
---|
358 | 353 | pmd_t *pmd, *pmd_k; |
---|
359 | 354 | pte_t *pte_k; |
---|
.. | .. |
---|
364 | 359 | if (!pgd_present(*pgd_k)) |
---|
365 | 360 | goto no_context; |
---|
366 | 361 | |
---|
367 | | - pud = pud_offset(pgd, addr); |
---|
368 | | - pud_k = pud_offset(pgd_k, addr); |
---|
| 362 | + p4d = p4d_offset(pgd, addr); |
---|
| 363 | + p4d_k = p4d_offset(pgd_k, addr); |
---|
| 364 | + if (!p4d_present(*p4d_k)) |
---|
| 365 | + goto no_context; |
---|
| 366 | + |
---|
| 367 | + pud = pud_offset(p4d, addr); |
---|
| 368 | + pud_k = pud_offset(p4d_k, addr); |
---|
369 | 369 | if (!pud_present(*pud_k)) |
---|
370 | 370 | goto no_context; |
---|
371 | 371 | |
---|