| .. | .. |
|---|
| 9 | 9 | #include <linux/init.h> |
|---|
| 10 | 10 | #include <linux/hardirq.h> |
|---|
| 11 | 11 | #include <linux/uaccess.h> |
|---|
| 12 | +#include <linux/perf_event.h> |
|---|
| 12 | 13 | |
|---|
| 13 | | -#include <asm/pgtable.h> |
|---|
| 14 | 14 | #include <asm/tlbflush.h> |
|---|
| 15 | 15 | |
|---|
| 16 | 16 | extern void die(const char *str, struct pt_regs *regs, long err); |
|---|
| .. | .. |
|---|
| 30 | 30 | pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd)); |
|---|
| 31 | 31 | |
|---|
| 32 | 32 | do { |
|---|
| 33 | + p4d_t *p4d; |
|---|
| 34 | + pud_t *pud; |
|---|
| 33 | 35 | pmd_t *pmd; |
|---|
| 34 | 36 | |
|---|
| 35 | 37 | if (pgd_none(*pgd)) |
|---|
| .. | .. |
|---|
| 40 | 42 | break; |
|---|
| 41 | 43 | } |
|---|
| 42 | 44 | |
|---|
| 43 | | - pmd = pmd_offset(pgd, addr); |
|---|
| 45 | + p4d = p4d_offset(pgd, addr); |
|---|
| 46 | + pud = pud_offset(p4d, addr); |
|---|
| 47 | + pmd = pmd_offset(pud, addr); |
|---|
| 44 | 48 | #if PTRS_PER_PMD != 1 |
|---|
| 45 | 49 | pr_alert(", *pmd=%08lx", pmd_val(*pmd)); |
|---|
| 46 | 50 | #endif |
|---|
| .. | .. |
|---|
| 74 | 78 | struct vm_area_struct *vma; |
|---|
| 75 | 79 | int si_code; |
|---|
| 76 | 80 | vm_fault_t fault; |
|---|
| 77 | | - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; |
|---|
| 78 | | - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; |
|---|
| 81 | + unsigned int mask = VM_ACCESS_FLAGS; |
|---|
| 82 | + unsigned int flags = FAULT_FLAG_DEFAULT; |
|---|
| 79 | 83 | |
|---|
| 80 | 84 | error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE); |
|---|
| 81 | 85 | tsk = current; |
|---|
| .. | .. |
|---|
| 117 | 121 | if (unlikely(faulthandler_disabled() || !mm)) |
|---|
| 118 | 122 | goto no_context; |
|---|
| 119 | 123 | |
|---|
| 124 | + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); |
|---|
| 125 | + |
|---|
| 120 | 126 | /* |
|---|
| 121 | 127 | * As per x86, we may deadlock here. However, since the kernel only |
|---|
| 122 | 128 | * validly references user space from well defined areas of the code, |
|---|
| 123 | 129 | * we can bug out early if this is from code which shouldn't. |
|---|
| 124 | 130 | */ |
|---|
| 125 | | - if (unlikely(!down_read_trylock(&mm->mmap_sem))) { |
|---|
| 131 | + if (unlikely(!mmap_read_trylock(mm))) { |
|---|
| 126 | 132 | if (!user_mode(regs) && |
|---|
| 127 | 133 | !search_exception_tables(instruction_pointer(regs))) |
|---|
| 128 | 134 | goto no_context; |
|---|
| 129 | 135 | retry: |
|---|
| 130 | | - down_read(&mm->mmap_sem); |
|---|
| 136 | + mmap_read_lock(mm); |
|---|
| 131 | 137 | } else { |
|---|
| 132 | 138 | /* |
|---|
| 133 | 139 | * The above down_read_trylock() might have succeeded in which |
|---|
| .. | .. |
|---|
| 169 | 175 | mask = VM_EXEC; |
|---|
| 170 | 176 | else { |
|---|
| 171 | 177 | mask = VM_READ | VM_WRITE; |
|---|
| 172 | | - if (vma->vm_flags & VM_WRITE) |
|---|
| 173 | | - flags |= FAULT_FLAG_WRITE; |
|---|
| 174 | 178 | } |
|---|
| 175 | 179 | } else if (entry == ENTRY_TLB_MISC) { |
|---|
| 176 | 180 | switch (error_code & ITYPE_mskETYPE) { |
|---|
| .. | .. |
|---|
| 204 | 208 | * the fault. |
|---|
| 205 | 209 | */ |
|---|
| 206 | 210 | |
|---|
| 207 | | - fault = handle_mm_fault(vma, addr, flags); |
|---|
| 211 | + fault = handle_mm_fault(vma, addr, flags, regs); |
|---|
| 208 | 212 | |
|---|
| 209 | 213 | /* |
|---|
| 210 | 214 | * If we need to retry but a fatal signal is pending, handle the |
|---|
| 211 | | - * signal first. We do not need to release the mmap_sem because it |
|---|
| 215 | + * signal first. We do not need to release the mmap_lock because it |
|---|
| 212 | 216 | * would already be released in __lock_page_or_retry in mm/filemap.c. |
|---|
| 213 | 217 | */ |
|---|
| 214 | | - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { |
|---|
| 218 | + if (fault_signal_pending(fault, regs)) { |
|---|
| 215 | 219 | if (!user_mode(regs)) |
|---|
| 216 | 220 | goto no_context; |
|---|
| 217 | 221 | return; |
|---|
| .. | .. |
|---|
| 226 | 230 | goto bad_area; |
|---|
| 227 | 231 | } |
|---|
| 228 | 232 | |
|---|
| 229 | | - /* |
|---|
| 230 | | - * Major/minor page fault accounting is only done on the initial |
|---|
| 231 | | - * attempt. If we go through a retry, it is extremely likely that the |
|---|
| 232 | | - * page will be found in page cache at that point. |
|---|
| 233 | | - */ |
|---|
| 234 | 233 | if (flags & FAULT_FLAG_ALLOW_RETRY) { |
|---|
| 235 | | - if (fault & VM_FAULT_MAJOR) |
|---|
| 236 | | - tsk->maj_flt++; |
|---|
| 237 | | - else |
|---|
| 238 | | - tsk->min_flt++; |
|---|
| 239 | 234 | if (fault & VM_FAULT_RETRY) { |
|---|
| 240 | | - flags &= ~FAULT_FLAG_ALLOW_RETRY; |
|---|
| 241 | 235 | flags |= FAULT_FLAG_TRIED; |
|---|
| 242 | 236 | |
|---|
| 243 | | - /* No need to up_read(&mm->mmap_sem) as we would |
|---|
| 237 | + /* No need to mmap_read_unlock(mm) as we would |
|---|
| 244 | 238 | * have already released it in __lock_page_or_retry |
|---|
| 245 | 239 | * in mm/filemap.c. |
|---|
| 246 | 240 | */ |
|---|
| .. | .. |
|---|
| 248 | 242 | } |
|---|
| 249 | 243 | } |
|---|
| 250 | 244 | |
|---|
| 251 | | - up_read(&mm->mmap_sem); |
|---|
| 245 | + mmap_read_unlock(mm); |
|---|
| 252 | 246 | return; |
|---|
| 253 | 247 | |
|---|
| 254 | 248 | /* |
|---|
| .. | .. |
|---|
| 256 | 250 | * Fix it, but check if it's kernel or user first.. |
|---|
| 257 | 251 | */ |
|---|
| 258 | 252 | bad_area: |
|---|
| 259 | | - up_read(&mm->mmap_sem); |
|---|
| 253 | + mmap_read_unlock(mm); |
|---|
| 260 | 254 | |
|---|
| 261 | 255 | bad_area_nosemaphore: |
|---|
| 262 | 256 | |
|---|
| .. | .. |
|---|
| 266 | 260 | tsk->thread.address = addr; |
|---|
| 267 | 261 | tsk->thread.error_code = error_code; |
|---|
| 268 | 262 | tsk->thread.trap_no = entry; |
|---|
| 269 | | - force_sig_fault(SIGSEGV, si_code, (void __user *)addr, tsk); |
|---|
| 263 | + force_sig_fault(SIGSEGV, si_code, (void __user *)addr); |
|---|
| 270 | 264 | return; |
|---|
| 271 | 265 | } |
|---|
| 272 | 266 | |
|---|
| .. | .. |
|---|
| 316 | 310 | */ |
|---|
| 317 | 311 | |
|---|
| 318 | 312 | out_of_memory: |
|---|
| 319 | | - up_read(&mm->mmap_sem); |
|---|
| 313 | + mmap_read_unlock(mm); |
|---|
| 320 | 314 | if (!user_mode(regs)) |
|---|
| 321 | 315 | goto no_context; |
|---|
| 322 | 316 | pagefault_out_of_memory(); |
|---|
| 323 | 317 | return; |
|---|
| 324 | 318 | |
|---|
| 325 | 319 | do_sigbus: |
|---|
| 326 | | - up_read(&mm->mmap_sem); |
|---|
| 320 | + mmap_read_unlock(mm); |
|---|
| 327 | 321 | |
|---|
| 328 | 322 | /* Kernel mode? Handle exceptions or die */ |
|---|
| 329 | 323 | if (!user_mode(regs)) |
|---|
| .. | .. |
|---|
| 335 | 329 | tsk->thread.address = addr; |
|---|
| 336 | 330 | tsk->thread.error_code = error_code; |
|---|
| 337 | 331 | tsk->thread.trap_no = entry; |
|---|
| 338 | | - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, tsk); |
|---|
| 332 | + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr); |
|---|
| 339 | 333 | |
|---|
| 340 | 334 | return; |
|---|
| 341 | 335 | |
|---|
| .. | .. |
|---|
| 354 | 348 | |
|---|
| 355 | 349 | unsigned int index = pgd_index(addr); |
|---|
| 356 | 350 | pgd_t *pgd, *pgd_k; |
|---|
| 351 | + p4d_t *p4d, *p4d_k; |
|---|
| 357 | 352 | pud_t *pud, *pud_k; |
|---|
| 358 | 353 | pmd_t *pmd, *pmd_k; |
|---|
| 359 | 354 | pte_t *pte_k; |
|---|
| .. | .. |
|---|
| 364 | 359 | if (!pgd_present(*pgd_k)) |
|---|
| 365 | 360 | goto no_context; |
|---|
| 366 | 361 | |
|---|
| 367 | | - pud = pud_offset(pgd, addr); |
|---|
| 368 | | - pud_k = pud_offset(pgd_k, addr); |
|---|
| 362 | + p4d = p4d_offset(pgd, addr); |
|---|
| 363 | + p4d_k = p4d_offset(pgd_k, addr); |
|---|
| 364 | + if (!p4d_present(*p4d_k)) |
|---|
| 365 | + goto no_context; |
|---|
| 366 | + |
|---|
| 367 | + pud = pud_offset(p4d, addr); |
|---|
| 368 | + pud_k = pud_offset(p4d_k, addr); |
|---|
| 369 | 369 | if (!pud_present(*pud_k)) |
|---|
| 370 | 370 | goto no_context; |
|---|
| 371 | 371 | |
|---|