| .. | .. |
|---|
| 1 | 1 | // SPDX-License-Identifier: GPL-2.0 |
|---|
| 2 | | -#include <linux/mm.h> |
|---|
| 2 | +#include <linux/pagewalk.h> |
|---|
| 3 | 3 | #include <linux/vmacache.h> |
|---|
| 4 | 4 | #include <linux/hugetlb.h> |
|---|
| 5 | 5 | #include <linux/huge_mm.h> |
|---|
| .. | .. |
|---|
| 59 | 59 | SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); |
|---|
| 60 | 60 | SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); |
|---|
| 61 | 61 | SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); |
|---|
| 62 | | - SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm); |
|---|
| 62 | + SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm)); |
|---|
| 63 | 63 | SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss); |
|---|
| 64 | 64 | SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss); |
|---|
| 65 | 65 | SEQ_PUT_DEC(" kB\nRssAnon:\t", anon); |
|---|
| .. | .. |
|---|
| 147 | 147 | long pages_pinned; |
|---|
| 148 | 148 | struct page *page; |
|---|
| 149 | 149 | |
|---|
| 150 | | - pages_pinned = get_user_pages_remote(current, mm, |
|---|
| 151 | | - page_start_vaddr, 1, 0, &page, NULL, NULL); |
|---|
| 150 | + pages_pinned = get_user_pages_remote(mm, page_start_vaddr, 1, 0, |
|---|
| 151 | + &page, NULL, NULL); |
|---|
| 152 | 152 | if (pages_pinned < 1) { |
|---|
| 153 | 153 | seq_puts(m, "<fault>]"); |
|---|
| 154 | 154 | return; |
|---|
| .. | .. |
|---|
| 159 | 159 | write_len = strnlen(kaddr + page_offset, len); |
|---|
| 160 | 160 | seq_write(m, kaddr + page_offset, write_len); |
|---|
| 161 | 161 | kunmap(page); |
|---|
| 162 | | - put_page(page); |
|---|
| 162 | + put_user_page(page); |
|---|
| 163 | 163 | |
|---|
| 164 | 164 | /* if strnlen hit a null terminator then we're done */ |
|---|
| 165 | 165 | if (write_len != len) |
|---|
| .. | .. |
|---|
| 173 | 173 | seq_putc(m, ']'); |
|---|
| 174 | 174 | } |
|---|
| 175 | 175 | |
|---|
| 176 | | -static void vma_stop(struct proc_maps_private *priv) |
|---|
| 177 | | -{ |
|---|
| 178 | | - struct mm_struct *mm = priv->mm; |
|---|
| 179 | | - |
|---|
| 180 | | - release_task_mempolicy(priv); |
|---|
| 181 | | - up_read(&mm->mmap_sem); |
|---|
| 182 | | - mmput(mm); |
|---|
| 183 | | -} |
|---|
| 184 | | - |
|---|
| 185 | | -static struct vm_area_struct * |
|---|
| 186 | | -m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma) |
|---|
| 187 | | -{ |
|---|
| 188 | | - if (vma == priv->tail_vma) |
|---|
| 189 | | - return NULL; |
|---|
| 190 | | - return vma->vm_next ?: priv->tail_vma; |
|---|
| 191 | | -} |
|---|
| 192 | | - |
|---|
| 193 | | -static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma) |
|---|
| 194 | | -{ |
|---|
| 195 | | - if (m->count < m->size) /* vma is copied successfully */ |
|---|
| 196 | | - m->version = m_next_vma(m->private, vma) ? vma->vm_end : -1UL; |
|---|
| 197 | | -} |
|---|
| 198 | | - |
|---|
| 199 | 176 | static void *m_start(struct seq_file *m, loff_t *ppos) |
|---|
| 200 | 177 | { |
|---|
| 201 | 178 | struct proc_maps_private *priv = m->private; |
|---|
| 202 | | - unsigned long last_addr = m->version; |
|---|
| 179 | + unsigned long last_addr = *ppos; |
|---|
| 203 | 180 | struct mm_struct *mm; |
|---|
| 204 | 181 | struct vm_area_struct *vma; |
|---|
| 205 | | - unsigned int pos = *ppos; |
|---|
| 206 | 182 | |
|---|
| 207 | | - /* See m_cache_vma(). Zero at the start or after lseek. */ |
|---|
| 183 | + /* See m_next(). Zero at the start or after lseek. */ |
|---|
| 208 | 184 | if (last_addr == -1UL) |
|---|
| 209 | 185 | return NULL; |
|---|
| 210 | 186 | |
|---|
| .. | .. |
|---|
| 213 | 189 | return ERR_PTR(-ESRCH); |
|---|
| 214 | 190 | |
|---|
| 215 | 191 | mm = priv->mm; |
|---|
| 216 | | - if (!mm || !mmget_not_zero(mm)) |
|---|
| 192 | + if (!mm || !mmget_not_zero(mm)) { |
|---|
| 193 | + put_task_struct(priv->task); |
|---|
| 194 | + priv->task = NULL; |
|---|
| 217 | 195 | return NULL; |
|---|
| 196 | + } |
|---|
| 218 | 197 | |
|---|
| 219 | | - if (down_read_killable(&mm->mmap_sem)) { |
|---|
| 198 | + if (mmap_read_lock_killable(mm)) { |
|---|
| 220 | 199 | mmput(mm); |
|---|
| 200 | + put_task_struct(priv->task); |
|---|
| 201 | + priv->task = NULL; |
|---|
| 221 | 202 | return ERR_PTR(-EINTR); |
|---|
| 222 | 203 | } |
|---|
| 223 | 204 | |
|---|
| 224 | 205 | hold_task_mempolicy(priv); |
|---|
| 225 | 206 | priv->tail_vma = get_gate_vma(mm); |
|---|
| 226 | 207 | |
|---|
| 227 | | - if (last_addr) { |
|---|
| 228 | | - vma = find_vma(mm, last_addr - 1); |
|---|
| 229 | | - if (vma && vma->vm_start <= last_addr) |
|---|
| 230 | | - vma = m_next_vma(priv, vma); |
|---|
| 231 | | - if (vma) |
|---|
| 232 | | - return vma; |
|---|
| 233 | | - } |
|---|
| 234 | | - |
|---|
| 235 | | - m->version = 0; |
|---|
| 236 | | - if (pos < mm->map_count) { |
|---|
| 237 | | - for (vma = mm->mmap; pos; pos--) { |
|---|
| 238 | | - m->version = vma->vm_start; |
|---|
| 239 | | - vma = vma->vm_next; |
|---|
| 240 | | - } |
|---|
| 208 | + vma = find_vma(mm, last_addr); |
|---|
| 209 | + if (vma) |
|---|
| 241 | 210 | return vma; |
|---|
| 242 | | - } |
|---|
| 243 | 211 | |
|---|
| 244 | | - /* we do not bother to update m->version in this case */ |
|---|
| 245 | | - if (pos == mm->map_count && priv->tail_vma) |
|---|
| 246 | | - return priv->tail_vma; |
|---|
| 247 | | - |
|---|
| 248 | | - vma_stop(priv); |
|---|
| 249 | | - return NULL; |
|---|
| 212 | + return priv->tail_vma; |
|---|
| 250 | 213 | } |
|---|
| 251 | 214 | |
|---|
| 252 | | -static void *m_next(struct seq_file *m, void *v, loff_t *pos) |
|---|
| 215 | +static void *m_next(struct seq_file *m, void *v, loff_t *ppos) |
|---|
| 253 | 216 | { |
|---|
| 254 | 217 | struct proc_maps_private *priv = m->private; |
|---|
| 255 | | - struct vm_area_struct *next; |
|---|
| 218 | + struct vm_area_struct *next, *vma = v; |
|---|
| 256 | 219 | |
|---|
| 257 | | - (*pos)++; |
|---|
| 258 | | - next = m_next_vma(priv, v); |
|---|
| 259 | | - if (!next) |
|---|
| 260 | | - vma_stop(priv); |
|---|
| 220 | + if (vma == priv->tail_vma) |
|---|
| 221 | + next = NULL; |
|---|
| 222 | + else if (vma->vm_next) |
|---|
| 223 | + next = vma->vm_next; |
|---|
| 224 | + else |
|---|
| 225 | + next = priv->tail_vma; |
|---|
| 226 | + |
|---|
| 227 | + *ppos = next ? next->vm_start : -1UL; |
|---|
| 228 | + |
|---|
| 261 | 229 | return next; |
|---|
| 262 | 230 | } |
|---|
| 263 | 231 | |
|---|
| 264 | 232 | static void m_stop(struct seq_file *m, void *v) |
|---|
| 265 | 233 | { |
|---|
| 266 | 234 | struct proc_maps_private *priv = m->private; |
|---|
| 235 | + struct mm_struct *mm = priv->mm; |
|---|
| 267 | 236 | |
|---|
| 268 | | - if (!IS_ERR_OR_NULL(v)) |
|---|
| 269 | | - vma_stop(priv); |
|---|
| 270 | | - if (priv->task) { |
|---|
| 271 | | - put_task_struct(priv->task); |
|---|
| 272 | | - priv->task = NULL; |
|---|
| 273 | | - } |
|---|
| 237 | + if (!priv->task) |
|---|
| 238 | + return; |
|---|
| 239 | + |
|---|
| 240 | + release_task_mempolicy(priv); |
|---|
| 241 | + mmap_read_unlock(mm); |
|---|
| 242 | + mmput(mm); |
|---|
| 243 | + put_task_struct(priv->task); |
|---|
| 244 | + priv->task = NULL; |
|---|
| 274 | 245 | } |
|---|
| 275 | 246 | |
|---|
| 276 | 247 | static int proc_maps_open(struct inode *inode, struct file *file, |
|---|
| .. | .. |
|---|
| 420 | 391 | static int show_map(struct seq_file *m, void *v) |
|---|
| 421 | 392 | { |
|---|
| 422 | 393 | show_map_vma(m, v); |
|---|
| 423 | | - m_cache_vma(m, v); |
|---|
| 424 | 394 | return 0; |
|---|
| 425 | 395 | } |
|---|
| 426 | 396 | |
|---|
| .. | .. |
|---|
| 474 | 444 | unsigned long lazyfree; |
|---|
| 475 | 445 | unsigned long anonymous_thp; |
|---|
| 476 | 446 | unsigned long shmem_thp; |
|---|
| 447 | + unsigned long file_thp; |
|---|
| 477 | 448 | unsigned long swap; |
|---|
| 478 | 449 | unsigned long shared_hugetlb; |
|---|
| 479 | 450 | unsigned long private_hugetlb; |
|---|
| 480 | 451 | u64 pss; |
|---|
| 452 | + u64 pss_anon; |
|---|
| 453 | + u64 pss_file; |
|---|
| 454 | + u64 pss_shmem; |
|---|
| 481 | 455 | u64 pss_locked; |
|---|
| 482 | 456 | u64 swap_pss; |
|---|
| 483 | 457 | bool check_shmem_swap; |
|---|
| 484 | 458 | }; |
|---|
| 485 | 459 | |
|---|
| 486 | | -static void smaps_account(struct mem_size_stats *mss, struct page *page, |
|---|
| 487 | | - bool compound, bool young, bool dirty, bool locked) |
|---|
| 460 | +static void smaps_page_accumulate(struct mem_size_stats *mss, |
|---|
| 461 | + struct page *page, unsigned long size, unsigned long pss, |
|---|
| 462 | + bool dirty, bool locked, bool private) |
|---|
| 488 | 463 | { |
|---|
| 489 | | - int i, nr = compound ? 1 << compound_order(page) : 1; |
|---|
| 464 | + mss->pss += pss; |
|---|
| 465 | + |
|---|
| 466 | + if (PageAnon(page)) |
|---|
| 467 | + mss->pss_anon += pss; |
|---|
| 468 | + else if (PageSwapBacked(page)) |
|---|
| 469 | + mss->pss_shmem += pss; |
|---|
| 470 | + else |
|---|
| 471 | + mss->pss_file += pss; |
|---|
| 472 | + |
|---|
| 473 | + if (locked) |
|---|
| 474 | + mss->pss_locked += pss; |
|---|
| 475 | + |
|---|
| 476 | + if (dirty || PageDirty(page)) { |
|---|
| 477 | + if (private) |
|---|
| 478 | + mss->private_dirty += size; |
|---|
| 479 | + else |
|---|
| 480 | + mss->shared_dirty += size; |
|---|
| 481 | + } else { |
|---|
| 482 | + if (private) |
|---|
| 483 | + mss->private_clean += size; |
|---|
| 484 | + else |
|---|
| 485 | + mss->shared_clean += size; |
|---|
| 486 | + } |
|---|
| 487 | +} |
|---|
| 488 | + |
|---|
| 489 | +static void smaps_account(struct mem_size_stats *mss, struct page *page, |
|---|
| 490 | + bool compound, bool young, bool dirty, bool locked, |
|---|
| 491 | + bool migration) |
|---|
| 492 | +{ |
|---|
| 493 | + int i, nr = compound ? compound_nr(page) : 1; |
|---|
| 490 | 494 | unsigned long size = nr * PAGE_SIZE; |
|---|
| 491 | 495 | |
|---|
| 496 | + /* |
|---|
| 497 | + * First accumulate quantities that depend only on |size| and the type |
|---|
| 498 | + * of the compound page. |
|---|
| 499 | + */ |
|---|
| 492 | 500 | if (PageAnon(page)) { |
|---|
| 493 | 501 | mss->anonymous += size; |
|---|
| 494 | 502 | if (!PageSwapBacked(page) && !dirty && !PageDirty(page)) |
|---|
| .. | .. |
|---|
| 501 | 509 | mss->referenced += size; |
|---|
| 502 | 510 | |
|---|
| 503 | 511 | /* |
|---|
| 512 | + * Then accumulate quantities that may depend on sharing, or that may |
|---|
| 513 | + * differ page-by-page. |
|---|
| 514 | + * |
|---|
| 504 | 515 | * page_count(page) == 1 guarantees the page is mapped exactly once. |
|---|
| 505 | 516 | * If any subpage of the compound page mapped with PTE it would elevate |
|---|
| 506 | 517 | * page_count(). |
|---|
| 518 | + * |
|---|
| 519 | + * The page_mapcount() is called to get a snapshot of the mapcount. |
|---|
| 520 | + * Without holding the page lock this snapshot can be slightly wrong as |
|---|
| 521 | + * we cannot always read the mapcount atomically. It is not safe to |
|---|
| 522 | + * call page_mapcount() even with PTL held if the page is not mapped, |
|---|
| 523 | + * especially for migration entries. Treat regular migration entries |
|---|
| 524 | + * as mapcount == 1. |
|---|
| 507 | 525 | */ |
|---|
| 508 | | - if (page_count(page) == 1) { |
|---|
| 509 | | - if (dirty || PageDirty(page)) |
|---|
| 510 | | - mss->private_dirty += size; |
|---|
| 511 | | - else |
|---|
| 512 | | - mss->private_clean += size; |
|---|
| 513 | | - mss->pss += (u64)size << PSS_SHIFT; |
|---|
| 514 | | - if (locked) |
|---|
| 515 | | - mss->pss_locked += (u64)size << PSS_SHIFT; |
|---|
| 526 | + if ((page_count(page) == 1) || migration) { |
|---|
| 527 | + smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty, |
|---|
| 528 | + locked, true); |
|---|
| 516 | 529 | return; |
|---|
| 517 | 530 | } |
|---|
| 518 | | - |
|---|
| 519 | 531 | for (i = 0; i < nr; i++, page++) { |
|---|
| 520 | 532 | int mapcount = page_mapcount(page); |
|---|
| 521 | | - unsigned long pss = (PAGE_SIZE << PSS_SHIFT); |
|---|
| 522 | | - |
|---|
| 523 | | - if (mapcount >= 2) { |
|---|
| 524 | | - if (dirty || PageDirty(page)) |
|---|
| 525 | | - mss->shared_dirty += PAGE_SIZE; |
|---|
| 526 | | - else |
|---|
| 527 | | - mss->shared_clean += PAGE_SIZE; |
|---|
| 528 | | - mss->pss += pss / mapcount; |
|---|
| 529 | | - if (locked) |
|---|
| 530 | | - mss->pss_locked += pss / mapcount; |
|---|
| 531 | | - } else { |
|---|
| 532 | | - if (dirty || PageDirty(page)) |
|---|
| 533 | | - mss->private_dirty += PAGE_SIZE; |
|---|
| 534 | | - else |
|---|
| 535 | | - mss->private_clean += PAGE_SIZE; |
|---|
| 536 | | - mss->pss += pss; |
|---|
| 537 | | - if (locked) |
|---|
| 538 | | - mss->pss_locked += pss; |
|---|
| 539 | | - } |
|---|
| 533 | + unsigned long pss = PAGE_SIZE << PSS_SHIFT; |
|---|
| 534 | + if (mapcount >= 2) |
|---|
| 535 | + pss /= mapcount; |
|---|
| 536 | + smaps_page_accumulate(mss, page, PAGE_SIZE, pss, dirty, locked, |
|---|
| 537 | + mapcount < 2); |
|---|
| 540 | 538 | } |
|---|
| 541 | 539 | } |
|---|
| 542 | 540 | |
|---|
| 543 | 541 | #ifdef CONFIG_SHMEM |
|---|
| 544 | 542 | static int smaps_pte_hole(unsigned long addr, unsigned long end, |
|---|
| 545 | | - struct mm_walk *walk) |
|---|
| 543 | + __always_unused int depth, struct mm_walk *walk) |
|---|
| 546 | 544 | { |
|---|
| 547 | 545 | struct mem_size_stats *mss = walk->private; |
|---|
| 548 | 546 | |
|---|
| .. | .. |
|---|
| 551 | 549 | |
|---|
| 552 | 550 | return 0; |
|---|
| 553 | 551 | } |
|---|
| 554 | | -#endif |
|---|
| 552 | +#else |
|---|
| 553 | +#define smaps_pte_hole NULL |
|---|
| 554 | +#endif /* CONFIG_SHMEM */ |
|---|
| 555 | 555 | |
|---|
| 556 | 556 | static void smaps_pte_entry(pte_t *pte, unsigned long addr, |
|---|
| 557 | 557 | struct mm_walk *walk) |
|---|
| .. | .. |
|---|
| 560 | 560 | struct vm_area_struct *vma = walk->vma; |
|---|
| 561 | 561 | bool locked = !!(vma->vm_flags & VM_LOCKED); |
|---|
| 562 | 562 | struct page *page = NULL; |
|---|
| 563 | + bool migration = false, young = false, dirty = false; |
|---|
| 563 | 564 | |
|---|
| 564 | 565 | if (pte_present(*pte)) { |
|---|
| 565 | 566 | page = vm_normal_page(vma, addr, *pte); |
|---|
| 567 | + young = pte_young(*pte); |
|---|
| 568 | + dirty = pte_dirty(*pte); |
|---|
| 566 | 569 | } else if (is_swap_pte(*pte)) { |
|---|
| 567 | 570 | swp_entry_t swpent = pte_to_swp_entry(*pte); |
|---|
| 568 | 571 | |
|---|
| .. | .. |
|---|
| 579 | 582 | } else { |
|---|
| 580 | 583 | mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; |
|---|
| 581 | 584 | } |
|---|
| 582 | | - } else if (is_migration_entry(swpent)) |
|---|
| 585 | + } else if (is_migration_entry(swpent)) { |
|---|
| 586 | + migration = true; |
|---|
| 583 | 587 | page = migration_entry_to_page(swpent); |
|---|
| 584 | | - else if (is_device_private_entry(swpent)) |
|---|
| 588 | + } else if (is_device_private_entry(swpent)) |
|---|
| 585 | 589 | page = device_private_entry_to_page(swpent); |
|---|
| 586 | 590 | } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap |
|---|
| 587 | 591 | && pte_none(*pte))) { |
|---|
| 588 | | - page = find_get_entry(vma->vm_file->f_mapping, |
|---|
| 592 | + page = xa_load(&vma->vm_file->f_mapping->i_pages, |
|---|
| 589 | 593 | linear_page_index(vma, addr)); |
|---|
| 590 | | - if (!page) |
|---|
| 591 | | - return; |
|---|
| 592 | | - |
|---|
| 593 | | - if (radix_tree_exceptional_entry(page)) |
|---|
| 594 | + if (xa_is_value(page)) |
|---|
| 594 | 595 | mss->swap += PAGE_SIZE; |
|---|
| 595 | | - else |
|---|
| 596 | | - put_page(page); |
|---|
| 597 | | - |
|---|
| 598 | 596 | return; |
|---|
| 599 | 597 | } |
|---|
| 600 | 598 | |
|---|
| 601 | 599 | if (!page) |
|---|
| 602 | 600 | return; |
|---|
| 603 | 601 | |
|---|
| 604 | | - smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); |
|---|
| 602 | + smaps_account(mss, page, false, young, dirty, locked, migration); |
|---|
| 605 | 603 | } |
|---|
| 606 | 604 | |
|---|
| 607 | 605 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
|---|
| .. | .. |
|---|
| 611 | 609 | struct mem_size_stats *mss = walk->private; |
|---|
| 612 | 610 | struct vm_area_struct *vma = walk->vma; |
|---|
| 613 | 611 | bool locked = !!(vma->vm_flags & VM_LOCKED); |
|---|
| 614 | | - struct page *page; |
|---|
| 612 | + struct page *page = NULL; |
|---|
| 613 | + bool migration = false; |
|---|
| 615 | 614 | |
|---|
| 616 | | - /* FOLL_DUMP will return -EFAULT on huge zero page */ |
|---|
| 617 | | - page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); |
|---|
| 615 | + if (pmd_present(*pmd)) { |
|---|
| 616 | + /* FOLL_DUMP will return -EFAULT on huge zero page */ |
|---|
| 617 | + page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); |
|---|
| 618 | + } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { |
|---|
| 619 | + swp_entry_t entry = pmd_to_swp_entry(*pmd); |
|---|
| 620 | + |
|---|
| 621 | + if (is_migration_entry(entry)) { |
|---|
| 622 | + migration = true; |
|---|
| 623 | + page = migration_entry_to_page(entry); |
|---|
| 624 | + } |
|---|
| 625 | + } |
|---|
| 618 | 626 | if (IS_ERR_OR_NULL(page)) |
|---|
| 619 | 627 | return; |
|---|
| 620 | 628 | if (PageAnon(page)) |
|---|
| .. | .. |
|---|
| 624 | 632 | else if (is_zone_device_page(page)) |
|---|
| 625 | 633 | /* pass */; |
|---|
| 626 | 634 | else |
|---|
| 627 | | - VM_BUG_ON_PAGE(1, page); |
|---|
| 628 | | - smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); |
|---|
| 635 | + mss->file_thp += HPAGE_PMD_SIZE; |
|---|
| 636 | + |
|---|
| 637 | + smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), |
|---|
| 638 | + locked, migration); |
|---|
| 629 | 639 | } |
|---|
| 630 | 640 | #else |
|---|
| 631 | 641 | static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, |
|---|
| .. | .. |
|---|
| 643 | 653 | |
|---|
| 644 | 654 | ptl = pmd_trans_huge_lock(pmd, vma); |
|---|
| 645 | 655 | if (ptl) { |
|---|
| 646 | | - if (pmd_present(*pmd)) |
|---|
| 647 | | - smaps_pmd_entry(pmd, addr, walk); |
|---|
| 656 | + smaps_pmd_entry(pmd, addr, walk); |
|---|
| 648 | 657 | spin_unlock(ptl); |
|---|
| 649 | 658 | goto out; |
|---|
| 650 | 659 | } |
|---|
| .. | .. |
|---|
| 652 | 661 | if (pmd_trans_unstable(pmd)) |
|---|
| 653 | 662 | goto out; |
|---|
| 654 | 663 | /* |
|---|
| 655 | | - * The mmap_sem held all the way back in m_start() is what |
|---|
| 664 | + * The mmap_lock held all the way back in m_start() is what |
|---|
| 656 | 665 | * keeps khugepaged out of here and from collapsing things |
|---|
| 657 | 666 | * in here. |
|---|
| 658 | 667 | */ |
|---|
| .. | .. |
|---|
| 687 | 696 | [ilog2(VM_GROWSDOWN)] = "gd", |
|---|
| 688 | 697 | [ilog2(VM_PFNMAP)] = "pf", |
|---|
| 689 | 698 | [ilog2(VM_DENYWRITE)] = "dw", |
|---|
| 690 | | -#ifdef CONFIG_X86_INTEL_MPX |
|---|
| 691 | | - [ilog2(VM_MPX)] = "mp", |
|---|
| 692 | | -#endif |
|---|
| 693 | 699 | [ilog2(VM_LOCKED)] = "lo", |
|---|
| 694 | 700 | [ilog2(VM_IO)] = "io", |
|---|
| 695 | 701 | [ilog2(VM_SEQ_READ)] = "sr", |
|---|
| .. | .. |
|---|
| 703 | 709 | [ilog2(VM_ARCH_1)] = "ar", |
|---|
| 704 | 710 | [ilog2(VM_WIPEONFORK)] = "wf", |
|---|
| 705 | 711 | [ilog2(VM_DONTDUMP)] = "dd", |
|---|
| 712 | +#ifdef CONFIG_ARM64_BTI |
|---|
| 713 | + [ilog2(VM_ARM64_BTI)] = "bt", |
|---|
| 714 | +#endif |
|---|
| 706 | 715 | #ifdef CONFIG_MEM_SOFT_DIRTY |
|---|
| 707 | 716 | [ilog2(VM_SOFTDIRTY)] = "sd", |
|---|
| 708 | 717 | #endif |
|---|
| .. | .. |
|---|
| 712 | 721 | [ilog2(VM_MERGEABLE)] = "mg", |
|---|
| 713 | 722 | [ilog2(VM_UFFD_MISSING)]= "um", |
|---|
| 714 | 723 | [ilog2(VM_UFFD_WP)] = "uw", |
|---|
| 724 | +#ifdef CONFIG_ARM64_MTE |
|---|
| 725 | + [ilog2(VM_MTE)] = "mt", |
|---|
| 726 | + [ilog2(VM_MTE_ALLOWED)] = "", |
|---|
| 727 | +#endif |
|---|
| 715 | 728 | #ifdef CONFIG_ARCH_HAS_PKEYS |
|---|
| 716 | 729 | /* These come out via ProtectionKey: */ |
|---|
| 717 | 730 | [ilog2(VM_PKEY_BIT0)] = "", |
|---|
| .. | .. |
|---|
| 722 | 735 | [ilog2(VM_PKEY_BIT4)] = "", |
|---|
| 723 | 736 | #endif |
|---|
| 724 | 737 | #endif /* CONFIG_ARCH_HAS_PKEYS */ |
|---|
| 738 | +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR |
|---|
| 739 | + [ilog2(VM_UFFD_MINOR)] = "ui", |
|---|
| 740 | +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ |
|---|
| 725 | 741 | }; |
|---|
| 726 | 742 | size_t i; |
|---|
| 727 | 743 | |
|---|
| .. | .. |
|---|
| 758 | 774 | page = device_private_entry_to_page(swpent); |
|---|
| 759 | 775 | } |
|---|
| 760 | 776 | if (page) { |
|---|
| 761 | | - int mapcount = page_mapcount(page); |
|---|
| 762 | | - |
|---|
| 763 | | - if (mapcount >= 2) |
|---|
| 777 | + if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte)) |
|---|
| 764 | 778 | mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); |
|---|
| 765 | 779 | else |
|---|
| 766 | 780 | mss->private_hugetlb += huge_page_size(hstate_vma(vma)); |
|---|
| 767 | 781 | } |
|---|
| 768 | 782 | return 0; |
|---|
| 769 | 783 | } |
|---|
| 784 | +#else |
|---|
| 785 | +#define smaps_hugetlb_range NULL |
|---|
| 770 | 786 | #endif /* HUGETLB_PAGE */ |
|---|
| 771 | 787 | |
|---|
| 772 | | -static void smap_gather_stats(struct vm_area_struct *vma, |
|---|
| 773 | | - struct mem_size_stats *mss) |
|---|
| 774 | | -{ |
|---|
| 775 | | - struct mm_walk smaps_walk = { |
|---|
| 776 | | - .pmd_entry = smaps_pte_range, |
|---|
| 777 | | -#ifdef CONFIG_HUGETLB_PAGE |
|---|
| 778 | | - .hugetlb_entry = smaps_hugetlb_range, |
|---|
| 779 | | -#endif |
|---|
| 780 | | - .mm = vma->vm_mm, |
|---|
| 781 | | - }; |
|---|
| 788 | +static const struct mm_walk_ops smaps_walk_ops = { |
|---|
| 789 | + .pmd_entry = smaps_pte_range, |
|---|
| 790 | + .hugetlb_entry = smaps_hugetlb_range, |
|---|
| 791 | +}; |
|---|
| 782 | 792 | |
|---|
| 783 | | - smaps_walk.private = mss; |
|---|
| 793 | +static const struct mm_walk_ops smaps_shmem_walk_ops = { |
|---|
| 794 | + .pmd_entry = smaps_pte_range, |
|---|
| 795 | + .hugetlb_entry = smaps_hugetlb_range, |
|---|
| 796 | + .pte_hole = smaps_pte_hole, |
|---|
| 797 | +}; |
|---|
| 798 | + |
|---|
| 799 | +/* |
|---|
| 800 | + * Gather mem stats from @vma with the indicated beginning |
|---|
| 801 | + * address @start, and keep them in @mss. |
|---|
| 802 | + * |
|---|
| 803 | + * Use vm_start of @vma as the beginning address if @start is 0. |
|---|
| 804 | + */ |
|---|
| 805 | +static void smap_gather_stats(struct vm_area_struct *vma, |
|---|
| 806 | + struct mem_size_stats *mss, unsigned long start) |
|---|
| 807 | +{ |
|---|
| 808 | + const struct mm_walk_ops *ops = &smaps_walk_ops; |
|---|
| 809 | + |
|---|
| 810 | + /* Invalid start */ |
|---|
| 811 | + if (start >= vma->vm_end) |
|---|
| 812 | + return; |
|---|
| 784 | 813 | |
|---|
| 785 | 814 | #ifdef CONFIG_SHMEM |
|---|
| 786 | 815 | /* In case of smaps_rollup, reset the value from previous vma */ |
|---|
| .. | .. |
|---|
| 798 | 827 | */ |
|---|
| 799 | 828 | unsigned long shmem_swapped = shmem_swap_usage(vma); |
|---|
| 800 | 829 | |
|---|
| 801 | | - if (!shmem_swapped || (vma->vm_flags & VM_SHARED) || |
|---|
| 802 | | - !(vma->vm_flags & VM_WRITE)) { |
|---|
| 830 | + if (!start && (!shmem_swapped || (vma->vm_flags & VM_SHARED) || |
|---|
| 831 | + !(vma->vm_flags & VM_WRITE))) { |
|---|
| 803 | 832 | mss->swap += shmem_swapped; |
|---|
| 804 | 833 | } else { |
|---|
| 805 | 834 | mss->check_shmem_swap = true; |
|---|
| 806 | | - smaps_walk.pte_hole = smaps_pte_hole; |
|---|
| 835 | + ops = &smaps_shmem_walk_ops; |
|---|
| 807 | 836 | } |
|---|
| 808 | 837 | } |
|---|
| 809 | 838 | #endif |
|---|
| 810 | | - /* mmap_sem is held in m_start */ |
|---|
| 811 | | - walk_page_vma(vma, &smaps_walk); |
|---|
| 839 | + /* mmap_lock is held in m_start */ |
|---|
| 840 | + if (!start) |
|---|
| 841 | + walk_page_vma(vma, ops, mss); |
|---|
| 842 | + else |
|---|
| 843 | + walk_page_range(vma->vm_mm, start, vma->vm_end, ops, mss); |
|---|
| 812 | 844 | } |
|---|
| 813 | 845 | |
|---|
| 814 | 846 | #define SEQ_PUT_DEC(str, val) \ |
|---|
| 815 | 847 | seq_put_decimal_ull_width(m, str, (val) >> 10, 8) |
|---|
| 816 | 848 | |
|---|
| 817 | 849 | /* Show the contents common for smaps and smaps_rollup */ |
|---|
| 818 | | -static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss) |
|---|
| 850 | +static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, |
|---|
| 851 | + bool rollup_mode) |
|---|
| 819 | 852 | { |
|---|
| 820 | 853 | SEQ_PUT_DEC("Rss: ", mss->resident); |
|---|
| 821 | 854 | SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT); |
|---|
| 855 | + if (rollup_mode) { |
|---|
| 856 | + /* |
|---|
| 857 | + * These are meaningful only for smaps_rollup, otherwise two of |
|---|
| 858 | + * them are zero, and the other one is the same as Pss. |
|---|
| 859 | + */ |
|---|
| 860 | + SEQ_PUT_DEC(" kB\nPss_Anon: ", |
|---|
| 861 | + mss->pss_anon >> PSS_SHIFT); |
|---|
| 862 | + SEQ_PUT_DEC(" kB\nPss_File: ", |
|---|
| 863 | + mss->pss_file >> PSS_SHIFT); |
|---|
| 864 | + SEQ_PUT_DEC(" kB\nPss_Shmem: ", |
|---|
| 865 | + mss->pss_shmem >> PSS_SHIFT); |
|---|
| 866 | + } |
|---|
| 822 | 867 | SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean); |
|---|
| 823 | 868 | SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty); |
|---|
| 824 | 869 | SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean); |
|---|
| .. | .. |
|---|
| 828 | 873 | SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree); |
|---|
| 829 | 874 | SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp); |
|---|
| 830 | 875 | SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp); |
|---|
| 876 | + SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp); |
|---|
| 831 | 877 | SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb); |
|---|
| 832 | 878 | seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ", |
|---|
| 833 | 879 | mss->private_hugetlb >> 10, 7); |
|---|
| .. | .. |
|---|
| 846 | 892 | |
|---|
| 847 | 893 | memset(&mss, 0, sizeof(mss)); |
|---|
| 848 | 894 | |
|---|
| 849 | | - smap_gather_stats(vma, &mss); |
|---|
| 895 | + smap_gather_stats(vma, &mss, 0); |
|---|
| 850 | 896 | |
|---|
| 851 | 897 | show_map_vma(m, vma); |
|---|
| 852 | 898 | if (vma_get_anon_name(vma)) { |
|---|
| .. | .. |
|---|
| 860 | 906 | SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma)); |
|---|
| 861 | 907 | seq_puts(m, " kB\n"); |
|---|
| 862 | 908 | |
|---|
| 863 | | - __show_smap(m, &mss); |
|---|
| 909 | + __show_smap(m, &mss, false); |
|---|
| 864 | 910 | |
|---|
| 865 | | - seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma)); |
|---|
| 911 | + seq_printf(m, "THPeligible: %d\n", |
|---|
| 912 | + transparent_hugepage_active(vma)); |
|---|
| 866 | 913 | |
|---|
| 867 | 914 | if (arch_pkeys_enabled()) |
|---|
| 868 | 915 | seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); |
|---|
| 869 | 916 | show_smap_vma_flags(m, vma); |
|---|
| 870 | | - |
|---|
| 871 | | - m_cache_vma(m, vma); |
|---|
| 872 | 917 | |
|---|
| 873 | 918 | return 0; |
|---|
| 874 | 919 | } |
|---|
| .. | .. |
|---|
| 894 | 939 | |
|---|
| 895 | 940 | memset(&mss, 0, sizeof(mss)); |
|---|
| 896 | 941 | |
|---|
| 897 | | - ret = down_read_killable(&mm->mmap_sem); |
|---|
| 942 | + ret = mmap_read_lock_killable(mm); |
|---|
| 898 | 943 | if (ret) |
|---|
| 899 | 944 | goto out_put_mm; |
|---|
| 900 | 945 | |
|---|
| 901 | 946 | hold_task_mempolicy(priv); |
|---|
| 902 | 947 | |
|---|
| 903 | | - for (vma = priv->mm->mmap; vma; vma = vma->vm_next) { |
|---|
| 904 | | - smap_gather_stats(vma, &mss); |
|---|
| 948 | + for (vma = priv->mm->mmap; vma;) { |
|---|
| 949 | + smap_gather_stats(vma, &mss, 0); |
|---|
| 905 | 950 | last_vma_end = vma->vm_end; |
|---|
| 951 | + |
|---|
| 952 | + /* |
|---|
| 953 | + * Release mmap_lock temporarily if someone wants to |
|---|
| 954 | + * access it for write request. |
|---|
| 955 | + */ |
|---|
| 956 | + if (mmap_lock_is_contended(mm)) { |
|---|
| 957 | + mmap_read_unlock(mm); |
|---|
| 958 | + ret = mmap_read_lock_killable(mm); |
|---|
| 959 | + if (ret) { |
|---|
| 960 | + release_task_mempolicy(priv); |
|---|
| 961 | + goto out_put_mm; |
|---|
| 962 | + } |
|---|
| 963 | + |
|---|
| 964 | + /* |
|---|
| 965 | + * After dropping the lock, there are four cases to |
|---|
| 966 | + * consider. See the following example for explanation. |
|---|
| 967 | + * |
|---|
| 968 | + * +------+------+-----------+ |
|---|
| 969 | + * | VMA1 | VMA2 | VMA3 | |
|---|
| 970 | + * +------+------+-----------+ |
|---|
| 971 | + * | | | | |
|---|
| 972 | + * 4k 8k 16k 400k |
|---|
| 973 | + * |
|---|
| 974 | + * Suppose we drop the lock after reading VMA2 due to |
|---|
| 975 | + * contention, then we get: |
|---|
| 976 | + * |
|---|
| 977 | + * last_vma_end = 16k |
|---|
| 978 | + * |
|---|
| 979 | + * 1) VMA2 is freed, but VMA3 exists: |
|---|
| 980 | + * |
|---|
| 981 | + * find_vma(mm, 16k - 1) will return VMA3. |
|---|
| 982 | + * In this case, just continue from VMA3. |
|---|
| 983 | + * |
|---|
| 984 | + * 2) VMA2 still exists: |
|---|
| 985 | + * |
|---|
| 986 | + * find_vma(mm, 16k - 1) will return VMA2. |
|---|
| 987 | + * Iterate the loop like the original one. |
|---|
| 988 | + * |
|---|
| 989 | + * 3) No more VMAs can be found: |
|---|
| 990 | + * |
|---|
| 991 | + * find_vma(mm, 16k - 1) will return NULL. |
|---|
| 992 | + * No more things to do, just break. |
|---|
| 993 | + * |
|---|
| 994 | + * 4) (last_vma_end - 1) is the middle of a vma (VMA'): |
|---|
| 995 | + * |
|---|
| 996 | + * find_vma(mm, 16k - 1) will return VMA' whose range |
|---|
| 997 | + * contains last_vma_end. |
|---|
| 998 | + * Iterate VMA' from last_vma_end. |
|---|
| 999 | + */ |
|---|
| 1000 | + vma = find_vma(mm, last_vma_end - 1); |
|---|
| 1001 | + /* Case 3 above */ |
|---|
| 1002 | + if (!vma) |
|---|
| 1003 | + break; |
|---|
| 1004 | + |
|---|
| 1005 | + /* Case 1 above */ |
|---|
| 1006 | + if (vma->vm_start >= last_vma_end) |
|---|
| 1007 | + continue; |
|---|
| 1008 | + |
|---|
| 1009 | + /* Case 4 above */ |
|---|
| 1010 | + if (vma->vm_end > last_vma_end) |
|---|
| 1011 | + smap_gather_stats(vma, &mss, last_vma_end); |
|---|
| 1012 | + } |
|---|
| 1013 | + /* Case 2 above */ |
|---|
| 1014 | + vma = vma->vm_next; |
|---|
| 906 | 1015 | } |
|---|
| 907 | 1016 | |
|---|
| 908 | | - show_vma_header_prefix(m, priv->mm->mmap->vm_start, |
|---|
| 1017 | + show_vma_header_prefix(m, priv->mm->mmap ? priv->mm->mmap->vm_start : 0, |
|---|
| 909 | 1018 | last_vma_end, 0, 0, 0, 0); |
|---|
| 910 | 1019 | seq_pad(m, ' '); |
|---|
| 911 | 1020 | seq_puts(m, "[rollup]\n"); |
|---|
| 912 | 1021 | |
|---|
| 913 | | - __show_smap(m, &mss); |
|---|
| 1022 | + __show_smap(m, &mss, true); |
|---|
| 914 | 1023 | |
|---|
| 915 | 1024 | release_task_mempolicy(priv); |
|---|
| 916 | | - up_read(&mm->mmap_sem); |
|---|
| 1025 | + mmap_read_unlock(mm); |
|---|
| 917 | 1026 | |
|---|
| 918 | 1027 | out_put_mm: |
|---|
| 919 | 1028 | mmput(mm); |
|---|
| .. | .. |
|---|
| 1006 | 1115 | }; |
|---|
| 1007 | 1116 | |
|---|
| 1008 | 1117 | #ifdef CONFIG_MEM_SOFT_DIRTY |
|---|
| 1118 | + |
|---|
| 1119 | +#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) |
|---|
| 1120 | + |
|---|
| 1121 | +static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) |
|---|
| 1122 | +{ |
|---|
| 1123 | + struct page *page; |
|---|
| 1124 | + |
|---|
| 1125 | + if (!pte_write(pte)) |
|---|
| 1126 | + return false; |
|---|
| 1127 | + if (!is_cow_mapping(vma->vm_flags)) |
|---|
| 1128 | + return false; |
|---|
| 1129 | + if (likely(!atomic_read(&vma->vm_mm->has_pinned))) |
|---|
| 1130 | + return false; |
|---|
| 1131 | + page = vm_normal_page(vma, addr, pte); |
|---|
| 1132 | + if (!page) |
|---|
| 1133 | + return false; |
|---|
| 1134 | + return page_maybe_dma_pinned(page); |
|---|
| 1135 | +} |
|---|
| 1136 | + |
|---|
| 1009 | 1137 | static inline void clear_soft_dirty(struct vm_area_struct *vma, |
|---|
| 1010 | 1138 | unsigned long addr, pte_t *pte) |
|---|
| 1011 | 1139 | { |
|---|
| .. | .. |
|---|
| 1018 | 1146 | pte_t ptent = *pte; |
|---|
| 1019 | 1147 | |
|---|
| 1020 | 1148 | if (pte_present(ptent)) { |
|---|
| 1021 | | - ptent = ptep_modify_prot_start(vma->vm_mm, addr, pte); |
|---|
| 1022 | | - ptent = pte_wrprotect(ptent); |
|---|
| 1149 | + pte_t old_pte; |
|---|
| 1150 | + |
|---|
| 1151 | + if (pte_is_pinned(vma, addr, ptent)) |
|---|
| 1152 | + return; |
|---|
| 1153 | + old_pte = ptep_modify_prot_start(vma, addr, pte); |
|---|
| 1154 | + ptent = pte_wrprotect(old_pte); |
|---|
| 1023 | 1155 | ptent = pte_clear_soft_dirty(ptent); |
|---|
| 1024 | | - ptep_modify_prot_commit(vma->vm_mm, addr, pte, ptent); |
|---|
| 1156 | + ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); |
|---|
| 1025 | 1157 | } else if (is_swap_pte(ptent)) { |
|---|
| 1026 | 1158 | ptent = pte_swp_clear_soft_dirty(ptent); |
|---|
| 1027 | 1159 | set_pte_at(vma->vm_mm, addr, pte, ptent); |
|---|
| .. | .. |
|---|
| 1145 | 1277 | return 0; |
|---|
| 1146 | 1278 | } |
|---|
| 1147 | 1279 | |
|---|
| 1280 | +static const struct mm_walk_ops clear_refs_walk_ops = { |
|---|
| 1281 | + .pmd_entry = clear_refs_pte_range, |
|---|
| 1282 | + .test_walk = clear_refs_test_walk, |
|---|
| 1283 | +}; |
|---|
| 1284 | + |
|---|
| 1148 | 1285 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
|---|
| 1149 | 1286 | size_t count, loff_t *ppos) |
|---|
| 1150 | 1287 | { |
|---|
| .. | .. |
|---|
| 1153 | 1290 | struct mm_struct *mm; |
|---|
| 1154 | 1291 | struct vm_area_struct *vma; |
|---|
| 1155 | 1292 | enum clear_refs_types type; |
|---|
| 1156 | | - struct mmu_gather tlb; |
|---|
| 1157 | 1293 | int itype; |
|---|
| 1158 | 1294 | int rv; |
|---|
| 1159 | 1295 | |
|---|
| .. | .. |
|---|
| 1174 | 1310 | return -ESRCH; |
|---|
| 1175 | 1311 | mm = get_task_mm(task); |
|---|
| 1176 | 1312 | if (mm) { |
|---|
| 1313 | + struct mmu_notifier_range range; |
|---|
| 1177 | 1314 | struct clear_refs_private cp = { |
|---|
| 1178 | 1315 | .type = type, |
|---|
| 1179 | 1316 | }; |
|---|
| 1180 | | - struct mm_walk clear_refs_walk = { |
|---|
| 1181 | | - .pmd_entry = clear_refs_pte_range, |
|---|
| 1182 | | - .test_walk = clear_refs_test_walk, |
|---|
| 1183 | | - .mm = mm, |
|---|
| 1184 | | - .private = &cp, |
|---|
| 1185 | | - }; |
|---|
| 1186 | 1317 | |
|---|
| 1318 | + if (mmap_write_lock_killable(mm)) { |
|---|
| 1319 | + count = -EINTR; |
|---|
| 1320 | + goto out_mm; |
|---|
| 1321 | + } |
|---|
| 1187 | 1322 | if (type == CLEAR_REFS_MM_HIWATER_RSS) { |
|---|
| 1188 | | - if (down_write_killable(&mm->mmap_sem)) { |
|---|
| 1189 | | - count = -EINTR; |
|---|
| 1190 | | - goto out_mm; |
|---|
| 1191 | | - } |
|---|
| 1192 | | - |
|---|
| 1193 | 1323 | /* |
|---|
| 1194 | 1324 | * Writing 5 to /proc/pid/clear_refs resets the peak |
|---|
| 1195 | 1325 | * resident set size to this mm's current rss value. |
|---|
| 1196 | 1326 | */ |
|---|
| 1197 | 1327 | reset_mm_hiwater_rss(mm); |
|---|
| 1198 | | - up_write(&mm->mmap_sem); |
|---|
| 1199 | | - goto out_mm; |
|---|
| 1328 | + goto out_unlock; |
|---|
| 1200 | 1329 | } |
|---|
| 1201 | 1330 | |
|---|
| 1202 | | - if (down_read_killable(&mm->mmap_sem)) { |
|---|
| 1203 | | - count = -EINTR; |
|---|
| 1204 | | - goto out_mm; |
|---|
| 1205 | | - } |
|---|
| 1206 | | - tlb_gather_mmu(&tlb, mm, 0, -1); |
|---|
| 1207 | 1331 | if (type == CLEAR_REFS_SOFT_DIRTY) { |
|---|
| 1208 | 1332 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
|---|
| 1209 | 1333 | if (!(vma->vm_flags & VM_SOFTDIRTY)) |
|---|
| 1210 | 1334 | continue; |
|---|
| 1211 | | - up_read(&mm->mmap_sem); |
|---|
| 1212 | | - if (down_write_killable(&mm->mmap_sem)) { |
|---|
| 1213 | | - count = -EINTR; |
|---|
| 1214 | | - goto out_mm; |
|---|
| 1215 | | - } |
|---|
| 1216 | | - /* |
|---|
| 1217 | | - * Avoid to modify vma->vm_flags |
|---|
| 1218 | | - * without locked ops while the |
|---|
| 1219 | | - * coredump reads the vm_flags. |
|---|
| 1220 | | - */ |
|---|
| 1221 | | - if (!mmget_still_valid(mm)) { |
|---|
| 1222 | | - /* |
|---|
| 1223 | | - * Silently return "count" |
|---|
| 1224 | | - * like if get_task_mm() |
|---|
| 1225 | | - * failed. FIXME: should this |
|---|
| 1226 | | - * function have returned |
|---|
| 1227 | | - * -ESRCH if get_task_mm() |
|---|
| 1228 | | - * failed like if |
|---|
| 1229 | | - * get_proc_task() fails? |
|---|
| 1230 | | - */ |
|---|
| 1231 | | - up_write(&mm->mmap_sem); |
|---|
| 1232 | | - goto out_mm; |
|---|
| 1233 | | - } |
|---|
| 1234 | | - for (vma = mm->mmap; vma; vma = vma->vm_next) { |
|---|
| 1235 | | - vma->vm_flags &= ~VM_SOFTDIRTY; |
|---|
| 1236 | | - vma_set_page_prot(vma); |
|---|
| 1237 | | - } |
|---|
| 1238 | | - downgrade_write(&mm->mmap_sem); |
|---|
| 1239 | | - break; |
|---|
| 1335 | + vm_write_begin(vma); |
|---|
| 1336 | + WRITE_ONCE(vma->vm_flags, |
|---|
| 1337 | + vma->vm_flags & ~VM_SOFTDIRTY); |
|---|
| 1338 | + vma_set_page_prot(vma); |
|---|
| 1339 | + vm_write_end(vma); |
|---|
| 1240 | 1340 | } |
|---|
| 1241 | | - mmu_notifier_invalidate_range_start(mm, 0, -1); |
|---|
| 1341 | + |
|---|
| 1342 | + inc_tlb_flush_pending(mm); |
|---|
| 1343 | + mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, |
|---|
| 1344 | + 0, NULL, mm, 0, -1UL); |
|---|
| 1345 | + mmu_notifier_invalidate_range_start(&range); |
|---|
| 1242 | 1346 | } |
|---|
| 1243 | | - walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); |
|---|
| 1244 | | - if (type == CLEAR_REFS_SOFT_DIRTY) |
|---|
| 1245 | | - mmu_notifier_invalidate_range_end(mm, 0, -1); |
|---|
| 1246 | | - tlb_finish_mmu(&tlb, 0, -1); |
|---|
| 1247 | | - up_read(&mm->mmap_sem); |
|---|
| 1347 | + walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops, |
|---|
| 1348 | + &cp); |
|---|
| 1349 | + if (type == CLEAR_REFS_SOFT_DIRTY) { |
|---|
| 1350 | + mmu_notifier_invalidate_range_end(&range); |
|---|
| 1351 | + flush_tlb_mm(mm); |
|---|
| 1352 | + dec_tlb_flush_pending(mm); |
|---|
| 1353 | + } |
|---|
| 1354 | +out_unlock: |
|---|
| 1355 | + mmap_write_unlock(mm); |
|---|
| 1248 | 1356 | out_mm: |
|---|
| 1249 | 1357 | mmput(mm); |
|---|
| 1250 | 1358 | } |
|---|
| .. | .. |
|---|
| 1297 | 1405 | } |
|---|
| 1298 | 1406 | |
|---|
| 1299 | 1407 | static int pagemap_pte_hole(unsigned long start, unsigned long end, |
|---|
| 1300 | | - struct mm_walk *walk) |
|---|
| 1408 | + __always_unused int depth, struct mm_walk *walk) |
|---|
| 1301 | 1409 | { |
|---|
| 1302 | 1410 | struct pagemapread *pm = walk->private; |
|---|
| 1303 | 1411 | unsigned long addr = start; |
|---|
| .. | .. |
|---|
| 1341 | 1449 | { |
|---|
| 1342 | 1450 | u64 frame = 0, flags = 0; |
|---|
| 1343 | 1451 | struct page *page = NULL; |
|---|
| 1452 | + bool migration = false; |
|---|
| 1344 | 1453 | |
|---|
| 1345 | 1454 | if (pte_present(pte)) { |
|---|
| 1346 | 1455 | if (pm->show_pfn) |
|---|
| 1347 | 1456 | frame = pte_pfn(pte); |
|---|
| 1348 | 1457 | flags |= PM_PRESENT; |
|---|
| 1349 | | - page = _vm_normal_page(vma, addr, pte, true); |
|---|
| 1458 | + page = vm_normal_page(vma, addr, pte); |
|---|
| 1350 | 1459 | if (pte_soft_dirty(pte)) |
|---|
| 1351 | 1460 | flags |= PM_SOFT_DIRTY; |
|---|
| 1352 | 1461 | } else if (is_swap_pte(pte)) { |
|---|
| .. | .. |
|---|
| 1358 | 1467 | frame = swp_type(entry) | |
|---|
| 1359 | 1468 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); |
|---|
| 1360 | 1469 | flags |= PM_SWAP; |
|---|
| 1361 | | - if (is_migration_entry(entry)) |
|---|
| 1470 | + if (is_migration_entry(entry)) { |
|---|
| 1471 | + migration = true; |
|---|
| 1362 | 1472 | page = migration_entry_to_page(entry); |
|---|
| 1473 | + } |
|---|
| 1363 | 1474 | |
|---|
| 1364 | 1475 | if (is_device_private_entry(entry)) |
|---|
| 1365 | 1476 | page = device_private_entry_to_page(entry); |
|---|
| .. | .. |
|---|
| 1367 | 1478 | |
|---|
| 1368 | 1479 | if (page && !PageAnon(page)) |
|---|
| 1369 | 1480 | flags |= PM_FILE; |
|---|
| 1370 | | - if (page && page_mapcount(page) == 1) |
|---|
| 1481 | + if (page && !migration && page_mapcount(page) == 1) |
|---|
| 1371 | 1482 | flags |= PM_MMAP_EXCLUSIVE; |
|---|
| 1372 | 1483 | if (vma->vm_flags & VM_SOFTDIRTY) |
|---|
| 1373 | 1484 | flags |= PM_SOFT_DIRTY; |
|---|
| .. | .. |
|---|
| 1383 | 1494 | spinlock_t *ptl; |
|---|
| 1384 | 1495 | pte_t *pte, *orig_pte; |
|---|
| 1385 | 1496 | int err = 0; |
|---|
| 1386 | | - |
|---|
| 1387 | 1497 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
|---|
| 1498 | + bool migration = false; |
|---|
| 1499 | + |
|---|
| 1388 | 1500 | ptl = pmd_trans_huge_lock(pmdp, vma); |
|---|
| 1389 | 1501 | if (ptl) { |
|---|
| 1390 | 1502 | u64 flags = 0, frame = 0; |
|---|
| .. | .. |
|---|
| 1419 | 1531 | if (pmd_swp_soft_dirty(pmd)) |
|---|
| 1420 | 1532 | flags |= PM_SOFT_DIRTY; |
|---|
| 1421 | 1533 | VM_BUG_ON(!is_pmd_migration_entry(pmd)); |
|---|
| 1534 | + migration = is_migration_entry(entry); |
|---|
| 1422 | 1535 | page = migration_entry_to_page(entry); |
|---|
| 1423 | 1536 | } |
|---|
| 1424 | 1537 | #endif |
|---|
| 1425 | 1538 | |
|---|
| 1426 | | - if (page && page_mapcount(page) == 1) |
|---|
| 1539 | + if (page && !migration && page_mapcount(page) == 1) |
|---|
| 1427 | 1540 | flags |= PM_MMAP_EXCLUSIVE; |
|---|
| 1428 | 1541 | |
|---|
| 1429 | 1542 | for (; addr != end; addr += PAGE_SIZE) { |
|---|
| .. | .. |
|---|
| 1512 | 1625 | |
|---|
| 1513 | 1626 | return err; |
|---|
| 1514 | 1627 | } |
|---|
| 1628 | +#else |
|---|
| 1629 | +#define pagemap_hugetlb_range NULL |
|---|
| 1515 | 1630 | #endif /* HUGETLB_PAGE */ |
|---|
| 1631 | + |
|---|
| 1632 | +static const struct mm_walk_ops pagemap_ops = { |
|---|
| 1633 | + .pmd_entry = pagemap_pmd_range, |
|---|
| 1634 | + .pte_hole = pagemap_pte_hole, |
|---|
| 1635 | + .hugetlb_entry = pagemap_hugetlb_range, |
|---|
| 1636 | +}; |
|---|
| 1516 | 1637 | |
|---|
| 1517 | 1638 | /* |
|---|
| 1518 | 1639 | * /proc/pid/pagemap - an array mapping virtual pages to pfns |
|---|
| .. | .. |
|---|
| 1545 | 1666 | { |
|---|
| 1546 | 1667 | struct mm_struct *mm = file->private_data; |
|---|
| 1547 | 1668 | struct pagemapread pm; |
|---|
| 1548 | | - struct mm_walk pagemap_walk = {}; |
|---|
| 1549 | 1669 | unsigned long src; |
|---|
| 1550 | 1670 | unsigned long svpfn; |
|---|
| 1551 | 1671 | unsigned long start_vaddr; |
|---|
| .. | .. |
|---|
| 1573 | 1693 | if (!pm.buffer) |
|---|
| 1574 | 1694 | goto out_mm; |
|---|
| 1575 | 1695 | |
|---|
| 1576 | | - pagemap_walk.pmd_entry = pagemap_pmd_range; |
|---|
| 1577 | | - pagemap_walk.pte_hole = pagemap_pte_hole; |
|---|
| 1578 | | -#ifdef CONFIG_HUGETLB_PAGE |
|---|
| 1579 | | - pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; |
|---|
| 1580 | | -#endif |
|---|
| 1581 | | - pagemap_walk.mm = mm; |
|---|
| 1582 | | - pagemap_walk.private = ± |
|---|
| 1583 | | - |
|---|
| 1584 | 1696 | src = *ppos; |
|---|
| 1585 | 1697 | svpfn = src / PM_ENTRY_BYTES; |
|---|
| 1586 | | - start_vaddr = svpfn << PAGE_SHIFT; |
|---|
| 1587 | 1698 | end_vaddr = mm->task_size; |
|---|
| 1588 | 1699 | |
|---|
| 1589 | 1700 | /* watch out for wraparound */ |
|---|
| 1590 | | - if (svpfn > mm->task_size >> PAGE_SHIFT) |
|---|
| 1701 | + start_vaddr = end_vaddr; |
|---|
| 1702 | + if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) |
|---|
| 1703 | + start_vaddr = untagged_addr(svpfn << PAGE_SHIFT); |
|---|
| 1704 | + |
|---|
| 1705 | + /* Ensure the address is inside the task */ |
|---|
| 1706 | + if (start_vaddr > mm->task_size) |
|---|
| 1591 | 1707 | start_vaddr = end_vaddr; |
|---|
| 1592 | 1708 | |
|---|
| 1593 | 1709 | /* |
|---|
| .. | .. |
|---|
| 1606 | 1722 | /* overflow ? */ |
|---|
| 1607 | 1723 | if (end < start_vaddr || end > end_vaddr) |
|---|
| 1608 | 1724 | end = end_vaddr; |
|---|
| 1609 | | - ret = down_read_killable(&mm->mmap_sem); |
|---|
| 1725 | + ret = mmap_read_lock_killable(mm); |
|---|
| 1610 | 1726 | if (ret) |
|---|
| 1611 | 1727 | goto out_free; |
|---|
| 1612 | | - ret = walk_page_range(start_vaddr, end, &pagemap_walk); |
|---|
| 1613 | | - up_read(&mm->mmap_sem); |
|---|
| 1728 | + ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm); |
|---|
| 1729 | + mmap_read_unlock(mm); |
|---|
| 1614 | 1730 | start_vaddr = end; |
|---|
| 1615 | 1731 | |
|---|
| 1616 | 1732 | len = min(count, PM_ENTRY_BYTES * pm.pos); |
|---|
| .. | .. |
|---|
| 1821 | 1937 | } |
|---|
| 1822 | 1938 | #endif |
|---|
| 1823 | 1939 | |
|---|
| 1940 | +static const struct mm_walk_ops show_numa_ops = { |
|---|
| 1941 | + .hugetlb_entry = gather_hugetlb_stats, |
|---|
| 1942 | + .pmd_entry = gather_pte_stats, |
|---|
| 1943 | +}; |
|---|
| 1944 | + |
|---|
| 1824 | 1945 | /* |
|---|
| 1825 | 1946 | * Display pages allocated per node and memory policy via /proc. |
|---|
| 1826 | 1947 | */ |
|---|
| .. | .. |
|---|
| 1832 | 1953 | struct numa_maps *md = &numa_priv->md; |
|---|
| 1833 | 1954 | struct file *file = vma->vm_file; |
|---|
| 1834 | 1955 | struct mm_struct *mm = vma->vm_mm; |
|---|
| 1835 | | - struct mm_walk walk = { |
|---|
| 1836 | | - .hugetlb_entry = gather_hugetlb_stats, |
|---|
| 1837 | | - .pmd_entry = gather_pte_stats, |
|---|
| 1838 | | - .private = md, |
|---|
| 1839 | | - .mm = mm, |
|---|
| 1840 | | - }; |
|---|
| 1841 | 1956 | struct mempolicy *pol; |
|---|
| 1842 | 1957 | char buffer[64]; |
|---|
| 1843 | 1958 | int nid; |
|---|
| .. | .. |
|---|
| 1870 | 1985 | if (is_vm_hugetlb_page(vma)) |
|---|
| 1871 | 1986 | seq_puts(m, " huge"); |
|---|
| 1872 | 1987 | |
|---|
| 1873 | | - /* mmap_sem is held by m_start */ |
|---|
| 1874 | | - walk_page_vma(vma, &walk); |
|---|
| 1988 | + /* mmap_lock is held by m_start */ |
|---|
| 1989 | + walk_page_vma(vma, &show_numa_ops, md); |
|---|
| 1875 | 1990 | |
|---|
| 1876 | 1991 | if (!md->pages) |
|---|
| 1877 | 1992 | goto out; |
|---|
| .. | .. |
|---|
| 1904 | 2019 | seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10); |
|---|
| 1905 | 2020 | out: |
|---|
| 1906 | 2021 | seq_putc(m, '\n'); |
|---|
| 1907 | | - m_cache_vma(m, vma); |
|---|
| 1908 | 2022 | return 0; |
|---|
| 1909 | 2023 | } |
|---|
| 1910 | 2024 | |
|---|