.. | .. |
---|
1 | 1 | // SPDX-License-Identifier: GPL-2.0 |
---|
2 | | -#include <linux/mm.h> |
---|
| 2 | +#include <linux/pagewalk.h> |
---|
3 | 3 | #include <linux/vmacache.h> |
---|
4 | 4 | #include <linux/hugetlb.h> |
---|
5 | 5 | #include <linux/huge_mm.h> |
---|
.. | .. |
---|
59 | 59 | SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); |
---|
60 | 60 | SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); |
---|
61 | 61 | SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); |
---|
62 | | - SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm); |
---|
| 62 | + SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm)); |
---|
63 | 63 | SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss); |
---|
64 | 64 | SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss); |
---|
65 | 65 | SEQ_PUT_DEC(" kB\nRssAnon:\t", anon); |
---|
.. | .. |
---|
147 | 147 | long pages_pinned; |
---|
148 | 148 | struct page *page; |
---|
149 | 149 | |
---|
150 | | - pages_pinned = get_user_pages_remote(current, mm, |
---|
151 | | - page_start_vaddr, 1, 0, &page, NULL, NULL); |
---|
| 150 | + pages_pinned = get_user_pages_remote(mm, page_start_vaddr, 1, 0, |
---|
| 151 | + &page, NULL, NULL); |
---|
152 | 152 | if (pages_pinned < 1) { |
---|
153 | 153 | seq_puts(m, "<fault>]"); |
---|
154 | 154 | return; |
---|
.. | .. |
---|
159 | 159 | write_len = strnlen(kaddr + page_offset, len); |
---|
160 | 160 | seq_write(m, kaddr + page_offset, write_len); |
---|
161 | 161 | kunmap(page); |
---|
162 | | - put_page(page); |
---|
| 162 | + put_user_page(page); |
---|
163 | 163 | |
---|
164 | 164 | /* if strnlen hit a null terminator then we're done */ |
---|
165 | 165 | if (write_len != len) |
---|
.. | .. |
---|
173 | 173 | seq_putc(m, ']'); |
---|
174 | 174 | } |
---|
175 | 175 | |
---|
176 | | -static void vma_stop(struct proc_maps_private *priv) |
---|
177 | | -{ |
---|
178 | | - struct mm_struct *mm = priv->mm; |
---|
179 | | - |
---|
180 | | - release_task_mempolicy(priv); |
---|
181 | | - up_read(&mm->mmap_sem); |
---|
182 | | - mmput(mm); |
---|
183 | | -} |
---|
184 | | - |
---|
185 | | -static struct vm_area_struct * |
---|
186 | | -m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma) |
---|
187 | | -{ |
---|
188 | | - if (vma == priv->tail_vma) |
---|
189 | | - return NULL; |
---|
190 | | - return vma->vm_next ?: priv->tail_vma; |
---|
191 | | -} |
---|
192 | | - |
---|
193 | | -static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma) |
---|
194 | | -{ |
---|
195 | | - if (m->count < m->size) /* vma is copied successfully */ |
---|
196 | | - m->version = m_next_vma(m->private, vma) ? vma->vm_end : -1UL; |
---|
197 | | -} |
---|
198 | | - |
---|
199 | 176 | static void *m_start(struct seq_file *m, loff_t *ppos) |
---|
200 | 177 | { |
---|
201 | 178 | struct proc_maps_private *priv = m->private; |
---|
202 | | - unsigned long last_addr = m->version; |
---|
| 179 | + unsigned long last_addr = *ppos; |
---|
203 | 180 | struct mm_struct *mm; |
---|
204 | 181 | struct vm_area_struct *vma; |
---|
205 | | - unsigned int pos = *ppos; |
---|
206 | 182 | |
---|
207 | | - /* See m_cache_vma(). Zero at the start or after lseek. */ |
---|
| 183 | + /* See m_next(). Zero at the start or after lseek. */ |
---|
208 | 184 | if (last_addr == -1UL) |
---|
209 | 185 | return NULL; |
---|
210 | 186 | |
---|
.. | .. |
---|
213 | 189 | return ERR_PTR(-ESRCH); |
---|
214 | 190 | |
---|
215 | 191 | mm = priv->mm; |
---|
216 | | - if (!mm || !mmget_not_zero(mm)) |
---|
| 192 | + if (!mm || !mmget_not_zero(mm)) { |
---|
| 193 | + put_task_struct(priv->task); |
---|
| 194 | + priv->task = NULL; |
---|
217 | 195 | return NULL; |
---|
| 196 | + } |
---|
218 | 197 | |
---|
219 | | - if (down_read_killable(&mm->mmap_sem)) { |
---|
| 198 | + if (mmap_read_lock_killable(mm)) { |
---|
220 | 199 | mmput(mm); |
---|
| 200 | + put_task_struct(priv->task); |
---|
| 201 | + priv->task = NULL; |
---|
221 | 202 | return ERR_PTR(-EINTR); |
---|
222 | 203 | } |
---|
223 | 204 | |
---|
224 | 205 | hold_task_mempolicy(priv); |
---|
225 | 206 | priv->tail_vma = get_gate_vma(mm); |
---|
226 | 207 | |
---|
227 | | - if (last_addr) { |
---|
228 | | - vma = find_vma(mm, last_addr - 1); |
---|
229 | | - if (vma && vma->vm_start <= last_addr) |
---|
230 | | - vma = m_next_vma(priv, vma); |
---|
231 | | - if (vma) |
---|
232 | | - return vma; |
---|
233 | | - } |
---|
234 | | - |
---|
235 | | - m->version = 0; |
---|
236 | | - if (pos < mm->map_count) { |
---|
237 | | - for (vma = mm->mmap; pos; pos--) { |
---|
238 | | - m->version = vma->vm_start; |
---|
239 | | - vma = vma->vm_next; |
---|
240 | | - } |
---|
| 208 | + vma = find_vma(mm, last_addr); |
---|
| 209 | + if (vma) |
---|
241 | 210 | return vma; |
---|
242 | | - } |
---|
243 | 211 | |
---|
244 | | - /* we do not bother to update m->version in this case */ |
---|
245 | | - if (pos == mm->map_count && priv->tail_vma) |
---|
246 | | - return priv->tail_vma; |
---|
247 | | - |
---|
248 | | - vma_stop(priv); |
---|
249 | | - return NULL; |
---|
| 212 | + return priv->tail_vma; |
---|
250 | 213 | } |
---|
251 | 214 | |
---|
252 | | -static void *m_next(struct seq_file *m, void *v, loff_t *pos) |
---|
| 215 | +static void *m_next(struct seq_file *m, void *v, loff_t *ppos) |
---|
253 | 216 | { |
---|
254 | 217 | struct proc_maps_private *priv = m->private; |
---|
255 | | - struct vm_area_struct *next; |
---|
| 218 | + struct vm_area_struct *next, *vma = v; |
---|
256 | 219 | |
---|
257 | | - (*pos)++; |
---|
258 | | - next = m_next_vma(priv, v); |
---|
259 | | - if (!next) |
---|
260 | | - vma_stop(priv); |
---|
| 220 | + if (vma == priv->tail_vma) |
---|
| 221 | + next = NULL; |
---|
| 222 | + else if (vma->vm_next) |
---|
| 223 | + next = vma->vm_next; |
---|
| 224 | + else |
---|
| 225 | + next = priv->tail_vma; |
---|
| 226 | + |
---|
| 227 | + *ppos = next ? next->vm_start : -1UL; |
---|
| 228 | + |
---|
261 | 229 | return next; |
---|
262 | 230 | } |
---|
263 | 231 | |
---|
264 | 232 | static void m_stop(struct seq_file *m, void *v) |
---|
265 | 233 | { |
---|
266 | 234 | struct proc_maps_private *priv = m->private; |
---|
| 235 | + struct mm_struct *mm = priv->mm; |
---|
267 | 236 | |
---|
268 | | - if (!IS_ERR_OR_NULL(v)) |
---|
269 | | - vma_stop(priv); |
---|
270 | | - if (priv->task) { |
---|
271 | | - put_task_struct(priv->task); |
---|
272 | | - priv->task = NULL; |
---|
273 | | - } |
---|
| 237 | + if (!priv->task) |
---|
| 238 | + return; |
---|
| 239 | + |
---|
| 240 | + release_task_mempolicy(priv); |
---|
| 241 | + mmap_read_unlock(mm); |
---|
| 242 | + mmput(mm); |
---|
| 243 | + put_task_struct(priv->task); |
---|
| 244 | + priv->task = NULL; |
---|
274 | 245 | } |
---|
275 | 246 | |
---|
276 | 247 | static int proc_maps_open(struct inode *inode, struct file *file, |
---|
.. | .. |
---|
420 | 391 | static int show_map(struct seq_file *m, void *v) |
---|
421 | 392 | { |
---|
422 | 393 | show_map_vma(m, v); |
---|
423 | | - m_cache_vma(m, v); |
---|
424 | 394 | return 0; |
---|
425 | 395 | } |
---|
426 | 396 | |
---|
.. | .. |
---|
474 | 444 | unsigned long lazyfree; |
---|
475 | 445 | unsigned long anonymous_thp; |
---|
476 | 446 | unsigned long shmem_thp; |
---|
| 447 | + unsigned long file_thp; |
---|
477 | 448 | unsigned long swap; |
---|
478 | 449 | unsigned long shared_hugetlb; |
---|
479 | 450 | unsigned long private_hugetlb; |
---|
480 | 451 | u64 pss; |
---|
| 452 | + u64 pss_anon; |
---|
| 453 | + u64 pss_file; |
---|
| 454 | + u64 pss_shmem; |
---|
481 | 455 | u64 pss_locked; |
---|
482 | 456 | u64 swap_pss; |
---|
483 | 457 | bool check_shmem_swap; |
---|
484 | 458 | }; |
---|
485 | 459 | |
---|
486 | | -static void smaps_account(struct mem_size_stats *mss, struct page *page, |
---|
487 | | - bool compound, bool young, bool dirty, bool locked) |
---|
| 460 | +static void smaps_page_accumulate(struct mem_size_stats *mss, |
---|
| 461 | + struct page *page, unsigned long size, unsigned long pss, |
---|
| 462 | + bool dirty, bool locked, bool private) |
---|
488 | 463 | { |
---|
489 | | - int i, nr = compound ? 1 << compound_order(page) : 1; |
---|
| 464 | + mss->pss += pss; |
---|
| 465 | + |
---|
| 466 | + if (PageAnon(page)) |
---|
| 467 | + mss->pss_anon += pss; |
---|
| 468 | + else if (PageSwapBacked(page)) |
---|
| 469 | + mss->pss_shmem += pss; |
---|
| 470 | + else |
---|
| 471 | + mss->pss_file += pss; |
---|
| 472 | + |
---|
| 473 | + if (locked) |
---|
| 474 | + mss->pss_locked += pss; |
---|
| 475 | + |
---|
| 476 | + if (dirty || PageDirty(page)) { |
---|
| 477 | + if (private) |
---|
| 478 | + mss->private_dirty += size; |
---|
| 479 | + else |
---|
| 480 | + mss->shared_dirty += size; |
---|
| 481 | + } else { |
---|
| 482 | + if (private) |
---|
| 483 | + mss->private_clean += size; |
---|
| 484 | + else |
---|
| 485 | + mss->shared_clean += size; |
---|
| 486 | + } |
---|
| 487 | +} |
---|
| 488 | + |
---|
| 489 | +static void smaps_account(struct mem_size_stats *mss, struct page *page, |
---|
| 490 | + bool compound, bool young, bool dirty, bool locked, |
---|
| 491 | + bool migration) |
---|
| 492 | +{ |
---|
| 493 | + int i, nr = compound ? compound_nr(page) : 1; |
---|
490 | 494 | unsigned long size = nr * PAGE_SIZE; |
---|
491 | 495 | |
---|
| 496 | + /* |
---|
| 497 | + * First accumulate quantities that depend only on |size| and the type |
---|
| 498 | + * of the compound page. |
---|
| 499 | + */ |
---|
492 | 500 | if (PageAnon(page)) { |
---|
493 | 501 | mss->anonymous += size; |
---|
494 | 502 | if (!PageSwapBacked(page) && !dirty && !PageDirty(page)) |
---|
.. | .. |
---|
501 | 509 | mss->referenced += size; |
---|
502 | 510 | |
---|
503 | 511 | /* |
---|
| 512 | + * Then accumulate quantities that may depend on sharing, or that may |
---|
| 513 | + * differ page-by-page. |
---|
| 514 | + * |
---|
504 | 515 | * page_count(page) == 1 guarantees the page is mapped exactly once. |
---|
505 | 516 | * If any subpage of the compound page mapped with PTE it would elevate |
---|
506 | 517 | * page_count(). |
---|
| 518 | + * |
---|
| 519 | + * The page_mapcount() is called to get a snapshot of the mapcount. |
---|
| 520 | + * Without holding the page lock this snapshot can be slightly wrong as |
---|
| 521 | + * we cannot always read the mapcount atomically. It is not safe to |
---|
| 522 | + * call page_mapcount() even with PTL held if the page is not mapped, |
---|
| 523 | + * especially for migration entries. Treat regular migration entries |
---|
| 524 | + * as mapcount == 1. |
---|
507 | 525 | */ |
---|
508 | | - if (page_count(page) == 1) { |
---|
509 | | - if (dirty || PageDirty(page)) |
---|
510 | | - mss->private_dirty += size; |
---|
511 | | - else |
---|
512 | | - mss->private_clean += size; |
---|
513 | | - mss->pss += (u64)size << PSS_SHIFT; |
---|
514 | | - if (locked) |
---|
515 | | - mss->pss_locked += (u64)size << PSS_SHIFT; |
---|
| 526 | + if ((page_count(page) == 1) || migration) { |
---|
| 527 | + smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty, |
---|
| 528 | + locked, true); |
---|
516 | 529 | return; |
---|
517 | 530 | } |
---|
518 | | - |
---|
519 | 531 | for (i = 0; i < nr; i++, page++) { |
---|
520 | 532 | int mapcount = page_mapcount(page); |
---|
521 | | - unsigned long pss = (PAGE_SIZE << PSS_SHIFT); |
---|
522 | | - |
---|
523 | | - if (mapcount >= 2) { |
---|
524 | | - if (dirty || PageDirty(page)) |
---|
525 | | - mss->shared_dirty += PAGE_SIZE; |
---|
526 | | - else |
---|
527 | | - mss->shared_clean += PAGE_SIZE; |
---|
528 | | - mss->pss += pss / mapcount; |
---|
529 | | - if (locked) |
---|
530 | | - mss->pss_locked += pss / mapcount; |
---|
531 | | - } else { |
---|
532 | | - if (dirty || PageDirty(page)) |
---|
533 | | - mss->private_dirty += PAGE_SIZE; |
---|
534 | | - else |
---|
535 | | - mss->private_clean += PAGE_SIZE; |
---|
536 | | - mss->pss += pss; |
---|
537 | | - if (locked) |
---|
538 | | - mss->pss_locked += pss; |
---|
539 | | - } |
---|
| 533 | + unsigned long pss = PAGE_SIZE << PSS_SHIFT; |
---|
| 534 | + if (mapcount >= 2) |
---|
| 535 | + pss /= mapcount; |
---|
| 536 | + smaps_page_accumulate(mss, page, PAGE_SIZE, pss, dirty, locked, |
---|
| 537 | + mapcount < 2); |
---|
540 | 538 | } |
---|
541 | 539 | } |
---|
542 | 540 | |
---|
543 | 541 | #ifdef CONFIG_SHMEM |
---|
544 | 542 | static int smaps_pte_hole(unsigned long addr, unsigned long end, |
---|
545 | | - struct mm_walk *walk) |
---|
| 543 | + __always_unused int depth, struct mm_walk *walk) |
---|
546 | 544 | { |
---|
547 | 545 | struct mem_size_stats *mss = walk->private; |
---|
548 | 546 | |
---|
.. | .. |
---|
551 | 549 | |
---|
552 | 550 | return 0; |
---|
553 | 551 | } |
---|
554 | | -#endif |
---|
| 552 | +#else |
---|
| 553 | +#define smaps_pte_hole NULL |
---|
| 554 | +#endif /* CONFIG_SHMEM */ |
---|
555 | 555 | |
---|
556 | 556 | static void smaps_pte_entry(pte_t *pte, unsigned long addr, |
---|
557 | 557 | struct mm_walk *walk) |
---|
.. | .. |
---|
560 | 560 | struct vm_area_struct *vma = walk->vma; |
---|
561 | 561 | bool locked = !!(vma->vm_flags & VM_LOCKED); |
---|
562 | 562 | struct page *page = NULL; |
---|
| 563 | + bool migration = false, young = false, dirty = false; |
---|
563 | 564 | |
---|
564 | 565 | if (pte_present(*pte)) { |
---|
565 | 566 | page = vm_normal_page(vma, addr, *pte); |
---|
| 567 | + young = pte_young(*pte); |
---|
| 568 | + dirty = pte_dirty(*pte); |
---|
566 | 569 | } else if (is_swap_pte(*pte)) { |
---|
567 | 570 | swp_entry_t swpent = pte_to_swp_entry(*pte); |
---|
568 | 571 | |
---|
.. | .. |
---|
579 | 582 | } else { |
---|
580 | 583 | mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; |
---|
581 | 584 | } |
---|
582 | | - } else if (is_migration_entry(swpent)) |
---|
| 585 | + } else if (is_migration_entry(swpent)) { |
---|
| 586 | + migration = true; |
---|
583 | 587 | page = migration_entry_to_page(swpent); |
---|
584 | | - else if (is_device_private_entry(swpent)) |
---|
| 588 | + } else if (is_device_private_entry(swpent)) |
---|
585 | 589 | page = device_private_entry_to_page(swpent); |
---|
586 | 590 | } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap |
---|
587 | 591 | && pte_none(*pte))) { |
---|
588 | | - page = find_get_entry(vma->vm_file->f_mapping, |
---|
| 592 | + page = xa_load(&vma->vm_file->f_mapping->i_pages, |
---|
589 | 593 | linear_page_index(vma, addr)); |
---|
590 | | - if (!page) |
---|
591 | | - return; |
---|
592 | | - |
---|
593 | | - if (radix_tree_exceptional_entry(page)) |
---|
| 594 | + if (xa_is_value(page)) |
---|
594 | 595 | mss->swap += PAGE_SIZE; |
---|
595 | | - else |
---|
596 | | - put_page(page); |
---|
597 | | - |
---|
598 | 596 | return; |
---|
599 | 597 | } |
---|
600 | 598 | |
---|
601 | 599 | if (!page) |
---|
602 | 600 | return; |
---|
603 | 601 | |
---|
604 | | - smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); |
---|
| 602 | + smaps_account(mss, page, false, young, dirty, locked, migration); |
---|
605 | 603 | } |
---|
606 | 604 | |
---|
607 | 605 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
.. | .. |
---|
611 | 609 | struct mem_size_stats *mss = walk->private; |
---|
612 | 610 | struct vm_area_struct *vma = walk->vma; |
---|
613 | 611 | bool locked = !!(vma->vm_flags & VM_LOCKED); |
---|
614 | | - struct page *page; |
---|
| 612 | + struct page *page = NULL; |
---|
| 613 | + bool migration = false; |
---|
615 | 614 | |
---|
616 | | - /* FOLL_DUMP will return -EFAULT on huge zero page */ |
---|
617 | | - page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); |
---|
| 615 | + if (pmd_present(*pmd)) { |
---|
| 616 | + /* FOLL_DUMP will return -EFAULT on huge zero page */ |
---|
| 617 | + page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); |
---|
| 618 | + } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { |
---|
| 619 | + swp_entry_t entry = pmd_to_swp_entry(*pmd); |
---|
| 620 | + |
---|
| 621 | + if (is_migration_entry(entry)) { |
---|
| 622 | + migration = true; |
---|
| 623 | + page = migration_entry_to_page(entry); |
---|
| 624 | + } |
---|
| 625 | + } |
---|
618 | 626 | if (IS_ERR_OR_NULL(page)) |
---|
619 | 627 | return; |
---|
620 | 628 | if (PageAnon(page)) |
---|
.. | .. |
---|
624 | 632 | else if (is_zone_device_page(page)) |
---|
625 | 633 | /* pass */; |
---|
626 | 634 | else |
---|
627 | | - VM_BUG_ON_PAGE(1, page); |
---|
628 | | - smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); |
---|
| 635 | + mss->file_thp += HPAGE_PMD_SIZE; |
---|
| 636 | + |
---|
| 637 | + smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), |
---|
| 638 | + locked, migration); |
---|
629 | 639 | } |
---|
630 | 640 | #else |
---|
631 | 641 | static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, |
---|
.. | .. |
---|
643 | 653 | |
---|
644 | 654 | ptl = pmd_trans_huge_lock(pmd, vma); |
---|
645 | 655 | if (ptl) { |
---|
646 | | - if (pmd_present(*pmd)) |
---|
647 | | - smaps_pmd_entry(pmd, addr, walk); |
---|
| 656 | + smaps_pmd_entry(pmd, addr, walk); |
---|
648 | 657 | spin_unlock(ptl); |
---|
649 | 658 | goto out; |
---|
650 | 659 | } |
---|
.. | .. |
---|
652 | 661 | if (pmd_trans_unstable(pmd)) |
---|
653 | 662 | goto out; |
---|
654 | 663 | /* |
---|
655 | | - * The mmap_sem held all the way back in m_start() is what |
---|
| 664 | + * The mmap_lock held all the way back in m_start() is what |
---|
656 | 665 | * keeps khugepaged out of here and from collapsing things |
---|
657 | 666 | * in here. |
---|
658 | 667 | */ |
---|
.. | .. |
---|
687 | 696 | [ilog2(VM_GROWSDOWN)] = "gd", |
---|
688 | 697 | [ilog2(VM_PFNMAP)] = "pf", |
---|
689 | 698 | [ilog2(VM_DENYWRITE)] = "dw", |
---|
690 | | -#ifdef CONFIG_X86_INTEL_MPX |
---|
691 | | - [ilog2(VM_MPX)] = "mp", |
---|
692 | | -#endif |
---|
693 | 699 | [ilog2(VM_LOCKED)] = "lo", |
---|
694 | 700 | [ilog2(VM_IO)] = "io", |
---|
695 | 701 | [ilog2(VM_SEQ_READ)] = "sr", |
---|
.. | .. |
---|
703 | 709 | [ilog2(VM_ARCH_1)] = "ar", |
---|
704 | 710 | [ilog2(VM_WIPEONFORK)] = "wf", |
---|
705 | 711 | [ilog2(VM_DONTDUMP)] = "dd", |
---|
| 712 | +#ifdef CONFIG_ARM64_BTI |
---|
| 713 | + [ilog2(VM_ARM64_BTI)] = "bt", |
---|
| 714 | +#endif |
---|
706 | 715 | #ifdef CONFIG_MEM_SOFT_DIRTY |
---|
707 | 716 | [ilog2(VM_SOFTDIRTY)] = "sd", |
---|
708 | 717 | #endif |
---|
.. | .. |
---|
712 | 721 | [ilog2(VM_MERGEABLE)] = "mg", |
---|
713 | 722 | [ilog2(VM_UFFD_MISSING)]= "um", |
---|
714 | 723 | [ilog2(VM_UFFD_WP)] = "uw", |
---|
| 724 | +#ifdef CONFIG_ARM64_MTE |
---|
| 725 | + [ilog2(VM_MTE)] = "mt", |
---|
| 726 | + [ilog2(VM_MTE_ALLOWED)] = "", |
---|
| 727 | +#endif |
---|
715 | 728 | #ifdef CONFIG_ARCH_HAS_PKEYS |
---|
716 | 729 | /* These come out via ProtectionKey: */ |
---|
717 | 730 | [ilog2(VM_PKEY_BIT0)] = "", |
---|
.. | .. |
---|
722 | 735 | [ilog2(VM_PKEY_BIT4)] = "", |
---|
723 | 736 | #endif |
---|
724 | 737 | #endif /* CONFIG_ARCH_HAS_PKEYS */ |
---|
| 738 | +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR |
---|
| 739 | + [ilog2(VM_UFFD_MINOR)] = "ui", |
---|
| 740 | +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ |
---|
725 | 741 | }; |
---|
726 | 742 | size_t i; |
---|
727 | 743 | |
---|
.. | .. |
---|
758 | 774 | page = device_private_entry_to_page(swpent); |
---|
759 | 775 | } |
---|
760 | 776 | if (page) { |
---|
761 | | - int mapcount = page_mapcount(page); |
---|
762 | | - |
---|
763 | | - if (mapcount >= 2) |
---|
| 777 | + if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte)) |
---|
764 | 778 | mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); |
---|
765 | 779 | else |
---|
766 | 780 | mss->private_hugetlb += huge_page_size(hstate_vma(vma)); |
---|
767 | 781 | } |
---|
768 | 782 | return 0; |
---|
769 | 783 | } |
---|
| 784 | +#else |
---|
| 785 | +#define smaps_hugetlb_range NULL |
---|
770 | 786 | #endif /* HUGETLB_PAGE */ |
---|
771 | 787 | |
---|
772 | | -static void smap_gather_stats(struct vm_area_struct *vma, |
---|
773 | | - struct mem_size_stats *mss) |
---|
774 | | -{ |
---|
775 | | - struct mm_walk smaps_walk = { |
---|
776 | | - .pmd_entry = smaps_pte_range, |
---|
777 | | -#ifdef CONFIG_HUGETLB_PAGE |
---|
778 | | - .hugetlb_entry = smaps_hugetlb_range, |
---|
779 | | -#endif |
---|
780 | | - .mm = vma->vm_mm, |
---|
781 | | - }; |
---|
| 788 | +static const struct mm_walk_ops smaps_walk_ops = { |
---|
| 789 | + .pmd_entry = smaps_pte_range, |
---|
| 790 | + .hugetlb_entry = smaps_hugetlb_range, |
---|
| 791 | +}; |
---|
782 | 792 | |
---|
783 | | - smaps_walk.private = mss; |
---|
| 793 | +static const struct mm_walk_ops smaps_shmem_walk_ops = { |
---|
| 794 | + .pmd_entry = smaps_pte_range, |
---|
| 795 | + .hugetlb_entry = smaps_hugetlb_range, |
---|
| 796 | + .pte_hole = smaps_pte_hole, |
---|
| 797 | +}; |
---|
| 798 | + |
---|
| 799 | +/* |
---|
| 800 | + * Gather mem stats from @vma with the indicated beginning |
---|
| 801 | + * address @start, and keep them in @mss. |
---|
| 802 | + * |
---|
| 803 | + * Use vm_start of @vma as the beginning address if @start is 0. |
---|
| 804 | + */ |
---|
| 805 | +static void smap_gather_stats(struct vm_area_struct *vma, |
---|
| 806 | + struct mem_size_stats *mss, unsigned long start) |
---|
| 807 | +{ |
---|
| 808 | + const struct mm_walk_ops *ops = &smaps_walk_ops; |
---|
| 809 | + |
---|
| 810 | + /* Invalid start */ |
---|
| 811 | + if (start >= vma->vm_end) |
---|
| 812 | + return; |
---|
784 | 813 | |
---|
785 | 814 | #ifdef CONFIG_SHMEM |
---|
786 | 815 | /* In case of smaps_rollup, reset the value from previous vma */ |
---|
.. | .. |
---|
798 | 827 | */ |
---|
799 | 828 | unsigned long shmem_swapped = shmem_swap_usage(vma); |
---|
800 | 829 | |
---|
801 | | - if (!shmem_swapped || (vma->vm_flags & VM_SHARED) || |
---|
802 | | - !(vma->vm_flags & VM_WRITE)) { |
---|
| 830 | + if (!start && (!shmem_swapped || (vma->vm_flags & VM_SHARED) || |
---|
| 831 | + !(vma->vm_flags & VM_WRITE))) { |
---|
803 | 832 | mss->swap += shmem_swapped; |
---|
804 | 833 | } else { |
---|
805 | 834 | mss->check_shmem_swap = true; |
---|
806 | | - smaps_walk.pte_hole = smaps_pte_hole; |
---|
| 835 | + ops = &smaps_shmem_walk_ops; |
---|
807 | 836 | } |
---|
808 | 837 | } |
---|
809 | 838 | #endif |
---|
810 | | - /* mmap_sem is held in m_start */ |
---|
811 | | - walk_page_vma(vma, &smaps_walk); |
---|
| 839 | + /* mmap_lock is held in m_start */ |
---|
| 840 | + if (!start) |
---|
| 841 | + walk_page_vma(vma, ops, mss); |
---|
| 842 | + else |
---|
| 843 | + walk_page_range(vma->vm_mm, start, vma->vm_end, ops, mss); |
---|
812 | 844 | } |
---|
813 | 845 | |
---|
814 | 846 | #define SEQ_PUT_DEC(str, val) \ |
---|
815 | 847 | seq_put_decimal_ull_width(m, str, (val) >> 10, 8) |
---|
816 | 848 | |
---|
817 | 849 | /* Show the contents common for smaps and smaps_rollup */ |
---|
818 | | -static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss) |
---|
| 850 | +static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, |
---|
| 851 | + bool rollup_mode) |
---|
819 | 852 | { |
---|
820 | 853 | SEQ_PUT_DEC("Rss: ", mss->resident); |
---|
821 | 854 | SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT); |
---|
| 855 | + if (rollup_mode) { |
---|
| 856 | + /* |
---|
| 857 | + * These are meaningful only for smaps_rollup, otherwise two of |
---|
| 858 | + * them are zero, and the other one is the same as Pss. |
---|
| 859 | + */ |
---|
| 860 | + SEQ_PUT_DEC(" kB\nPss_Anon: ", |
---|
| 861 | + mss->pss_anon >> PSS_SHIFT); |
---|
| 862 | + SEQ_PUT_DEC(" kB\nPss_File: ", |
---|
| 863 | + mss->pss_file >> PSS_SHIFT); |
---|
| 864 | + SEQ_PUT_DEC(" kB\nPss_Shmem: ", |
---|
| 865 | + mss->pss_shmem >> PSS_SHIFT); |
---|
| 866 | + } |
---|
822 | 867 | SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean); |
---|
823 | 868 | SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty); |
---|
824 | 869 | SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean); |
---|
.. | .. |
---|
828 | 873 | SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree); |
---|
829 | 874 | SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp); |
---|
830 | 875 | SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp); |
---|
| 876 | + SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp); |
---|
831 | 877 | SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb); |
---|
832 | 878 | seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ", |
---|
833 | 879 | mss->private_hugetlb >> 10, 7); |
---|
.. | .. |
---|
846 | 892 | |
---|
847 | 893 | memset(&mss, 0, sizeof(mss)); |
---|
848 | 894 | |
---|
849 | | - smap_gather_stats(vma, &mss); |
---|
| 895 | + smap_gather_stats(vma, &mss, 0); |
---|
850 | 896 | |
---|
851 | 897 | show_map_vma(m, vma); |
---|
852 | 898 | if (vma_get_anon_name(vma)) { |
---|
.. | .. |
---|
860 | 906 | SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma)); |
---|
861 | 907 | seq_puts(m, " kB\n"); |
---|
862 | 908 | |
---|
863 | | - __show_smap(m, &mss); |
---|
| 909 | + __show_smap(m, &mss, false); |
---|
864 | 910 | |
---|
865 | | - seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma)); |
---|
| 911 | + seq_printf(m, "THPeligible: %d\n", |
---|
| 912 | + transparent_hugepage_active(vma)); |
---|
866 | 913 | |
---|
867 | 914 | if (arch_pkeys_enabled()) |
---|
868 | 915 | seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); |
---|
869 | 916 | show_smap_vma_flags(m, vma); |
---|
870 | | - |
---|
871 | | - m_cache_vma(m, vma); |
---|
872 | 917 | |
---|
873 | 918 | return 0; |
---|
874 | 919 | } |
---|
.. | .. |
---|
894 | 939 | |
---|
895 | 940 | memset(&mss, 0, sizeof(mss)); |
---|
896 | 941 | |
---|
897 | | - ret = down_read_killable(&mm->mmap_sem); |
---|
| 942 | + ret = mmap_read_lock_killable(mm); |
---|
898 | 943 | if (ret) |
---|
899 | 944 | goto out_put_mm; |
---|
900 | 945 | |
---|
901 | 946 | hold_task_mempolicy(priv); |
---|
902 | 947 | |
---|
903 | | - for (vma = priv->mm->mmap; vma; vma = vma->vm_next) { |
---|
904 | | - smap_gather_stats(vma, &mss); |
---|
| 948 | + for (vma = priv->mm->mmap; vma;) { |
---|
| 949 | + smap_gather_stats(vma, &mss, 0); |
---|
905 | 950 | last_vma_end = vma->vm_end; |
---|
| 951 | + |
---|
| 952 | + /* |
---|
| 953 | + * Release mmap_lock temporarily if someone wants to |
---|
| 954 | + * access it for write request. |
---|
| 955 | + */ |
---|
| 956 | + if (mmap_lock_is_contended(mm)) { |
---|
| 957 | + mmap_read_unlock(mm); |
---|
| 958 | + ret = mmap_read_lock_killable(mm); |
---|
| 959 | + if (ret) { |
---|
| 960 | + release_task_mempolicy(priv); |
---|
| 961 | + goto out_put_mm; |
---|
| 962 | + } |
---|
| 963 | + |
---|
| 964 | + /* |
---|
| 965 | + * After dropping the lock, there are four cases to |
---|
| 966 | + * consider. See the following example for explanation. |
---|
| 967 | + * |
---|
| 968 | + * +------+------+-----------+ |
---|
| 969 | + * | VMA1 | VMA2 | VMA3 | |
---|
| 970 | + * +------+------+-----------+ |
---|
| 971 | + * | | | | |
---|
| 972 | + * 4k 8k 16k 400k |
---|
| 973 | + * |
---|
| 974 | + * Suppose we drop the lock after reading VMA2 due to |
---|
| 975 | + * contention, then we get: |
---|
| 976 | + * |
---|
| 977 | + * last_vma_end = 16k |
---|
| 978 | + * |
---|
| 979 | + * 1) VMA2 is freed, but VMA3 exists: |
---|
| 980 | + * |
---|
| 981 | + * find_vma(mm, 16k - 1) will return VMA3. |
---|
| 982 | + * In this case, just continue from VMA3. |
---|
| 983 | + * |
---|
| 984 | + * 2) VMA2 still exists: |
---|
| 985 | + * |
---|
| 986 | + * find_vma(mm, 16k - 1) will return VMA2. |
---|
| 987 | + * Iterate the loop like the original one. |
---|
| 988 | + * |
---|
| 989 | + * 3) No more VMAs can be found: |
---|
| 990 | + * |
---|
| 991 | + * find_vma(mm, 16k - 1) will return NULL. |
---|
| 992 | + * No more things to do, just break. |
---|
| 993 | + * |
---|
| 994 | + * 4) (last_vma_end - 1) is the middle of a vma (VMA'): |
---|
| 995 | + * |
---|
| 996 | + * find_vma(mm, 16k - 1) will return VMA' whose range |
---|
| 997 | + * contains last_vma_end. |
---|
| 998 | + * Iterate VMA' from last_vma_end. |
---|
| 999 | + */ |
---|
| 1000 | + vma = find_vma(mm, last_vma_end - 1); |
---|
| 1001 | + /* Case 3 above */ |
---|
| 1002 | + if (!vma) |
---|
| 1003 | + break; |
---|
| 1004 | + |
---|
| 1005 | + /* Case 1 above */ |
---|
| 1006 | + if (vma->vm_start >= last_vma_end) |
---|
| 1007 | + continue; |
---|
| 1008 | + |
---|
| 1009 | + /* Case 4 above */ |
---|
| 1010 | + if (vma->vm_end > last_vma_end) |
---|
| 1011 | + smap_gather_stats(vma, &mss, last_vma_end); |
---|
| 1012 | + } |
---|
| 1013 | + /* Case 2 above */ |
---|
| 1014 | + vma = vma->vm_next; |
---|
906 | 1015 | } |
---|
907 | 1016 | |
---|
908 | | - show_vma_header_prefix(m, priv->mm->mmap->vm_start, |
---|
| 1017 | + show_vma_header_prefix(m, priv->mm->mmap ? priv->mm->mmap->vm_start : 0, |
---|
909 | 1018 | last_vma_end, 0, 0, 0, 0); |
---|
910 | 1019 | seq_pad(m, ' '); |
---|
911 | 1020 | seq_puts(m, "[rollup]\n"); |
---|
912 | 1021 | |
---|
913 | | - __show_smap(m, &mss); |
---|
| 1022 | + __show_smap(m, &mss, true); |
---|
914 | 1023 | |
---|
915 | 1024 | release_task_mempolicy(priv); |
---|
916 | | - up_read(&mm->mmap_sem); |
---|
| 1025 | + mmap_read_unlock(mm); |
---|
917 | 1026 | |
---|
918 | 1027 | out_put_mm: |
---|
919 | 1028 | mmput(mm); |
---|
.. | .. |
---|
1006 | 1115 | }; |
---|
1007 | 1116 | |
---|
1008 | 1117 | #ifdef CONFIG_MEM_SOFT_DIRTY |
---|
| 1118 | + |
---|
| 1119 | +#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) |
---|
| 1120 | + |
---|
| 1121 | +static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) |
---|
| 1122 | +{ |
---|
| 1123 | + struct page *page; |
---|
| 1124 | + |
---|
| 1125 | + if (!pte_write(pte)) |
---|
| 1126 | + return false; |
---|
| 1127 | + if (!is_cow_mapping(vma->vm_flags)) |
---|
| 1128 | + return false; |
---|
| 1129 | + if (likely(!atomic_read(&vma->vm_mm->has_pinned))) |
---|
| 1130 | + return false; |
---|
| 1131 | + page = vm_normal_page(vma, addr, pte); |
---|
| 1132 | + if (!page) |
---|
| 1133 | + return false; |
---|
| 1134 | + return page_maybe_dma_pinned(page); |
---|
| 1135 | +} |
---|
| 1136 | + |
---|
1009 | 1137 | static inline void clear_soft_dirty(struct vm_area_struct *vma, |
---|
1010 | 1138 | unsigned long addr, pte_t *pte) |
---|
1011 | 1139 | { |
---|
.. | .. |
---|
1018 | 1146 | pte_t ptent = *pte; |
---|
1019 | 1147 | |
---|
1020 | 1148 | if (pte_present(ptent)) { |
---|
1021 | | - ptent = ptep_modify_prot_start(vma->vm_mm, addr, pte); |
---|
1022 | | - ptent = pte_wrprotect(ptent); |
---|
| 1149 | + pte_t old_pte; |
---|
| 1150 | + |
---|
| 1151 | + if (pte_is_pinned(vma, addr, ptent)) |
---|
| 1152 | + return; |
---|
| 1153 | + old_pte = ptep_modify_prot_start(vma, addr, pte); |
---|
| 1154 | + ptent = pte_wrprotect(old_pte); |
---|
1023 | 1155 | ptent = pte_clear_soft_dirty(ptent); |
---|
1024 | | - ptep_modify_prot_commit(vma->vm_mm, addr, pte, ptent); |
---|
| 1156 | + ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); |
---|
1025 | 1157 | } else if (is_swap_pte(ptent)) { |
---|
1026 | 1158 | ptent = pte_swp_clear_soft_dirty(ptent); |
---|
1027 | 1159 | set_pte_at(vma->vm_mm, addr, pte, ptent); |
---|
.. | .. |
---|
1145 | 1277 | return 0; |
---|
1146 | 1278 | } |
---|
1147 | 1279 | |
---|
| 1280 | +static const struct mm_walk_ops clear_refs_walk_ops = { |
---|
| 1281 | + .pmd_entry = clear_refs_pte_range, |
---|
| 1282 | + .test_walk = clear_refs_test_walk, |
---|
| 1283 | +}; |
---|
| 1284 | + |
---|
1148 | 1285 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
---|
1149 | 1286 | size_t count, loff_t *ppos) |
---|
1150 | 1287 | { |
---|
.. | .. |
---|
1153 | 1290 | struct mm_struct *mm; |
---|
1154 | 1291 | struct vm_area_struct *vma; |
---|
1155 | 1292 | enum clear_refs_types type; |
---|
1156 | | - struct mmu_gather tlb; |
---|
1157 | 1293 | int itype; |
---|
1158 | 1294 | int rv; |
---|
1159 | 1295 | |
---|
.. | .. |
---|
1174 | 1310 | return -ESRCH; |
---|
1175 | 1311 | mm = get_task_mm(task); |
---|
1176 | 1312 | if (mm) { |
---|
| 1313 | + struct mmu_notifier_range range; |
---|
1177 | 1314 | struct clear_refs_private cp = { |
---|
1178 | 1315 | .type = type, |
---|
1179 | 1316 | }; |
---|
1180 | | - struct mm_walk clear_refs_walk = { |
---|
1181 | | - .pmd_entry = clear_refs_pte_range, |
---|
1182 | | - .test_walk = clear_refs_test_walk, |
---|
1183 | | - .mm = mm, |
---|
1184 | | - .private = &cp, |
---|
1185 | | - }; |
---|
1186 | 1317 | |
---|
| 1318 | + if (mmap_write_lock_killable(mm)) { |
---|
| 1319 | + count = -EINTR; |
---|
| 1320 | + goto out_mm; |
---|
| 1321 | + } |
---|
1187 | 1322 | if (type == CLEAR_REFS_MM_HIWATER_RSS) { |
---|
1188 | | - if (down_write_killable(&mm->mmap_sem)) { |
---|
1189 | | - count = -EINTR; |
---|
1190 | | - goto out_mm; |
---|
1191 | | - } |
---|
1192 | | - |
---|
1193 | 1323 | /* |
---|
1194 | 1324 | * Writing 5 to /proc/pid/clear_refs resets the peak |
---|
1195 | 1325 | * resident set size to this mm's current rss value. |
---|
1196 | 1326 | */ |
---|
1197 | 1327 | reset_mm_hiwater_rss(mm); |
---|
1198 | | - up_write(&mm->mmap_sem); |
---|
1199 | | - goto out_mm; |
---|
| 1328 | + goto out_unlock; |
---|
1200 | 1329 | } |
---|
1201 | 1330 | |
---|
1202 | | - if (down_read_killable(&mm->mmap_sem)) { |
---|
1203 | | - count = -EINTR; |
---|
1204 | | - goto out_mm; |
---|
1205 | | - } |
---|
1206 | | - tlb_gather_mmu(&tlb, mm, 0, -1); |
---|
1207 | 1331 | if (type == CLEAR_REFS_SOFT_DIRTY) { |
---|
1208 | 1332 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
---|
1209 | 1333 | if (!(vma->vm_flags & VM_SOFTDIRTY)) |
---|
1210 | 1334 | continue; |
---|
1211 | | - up_read(&mm->mmap_sem); |
---|
1212 | | - if (down_write_killable(&mm->mmap_sem)) { |
---|
1213 | | - count = -EINTR; |
---|
1214 | | - goto out_mm; |
---|
1215 | | - } |
---|
1216 | | - /* |
---|
1217 | | - * Avoid to modify vma->vm_flags |
---|
1218 | | - * without locked ops while the |
---|
1219 | | - * coredump reads the vm_flags. |
---|
1220 | | - */ |
---|
1221 | | - if (!mmget_still_valid(mm)) { |
---|
1222 | | - /* |
---|
1223 | | - * Silently return "count" |
---|
1224 | | - * like if get_task_mm() |
---|
1225 | | - * failed. FIXME: should this |
---|
1226 | | - * function have returned |
---|
1227 | | - * -ESRCH if get_task_mm() |
---|
1228 | | - * failed like if |
---|
1229 | | - * get_proc_task() fails? |
---|
1230 | | - */ |
---|
1231 | | - up_write(&mm->mmap_sem); |
---|
1232 | | - goto out_mm; |
---|
1233 | | - } |
---|
1234 | | - for (vma = mm->mmap; vma; vma = vma->vm_next) { |
---|
1235 | | - vma->vm_flags &= ~VM_SOFTDIRTY; |
---|
1236 | | - vma_set_page_prot(vma); |
---|
1237 | | - } |
---|
1238 | | - downgrade_write(&mm->mmap_sem); |
---|
1239 | | - break; |
---|
| 1335 | + vm_write_begin(vma); |
---|
| 1336 | + WRITE_ONCE(vma->vm_flags, |
---|
| 1337 | + vma->vm_flags & ~VM_SOFTDIRTY); |
---|
| 1338 | + vma_set_page_prot(vma); |
---|
| 1339 | + vm_write_end(vma); |
---|
1240 | 1340 | } |
---|
1241 | | - mmu_notifier_invalidate_range_start(mm, 0, -1); |
---|
| 1341 | + |
---|
| 1342 | + inc_tlb_flush_pending(mm); |
---|
| 1343 | + mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, |
---|
| 1344 | + 0, NULL, mm, 0, -1UL); |
---|
| 1345 | + mmu_notifier_invalidate_range_start(&range); |
---|
1242 | 1346 | } |
---|
1243 | | - walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); |
---|
1244 | | - if (type == CLEAR_REFS_SOFT_DIRTY) |
---|
1245 | | - mmu_notifier_invalidate_range_end(mm, 0, -1); |
---|
1246 | | - tlb_finish_mmu(&tlb, 0, -1); |
---|
1247 | | - up_read(&mm->mmap_sem); |
---|
| 1347 | + walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops, |
---|
| 1348 | + &cp); |
---|
| 1349 | + if (type == CLEAR_REFS_SOFT_DIRTY) { |
---|
| 1350 | + mmu_notifier_invalidate_range_end(&range); |
---|
| 1351 | + flush_tlb_mm(mm); |
---|
| 1352 | + dec_tlb_flush_pending(mm); |
---|
| 1353 | + } |
---|
| 1354 | +out_unlock: |
---|
| 1355 | + mmap_write_unlock(mm); |
---|
1248 | 1356 | out_mm: |
---|
1249 | 1357 | mmput(mm); |
---|
1250 | 1358 | } |
---|
.. | .. |
---|
1297 | 1405 | } |
---|
1298 | 1406 | |
---|
1299 | 1407 | static int pagemap_pte_hole(unsigned long start, unsigned long end, |
---|
1300 | | - struct mm_walk *walk) |
---|
| 1408 | + __always_unused int depth, struct mm_walk *walk) |
---|
1301 | 1409 | { |
---|
1302 | 1410 | struct pagemapread *pm = walk->private; |
---|
1303 | 1411 | unsigned long addr = start; |
---|
.. | .. |
---|
1341 | 1449 | { |
---|
1342 | 1450 | u64 frame = 0, flags = 0; |
---|
1343 | 1451 | struct page *page = NULL; |
---|
| 1452 | + bool migration = false; |
---|
1344 | 1453 | |
---|
1345 | 1454 | if (pte_present(pte)) { |
---|
1346 | 1455 | if (pm->show_pfn) |
---|
1347 | 1456 | frame = pte_pfn(pte); |
---|
1348 | 1457 | flags |= PM_PRESENT; |
---|
1349 | | - page = _vm_normal_page(vma, addr, pte, true); |
---|
| 1458 | + page = vm_normal_page(vma, addr, pte); |
---|
1350 | 1459 | if (pte_soft_dirty(pte)) |
---|
1351 | 1460 | flags |= PM_SOFT_DIRTY; |
---|
1352 | 1461 | } else if (is_swap_pte(pte)) { |
---|
.. | .. |
---|
1358 | 1467 | frame = swp_type(entry) | |
---|
1359 | 1468 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); |
---|
1360 | 1469 | flags |= PM_SWAP; |
---|
1361 | | - if (is_migration_entry(entry)) |
---|
| 1470 | + if (is_migration_entry(entry)) { |
---|
| 1471 | + migration = true; |
---|
1362 | 1472 | page = migration_entry_to_page(entry); |
---|
| 1473 | + } |
---|
1363 | 1474 | |
---|
1364 | 1475 | if (is_device_private_entry(entry)) |
---|
1365 | 1476 | page = device_private_entry_to_page(entry); |
---|
.. | .. |
---|
1367 | 1478 | |
---|
1368 | 1479 | if (page && !PageAnon(page)) |
---|
1369 | 1480 | flags |= PM_FILE; |
---|
1370 | | - if (page && page_mapcount(page) == 1) |
---|
| 1481 | + if (page && !migration && page_mapcount(page) == 1) |
---|
1371 | 1482 | flags |= PM_MMAP_EXCLUSIVE; |
---|
1372 | 1483 | if (vma->vm_flags & VM_SOFTDIRTY) |
---|
1373 | 1484 | flags |= PM_SOFT_DIRTY; |
---|
.. | .. |
---|
1383 | 1494 | spinlock_t *ptl; |
---|
1384 | 1495 | pte_t *pte, *orig_pte; |
---|
1385 | 1496 | int err = 0; |
---|
1386 | | - |
---|
1387 | 1497 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
---|
| 1498 | + bool migration = false; |
---|
| 1499 | + |
---|
1388 | 1500 | ptl = pmd_trans_huge_lock(pmdp, vma); |
---|
1389 | 1501 | if (ptl) { |
---|
1390 | 1502 | u64 flags = 0, frame = 0; |
---|
.. | .. |
---|
1419 | 1531 | if (pmd_swp_soft_dirty(pmd)) |
---|
1420 | 1532 | flags |= PM_SOFT_DIRTY; |
---|
1421 | 1533 | VM_BUG_ON(!is_pmd_migration_entry(pmd)); |
---|
| 1534 | + migration = is_migration_entry(entry); |
---|
1422 | 1535 | page = migration_entry_to_page(entry); |
---|
1423 | 1536 | } |
---|
1424 | 1537 | #endif |
---|
1425 | 1538 | |
---|
1426 | | - if (page && page_mapcount(page) == 1) |
---|
| 1539 | + if (page && !migration && page_mapcount(page) == 1) |
---|
1427 | 1540 | flags |= PM_MMAP_EXCLUSIVE; |
---|
1428 | 1541 | |
---|
1429 | 1542 | for (; addr != end; addr += PAGE_SIZE) { |
---|
.. | .. |
---|
1512 | 1625 | |
---|
1513 | 1626 | return err; |
---|
1514 | 1627 | } |
---|
| 1628 | +#else |
---|
| 1629 | +#define pagemap_hugetlb_range NULL |
---|
1515 | 1630 | #endif /* HUGETLB_PAGE */ |
---|
| 1631 | + |
---|
| 1632 | +static const struct mm_walk_ops pagemap_ops = { |
---|
| 1633 | + .pmd_entry = pagemap_pmd_range, |
---|
| 1634 | + .pte_hole = pagemap_pte_hole, |
---|
| 1635 | + .hugetlb_entry = pagemap_hugetlb_range, |
---|
| 1636 | +}; |
---|
1516 | 1637 | |
---|
1517 | 1638 | /* |
---|
1518 | 1639 | * /proc/pid/pagemap - an array mapping virtual pages to pfns |
---|
.. | .. |
---|
1545 | 1666 | { |
---|
1546 | 1667 | struct mm_struct *mm = file->private_data; |
---|
1547 | 1668 | struct pagemapread pm; |
---|
1548 | | - struct mm_walk pagemap_walk = {}; |
---|
1549 | 1669 | unsigned long src; |
---|
1550 | 1670 | unsigned long svpfn; |
---|
1551 | 1671 | unsigned long start_vaddr; |
---|
.. | .. |
---|
1573 | 1693 | if (!pm.buffer) |
---|
1574 | 1694 | goto out_mm; |
---|
1575 | 1695 | |
---|
1576 | | - pagemap_walk.pmd_entry = pagemap_pmd_range; |
---|
1577 | | - pagemap_walk.pte_hole = pagemap_pte_hole; |
---|
1578 | | -#ifdef CONFIG_HUGETLB_PAGE |
---|
1579 | | - pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; |
---|
1580 | | -#endif |
---|
1581 | | - pagemap_walk.mm = mm; |
---|
1582 | | - pagemap_walk.private = ± |
---|
1583 | | - |
---|
1584 | 1696 | src = *ppos; |
---|
1585 | 1697 | svpfn = src / PM_ENTRY_BYTES; |
---|
1586 | | - start_vaddr = svpfn << PAGE_SHIFT; |
---|
1587 | 1698 | end_vaddr = mm->task_size; |
---|
1588 | 1699 | |
---|
1589 | 1700 | /* watch out for wraparound */ |
---|
1590 | | - if (svpfn > mm->task_size >> PAGE_SHIFT) |
---|
| 1701 | + start_vaddr = end_vaddr; |
---|
| 1702 | + if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) |
---|
| 1703 | + start_vaddr = untagged_addr(svpfn << PAGE_SHIFT); |
---|
| 1704 | + |
---|
| 1705 | + /* Ensure the address is inside the task */ |
---|
| 1706 | + if (start_vaddr > mm->task_size) |
---|
1591 | 1707 | start_vaddr = end_vaddr; |
---|
1592 | 1708 | |
---|
1593 | 1709 | /* |
---|
.. | .. |
---|
1606 | 1722 | /* overflow ? */ |
---|
1607 | 1723 | if (end < start_vaddr || end > end_vaddr) |
---|
1608 | 1724 | end = end_vaddr; |
---|
1609 | | - ret = down_read_killable(&mm->mmap_sem); |
---|
| 1725 | + ret = mmap_read_lock_killable(mm); |
---|
1610 | 1726 | if (ret) |
---|
1611 | 1727 | goto out_free; |
---|
1612 | | - ret = walk_page_range(start_vaddr, end, &pagemap_walk); |
---|
1613 | | - up_read(&mm->mmap_sem); |
---|
| 1728 | + ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm); |
---|
| 1729 | + mmap_read_unlock(mm); |
---|
1614 | 1730 | start_vaddr = end; |
---|
1615 | 1731 | |
---|
1616 | 1732 | len = min(count, PM_ENTRY_BYTES * pm.pos); |
---|
.. | .. |
---|
1821 | 1937 | } |
---|
1822 | 1938 | #endif |
---|
1823 | 1939 | |
---|
| 1940 | +static const struct mm_walk_ops show_numa_ops = { |
---|
| 1941 | + .hugetlb_entry = gather_hugetlb_stats, |
---|
| 1942 | + .pmd_entry = gather_pte_stats, |
---|
| 1943 | +}; |
---|
| 1944 | + |
---|
1824 | 1945 | /* |
---|
1825 | 1946 | * Display pages allocated per node and memory policy via /proc. |
---|
1826 | 1947 | */ |
---|
.. | .. |
---|
1832 | 1953 | struct numa_maps *md = &numa_priv->md; |
---|
1833 | 1954 | struct file *file = vma->vm_file; |
---|
1834 | 1955 | struct mm_struct *mm = vma->vm_mm; |
---|
1835 | | - struct mm_walk walk = { |
---|
1836 | | - .hugetlb_entry = gather_hugetlb_stats, |
---|
1837 | | - .pmd_entry = gather_pte_stats, |
---|
1838 | | - .private = md, |
---|
1839 | | - .mm = mm, |
---|
1840 | | - }; |
---|
1841 | 1956 | struct mempolicy *pol; |
---|
1842 | 1957 | char buffer[64]; |
---|
1843 | 1958 | int nid; |
---|
.. | .. |
---|
1870 | 1985 | if (is_vm_hugetlb_page(vma)) |
---|
1871 | 1986 | seq_puts(m, " huge"); |
---|
1872 | 1987 | |
---|
1873 | | - /* mmap_sem is held by m_start */ |
---|
1874 | | - walk_page_vma(vma, &walk); |
---|
| 1988 | + /* mmap_lock is held by m_start */ |
---|
| 1989 | + walk_page_vma(vma, &show_numa_ops, md); |
---|
1875 | 1990 | |
---|
1876 | 1991 | if (!md->pages) |
---|
1877 | 1992 | goto out; |
---|
.. | .. |
---|
1904 | 2019 | seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10); |
---|
1905 | 2020 | out: |
---|
1906 | 2021 | seq_putc(m, '\n'); |
---|
1907 | | - m_cache_vma(m, vma); |
---|
1908 | 2022 | return 0; |
---|
1909 | 2023 | } |
---|
1910 | 2024 | |
---|