From a5969cabbb4660eab42b6ef0412cbbd1200cf14d Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Sat, 12 Oct 2024 07:10:09 +0000 Subject: [PATCH] 修改led为gpio --- kernel/fs/proc/task_mmu.c | 592 +++++++++++++++++++++++++++++++++++----------------------- 1 files changed, 353 insertions(+), 239 deletions(-) diff --git a/kernel/fs/proc/task_mmu.c b/kernel/fs/proc/task_mmu.c index 0c31906..9d316d5 100644 --- a/kernel/fs/proc/task_mmu.c +++ b/kernel/fs/proc/task_mmu.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/mm.h> +#include <linux/pagewalk.h> #include <linux/vmacache.h> #include <linux/hugetlb.h> #include <linux/huge_mm.h> @@ -59,7 +59,7 @@ SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); - SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm); + SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm)); SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss); SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss); SEQ_PUT_DEC(" kB\nRssAnon:\t", anon); @@ -147,8 +147,8 @@ long pages_pinned; struct page *page; - pages_pinned = get_user_pages_remote(current, mm, - page_start_vaddr, 1, 0, &page, NULL, NULL); + pages_pinned = get_user_pages_remote(mm, page_start_vaddr, 1, 0, + &page, NULL, NULL); if (pages_pinned < 1) { seq_puts(m, "<fault>]"); return; @@ -159,7 +159,7 @@ write_len = strnlen(kaddr + page_offset, len); seq_write(m, kaddr + page_offset, write_len); kunmap(page); - put_page(page); + put_user_page(page); /* if strnlen hit a null terminator then we're done */ if (write_len != len) @@ -173,38 +173,14 @@ seq_putc(m, ']'); } -static void vma_stop(struct proc_maps_private *priv) -{ - struct mm_struct *mm = priv->mm; - - release_task_mempolicy(priv); - up_read(&mm->mmap_sem); - mmput(mm); -} - -static struct vm_area_struct * -m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma) -{ - if (vma == priv->tail_vma) - return NULL; - return vma->vm_next ?: priv->tail_vma; -} - -static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma) -{ - if (m->count < m->size) /* vma is copied successfully */ - m->version = m_next_vma(m->private, vma) ? vma->vm_end : -1UL; -} - static void *m_start(struct seq_file *m, loff_t *ppos) { struct proc_maps_private *priv = m->private; - unsigned long last_addr = m->version; + unsigned long last_addr = *ppos; struct mm_struct *mm; struct vm_area_struct *vma; - unsigned int pos = *ppos; - /* See m_cache_vma(). Zero at the start or after lseek. */ + /* See m_next(). Zero at the start or after lseek. */ if (last_addr == -1UL) return NULL; @@ -213,64 +189,59 @@ return ERR_PTR(-ESRCH); mm = priv->mm; - if (!mm || !mmget_not_zero(mm)) + if (!mm || !mmget_not_zero(mm)) { + put_task_struct(priv->task); + priv->task = NULL; return NULL; + } - if (down_read_killable(&mm->mmap_sem)) { + if (mmap_read_lock_killable(mm)) { mmput(mm); + put_task_struct(priv->task); + priv->task = NULL; return ERR_PTR(-EINTR); } hold_task_mempolicy(priv); priv->tail_vma = get_gate_vma(mm); - if (last_addr) { - vma = find_vma(mm, last_addr - 1); - if (vma && vma->vm_start <= last_addr) - vma = m_next_vma(priv, vma); - if (vma) - return vma; - } - - m->version = 0; - if (pos < mm->map_count) { - for (vma = mm->mmap; pos; pos--) { - m->version = vma->vm_start; - vma = vma->vm_next; - } + vma = find_vma(mm, last_addr); + if (vma) return vma; - } - /* we do not bother to update m->version in this case */ - if (pos == mm->map_count && priv->tail_vma) - return priv->tail_vma; - - vma_stop(priv); - return NULL; + return priv->tail_vma; } -static void *m_next(struct seq_file *m, void *v, loff_t *pos) +static void *m_next(struct seq_file *m, void *v, loff_t *ppos) { struct proc_maps_private *priv = m->private; - struct vm_area_struct *next; + struct vm_area_struct *next, *vma = v; - (*pos)++; - next = m_next_vma(priv, v); - if (!next) - vma_stop(priv); + if (vma == priv->tail_vma) + next = NULL; + else if (vma->vm_next) + next = vma->vm_next; + else + next = priv->tail_vma; + + *ppos = next ? next->vm_start : -1UL; + return next; } static void m_stop(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; + struct mm_struct *mm = priv->mm; - if (!IS_ERR_OR_NULL(v)) - vma_stop(priv); - if (priv->task) { - put_task_struct(priv->task); - priv->task = NULL; - } + if (!priv->task) + return; + + release_task_mempolicy(priv); + mmap_read_unlock(mm); + mmput(mm); + put_task_struct(priv->task); + priv->task = NULL; } static int proc_maps_open(struct inode *inode, struct file *file, @@ -420,7 +391,6 @@ static int show_map(struct seq_file *m, void *v) { show_map_vma(m, v); - m_cache_vma(m, v); return 0; } @@ -474,21 +444,59 @@ unsigned long lazyfree; unsigned long anonymous_thp; unsigned long shmem_thp; + unsigned long file_thp; unsigned long swap; unsigned long shared_hugetlb; unsigned long private_hugetlb; u64 pss; + u64 pss_anon; + u64 pss_file; + u64 pss_shmem; u64 pss_locked; u64 swap_pss; bool check_shmem_swap; }; -static void smaps_account(struct mem_size_stats *mss, struct page *page, - bool compound, bool young, bool dirty, bool locked) +static void smaps_page_accumulate(struct mem_size_stats *mss, + struct page *page, unsigned long size, unsigned long pss, + bool dirty, bool locked, bool private) { - int i, nr = compound ? 1 << compound_order(page) : 1; + mss->pss += pss; + + if (PageAnon(page)) + mss->pss_anon += pss; + else if (PageSwapBacked(page)) + mss->pss_shmem += pss; + else + mss->pss_file += pss; + + if (locked) + mss->pss_locked += pss; + + if (dirty || PageDirty(page)) { + if (private) + mss->private_dirty += size; + else + mss->shared_dirty += size; + } else { + if (private) + mss->private_clean += size; + else + mss->shared_clean += size; + } +} + +static void smaps_account(struct mem_size_stats *mss, struct page *page, + bool compound, bool young, bool dirty, bool locked, + bool migration) +{ + int i, nr = compound ? compound_nr(page) : 1; unsigned long size = nr * PAGE_SIZE; + /* + * First accumulate quantities that depend only on |size| and the type + * of the compound page. + */ if (PageAnon(page)) { mss->anonymous += size; if (!PageSwapBacked(page) && !dirty && !PageDirty(page)) @@ -501,48 +509,38 @@ mss->referenced += size; /* + * Then accumulate quantities that may depend on sharing, or that may + * differ page-by-page. + * * page_count(page) == 1 guarantees the page is mapped exactly once. * If any subpage of the compound page mapped with PTE it would elevate * page_count(). + * + * The page_mapcount() is called to get a snapshot of the mapcount. + * Without holding the page lock this snapshot can be slightly wrong as + * we cannot always read the mapcount atomically. It is not safe to + * call page_mapcount() even with PTL held if the page is not mapped, + * especially for migration entries. Treat regular migration entries + * as mapcount == 1. */ - if (page_count(page) == 1) { - if (dirty || PageDirty(page)) - mss->private_dirty += size; - else - mss->private_clean += size; - mss->pss += (u64)size << PSS_SHIFT; - if (locked) - mss->pss_locked += (u64)size << PSS_SHIFT; + if ((page_count(page) == 1) || migration) { + smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty, + locked, true); return; } - for (i = 0; i < nr; i++, page++) { int mapcount = page_mapcount(page); - unsigned long pss = (PAGE_SIZE << PSS_SHIFT); - - if (mapcount >= 2) { - if (dirty || PageDirty(page)) - mss->shared_dirty += PAGE_SIZE; - else - mss->shared_clean += PAGE_SIZE; - mss->pss += pss / mapcount; - if (locked) - mss->pss_locked += pss / mapcount; - } else { - if (dirty || PageDirty(page)) - mss->private_dirty += PAGE_SIZE; - else - mss->private_clean += PAGE_SIZE; - mss->pss += pss; - if (locked) - mss->pss_locked += pss; - } + unsigned long pss = PAGE_SIZE << PSS_SHIFT; + if (mapcount >= 2) + pss /= mapcount; + smaps_page_accumulate(mss, page, PAGE_SIZE, pss, dirty, locked, + mapcount < 2); } } #ifdef CONFIG_SHMEM static int smaps_pte_hole(unsigned long addr, unsigned long end, - struct mm_walk *walk) + __always_unused int depth, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; @@ -551,7 +549,9 @@ return 0; } -#endif +#else +#define smaps_pte_hole NULL +#endif /* CONFIG_SHMEM */ static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct mm_walk *walk) @@ -560,9 +560,12 @@ struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; + bool migration = false, young = false, dirty = false; if (pte_present(*pte)) { page = vm_normal_page(vma, addr, *pte); + young = pte_young(*pte); + dirty = pte_dirty(*pte); } else if (is_swap_pte(*pte)) { swp_entry_t swpent = pte_to_swp_entry(*pte); @@ -579,29 +582,24 @@ } else { mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; } - } else if (is_migration_entry(swpent)) + } else if (is_migration_entry(swpent)) { + migration = true; page = migration_entry_to_page(swpent); - else if (is_device_private_entry(swpent)) + } else if (is_device_private_entry(swpent)) page = device_private_entry_to_page(swpent); } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap && pte_none(*pte))) { - page = find_get_entry(vma->vm_file->f_mapping, + page = xa_load(&vma->vm_file->f_mapping->i_pages, linear_page_index(vma, addr)); - if (!page) - return; - - if (radix_tree_exceptional_entry(page)) + if (xa_is_value(page)) mss->swap += PAGE_SIZE; - else - put_page(page); - return; } if (!page) return; - smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); + smaps_account(mss, page, false, young, dirty, locked, migration); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -611,10 +609,20 @@ struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); - struct page *page; + struct page *page = NULL; + bool migration = false; - /* FOLL_DUMP will return -EFAULT on huge zero page */ - page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); + if (pmd_present(*pmd)) { + /* FOLL_DUMP will return -EFAULT on huge zero page */ + page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); + } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { + swp_entry_t entry = pmd_to_swp_entry(*pmd); + + if (is_migration_entry(entry)) { + migration = true; + page = migration_entry_to_page(entry); + } + } if (IS_ERR_OR_NULL(page)) return; if (PageAnon(page)) @@ -624,8 +632,10 @@ else if (is_zone_device_page(page)) /* pass */; else - VM_BUG_ON_PAGE(1, page); - smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); + mss->file_thp += HPAGE_PMD_SIZE; + + smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), + locked, migration); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -643,8 +653,7 @@ ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { - if (pmd_present(*pmd)) - smaps_pmd_entry(pmd, addr, walk); + smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); goto out; } @@ -652,7 +661,7 @@ if (pmd_trans_unstable(pmd)) goto out; /* - * The mmap_sem held all the way back in m_start() is what + * The mmap_lock held all the way back in m_start() is what * keeps khugepaged out of here and from collapsing things * in here. */ @@ -687,9 +696,6 @@ [ilog2(VM_GROWSDOWN)] = "gd", [ilog2(VM_PFNMAP)] = "pf", [ilog2(VM_DENYWRITE)] = "dw", -#ifdef CONFIG_X86_INTEL_MPX - [ilog2(VM_MPX)] = "mp", -#endif [ilog2(VM_LOCKED)] = "lo", [ilog2(VM_IO)] = "io", [ilog2(VM_SEQ_READ)] = "sr", @@ -703,6 +709,9 @@ [ilog2(VM_ARCH_1)] = "ar", [ilog2(VM_WIPEONFORK)] = "wf", [ilog2(VM_DONTDUMP)] = "dd", +#ifdef CONFIG_ARM64_BTI + [ilog2(VM_ARM64_BTI)] = "bt", +#endif #ifdef CONFIG_MEM_SOFT_DIRTY [ilog2(VM_SOFTDIRTY)] = "sd", #endif @@ -712,6 +721,10 @@ [ilog2(VM_MERGEABLE)] = "mg", [ilog2(VM_UFFD_MISSING)]= "um", [ilog2(VM_UFFD_WP)] = "uw", +#ifdef CONFIG_ARM64_MTE + [ilog2(VM_MTE)] = "mt", + [ilog2(VM_MTE_ALLOWED)] = "", +#endif #ifdef CONFIG_ARCH_HAS_PKEYS /* These come out via ProtectionKey: */ [ilog2(VM_PKEY_BIT0)] = "", @@ -722,6 +735,9 @@ [ilog2(VM_PKEY_BIT4)] = "", #endif #endif /* CONFIG_ARCH_HAS_PKEYS */ +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR + [ilog2(VM_UFFD_MINOR)] = "ui", +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ }; size_t i; @@ -758,29 +774,42 @@ page = device_private_entry_to_page(swpent); } if (page) { - int mapcount = page_mapcount(page); - - if (mapcount >= 2) + if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte)) mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); else mss->private_hugetlb += huge_page_size(hstate_vma(vma)); } return 0; } +#else +#define smaps_hugetlb_range NULL #endif /* HUGETLB_PAGE */ -static void smap_gather_stats(struct vm_area_struct *vma, - struct mem_size_stats *mss) -{ - struct mm_walk smaps_walk = { - .pmd_entry = smaps_pte_range, -#ifdef CONFIG_HUGETLB_PAGE - .hugetlb_entry = smaps_hugetlb_range, -#endif - .mm = vma->vm_mm, - }; +static const struct mm_walk_ops smaps_walk_ops = { + .pmd_entry = smaps_pte_range, + .hugetlb_entry = smaps_hugetlb_range, +}; - smaps_walk.private = mss; +static const struct mm_walk_ops smaps_shmem_walk_ops = { + .pmd_entry = smaps_pte_range, + .hugetlb_entry = smaps_hugetlb_range, + .pte_hole = smaps_pte_hole, +}; + +/* + * Gather mem stats from @vma with the indicated beginning + * address @start, and keep them in @mss. + * + * Use vm_start of @vma as the beginning address if @start is 0. + */ +static void smap_gather_stats(struct vm_area_struct *vma, + struct mem_size_stats *mss, unsigned long start) +{ + const struct mm_walk_ops *ops = &smaps_walk_ops; + + /* Invalid start */ + if (start >= vma->vm_end) + return; #ifdef CONFIG_SHMEM /* In case of smaps_rollup, reset the value from previous vma */ @@ -798,27 +827,43 @@ */ unsigned long shmem_swapped = shmem_swap_usage(vma); - if (!shmem_swapped || (vma->vm_flags & VM_SHARED) || - !(vma->vm_flags & VM_WRITE)) { + if (!start && (!shmem_swapped || (vma->vm_flags & VM_SHARED) || + !(vma->vm_flags & VM_WRITE))) { mss->swap += shmem_swapped; } else { mss->check_shmem_swap = true; - smaps_walk.pte_hole = smaps_pte_hole; + ops = &smaps_shmem_walk_ops; } } #endif - /* mmap_sem is held in m_start */ - walk_page_vma(vma, &smaps_walk); + /* mmap_lock is held in m_start */ + if (!start) + walk_page_vma(vma, ops, mss); + else + walk_page_range(vma->vm_mm, start, vma->vm_end, ops, mss); } #define SEQ_PUT_DEC(str, val) \ seq_put_decimal_ull_width(m, str, (val) >> 10, 8) /* Show the contents common for smaps and smaps_rollup */ -static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss) +static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, + bool rollup_mode) { SEQ_PUT_DEC("Rss: ", mss->resident); SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT); + if (rollup_mode) { + /* + * These are meaningful only for smaps_rollup, otherwise two of + * them are zero, and the other one is the same as Pss. + */ + SEQ_PUT_DEC(" kB\nPss_Anon: ", + mss->pss_anon >> PSS_SHIFT); + SEQ_PUT_DEC(" kB\nPss_File: ", + mss->pss_file >> PSS_SHIFT); + SEQ_PUT_DEC(" kB\nPss_Shmem: ", + mss->pss_shmem >> PSS_SHIFT); + } SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean); SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty); SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean); @@ -828,6 +873,7 @@ SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree); SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp); SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp); + SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp); SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb); seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ", mss->private_hugetlb >> 10, 7); @@ -846,7 +892,7 @@ memset(&mss, 0, sizeof(mss)); - smap_gather_stats(vma, &mss); + smap_gather_stats(vma, &mss, 0); show_map_vma(m, vma); if (vma_get_anon_name(vma)) { @@ -860,15 +906,14 @@ SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma)); seq_puts(m, " kB\n"); - __show_smap(m, &mss); + __show_smap(m, &mss, false); - seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma)); + seq_printf(m, "THPeligible: %d\n", + transparent_hugepage_active(vma)); if (arch_pkeys_enabled()) seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); show_smap_vma_flags(m, vma); - - m_cache_vma(m, vma); return 0; } @@ -894,26 +939,90 @@ memset(&mss, 0, sizeof(mss)); - ret = down_read_killable(&mm->mmap_sem); + ret = mmap_read_lock_killable(mm); if (ret) goto out_put_mm; hold_task_mempolicy(priv); - for (vma = priv->mm->mmap; vma; vma = vma->vm_next) { - smap_gather_stats(vma, &mss); + for (vma = priv->mm->mmap; vma;) { + smap_gather_stats(vma, &mss, 0); last_vma_end = vma->vm_end; + + /* + * Release mmap_lock temporarily if someone wants to + * access it for write request. + */ + if (mmap_lock_is_contended(mm)) { + mmap_read_unlock(mm); + ret = mmap_read_lock_killable(mm); + if (ret) { + release_task_mempolicy(priv); + goto out_put_mm; + } + + /* + * After dropping the lock, there are four cases to + * consider. See the following example for explanation. + * + * +------+------+-----------+ + * | VMA1 | VMA2 | VMA3 | + * +------+------+-----------+ + * | | | | + * 4k 8k 16k 400k + * + * Suppose we drop the lock after reading VMA2 due to + * contention, then we get: + * + * last_vma_end = 16k + * + * 1) VMA2 is freed, but VMA3 exists: + * + * find_vma(mm, 16k - 1) will return VMA3. + * In this case, just continue from VMA3. + * + * 2) VMA2 still exists: + * + * find_vma(mm, 16k - 1) will return VMA2. + * Iterate the loop like the original one. + * + * 3) No more VMAs can be found: + * + * find_vma(mm, 16k - 1) will return NULL. + * No more things to do, just break. + * + * 4) (last_vma_end - 1) is the middle of a vma (VMA'): + * + * find_vma(mm, 16k - 1) will return VMA' whose range + * contains last_vma_end. + * Iterate VMA' from last_vma_end. + */ + vma = find_vma(mm, last_vma_end - 1); + /* Case 3 above */ + if (!vma) + break; + + /* Case 1 above */ + if (vma->vm_start >= last_vma_end) + continue; + + /* Case 4 above */ + if (vma->vm_end > last_vma_end) + smap_gather_stats(vma, &mss, last_vma_end); + } + /* Case 2 above */ + vma = vma->vm_next; } - show_vma_header_prefix(m, priv->mm->mmap->vm_start, + show_vma_header_prefix(m, priv->mm->mmap ? priv->mm->mmap->vm_start : 0, last_vma_end, 0, 0, 0, 0); seq_pad(m, ' '); seq_puts(m, "[rollup]\n"); - __show_smap(m, &mss); + __show_smap(m, &mss, true); release_task_mempolicy(priv); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out_put_mm: mmput(mm); @@ -1006,6 +1115,25 @@ }; #ifdef CONFIG_MEM_SOFT_DIRTY + +#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) + +static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) +{ + struct page *page; + + if (!pte_write(pte)) + return false; + if (!is_cow_mapping(vma->vm_flags)) + return false; + if (likely(!atomic_read(&vma->vm_mm->has_pinned))) + return false; + page = vm_normal_page(vma, addr, pte); + if (!page) + return false; + return page_maybe_dma_pinned(page); +} + static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { @@ -1018,10 +1146,14 @@ pte_t ptent = *pte; if (pte_present(ptent)) { - ptent = ptep_modify_prot_start(vma->vm_mm, addr, pte); - ptent = pte_wrprotect(ptent); + pte_t old_pte; + + if (pte_is_pinned(vma, addr, ptent)) + return; + old_pte = ptep_modify_prot_start(vma, addr, pte); + ptent = pte_wrprotect(old_pte); ptent = pte_clear_soft_dirty(ptent); - ptep_modify_prot_commit(vma->vm_mm, addr, pte, ptent); + ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); } else if (is_swap_pte(ptent)) { ptent = pte_swp_clear_soft_dirty(ptent); set_pte_at(vma->vm_mm, addr, pte, ptent); @@ -1145,6 +1277,11 @@ return 0; } +static const struct mm_walk_ops clear_refs_walk_ops = { + .pmd_entry = clear_refs_pte_range, + .test_walk = clear_refs_test_walk, +}; + static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -1153,7 +1290,6 @@ struct mm_struct *mm; struct vm_area_struct *vma; enum clear_refs_types type; - struct mmu_gather tlb; int itype; int rv; @@ -1174,77 +1310,49 @@ return -ESRCH; mm = get_task_mm(task); if (mm) { + struct mmu_notifier_range range; struct clear_refs_private cp = { .type = type, }; - struct mm_walk clear_refs_walk = { - .pmd_entry = clear_refs_pte_range, - .test_walk = clear_refs_test_walk, - .mm = mm, - .private = &cp, - }; + if (mmap_write_lock_killable(mm)) { + count = -EINTR; + goto out_mm; + } if (type == CLEAR_REFS_MM_HIWATER_RSS) { - if (down_write_killable(&mm->mmap_sem)) { - count = -EINTR; - goto out_mm; - } - /* * Writing 5 to /proc/pid/clear_refs resets the peak * resident set size to this mm's current rss value. */ reset_mm_hiwater_rss(mm); - up_write(&mm->mmap_sem); - goto out_mm; + goto out_unlock; } - if (down_read_killable(&mm->mmap_sem)) { - count = -EINTR; - goto out_mm; - } - tlb_gather_mmu(&tlb, mm, 0, -1); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; - up_read(&mm->mmap_sem); - if (down_write_killable(&mm->mmap_sem)) { - count = -EINTR; - goto out_mm; - } - /* - * Avoid to modify vma->vm_flags - * without locked ops while the - * coredump reads the vm_flags. - */ - if (!mmget_still_valid(mm)) { - /* - * Silently return "count" - * like if get_task_mm() - * failed. FIXME: should this - * function have returned - * -ESRCH if get_task_mm() - * failed like if - * get_proc_task() fails? - */ - up_write(&mm->mmap_sem); - goto out_mm; - } - for (vma = mm->mmap; vma; vma = vma->vm_next) { - vma->vm_flags &= ~VM_SOFTDIRTY; - vma_set_page_prot(vma); - } - downgrade_write(&mm->mmap_sem); - break; + vm_write_begin(vma); + WRITE_ONCE(vma->vm_flags, + vma->vm_flags & ~VM_SOFTDIRTY); + vma_set_page_prot(vma); + vm_write_end(vma); } - mmu_notifier_invalidate_range_start(mm, 0, -1); + + inc_tlb_flush_pending(mm); + mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, + 0, NULL, mm, 0, -1UL); + mmu_notifier_invalidate_range_start(&range); } - walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); - if (type == CLEAR_REFS_SOFT_DIRTY) - mmu_notifier_invalidate_range_end(mm, 0, -1); - tlb_finish_mmu(&tlb, 0, -1); - up_read(&mm->mmap_sem); + walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops, + &cp); + if (type == CLEAR_REFS_SOFT_DIRTY) { + mmu_notifier_invalidate_range_end(&range); + flush_tlb_mm(mm); + dec_tlb_flush_pending(mm); + } +out_unlock: + mmap_write_unlock(mm); out_mm: mmput(mm); } @@ -1297,7 +1405,7 @@ } static int pagemap_pte_hole(unsigned long start, unsigned long end, - struct mm_walk *walk) + __always_unused int depth, struct mm_walk *walk) { struct pagemapread *pm = walk->private; unsigned long addr = start; @@ -1341,12 +1449,13 @@ { u64 frame = 0, flags = 0; struct page *page = NULL; + bool migration = false; if (pte_present(pte)) { if (pm->show_pfn) frame = pte_pfn(pte); flags |= PM_PRESENT; - page = _vm_normal_page(vma, addr, pte, true); + page = vm_normal_page(vma, addr, pte); if (pte_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; } else if (is_swap_pte(pte)) { @@ -1358,8 +1467,10 @@ frame = swp_type(entry) | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); flags |= PM_SWAP; - if (is_migration_entry(entry)) + if (is_migration_entry(entry)) { + migration = true; page = migration_entry_to_page(entry); + } if (is_device_private_entry(entry)) page = device_private_entry_to_page(entry); @@ -1367,7 +1478,7 @@ if (page && !PageAnon(page)) flags |= PM_FILE; - if (page && page_mapcount(page) == 1) + if (page && !migration && page_mapcount(page) == 1) flags |= PM_MMAP_EXCLUSIVE; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; @@ -1383,8 +1494,9 @@ spinlock_t *ptl; pte_t *pte, *orig_pte; int err = 0; - #ifdef CONFIG_TRANSPARENT_HUGEPAGE + bool migration = false; + ptl = pmd_trans_huge_lock(pmdp, vma); if (ptl) { u64 flags = 0, frame = 0; @@ -1419,11 +1531,12 @@ if (pmd_swp_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; VM_BUG_ON(!is_pmd_migration_entry(pmd)); + migration = is_migration_entry(entry); page = migration_entry_to_page(entry); } #endif - if (page && page_mapcount(page) == 1) + if (page && !migration && page_mapcount(page) == 1) flags |= PM_MMAP_EXCLUSIVE; for (; addr != end; addr += PAGE_SIZE) { @@ -1512,7 +1625,15 @@ return err; } +#else +#define pagemap_hugetlb_range NULL #endif /* HUGETLB_PAGE */ + +static const struct mm_walk_ops pagemap_ops = { + .pmd_entry = pagemap_pmd_range, + .pte_hole = pagemap_pte_hole, + .hugetlb_entry = pagemap_hugetlb_range, +}; /* * /proc/pid/pagemap - an array mapping virtual pages to pfns @@ -1545,7 +1666,6 @@ { struct mm_struct *mm = file->private_data; struct pagemapread pm; - struct mm_walk pagemap_walk = {}; unsigned long src; unsigned long svpfn; unsigned long start_vaddr; @@ -1573,21 +1693,17 @@ if (!pm.buffer) goto out_mm; - pagemap_walk.pmd_entry = pagemap_pmd_range; - pagemap_walk.pte_hole = pagemap_pte_hole; -#ifdef CONFIG_HUGETLB_PAGE - pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; -#endif - pagemap_walk.mm = mm; - pagemap_walk.private = ± - src = *ppos; svpfn = src / PM_ENTRY_BYTES; - start_vaddr = svpfn << PAGE_SHIFT; end_vaddr = mm->task_size; /* watch out for wraparound */ - if (svpfn > mm->task_size >> PAGE_SHIFT) + start_vaddr = end_vaddr; + if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) + start_vaddr = untagged_addr(svpfn << PAGE_SHIFT); + + /* Ensure the address is inside the task */ + if (start_vaddr > mm->task_size) start_vaddr = end_vaddr; /* @@ -1606,11 +1722,11 @@ /* overflow ? */ if (end < start_vaddr || end > end_vaddr) end = end_vaddr; - ret = down_read_killable(&mm->mmap_sem); + ret = mmap_read_lock_killable(mm); if (ret) goto out_free; - ret = walk_page_range(start_vaddr, end, &pagemap_walk); - up_read(&mm->mmap_sem); + ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm); + mmap_read_unlock(mm); start_vaddr = end; len = min(count, PM_ENTRY_BYTES * pm.pos); @@ -1821,6 +1937,11 @@ } #endif +static const struct mm_walk_ops show_numa_ops = { + .hugetlb_entry = gather_hugetlb_stats, + .pmd_entry = gather_pte_stats, +}; + /* * Display pages allocated per node and memory policy via /proc. */ @@ -1832,12 +1953,6 @@ struct numa_maps *md = &numa_priv->md; struct file *file = vma->vm_file; struct mm_struct *mm = vma->vm_mm; - struct mm_walk walk = { - .hugetlb_entry = gather_hugetlb_stats, - .pmd_entry = gather_pte_stats, - .private = md, - .mm = mm, - }; struct mempolicy *pol; char buffer[64]; int nid; @@ -1870,8 +1985,8 @@ if (is_vm_hugetlb_page(vma)) seq_puts(m, " huge"); - /* mmap_sem is held by m_start */ - walk_page_vma(vma, &walk); + /* mmap_lock is held by m_start */ + walk_page_vma(vma, &show_numa_ops, md); if (!md->pages) goto out; @@ -1904,7 +2019,6 @@ seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10); out: seq_putc(m, '\n'); - m_cache_vma(m, vma); return 0; } -- Gitblit v1.6.2