From 102a0743326a03cd1a1202ceda21e175b7d3575c Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Tue, 20 Feb 2024 01:20:52 +0000 Subject: [PATCH] add new system file --- kernel/mm/mlock.c | 91 +++++++++++++++++++++++++++------------------ 1 files changed, 54 insertions(+), 37 deletions(-) diff --git a/kernel/mm/mlock.c b/kernel/mm/mlock.c index ef2abaf..188aa74 100644 --- a/kernel/mm/mlock.c +++ b/kernel/mm/mlock.c @@ -17,6 +17,7 @@ #include <linux/mempolicy.h> #include <linux/syscalls.h> #include <linux/sched.h> +#include <linux/page_pinner.h> #include <linux/export.h> #include <linux/rmap.h> #include <linux/mmzone.h> @@ -49,7 +50,7 @@ * When lazy mlocking via vmscan, it is important to ensure that the * vma's VM_LOCKED status is not concurrently being modified, otherwise we * may have mlocked a page that is being munlocked. So lazy mlock must take - * the mmap_sem for read, and verify that the vma really is locked + * the mmap_lock for read, and verify that the vma really is locked * (see mm/rmap.c). */ @@ -58,12 +59,14 @@ */ void clear_page_mlock(struct page *page) { + int nr_pages; + if (!TestClearPageMlocked(page)) return; - mod_zone_page_state(page_zone(page), NR_MLOCK, - -hpage_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGCLEARED); + nr_pages = thp_nr_pages(page); + mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); + count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages); /* * The previous TestClearPageMlocked() corresponds to the smp_mb() * in __pagevec_lru_add_fn(). @@ -77,7 +80,7 @@ * We lost the race. the page already moved to evictable list. */ if (PageUnevictable(page)) - count_vm_event(UNEVICTABLE_PGSTRANDED); + count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); } } @@ -94,9 +97,10 @@ VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page); if (!TestSetPageMlocked(page)) { - mod_zone_page_state(page_zone(page), NR_MLOCK, - hpage_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGMLOCKED); + int nr_pages = thp_nr_pages(page); + + mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); + count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); if (!isolate_lru_page(page)) putback_lru_page(page); } @@ -139,7 +143,7 @@ /* Did try_to_unlock() succeed or punt? */ if (!PageMlocked(page)) - count_vm_event(UNEVICTABLE_PGMUNLOCKED); + count_vm_events(UNEVICTABLE_PGMUNLOCKED, thp_nr_pages(page)); putback_lru_page(page); } @@ -155,10 +159,12 @@ */ static void __munlock_isolation_failed(struct page *page) { + int nr_pages = thp_nr_pages(page); + if (PageUnevictable(page)) - __count_vm_event(UNEVICTABLE_PGSTRANDED); + __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); else - __count_vm_event(UNEVICTABLE_PGMUNLOCKED); + __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages); } /** @@ -182,7 +188,7 @@ unsigned int munlock_vma_page(struct page *page) { int nr_pages; - struct zone *zone = page_zone(page); + pg_data_t *pgdat = page_pgdat(page); /* For try_to_munlock() and to serialize with page migration */ BUG_ON(!PageLocked(page)); @@ -192,9 +198,9 @@ /* * Serialize with any parallel __split_huge_page_refcount() which * might otherwise copy PageMlocked to part of the tail pages before - * we clear it in the head page. It also stabilizes hpage_nr_pages(). + * we clear it in the head page. It also stabilizes thp_nr_pages(). */ - spin_lock_irq(zone_lru_lock(zone)); + spin_lock_irq(&pgdat->lru_lock); if (!TestClearPageMlocked(page)) { /* Potentially, PTE-mapped THP: do not skip the rest PTEs */ @@ -202,18 +208,18 @@ goto unlock_out; } - nr_pages = hpage_nr_pages(page); - __mod_zone_page_state(zone, NR_MLOCK, -nr_pages); + nr_pages = thp_nr_pages(page); + __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); if (__munlock_isolate_lru_page(page, true)) { - spin_unlock_irq(zone_lru_lock(zone)); + spin_unlock_irq(&pgdat->lru_lock); __munlock_isolated_page(page); goto out; } __munlock_isolation_failed(page); unlock_out: - spin_unlock_irq(zone_lru_lock(zone)); + spin_unlock_irq(&pgdat->lru_lock); out: return nr_pages - 1; @@ -298,7 +304,7 @@ pagevec_init(&pvec_putback); /* Phase 1: page isolation */ - spin_lock_irq(zone_lru_lock(zone)); + spin_lock_irq(&zone->zone_pgdat->lru_lock); for (i = 0; i < nr; i++) { struct page *page = pvec->pages[i]; @@ -325,7 +331,7 @@ pvec->pages[i] = NULL; } __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); - spin_unlock_irq(zone_lru_lock(zone)); + spin_unlock_irq(&zone->zone_pgdat->lru_lock); /* Now we can release pins of pages that we are not munlocking */ pagevec_release(&pvec_putback); @@ -381,7 +387,7 @@ /* * Initialize pte walk starting at the already pinned page where we * are sure that there is a pte, as it was pinned under the same - * mmap_sem write op. + * mmap_lock write op. */ pte = get_locked_pte(vma->vm_mm, start, &ptl); /* Make sure we do not cross the page table boundary */ @@ -445,7 +451,9 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - vma->vm_flags &= VM_LOCKED_CLEAR_MASK; + vm_write_begin(vma); + WRITE_ONCE(vma->vm_flags, vma->vm_flags & VM_LOCKED_CLEAR_MASK); + vm_write_end(vma); while (start < end) { struct page *page; @@ -463,8 +471,15 @@ * has sneaked into the range, we won't oops here: great). */ page = follow_page(vma, start, FOLL_GET | FOLL_DUMP); - if (page && !IS_ERR(page)) { + /* + * munlock_vma_pages_range uses follow_page(FOLL_GET) + * so it need to use put_user_page but the munlock + * path is quite complicated to deal with each put + * sites correctly so just unattribute them to avoid + * false positive at this moment. + */ + reset_page_pinner(page, compound_order(page)); if (PageTransTail(page)) { VM_BUG_ON_PAGE(PageMlocked(page), page); put_page(page); /* follow_page_mask() */ @@ -565,14 +580,15 @@ mm->locked_vm += nr_pages; /* - * vm_flags is protected by the mmap_sem held in write mode. + * vm_flags is protected by the mmap_lock held in write mode. * It's okay if try_to_unmap_one unmaps a page just after we * set VM_LOCKED, populate_vma_page_range will bring it back. */ - - if (lock) - vma->vm_flags = newflags; - else + if (lock) { + vm_write_begin(vma); + WRITE_ONCE(vma->vm_flags, newflags); + vm_write_end(vma); + } else munlock_vma_pages_range(vma, start, end); out: @@ -686,7 +702,7 @@ lock_limit >>= PAGE_SHIFT; locked = len >> PAGE_SHIFT; - if (down_write_killable(¤t->mm->mmap_sem)) + if (mmap_write_lock_killable(current->mm)) return -EINTR; locked += current->mm->locked_vm; @@ -705,7 +721,7 @@ if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) error = apply_vma_lock_flags(start, len, flags); - up_write(¤t->mm->mmap_sem); + mmap_write_unlock(current->mm); if (error) return error; @@ -742,10 +758,10 @@ len = PAGE_ALIGN(len + (offset_in_page(start))); start &= PAGE_MASK; - if (down_write_killable(¤t->mm->mmap_sem)) + if (mmap_write_lock_killable(current->mm)) return -EINTR; ret = apply_vma_lock_flags(start, len, 0); - up_write(¤t->mm->mmap_sem); + mmap_write_unlock(current->mm); return ret; } @@ -801,7 +817,8 @@ unsigned long lock_limit; int ret; - if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT))) + if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) || + flags == MCL_ONFAULT) return -EINVAL; if (!can_do_mlock()) @@ -810,14 +827,14 @@ lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; - if (down_write_killable(¤t->mm->mmap_sem)) + if (mmap_write_lock_killable(current->mm)) return -EINTR; ret = -ENOMEM; if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK)) ret = apply_mlockall_flags(flags); - up_write(¤t->mm->mmap_sem); + mmap_write_unlock(current->mm); if (!ret && (flags & MCL_CURRENT)) mm_populate(0, TASK_SIZE); @@ -828,10 +845,10 @@ { int ret; - if (down_write_killable(¤t->mm->mmap_sem)) + if (mmap_write_lock_killable(current->mm)) return -EINTR; ret = apply_mlockall_flags(0); - up_write(¤t->mm->mmap_sem); + mmap_write_unlock(current->mm); return ret; } -- Gitblit v1.6.2