From 244b2c5ca8b14627e4a17755e5922221e121c771 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 09 Oct 2024 06:15:07 +0000
Subject: [PATCH] change system file
---
kernel/mm/mlock.c | 91 +++++++++++++++++++++++++++------------------
1 files changed, 54 insertions(+), 37 deletions(-)
diff --git a/kernel/mm/mlock.c b/kernel/mm/mlock.c
index ef2abaf..188aa74 100644
--- a/kernel/mm/mlock.c
+++ b/kernel/mm/mlock.c
@@ -17,6 +17,7 @@
#include <linux/mempolicy.h>
#include <linux/syscalls.h>
#include <linux/sched.h>
+#include <linux/page_pinner.h>
#include <linux/export.h>
#include <linux/rmap.h>
#include <linux/mmzone.h>
@@ -49,7 +50,7 @@
* When lazy mlocking via vmscan, it is important to ensure that the
* vma's VM_LOCKED status is not concurrently being modified, otherwise we
* may have mlocked a page that is being munlocked. So lazy mlock must take
- * the mmap_sem for read, and verify that the vma really is locked
+ * the mmap_lock for read, and verify that the vma really is locked
* (see mm/rmap.c).
*/
@@ -58,12 +59,14 @@
*/
void clear_page_mlock(struct page *page)
{
+ int nr_pages;
+
if (!TestClearPageMlocked(page))
return;
- mod_zone_page_state(page_zone(page), NR_MLOCK,
- -hpage_nr_pages(page));
- count_vm_event(UNEVICTABLE_PGCLEARED);
+ nr_pages = thp_nr_pages(page);
+ mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
+ count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
/*
* The previous TestClearPageMlocked() corresponds to the smp_mb()
* in __pagevec_lru_add_fn().
@@ -77,7 +80,7 @@
* We lost the race. the page already moved to evictable list.
*/
if (PageUnevictable(page))
- count_vm_event(UNEVICTABLE_PGSTRANDED);
+ count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
}
}
@@ -94,9 +97,10 @@
VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
if (!TestSetPageMlocked(page)) {
- mod_zone_page_state(page_zone(page), NR_MLOCK,
- hpage_nr_pages(page));
- count_vm_event(UNEVICTABLE_PGMLOCKED);
+ int nr_pages = thp_nr_pages(page);
+
+ mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
+ count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
if (!isolate_lru_page(page))
putback_lru_page(page);
}
@@ -139,7 +143,7 @@
/* Did try_to_unlock() succeed or punt? */
if (!PageMlocked(page))
- count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+ count_vm_events(UNEVICTABLE_PGMUNLOCKED, thp_nr_pages(page));
putback_lru_page(page);
}
@@ -155,10 +159,12 @@
*/
static void __munlock_isolation_failed(struct page *page)
{
+ int nr_pages = thp_nr_pages(page);
+
if (PageUnevictable(page))
- __count_vm_event(UNEVICTABLE_PGSTRANDED);
+ __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
else
- __count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+ __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages);
}
/**
@@ -182,7 +188,7 @@
unsigned int munlock_vma_page(struct page *page)
{
int nr_pages;
- struct zone *zone = page_zone(page);
+ pg_data_t *pgdat = page_pgdat(page);
/* For try_to_munlock() and to serialize with page migration */
BUG_ON(!PageLocked(page));
@@ -192,9 +198,9 @@
/*
* Serialize with any parallel __split_huge_page_refcount() which
* might otherwise copy PageMlocked to part of the tail pages before
- * we clear it in the head page. It also stabilizes hpage_nr_pages().
+ * we clear it in the head page. It also stabilizes thp_nr_pages().
*/
- spin_lock_irq(zone_lru_lock(zone));
+ spin_lock_irq(&pgdat->lru_lock);
if (!TestClearPageMlocked(page)) {
/* Potentially, PTE-mapped THP: do not skip the rest PTEs */
@@ -202,18 +208,18 @@
goto unlock_out;
}
- nr_pages = hpage_nr_pages(page);
- __mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
+ nr_pages = thp_nr_pages(page);
+ __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
if (__munlock_isolate_lru_page(page, true)) {
- spin_unlock_irq(zone_lru_lock(zone));
+ spin_unlock_irq(&pgdat->lru_lock);
__munlock_isolated_page(page);
goto out;
}
__munlock_isolation_failed(page);
unlock_out:
- spin_unlock_irq(zone_lru_lock(zone));
+ spin_unlock_irq(&pgdat->lru_lock);
out:
return nr_pages - 1;
@@ -298,7 +304,7 @@
pagevec_init(&pvec_putback);
/* Phase 1: page isolation */
- spin_lock_irq(zone_lru_lock(zone));
+ spin_lock_irq(&zone->zone_pgdat->lru_lock);
for (i = 0; i < nr; i++) {
struct page *page = pvec->pages[i];
@@ -325,7 +331,7 @@
pvec->pages[i] = NULL;
}
__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
- spin_unlock_irq(zone_lru_lock(zone));
+ spin_unlock_irq(&zone->zone_pgdat->lru_lock);
/* Now we can release pins of pages that we are not munlocking */
pagevec_release(&pvec_putback);
@@ -381,7 +387,7 @@
/*
* Initialize pte walk starting at the already pinned page where we
* are sure that there is a pte, as it was pinned under the same
- * mmap_sem write op.
+ * mmap_lock write op.
*/
pte = get_locked_pte(vma->vm_mm, start, &ptl);
/* Make sure we do not cross the page table boundary */
@@ -445,7 +451,9 @@
void munlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
+ vm_write_begin(vma);
+ WRITE_ONCE(vma->vm_flags, vma->vm_flags & VM_LOCKED_CLEAR_MASK);
+ vm_write_end(vma);
while (start < end) {
struct page *page;
@@ -463,8 +471,15 @@
* has sneaked into the range, we won't oops here: great).
*/
page = follow_page(vma, start, FOLL_GET | FOLL_DUMP);
-
if (page && !IS_ERR(page)) {
+ /*
+ * munlock_vma_pages_range uses follow_page(FOLL_GET)
+ * so it need to use put_user_page but the munlock
+ * path is quite complicated to deal with each put
+ * sites correctly so just unattribute them to avoid
+ * false positive at this moment.
+ */
+ reset_page_pinner(page, compound_order(page));
if (PageTransTail(page)) {
VM_BUG_ON_PAGE(PageMlocked(page), page);
put_page(page); /* follow_page_mask() */
@@ -565,14 +580,15 @@
mm->locked_vm += nr_pages;
/*
- * vm_flags is protected by the mmap_sem held in write mode.
+ * vm_flags is protected by the mmap_lock held in write mode.
* It's okay if try_to_unmap_one unmaps a page just after we
* set VM_LOCKED, populate_vma_page_range will bring it back.
*/
-
- if (lock)
- vma->vm_flags = newflags;
- else
+ if (lock) {
+ vm_write_begin(vma);
+ WRITE_ONCE(vma->vm_flags, newflags);
+ vm_write_end(vma);
+ } else
munlock_vma_pages_range(vma, start, end);
out:
@@ -686,7 +702,7 @@
lock_limit >>= PAGE_SHIFT;
locked = len >> PAGE_SHIFT;
- if (down_write_killable(¤t->mm->mmap_sem))
+ if (mmap_write_lock_killable(current->mm))
return -EINTR;
locked += current->mm->locked_vm;
@@ -705,7 +721,7 @@
if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
error = apply_vma_lock_flags(start, len, flags);
- up_write(¤t->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
if (error)
return error;
@@ -742,10 +758,10 @@
len = PAGE_ALIGN(len + (offset_in_page(start)));
start &= PAGE_MASK;
- if (down_write_killable(¤t->mm->mmap_sem))
+ if (mmap_write_lock_killable(current->mm))
return -EINTR;
ret = apply_vma_lock_flags(start, len, 0);
- up_write(¤t->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
return ret;
}
@@ -801,7 +817,8 @@
unsigned long lock_limit;
int ret;
- if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)))
+ if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) ||
+ flags == MCL_ONFAULT)
return -EINVAL;
if (!can_do_mlock())
@@ -810,14 +827,14 @@
lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
- if (down_write_killable(¤t->mm->mmap_sem))
+ if (mmap_write_lock_killable(current->mm))
return -EINTR;
ret = -ENOMEM;
if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
capable(CAP_IPC_LOCK))
ret = apply_mlockall_flags(flags);
- up_write(¤t->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
if (!ret && (flags & MCL_CURRENT))
mm_populate(0, TASK_SIZE);
@@ -828,10 +845,10 @@
{
int ret;
- if (down_write_killable(¤t->mm->mmap_sem))
+ if (mmap_write_lock_killable(current->mm))
return -EINTR;
ret = apply_mlockall_flags(0);
- up_write(¤t->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
return ret;
}
--
Gitblit v1.6.2