From 8ac6c7a54ed1b98d142dce24b11c6de6a1e239a5 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Tue, 22 Oct 2024 10:36:11 +0000
Subject: [PATCH] 修改4g拨号为QMI,需要在系统里后台执行quectel-CM
---
kernel/mm/rmap.c | 482 +++++++++++++++++++++++++++++++++++------------------
1 files changed, 316 insertions(+), 166 deletions(-)
diff --git a/kernel/mm/rmap.c b/kernel/mm/rmap.c
index 699f445..5338a8b 100644
--- a/kernel/mm/rmap.c
+++ b/kernel/mm/rmap.c
@@ -21,13 +21,14 @@
* Lock ordering in mm:
*
* inode->i_mutex (while writing or truncating, not reading or faulting)
- * mm->mmap_sem
- * page->flags PG_locked (lock_page)
+ * mm->mmap_lock
+ * page->flags PG_locked (lock_page) * (see huegtlbfs below)
* hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
* mapping->i_mmap_rwsem
+ * hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
* anon_vma->rwsem
* mm->page_table_lock or pte_lock
- * zone_lru_lock (in mark_page_accessed, isolate_lru_page)
+ * pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
* swap_lock (in swap_duplicate, swap_info_get)
* mmlist_lock (in mmput, drain_mmlist and others)
* mapping->private_lock (in __set_page_dirty_buffers)
@@ -43,6 +44,11 @@
* anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon)
* ->tasklist_lock
* pte map lock
+ *
+ * * hugetlbfs PageHuge() pages take locks in this order:
+ * mapping->i_mmap_rwsem
+ * hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
+ * page->flags PG_locked (lock_page)
*/
#include <linux/mm.h>
@@ -61,6 +67,7 @@
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
#include <linux/hugetlb.h>
+#include <linux/huge_mm.h>
#include <linux/backing-dev.h>
#include <linux/page_idle.h>
#include <linux/memremap.h>
@@ -69,6 +76,8 @@
#include <asm/tlbflush.h>
#include <trace/events/tlb.h>
+
+#include <trace/hooks/mm.h>
#include "internal.h"
@@ -82,7 +91,8 @@
anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
if (anon_vma) {
atomic_set(&anon_vma->refcount, 1);
- anon_vma->degree = 1; /* Reference for first vma */
+ anon_vma->num_children = 0;
+ anon_vma->num_active_vmas = 0;
anon_vma->parent = anon_vma;
/*
* Initialise the anon_vma root to point to itself. If called
@@ -170,7 +180,7 @@
* to do any locking for the common case of already having
* an anon_vma.
*
- * This must be called with the mmap_sem held for reading.
+ * This must be called with the mmap_lock held for reading.
*/
int __anon_vma_prepare(struct vm_area_struct *vma)
{
@@ -190,6 +200,7 @@
anon_vma = anon_vma_alloc();
if (unlikely(!anon_vma))
goto out_enomem_free_avc;
+ anon_vma->num_children++; /* self-parent link for new root */
allocated = anon_vma;
}
@@ -199,8 +210,7 @@
if (likely(!vma->anon_vma)) {
vma->anon_vma = anon_vma;
anon_vma_chain_link(vma, avc, anon_vma);
- /* vma reference or self-parent link for new root */
- anon_vma->degree++;
+ anon_vma->num_active_vmas++;
allocated = NULL;
avc = NULL;
}
@@ -250,13 +260,19 @@
* Attach the anon_vmas from src to dst.
* Returns 0 on success, -ENOMEM on failure.
*
- * If dst->anon_vma is NULL this function tries to find and reuse existing
- * anon_vma which has no vmas and only one child anon_vma. This prevents
- * degradation of anon_vma hierarchy to endless linear chain in case of
- * constantly forking task. On the other hand, an anon_vma with more than one
- * child isn't reused even if there was no alive vma, thus rmap walker has a
- * good chance of avoiding scanning the whole hierarchy when it searches where
- * page is mapped.
+ * anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and
+ * anon_vma_fork(). The first three want an exact copy of src, while the last
+ * one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
+ * endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
+ * we can identify this case by checking (!dst->anon_vma && src->anon_vma).
+ *
+ * If (!dst->anon_vma && src->anon_vma) is true, this function tries to find
+ * and reuse existing anon_vma which has no vmas and only one child anon_vma.
+ * This prevents degradation of anon_vma hierarchy to endless linear chain in
+ * case of constantly forking task. On the other hand, an anon_vma with more
+ * than one child isn't reused even if there was no alive vma, thus rmap
+ * walker has a good chance of avoiding scanning the whole hierarchy when it
+ * searches where page is mapped.
*/
int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{
@@ -279,19 +295,19 @@
anon_vma_chain_link(dst, avc, anon_vma);
/*
- * Reuse existing anon_vma if its degree lower than two,
- * that means it has no vma and only one anon_vma child.
+ * Reuse existing anon_vma if it has no vma and only one
+ * anon_vma child.
*
- * Do not chose parent anon_vma, otherwise first child
- * will always reuse it. Root anon_vma is never reused:
+ * Root anon_vma is never reused:
* it has self-parent reference and at least one child.
*/
- if (!dst->anon_vma && anon_vma != src->anon_vma &&
- anon_vma->degree < 2)
+ if (!dst->anon_vma && src->anon_vma &&
+ anon_vma->num_children < 2 &&
+ anon_vma->num_active_vmas == 0)
dst->anon_vma = anon_vma;
}
if (dst->anon_vma)
- dst->anon_vma->degree++;
+ dst->anon_vma->num_active_vmas++;
unlock_anon_vma_root(root);
return 0;
@@ -341,6 +357,7 @@
anon_vma = anon_vma_alloc();
if (!anon_vma)
goto out_error;
+ anon_vma->num_active_vmas++;
avc = anon_vma_chain_alloc(GFP_KERNEL);
if (!avc)
goto out_error_free_anon_vma;
@@ -361,7 +378,7 @@
vma->anon_vma = anon_vma;
anon_vma_lock_write(anon_vma);
anon_vma_chain_link(vma, avc, anon_vma);
- anon_vma->parent->degree++;
+ anon_vma->parent->num_children++;
anon_vma_unlock_write(anon_vma);
return 0;
@@ -393,7 +410,7 @@
* to free them outside the lock.
*/
if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
- anon_vma->parent->degree--;
+ anon_vma->parent->num_children--;
continue;
}
@@ -401,7 +418,8 @@
anon_vma_chain_free(avc);
}
if (vma->anon_vma)
- vma->anon_vma->degree--;
+ vma->anon_vma->num_active_vmas--;
+
unlock_anon_vma_root(root);
/*
@@ -412,7 +430,8 @@
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma = avc->anon_vma;
- VM_WARN_ON(anon_vma->degree);
+ VM_WARN_ON(anon_vma->num_children);
+ VM_WARN_ON(anon_vma->num_active_vmas);
put_anon_vma(anon_vma);
list_del(&avc->same_vma);
@@ -457,9 +476,10 @@
* chain and verify that the page in question is indeed mapped in it
* [ something equivalent to page_mapped_in_vma() ].
*
- * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap()
- * that the anon_vma pointer from page->mapping is valid if there is a
- * mapcount, we can dereference the anon_vma after observing those.
+ * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
+ * page_remove_rmap() that the anon_vma pointer from page->mapping is valid
+ * if there is a mapcount, we can dereference the anon_vma after observing
+ * those.
*/
struct anon_vma *page_get_anon_vma(struct page *page)
{
@@ -502,13 +522,16 @@
*
* Its a little more complex as it tries to keep the fast path to a single
* atomic op -- the trylock. If we fail the trylock, we fall back to getting a
- * reference like with page_get_anon_vma() and then block on the mutex.
+ * reference like with page_get_anon_vma() and then block on the mutex
+ * on !rwc->try_lock case.
*/
-struct anon_vma *page_lock_anon_vma_read(struct page *page)
+struct anon_vma *page_lock_anon_vma_read(struct page *page,
+ struct rmap_walk_control *rwc)
{
struct anon_vma *anon_vma = NULL;
struct anon_vma *root_anon_vma;
unsigned long anon_mapping;
+ bool success = false;
rcu_read_lock();
anon_mapping = (unsigned long)READ_ONCE(page->mapping);
@@ -529,6 +552,17 @@
up_read(&root_anon_vma->rwsem);
anon_vma = NULL;
}
+ goto out;
+ }
+ trace_android_vh_do_page_trylock(page, NULL, NULL, &success);
+ if (success) {
+ anon_vma = NULL;
+ goto out;
+ }
+
+ if (rwc && rwc->try_lock) {
+ anon_vma = NULL;
+ rwc->contended = true;
goto out;
}
@@ -658,7 +692,7 @@
*/
void flush_tlb_batched_pending(struct mm_struct *mm)
{
- if (mm->tlb_flush_batched) {
+ if (data_race(mm->tlb_flush_batched)) {
flush_tlb_mm(mm);
/*
@@ -768,6 +802,7 @@
}
if (pvmw.pte) {
+ trace_android_vh_look_around(&pvmw, page, vma, &referenced);
if (ptep_clear_flush_young_notify(vma, address,
pvmw.pte)) {
/*
@@ -803,6 +838,7 @@
pra->vm_flags |= vma->vm_flags;
}
+ trace_android_vh_page_referenced_one_end(vma, page, referenced);
if (!pra->mapcount)
return false; /* To break the loop */
@@ -827,8 +863,10 @@
* @memcg: target memory cgroup
* @vm_flags: collect encountered vma->vm_flags who actually referenced the page
*
- * Quick test_and_clear_referenced for all mappings to a page,
- * returns the number of ptes which referenced the page.
+ * Quick test_and_clear_referenced for all mappings of a page,
+ *
+ * Return: The number of mappings which referenced the page. Return -1 if
+ * the function bailed out due to rmap lock contention.
*/
int page_referenced(struct page *page,
int is_locked,
@@ -844,10 +882,11 @@
.rmap_one = page_referenced_one,
.arg = (void *)&pra,
.anon_lock = page_lock_anon_vma_read,
+ .try_lock = true,
};
*vm_flags = 0;
- if (!page_mapped(page))
+ if (!pra.mapcount)
return 0;
if (!page_rmapping(page))
@@ -874,8 +913,9 @@
if (we_locked)
unlock_page(page);
- return pra.referenced;
+ return rwc.contended ? -1 : pra.referenced;
}
+EXPORT_SYMBOL_GPL(page_referenced);
static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
unsigned long address, void *arg)
@@ -886,21 +926,22 @@
.address = address,
.flags = PVMW_SYNC,
};
- unsigned long start = address, end;
+ struct mmu_notifier_range range;
int *cleaned = arg;
/*
* We have to assume the worse case ie pmd for invalidation. Note that
* the page can not be free from this function.
*/
- end = vma_address_end(page, vma);
- mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
+ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
+ 0, vma, vma->vm_mm, address,
+ vma_address_end(page, vma));
+ mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
- unsigned long cstart;
int ret = 0;
- cstart = address = pvmw.address;
+ address = pvmw.address;
if (pvmw.pte) {
pte_t entry;
pte_t *pte = pvmw.pte;
@@ -915,7 +956,7 @@
set_pte_at(vma->vm_mm, address, pte, entry);
ret = 1;
} else {
-#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
pmd_t *pmd = pvmw.pmd;
pmd_t entry;
@@ -927,7 +968,6 @@
entry = pmd_wrprotect(entry);
entry = pmd_mkclean(entry);
set_pmd_at(vma->vm_mm, address, pmd, entry);
- cstart &= PMD_MASK;
ret = 1;
#else
/* unexpected pmd-mapped page? */
@@ -946,7 +986,7 @@
(*cleaned)++;
}
- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
+ mmu_notifier_invalidate_range_end(&range);
return true;
}
@@ -1014,7 +1054,7 @@
/**
* __page_set_anon_rmap - set up new anonymous rmap
- * @page: Page to add to rmap
+ * @page: Page or Hugepage to add to rmap
* @vma: VM area to add page to.
* @address: User virtual address of the mapping
* @exclusive: the page is exclusively owned by the current process
@@ -1051,7 +1091,6 @@
static void __page_check_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
-#ifdef CONFIG_DEBUG_VM
/*
* The page's anon-rmap details (mapping and index) are guaranteed to
* be set up correctly at this point.
@@ -1064,9 +1103,9 @@
* are initially only visible via the pagetables, and the pte is locked
* over the call to page_add_new_anon_rmap.
*/
- BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
- BUG_ON(page_to_pgoff(page) != linear_page_index(vma, address));
-#endif
+ VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
+ VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
+ page);
}
/**
@@ -1097,6 +1136,12 @@
{
bool compound = flags & RMAP_COMPOUND;
bool first;
+ bool success = false;
+
+ if (unlikely(PageKsm(page)))
+ lock_page_memcg(page);
+ else
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
if (compound) {
atomic_t *mapcount;
@@ -1105,11 +1150,14 @@
mapcount = compound_mapcount_ptr(page);
first = atomic_inc_and_test(mapcount);
} else {
- first = atomic_inc_and_test(&page->_mapcount);
+ trace_android_vh_update_page_mapcount(page, true, compound,
+ &first, &success);
+ if (!success)
+ first = atomic_inc_and_test(&page->_mapcount);
}
if (first) {
- int nr = compound ? hpage_nr_pages(page) : 1;
+ int nr = compound ? thp_nr_pages(page) : 1;
/*
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
* these counters are not modified in interrupt context, and
@@ -1117,13 +1165,14 @@
* disabled.
*/
if (compound)
- __inc_node_page_state(page, NR_ANON_THPS);
- __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr);
+ __inc_lruvec_page_state(page, NR_ANON_THPS);
+ __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
}
- if (unlikely(PageKsm(page)))
- return;
- VM_BUG_ON_PAGE(!PageLocked(page), page);
+ if (unlikely(PageKsm(page))) {
+ unlock_page_memcg(page);
+ return;
+ }
/* address might be in next vma when migration races vma_adjust */
if (first)
@@ -1134,7 +1183,7 @@
}
/**
- * page_add_new_anon_rmap - add pte mapping to a new anonymous page
+ * __page_add_new_anon_rmap - add pte mapping to a new anonymous page
* @page: the page to add the mapping to
* @vma: the vm area in which the mapping is added
* @address: the user virtual address mapped
@@ -1144,25 +1193,27 @@
* This means the inc-and-test can be bypassed.
* Page does not have to be locked.
*/
-void page_add_new_anon_rmap(struct page *page,
+void __page_add_new_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address, bool compound)
{
- int nr = compound ? hpage_nr_pages(page) : 1;
+ int nr = compound ? thp_nr_pages(page) : 1;
- VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
__SetPageSwapBacked(page);
if (compound) {
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
/* increment count (starts at -1) */
atomic_set(compound_mapcount_ptr(page), 0);
- __inc_node_page_state(page, NR_ANON_THPS);
+ if (hpage_pincount_available(page))
+ atomic_set(compound_pincount_ptr(page), 0);
+
+ __inc_lruvec_page_state(page, NR_ANON_THPS);
} else {
/* Anon THP always mapped first with PMD */
VM_BUG_ON_PAGE(PageTransCompound(page), page);
/* increment count (starts at -1) */
atomic_set(&page->_mapcount, 0);
}
- __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, nr);
+ __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
__page_set_anon_rmap(page, vma, address, 1);
}
@@ -1176,18 +1227,29 @@
void page_add_file_rmap(struct page *page, bool compound)
{
int i, nr = 1;
+ bool first_mapping;
+ bool success = false;
VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
lock_page_memcg(page);
if (compound && PageTransHuge(page)) {
- for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
- if (atomic_inc_and_test(&page[i]._mapcount))
- nr++;
+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
+ trace_android_vh_update_page_mapcount(&page[i], true,
+ compound, &first_mapping, &success);
+ if ((success)) {
+ if (first_mapping)
+ nr++;
+ } else {
+ if (atomic_inc_and_test(&page[i]._mapcount))
+ nr++;
+ }
}
if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
goto out;
- VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
- __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
+ if (PageSwapBacked(page))
+ __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
+ else
+ __inc_node_page_state(page, NR_FILE_PMDMAPPED);
} else {
if (PageTransCompound(page) && page_mapping(page)) {
VM_WARN_ON_ONCE(!PageLocked(page));
@@ -1196,8 +1258,15 @@
if (PageMlocked(page))
clear_page_mlock(compound_head(page));
}
- if (!atomic_inc_and_test(&page->_mapcount))
- goto out;
+ trace_android_vh_update_page_mapcount(page, true,
+ compound, &first_mapping, &success);
+ if (success) {
+ if (!first_mapping)
+ goto out;
+ } else {
+ if (!atomic_inc_and_test(&page->_mapcount))
+ goto out;
+ }
}
__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
out:
@@ -1207,30 +1276,47 @@
static void page_remove_file_rmap(struct page *page, bool compound)
{
int i, nr = 1;
+ bool first_mapping;
+ bool success = false;
VM_BUG_ON_PAGE(compound && !PageHead(page), page);
- lock_page_memcg(page);
/* Hugepages are not counted in NR_FILE_MAPPED for now. */
if (unlikely(PageHuge(page))) {
/* hugetlb pages are always mapped with pmds */
atomic_dec(compound_mapcount_ptr(page));
- goto out;
+ return;
}
/* page still mapped by someone else? */
if (compound && PageTransHuge(page)) {
- for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
- if (atomic_add_negative(-1, &page[i]._mapcount))
- nr++;
+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
+ trace_android_vh_update_page_mapcount(&page[i], false,
+ compound, &first_mapping, &success);
+ if (success) {
+ if (first_mapping)
+ nr++;
+ } else {
+ if (atomic_add_negative(-1, &page[i]._mapcount))
+ nr++;
+ }
}
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
- goto out;
- VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
- __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
+ return;
+ if (PageSwapBacked(page))
+ __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
+ else
+ __dec_node_page_state(page, NR_FILE_PMDMAPPED);
} else {
- if (!atomic_add_negative(-1, &page->_mapcount))
- goto out;
+ trace_android_vh_update_page_mapcount(page, false,
+ compound, &first_mapping, &success);
+ if (success) {
+ if (!first_mapping)
+ return;
+ } else {
+ if (!atomic_add_negative(-1, &page->_mapcount))
+ return;
+ }
}
/*
@@ -1242,13 +1328,13 @@
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
-out:
- unlock_page_memcg(page);
}
static void page_remove_anon_compound_rmap(struct page *page)
{
int i, nr;
+ bool first_mapping;
+ bool success = false;
if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
return;
@@ -1260,28 +1346,41 @@
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
return;
- __dec_node_page_state(page, NR_ANON_THPS);
+ __dec_lruvec_page_state(page, NR_ANON_THPS);
if (TestClearPageDoubleMap(page)) {
/*
* Subpages can be mapped with PTEs too. Check how many of
- * themi are still mapped.
+ * them are still mapped.
*/
- for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
- if (atomic_add_negative(-1, &page[i]._mapcount))
- nr++;
+ for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
+ trace_android_vh_update_page_mapcount(&page[i], false,
+ false, &first_mapping, &success);
+ if (success) {
+ if (first_mapping)
+ nr++;
+ } else {
+ if (atomic_add_negative(-1, &page[i]._mapcount))
+ nr++;
+ }
}
+
+ /*
+ * Queue the page for deferred split if at least one small
+ * page of the compound page is unmapped, but at least one
+ * small page is still mapped.
+ */
+ if (nr && nr < thp_nr_pages(page))
+ deferred_split_huge_page(page);
} else {
- nr = HPAGE_PMD_NR;
+ nr = thp_nr_pages(page);
}
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
- if (nr) {
- __mod_node_page_state(page_pgdat(page), NR_ANON_MAPPED, -nr);
- deferred_split_huge_page(page);
- }
+ if (nr)
+ __mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
}
/**
@@ -1293,22 +1392,36 @@
*/
void page_remove_rmap(struct page *page, bool compound)
{
- if (!PageAnon(page))
- return page_remove_file_rmap(page, compound);
+ bool first_mapping;
+ bool success = false;
+ lock_page_memcg(page);
- if (compound)
- return page_remove_anon_compound_rmap(page);
+ if (!PageAnon(page)) {
+ page_remove_file_rmap(page, compound);
+ goto out;
+ }
- /* page still mapped by someone else? */
- if (!atomic_add_negative(-1, &page->_mapcount))
- return;
+ if (compound) {
+ page_remove_anon_compound_rmap(page);
+ goto out;
+ }
+ trace_android_vh_update_page_mapcount(page, false,
+ compound, &first_mapping, &success);
+ if (success) {
+ if (!first_mapping)
+ goto out;
+ } else {
+ /* page still mapped by someone else? */
+ if (!atomic_add_negative(-1, &page->_mapcount))
+ goto out;
+ }
/*
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
* these counters are not modified in interrupt context, and
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
- __dec_node_page_state(page, NR_ANON_MAPPED);
+ __dec_lruvec_page_state(page, NR_ANON_MAPPED);
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
@@ -1325,6 +1438,8 @@
* Leaving it set also helps swapoff to reinstate ptes
* faster for those pages still in swapcache.
*/
+out:
+ unlock_page_memcg(page);
}
/*
@@ -1342,8 +1457,8 @@
pte_t pteval;
struct page *subpage;
bool ret = true;
- unsigned long start = address, end;
- enum ttu_flags flags = (enum ttu_flags)arg;
+ struct mmu_notifier_range range;
+ enum ttu_flags flags = (enum ttu_flags)(long)arg;
/*
* When racing against e.g. zap_pte_range() on another cpu,
@@ -1375,16 +1490,19 @@
* Note that the page can not be free in this function as call of
* try_to_unmap() must hold a reference on the page.
*/
- end = PageKsm(page) ?
+ range.end = PageKsm(page) ?
address + PAGE_SIZE : vma_address_end(page, vma);
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+ address, range.end);
if (PageHuge(page)) {
/*
* If sharing is possible, start and end will be adjusted
* accordingly.
*/
- adjust_range_if_pmd_sharing_possible(vma, &start, &end);
+ adjust_range_if_pmd_sharing_possible(vma, &range.start,
+ &range.end);
}
- mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
+ mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) {
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
@@ -1408,7 +1526,7 @@
if (!PageTransCompound(page)) {
/*
* Holding pte lock, we do *not* need
- * mmap_sem here
+ * mmap_lock here
*/
mlock_vma_page(page);
}
@@ -1426,8 +1544,14 @@
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
address = pvmw.address;
- if (PageHuge(page)) {
- if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
+ if (PageHuge(page) && !PageAnon(page)) {
+ /*
+ * To call huge_pmd_unshare, i_mmap_rwsem must be
+ * held in write mode. Caller needs to explicitly
+ * do this outside rmap routines.
+ */
+ VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
+ if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
/*
* huge_pmd_unshare unmapped an entire PMD
* page. There is no way of knowing exactly
@@ -1435,9 +1559,10 @@
* we must flush them all. start/end were
* already adjusted above to cover this range.
*/
- flush_cache_range(vma, start, end);
- flush_tlb_range(vma, start, end);
- mmu_notifier_invalidate_range(mm, start, end);
+ flush_cache_range(vma, range.start, range.end);
+ flush_tlb_range(vma, range.start, range.end);
+ mmu_notifier_invalidate_range(mm, range.start,
+ range.end);
/*
* The ref count of the PMD page was dropped
@@ -1468,8 +1593,15 @@
*/
entry = make_migration_entry(page, 0);
swp_pte = swp_entry_to_pte(entry);
- if (pte_soft_dirty(pteval))
+
+ /*
+ * pteval maps a zone device page and is therefore
+ * a swap pte.
+ */
+ if (pte_swp_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_swp_uffd_wp(pteval))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
/*
* No need to invalidate here it will synchronize on
@@ -1484,15 +1616,6 @@
*/
subpage = page;
goto discard;
- }
-
- if (!(flags & TTU_IGNORE_ACCESS)) {
- if (ptep_clear_flush_young_notify(vma, address,
- pvmw.pte)) {
- ret = false;
- page_vma_mapped_walk_done(&pvmw);
- break;
- }
}
/* Nuke the page table entry. */
@@ -1523,8 +1646,7 @@
if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (PageHuge(page)) {
- int nr = 1 << compound_order(page);
- hugetlb_count_sub(nr, mm);
+ hugetlb_count_sub(compound_nr(page), mm);
set_huge_swap_pte_at(mm, address,
pvmw.pte, pteval,
vma_mmu_pagesize(vma));
@@ -1570,6 +1692,8 @@
swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_uffd_wp(pteval))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
set_pte_at(mm, address, pvmw.pte, swp_pte);
/*
* No need to invalidate here it will synchronize on
@@ -1594,7 +1718,30 @@
/* MADV_FREE page check */
if (!PageSwapBacked(page)) {
- if (!PageDirty(page)) {
+ int ref_count, map_count;
+
+ /*
+ * Synchronize with gup_pte_range():
+ * - clear PTE; barrier; read refcount
+ * - inc refcount; barrier; read PTE
+ */
+ smp_mb();
+
+ ref_count = page_ref_count(page);
+ map_count = page_mapcount(page);
+
+ /*
+ * Order reads for page refcount and dirty flag
+ * (see comments in __remove_mapping()).
+ */
+ smp_rmb();
+
+ /*
+ * The only page refs must be one from isolation
+ * plus the rmap(s) (dropped by discard:).
+ */
+ if (ref_count == 1 + map_count &&
+ !PageDirty(page)) {
/* Invalidate as we cleared the pte */
mmu_notifier_invalidate_range(mm,
address, address + PAGE_SIZE);
@@ -1636,6 +1783,8 @@
swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ if (pte_uffd_wp(pteval))
+ swp_pte = pte_swp_mkuffd_wp(swp_pte);
set_pte_at(mm, address, pvmw.pte, swp_pte);
/* Invalidate as we cleared the pte */
mmu_notifier_invalidate_range(mm, address,
@@ -1665,28 +1814,15 @@
put_page(page);
}
- mmu_notifier_invalidate_range_end(vma->vm_mm, start, end);
+ mmu_notifier_invalidate_range_end(&range);
+ trace_android_vh_try_to_unmap_one(vma, page, address, ret);
return ret;
}
-bool is_vma_temporary_stack(struct vm_area_struct *vma)
-{
- int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
-
- if (!maybe_stack)
- return false;
-
- if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
- VM_STACK_INCOMPLETE_SETUP)
- return true;
-
- return false;
-}
-
static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
{
- return is_vma_temporary_stack(vma);
+ return vma_is_temporary_stack(vma);
}
static int page_not_mapped(struct page *page)
@@ -1779,19 +1915,29 @@
struct anon_vma *anon_vma;
if (rwc->anon_lock)
- return rwc->anon_lock(page);
+ return rwc->anon_lock(page, rwc);
/*
* Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
* because that depends on page_mapped(); but not all its usages
- * are holding mmap_sem. Users without mmap_sem are required to
+ * are holding mmap_lock. Users without mmap_lock are required to
* take a reference count to prevent the anon_vma disappearing
*/
anon_vma = page_anon_vma(page);
if (!anon_vma)
return NULL;
+ if (anon_vma_trylock_read(anon_vma))
+ goto out;
+
+ if (rwc->try_lock) {
+ anon_vma = NULL;
+ rwc->contended = true;
+ goto out;
+ }
+
anon_vma_lock_read(anon_vma);
+out:
return anon_vma;
}
@@ -1804,7 +1950,7 @@
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the anon_vma struct it points to.
*
- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
* where the page was found will be held for write. So, we won't recheck
* vm_flags for that VMA. That should be OK, because that vma shouldn't be
* LOCKED.
@@ -1827,7 +1973,7 @@
return;
pgoff_start = page_to_pgoff(page);
- pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
+ pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
pgoff_start, pgoff_end) {
struct vm_area_struct *vma = avc->vma;
@@ -1857,7 +2003,7 @@
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the address_space struct it points to.
*
- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
* where the page was found will be held for write. So, we won't recheck
* vm_flags for that VMA. That should be OK, because that vma shouldn't be
* LOCKED.
@@ -1868,6 +2014,7 @@
struct address_space *mapping = page_mapping(page);
pgoff_t pgoff_start, pgoff_end;
struct vm_area_struct *vma;
+ bool got_lock = false, success = false;
/*
* The page lock not only makes sure that page->mapping cannot
@@ -1881,9 +2028,26 @@
return;
pgoff_start = page_to_pgoff(page);
- pgoff_end = pgoff_start + hpage_nr_pages(page) - 1;
- if (!locked)
- i_mmap_lock_read(mapping);
+ pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
+ if (!locked) {
+ trace_android_vh_do_page_trylock(page,
+ &mapping->i_mmap_rwsem, &got_lock, &success);
+ if (success) {
+ if (!got_lock)
+ return;
+ } else {
+ if (i_mmap_trylock_read(mapping))
+ goto lookup;
+
+ if (rwc->try_lock) {
+ rwc->contended = true;
+ return;
+ }
+
+ i_mmap_lock_read(mapping);
+ }
+ }
+lookup:
vma_interval_tree_foreach(vma, &mapping->i_mmap,
pgoff_start, pgoff_end) {
unsigned long address = vma_address(page, vma);
@@ -1928,27 +2092,10 @@
#ifdef CONFIG_HUGETLB_PAGE
/*
- * The following three functions are for anonymous (private mapped) hugepages.
+ * The following two functions are for anonymous (private mapped) hugepages.
* Unlike common anonymous pages, anonymous hugepages have no accounting code
* and no lru code, because we handle hugepages differently from common pages.
*/
-static void __hugepage_set_anon_rmap(struct page *page,
- struct vm_area_struct *vma, unsigned long address, int exclusive)
-{
- struct anon_vma *anon_vma = vma->anon_vma;
-
- BUG_ON(!anon_vma);
-
- if (PageAnon(page))
- return;
- if (!exclusive)
- anon_vma = anon_vma->root;
-
- anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
- page->mapping = (struct address_space *) anon_vma;
- page->index = linear_page_index(vma, address);
-}
-
void hugepage_add_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
@@ -1960,7 +2107,7 @@
/* address might be in next vma when migration races vma_adjust */
first = atomic_inc_and_test(compound_mapcount_ptr(page));
if (first)
- __hugepage_set_anon_rmap(page, vma, address, 0);
+ __page_set_anon_rmap(page, vma, address, 0);
}
void hugepage_add_new_anon_rmap(struct page *page,
@@ -1968,6 +2115,9 @@
{
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
atomic_set(compound_mapcount_ptr(page), 0);
- __hugepage_set_anon_rmap(page, vma, address, 1);
+ if (hpage_pincount_available(page))
+ atomic_set(compound_pincount_ptr(page), 0);
+
+ __page_set_anon_rmap(page, vma, address, 1);
}
#endif /* CONFIG_HUGETLB_PAGE */
--
Gitblit v1.6.2