From f9004dbfff8a3fbbd7e2a88c8a4327c7f2f8e5b2 Mon Sep 17 00:00:00 2001 From: hc <hc@nodka.com> Date: Wed, 31 Jan 2024 01:04:47 +0000 Subject: [PATCH] add driver 5G --- kernel/include/asm-generic/tlb.h | 526 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 437 insertions(+), 89 deletions(-) diff --git a/kernel/include/asm-generic/tlb.h b/kernel/include/asm-generic/tlb.h index db72ad3..f40c953 100644 --- a/kernel/include/asm-generic/tlb.h +++ b/kernel/include/asm-generic/tlb.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* include/asm-generic/tlb.h * * Generic TLB shootdown code @@ -6,51 +7,176 @@ * Based on code from mm/memory.c Copyright Linus Torvalds and others. * * Copyright 2011 Red Hat, Inc., Peter Zijlstra - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #ifndef _ASM_GENERIC__TLB_H #define _ASM_GENERIC__TLB_H #include <linux/mmu_notifier.h> #include <linux/swap.h> -#include <asm/pgalloc.h> +#include <linux/hugetlb_inline.h> #include <asm/tlbflush.h> +#include <asm/cacheflush.h> -#ifdef CONFIG_HAVE_RCU_TABLE_FREE /* - * Semi RCU freeing of the page directories. - * - * This is needed by some architectures to implement software pagetable walkers. - * - * gup_fast() and other software pagetable walkers do a lockless page-table - * walk and therefore needs some synchronization with the freeing of the page - * directories. The chosen means to accomplish that is by disabling IRQs over - * the walk. - * - * Architectures that use IPIs to flush TLBs will then automagically DTRT, - * since we unlink the page, flush TLBs, free the page. Since the disabling of - * IRQs delays the completion of the TLB flush we can never observe an already - * freed page. - * - * Architectures that do not have this (PPC) need to delay the freeing by some - * other means, this is that means. - * - * What we do is batch the freed directory pages (tables) and RCU free them. - * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling - * holds off grace periods. - * - * However, in order to batch these pages we need to allocate storage, this - * allocation is deep inside the MM code and can thus easily fail on memory - * pressure. To guarantee progress we fall back to single table freeing, see - * the implementation of tlb_remove_table_one(). - * + * Blindly accessing user memory from NMI context can be dangerous + * if we're in the middle of switching the current user task or switching + * the loaded mm. */ +#ifndef nmi_uaccess_okay +# define nmi_uaccess_okay() true +#endif + +#ifdef CONFIG_MMU + +/* + * Generic MMU-gather implementation. + * + * The mmu_gather data structure is used by the mm code to implement the + * correct and efficient ordering of freeing pages and TLB invalidations. + * + * This correct ordering is: + * + * 1) unhook page + * 2) TLB invalidate page + * 3) free page + * + * That is, we must never free a page before we have ensured there are no live + * translations left to it. Otherwise it might be possible to observe (or + * worse, change) the page content after it has been reused. + * + * The mmu_gather API consists of: + * + * - tlb_gather_mmu() / tlb_finish_mmu(); start and finish a mmu_gather + * + * Finish in particular will issue a (final) TLB invalidate and free + * all (remaining) queued pages. + * + * - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA + * + * Defaults to flushing at tlb_end_vma() to reset the range; helps when + * there's large holes between the VMAs. + * + * - tlb_remove_table() + * + * tlb_remove_table() is the basic primitive to free page-table directories + * (__p*_free_tlb()). In it's most primitive form it is an alias for + * tlb_remove_page() below, for when page directories are pages and have no + * additional constraints. + * + * See also MMU_GATHER_TABLE_FREE and MMU_GATHER_RCU_TABLE_FREE. + * + * - tlb_remove_page() / __tlb_remove_page() + * - tlb_remove_page_size() / __tlb_remove_page_size() + * + * __tlb_remove_page_size() is the basic primitive that queues a page for + * freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a + * boolean indicating if the queue is (now) full and a call to + * tlb_flush_mmu() is required. + * + * tlb_remove_page() and tlb_remove_page_size() imply the call to + * tlb_flush_mmu() when required and has no return value. + * + * - tlb_change_page_size() + * + * call before __tlb_remove_page*() to set the current page-size; implies a + * possible tlb_flush_mmu() call. + * + * - tlb_flush_mmu() / tlb_flush_mmu_tlbonly() + * + * tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets + * related state, like the range) + * + * tlb_flush_mmu() - in addition to the above TLB invalidate, also frees + * whatever pages are still batched. + * + * - mmu_gather::fullmm + * + * A flag set by tlb_gather_mmu() to indicate we're going to free + * the entire mm; this allows a number of optimizations. + * + * - We can ignore tlb_{start,end}_vma(); because we don't + * care about ranges. Everything will be shot down. + * + * - (RISC) architectures that use ASIDs can cycle to a new ASID + * and delay the invalidation until ASID space runs out. + * + * - mmu_gather::need_flush_all + * + * A flag that can be set by the arch code if it wants to force + * flush the entire TLB irrespective of the range. For instance + * x86-PAE needs this when changing top-level entries. + * + * And allows the architecture to provide and implement tlb_flush(): + * + * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make + * use of: + * + * - mmu_gather::start / mmu_gather::end + * + * which provides the range that needs to be flushed to cover the pages to + * be freed. + * + * - mmu_gather::freed_tables + * + * set when we freed page table pages + * + * - tlb_get_unmap_shift() / tlb_get_unmap_size() + * + * returns the smallest TLB entry size unmapped in this range. + * + * If an architecture does not provide tlb_flush() a default implementation + * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is + * specified, in which case we'll default to flush_tlb_mm(). + * + * Additionally there are a few opt-in features: + * + * MMU_GATHER_PAGE_SIZE + * + * This ensures we call tlb_flush() every time tlb_change_page_size() actually + * changes the size and provides mmu_gather::page_size to tlb_flush(). + * + * This might be useful if your architecture has size specific TLB + * invalidation instructions. + * + * MMU_GATHER_TABLE_FREE + * + * This provides tlb_remove_table(), to be used instead of tlb_remove_page() + * for page directores (__p*_free_tlb()). + * + * Useful if your architecture has non-page page directories. + * + * When used, an architecture is expected to provide __tlb_remove_table() + * which does the actual freeing of these pages. + * + * MMU_GATHER_RCU_TABLE_FREE + * + * Like MMU_GATHER_TABLE_FREE, and adds semi-RCU semantics to the free (see + * comment below). + * + * Useful if your architecture doesn't use IPIs for remote TLB invalidates + * and therefore doesn't naturally serialize with software page-table walkers. + * + * MMU_GATHER_NO_RANGE + * + * Use this if your architecture lacks an efficient flush_tlb_range(). + * + * MMU_GATHER_NO_GATHER + * + * If the option is set the mmu_gather will not track individual pages for + * delayed page free anymore. A platform that enables the option needs to + * provide its own implementation of the __tlb_remove_page_size() function to + * free pages. + * + * This is useful if your architecture already flushes TLB entries in the + * various ptep_get_and_clear() functions. + */ + +#ifdef CONFIG_MMU_GATHER_TABLE_FREE + struct mmu_table_batch { +#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE struct rcu_head rcu; +#endif unsigned int nr; void *tables[0]; }; @@ -58,11 +184,41 @@ #define MAX_TABLE_BATCH \ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) -extern void tlb_table_flush(struct mmu_gather *tlb); extern void tlb_remove_table(struct mmu_gather *tlb, void *table); +#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ + +/* + * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based + * page directories and we can use the normal page batching to free them. + */ +#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) + +#endif /* CONFIG_MMU_GATHER_TABLE_FREE */ + +#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE +/* + * This allows an architecture that does not use the linux page-tables for + * hardware to skip the TLBI when freeing page tables. + */ +#ifndef tlb_needs_table_invalidate +#define tlb_needs_table_invalidate() (true) #endif +void tlb_remove_table_sync_one(void); + +#else + +#ifdef tlb_needs_table_invalidate +#error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE +#endif + +static inline void tlb_remove_table_sync_one(void) { } + +#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ + + +#ifndef CONFIG_MMU_GATHER_NO_GATHER /* * If we can't allocate a page to make a big batch of page pointers * to work on, then just handle a few from the on-stack structure. @@ -87,41 +243,68 @@ */ #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) -/* struct mmu_gather is an opaque type used by the mm code for passing around +extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, + int page_size); +#endif + +/* + * struct mmu_gather is an opaque type used by the mm code for passing around * any data needed by arch specific code for tlb_remove_page. */ struct mmu_gather { struct mm_struct *mm; -#ifdef CONFIG_HAVE_RCU_TABLE_FREE + +#ifdef CONFIG_MMU_GATHER_TABLE_FREE struct mmu_table_batch *batch; #endif + unsigned long start; unsigned long end; - /* we are in the middle of an operation to clear - * a full mm and can make some optimizations */ - unsigned int fullmm : 1, - /* we have performed an operation which - * requires a complete flush of the tlb */ - need_flush_all : 1; + /* + * we are in the middle of an operation to clear + * a full mm and can make some optimizations + */ + unsigned int fullmm : 1; + /* + * we have performed an operation which + * requires a complete flush of the tlb + */ + unsigned int need_flush_all : 1; + + /* + * we have removed page directories + */ + unsigned int freed_tables : 1; + + /* + * at which levels have we cleared entries? + */ + unsigned int cleared_ptes : 1; + unsigned int cleared_pmds : 1; + unsigned int cleared_puds : 1; + unsigned int cleared_p4ds : 1; + + /* + * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma + */ + unsigned int vma_exec : 1; + unsigned int vma_huge : 1; + + unsigned int batch_count; + +#ifndef CONFIG_MMU_GATHER_NO_GATHER struct mmu_gather_batch *active; struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; - unsigned int batch_count; - int page_size; + +#ifdef CONFIG_MMU_GATHER_PAGE_SIZE + unsigned int page_size; +#endif +#endif }; -#define HAVE_GENERIC_MMU_GATHER - -void arch_tlb_gather_mmu(struct mmu_gather *tlb, - struct mm_struct *mm, unsigned long start, unsigned long end); void tlb_flush_mmu(struct mmu_gather *tlb); -void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end, bool force); -void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, - unsigned long size); -extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, - int page_size); static inline void __tlb_adjust_range(struct mmu_gather *tlb, unsigned long address, @@ -139,11 +322,107 @@ tlb->start = TASK_SIZE; tlb->end = 0; } + tlb->freed_tables = 0; + tlb->cleared_ptes = 0; + tlb->cleared_pmds = 0; + tlb->cleared_puds = 0; + tlb->cleared_p4ds = 0; + /* + * Do not reset mmu_gather::vma_* fields here, we do not + * call into tlb_start_vma() again to set them if there is an + * intermediate flush. + */ } + +#ifdef CONFIG_MMU_GATHER_NO_RANGE + +#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma) +#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma() +#endif + +/* + * When an architecture does not have efficient means of range flushing TLBs + * there is no point in doing intermediate flushes on tlb_end_vma() to keep the + * range small. We equally don't have to worry about page granularity or other + * things. + * + * All we need to do is issue a full flush for any !0 range. + */ +static inline void tlb_flush(struct mmu_gather *tlb) +{ + if (tlb->end) + flush_tlb_mm(tlb->mm); +} + +static inline void +tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } + +#define tlb_end_vma tlb_end_vma +static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { } + +#else /* CONFIG_MMU_GATHER_NO_RANGE */ + +#ifndef tlb_flush + +#if defined(tlb_start_vma) || defined(tlb_end_vma) +#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma() +#endif + +/* + * When an architecture does not provide its own tlb_flush() implementation + * but does have a reasonably efficient flush_vma_range() implementation + * use that. + */ +static inline void tlb_flush(struct mmu_gather *tlb) +{ + if (tlb->fullmm || tlb->need_flush_all) { + flush_tlb_mm(tlb->mm); + } else if (tlb->end) { + struct vm_area_struct vma = { + .vm_mm = tlb->mm, + .vm_flags = (tlb->vma_exec ? VM_EXEC : 0) | + (tlb->vma_huge ? VM_HUGETLB : 0), + }; + + flush_tlb_range(&vma, tlb->start, tlb->end); + } +} + +static inline void +tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + /* + * flush_tlb_range() implementations that look at VM_HUGETLB (tile, + * mips-4k) flush only large pages. + * + * flush_tlb_range() implementations that flush I-TLB also flush D-TLB + * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing + * range. + * + * We rely on tlb_end_vma() to issue a flush, such that when we reset + * these values the batch is empty. + */ + tlb->vma_huge = is_vm_hugetlb_page(vma); + tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); +} + +#else + +static inline void +tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } + +#endif + +#endif /* CONFIG_MMU_GATHER_NO_RANGE */ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { - if (!tlb->end) + /* + * Anything calling __tlb_adjust_range() also sets at least one of + * these bits. + */ + if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds || + tlb->cleared_puds || tlb->cleared_p4ds)) return; tlb_flush(tlb); @@ -172,21 +451,37 @@ return tlb_remove_page_size(tlb, page, PAGE_SIZE); } -#ifndef tlb_remove_check_page_size_change -#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change -static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, +static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size) { - /* - * We don't care about page size change, just update - * mmu_gather page size here so that debug checks - * doesn't throw false warning. - */ -#ifdef CONFIG_DEBUG_VM +#ifdef CONFIG_MMU_GATHER_PAGE_SIZE + if (tlb->page_size && tlb->page_size != page_size) { + if (!tlb->fullmm && !tlb->need_flush_all) + tlb_flush_mmu(tlb); + } + tlb->page_size = page_size; #endif } -#endif + +static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb) +{ + if (tlb->cleared_ptes) + return PAGE_SHIFT; + if (tlb->cleared_pmds) + return PMD_SHIFT; + if (tlb->cleared_puds) + return PUD_SHIFT; + if (tlb->cleared_p4ds) + return P4D_SHIFT; + + return PAGE_SHIFT; +} + +static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) +{ + return 1UL << tlb_get_unmap_shift(tlb); +} /* * In the case of tlb vma handling, we can optimise these away in the @@ -194,18 +489,63 @@ * the vmas are adjusted to only cover the region to be torn down. */ #ifndef tlb_start_vma -#define tlb_start_vma(tlb, vma) do { } while (0) -#endif +static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + if (tlb->fullmm) + return; -#define __tlb_end_vma(tlb, vma) \ - do { \ - if (!tlb->fullmm) \ - tlb_flush_mmu_tlbonly(tlb); \ - } while (0) + tlb_update_vma_flags(tlb, vma); + flush_cache_range(vma, vma->vm_start, vma->vm_end); +} +#endif #ifndef tlb_end_vma -#define tlb_end_vma __tlb_end_vma +static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + if (tlb->fullmm) + return; + + /* + * Do a TLB flush and reset the range at VMA boundaries; this avoids + * the ranges growing with the unused space between consecutive VMAs, + * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on + * this. + */ + tlb_flush_mmu_tlbonly(tlb); +} #endif + +/* + * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end, + * and set corresponding cleared_*. + */ +static inline void tlb_flush_pte_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + __tlb_adjust_range(tlb, address, size); + tlb->cleared_ptes = 1; +} + +static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + __tlb_adjust_range(tlb, address, size); + tlb->cleared_pmds = 1; +} + +static inline void tlb_flush_pud_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + __tlb_adjust_range(tlb, address, size); + tlb->cleared_puds = 1; +} + +static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + __tlb_adjust_range(tlb, address, size); + tlb->cleared_p4ds = 1; +} #ifndef __tlb_remove_tlb_entry #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) @@ -220,14 +560,22 @@ */ #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb_flush_pte_range(tlb, address, PAGE_SIZE); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) -#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ - do { \ - __tlb_adjust_range(tlb, address, huge_page_size(h)); \ - __tlb_remove_tlb_entry(tlb, ptep, address); \ +#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ + do { \ + unsigned long _sz = huge_page_size(h); \ + if (_sz >= P4D_SIZE) \ + tlb_flush_p4d_range(tlb, address, _sz); \ + else if (_sz >= PUD_SIZE) \ + tlb_flush_pud_range(tlb, address, _sz); \ + else if (_sz >= PMD_SIZE) \ + tlb_flush_pmd_range(tlb, address, _sz); \ + else \ + tlb_flush_pte_range(tlb, address, _sz); \ + __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) /** @@ -240,7 +588,7 @@ #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ - __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \ + tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE); \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) @@ -254,7 +602,7 @@ #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \ + tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE); \ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ } while (0) @@ -279,7 +627,8 @@ #ifndef pte_free_tlb #define pte_free_tlb(tlb, ptep, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb_flush_pmd_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #endif @@ -287,31 +636,30 @@ #ifndef pmd_free_tlb #define pmd_free_tlb(tlb, pmdp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb_flush_pud_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) #endif -#ifndef __ARCH_HAS_4LEVEL_HACK #ifndef pud_free_tlb #define pud_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb_flush_p4d_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif -#endif -#ifndef __ARCH_HAS_5LEVEL_HACK #ifndef p4d_free_tlb #define p4d_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif -#endif -#define tlb_migrate_finish(mm) do {} while (0) +#endif /* CONFIG_MMU */ #endif /* _ASM_GENERIC__TLB_H */ -- Gitblit v1.6.2