.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Based on arch/arm/mm/mmu.c |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (C) 1995-2005 Russell King |
---|
5 | 6 | * Copyright (C) 2012 ARM Ltd. |
---|
6 | | - * |
---|
7 | | - * This program is free software; you can redistribute it and/or modify |
---|
8 | | - * it under the terms of the GNU General Public License version 2 as |
---|
9 | | - * published by the Free Software Foundation. |
---|
10 | | - * |
---|
11 | | - * This program is distributed in the hope that it will be useful, |
---|
12 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
13 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
14 | | - * GNU General Public License for more details. |
---|
15 | | - * |
---|
16 | | - * You should have received a copy of the GNU General Public License |
---|
17 | | - * along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
18 | 7 | */ |
---|
19 | 8 | |
---|
20 | 9 | #include <linux/cache.h> |
---|
.. | .. |
---|
28 | 17 | #include <linux/mman.h> |
---|
29 | 18 | #include <linux/nodemask.h> |
---|
30 | 19 | #include <linux/memblock.h> |
---|
| 20 | +#include <linux/memory.h> |
---|
31 | 21 | #include <linux/fs.h> |
---|
32 | 22 | #include <linux/io.h> |
---|
33 | 23 | #include <linux/mm.h> |
---|
34 | 24 | #include <linux/vmalloc.h> |
---|
35 | | -#include <linux/dma-contiguous.h> |
---|
36 | | -#include <linux/cma.h> |
---|
37 | 25 | |
---|
38 | 26 | #include <asm/barrier.h> |
---|
39 | 27 | #include <asm/cputype.h> |
---|
.. | .. |
---|
42 | 30 | #include <asm/kernel-pgtable.h> |
---|
43 | 31 | #include <asm/sections.h> |
---|
44 | 32 | #include <asm/setup.h> |
---|
45 | | -#include <asm/sizes.h> |
---|
| 33 | +#include <linux/sizes.h> |
---|
46 | 34 | #include <asm/tlb.h> |
---|
47 | | -#include <asm/memblock.h> |
---|
48 | 35 | #include <asm/mmu_context.h> |
---|
49 | 36 | #include <asm/ptdump.h> |
---|
50 | 37 | #include <asm/tlbflush.h> |
---|
| 38 | +#include <asm/pgalloc.h> |
---|
51 | 39 | |
---|
52 | 40 | #define NO_BLOCK_MAPPINGS BIT(0) |
---|
53 | 41 | #define NO_CONT_MAPPINGS BIT(1) |
---|
54 | 42 | |
---|
55 | | -u64 idmap_t0sz = TCR_T0SZ(VA_BITS); |
---|
| 43 | +u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); |
---|
56 | 44 | u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; |
---|
| 45 | + |
---|
| 46 | +u64 __section(".mmuoff.data.write") vabits_actual; |
---|
| 47 | +EXPORT_SYMBOL(vabits_actual); |
---|
57 | 48 | |
---|
58 | 49 | u64 kimage_voffset __ro_after_init; |
---|
59 | 50 | EXPORT_SYMBOL(kimage_voffset); |
---|
.. | .. |
---|
69 | 60 | static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; |
---|
70 | 61 | static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; |
---|
71 | 62 | |
---|
72 | | -struct dma_contig_early_reserve { |
---|
73 | | - phys_addr_t base; |
---|
74 | | - unsigned long size; |
---|
75 | | -}; |
---|
| 63 | +static DEFINE_SPINLOCK(swapper_pgdir_lock); |
---|
| 64 | +static DEFINE_MUTEX(fixmap_lock); |
---|
76 | 65 | |
---|
77 | | -static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS]; |
---|
78 | | -static int dma_mmu_remap_num; |
---|
79 | | - |
---|
80 | | -void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) |
---|
| 66 | +void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) |
---|
81 | 67 | { |
---|
82 | | - if (dma_mmu_remap_num >= ARRAY_SIZE(dma_mmu_remap)) { |
---|
83 | | - pr_err("ARM64: Not enough slots for DMA fixup reserved regions!\n"); |
---|
84 | | - return; |
---|
85 | | - } |
---|
86 | | - dma_mmu_remap[dma_mmu_remap_num].base = base; |
---|
87 | | - dma_mmu_remap[dma_mmu_remap_num].size = size; |
---|
88 | | - dma_mmu_remap_num++; |
---|
89 | | -} |
---|
| 68 | + pgd_t *fixmap_pgdp; |
---|
90 | 69 | |
---|
91 | | -static bool dma_overlap(phys_addr_t start, phys_addr_t end) |
---|
92 | | -{ |
---|
93 | | - int i; |
---|
94 | | - |
---|
95 | | - for (i = 0; i < dma_mmu_remap_num; i++) { |
---|
96 | | - phys_addr_t dma_base = dma_mmu_remap[i].base; |
---|
97 | | - phys_addr_t dma_end = dma_mmu_remap[i].base + |
---|
98 | | - dma_mmu_remap[i].size; |
---|
99 | | - |
---|
100 | | - if ((dma_base < end) && (dma_end > start)) |
---|
101 | | - return true; |
---|
102 | | - } |
---|
103 | | - return false; |
---|
| 70 | + spin_lock(&swapper_pgdir_lock); |
---|
| 71 | + fixmap_pgdp = pgd_set_fixmap(__pa_symbol(pgdp)); |
---|
| 72 | + WRITE_ONCE(*fixmap_pgdp, pgd); |
---|
| 73 | + /* |
---|
| 74 | + * We need dsb(ishst) here to ensure the page-table-walker sees |
---|
| 75 | + * our new entry before set_p?d() returns. The fixmap's |
---|
| 76 | + * flush_tlb_kernel_range() via clear_fixmap() does this for us. |
---|
| 77 | + */ |
---|
| 78 | + pgd_clear_fixmap(); |
---|
| 79 | + spin_unlock(&swapper_pgdir_lock); |
---|
104 | 80 | } |
---|
105 | 81 | |
---|
106 | 82 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, |
---|
.. | .. |
---|
114 | 90 | } |
---|
115 | 91 | EXPORT_SYMBOL(phys_mem_access_prot); |
---|
116 | 92 | |
---|
117 | | -static phys_addr_t __init early_pgtable_alloc(void) |
---|
| 93 | +static phys_addr_t __init early_pgtable_alloc(int shift) |
---|
118 | 94 | { |
---|
119 | 95 | phys_addr_t phys; |
---|
120 | 96 | void *ptr; |
---|
121 | 97 | |
---|
122 | | - phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); |
---|
| 98 | + phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); |
---|
| 99 | + if (!phys) |
---|
| 100 | + panic("Failed to allocate page table page\n"); |
---|
123 | 101 | |
---|
124 | 102 | /* |
---|
125 | 103 | * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE |
---|
.. | .. |
---|
145 | 123 | * The following mapping attributes may be updated in live |
---|
146 | 124 | * kernel mappings without the need for break-before-make. |
---|
147 | 125 | */ |
---|
148 | | - static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG; |
---|
| 126 | + pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG; |
---|
149 | 127 | |
---|
150 | 128 | /* creating or taking down mappings is always safe */ |
---|
151 | 129 | if (old == 0 || new == 0) |
---|
.. | .. |
---|
158 | 136 | /* Transitioning from Non-Global to Global is unsafe */ |
---|
159 | 137 | if (old & ~new & PTE_NG) |
---|
160 | 138 | return false; |
---|
| 139 | + |
---|
| 140 | + /* |
---|
| 141 | + * Changing the memory type between Normal and Normal-Tagged is safe |
---|
| 142 | + * since Tagged is considered a permission attribute from the |
---|
| 143 | + * mismatched attribute aliases perspective. |
---|
| 144 | + */ |
---|
| 145 | + if (((old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) || |
---|
| 146 | + (old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED)) && |
---|
| 147 | + ((new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) || |
---|
| 148 | + (new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED))) |
---|
| 149 | + mask |= PTE_ATTRINDX_MASK; |
---|
161 | 150 | |
---|
162 | 151 | return ((old ^ new) & ~mask) == 0; |
---|
163 | 152 | } |
---|
.. | .. |
---|
189 | 178 | static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, |
---|
190 | 179 | unsigned long end, phys_addr_t phys, |
---|
191 | 180 | pgprot_t prot, |
---|
192 | | - phys_addr_t (*pgtable_alloc)(void), |
---|
| 181 | + phys_addr_t (*pgtable_alloc)(int), |
---|
193 | 182 | int flags) |
---|
194 | 183 | { |
---|
195 | 184 | unsigned long next; |
---|
.. | .. |
---|
199 | 188 | if (pmd_none(pmd)) { |
---|
200 | 189 | phys_addr_t pte_phys; |
---|
201 | 190 | BUG_ON(!pgtable_alloc); |
---|
202 | | - pte_phys = pgtable_alloc(); |
---|
| 191 | + pte_phys = pgtable_alloc(PAGE_SHIFT); |
---|
203 | 192 | __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE); |
---|
204 | 193 | pmd = READ_ONCE(*pmdp); |
---|
205 | 194 | } |
---|
.. | .. |
---|
223 | 212 | |
---|
224 | 213 | static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, |
---|
225 | 214 | phys_addr_t phys, pgprot_t prot, |
---|
226 | | - phys_addr_t (*pgtable_alloc)(void), int flags) |
---|
| 215 | + phys_addr_t (*pgtable_alloc)(int), int flags) |
---|
227 | 216 | { |
---|
228 | 217 | unsigned long next; |
---|
229 | 218 | pmd_t *pmdp; |
---|
.. | .. |
---|
236 | 225 | |
---|
237 | 226 | /* try section mapping first */ |
---|
238 | 227 | if (((addr | next | phys) & ~SECTION_MASK) == 0 && |
---|
239 | | - (flags & NO_BLOCK_MAPPINGS) == 0 && |
---|
240 | | - !dma_overlap(phys, phys + next - addr)) { |
---|
| 228 | + (flags & NO_BLOCK_MAPPINGS) == 0) { |
---|
241 | 229 | pmd_set_huge(pmdp, phys, prot); |
---|
242 | 230 | |
---|
243 | 231 | /* |
---|
.. | .. |
---|
262 | 250 | static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, |
---|
263 | 251 | unsigned long end, phys_addr_t phys, |
---|
264 | 252 | pgprot_t prot, |
---|
265 | | - phys_addr_t (*pgtable_alloc)(void), int flags) |
---|
| 253 | + phys_addr_t (*pgtable_alloc)(int), int flags) |
---|
266 | 254 | { |
---|
267 | 255 | unsigned long next; |
---|
268 | 256 | pud_t pud = READ_ONCE(*pudp); |
---|
.. | .. |
---|
274 | 262 | if (pud_none(pud)) { |
---|
275 | 263 | phys_addr_t pmd_phys; |
---|
276 | 264 | BUG_ON(!pgtable_alloc); |
---|
277 | | - pmd_phys = pgtable_alloc(); |
---|
| 265 | + pmd_phys = pgtable_alloc(PMD_SHIFT); |
---|
278 | 266 | __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); |
---|
279 | 267 | pud = READ_ONCE(*pudp); |
---|
280 | 268 | } |
---|
.. | .. |
---|
310 | 298 | |
---|
311 | 299 | static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, |
---|
312 | 300 | phys_addr_t phys, pgprot_t prot, |
---|
313 | | - phys_addr_t (*pgtable_alloc)(void), |
---|
| 301 | + phys_addr_t (*pgtable_alloc)(int), |
---|
314 | 302 | int flags) |
---|
315 | 303 | { |
---|
316 | 304 | unsigned long next; |
---|
317 | 305 | pud_t *pudp; |
---|
318 | | - pgd_t pgd = READ_ONCE(*pgdp); |
---|
| 306 | + p4d_t *p4dp = p4d_offset(pgdp, addr); |
---|
| 307 | + p4d_t p4d = READ_ONCE(*p4dp); |
---|
319 | 308 | |
---|
320 | | - if (pgd_none(pgd)) { |
---|
| 309 | + if (p4d_none(p4d)) { |
---|
321 | 310 | phys_addr_t pud_phys; |
---|
322 | 311 | BUG_ON(!pgtable_alloc); |
---|
323 | | - pud_phys = pgtable_alloc(); |
---|
324 | | - __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE); |
---|
325 | | - pgd = READ_ONCE(*pgdp); |
---|
| 312 | + pud_phys = pgtable_alloc(PUD_SHIFT); |
---|
| 313 | + __p4d_populate(p4dp, pud_phys, PUD_TYPE_TABLE); |
---|
| 314 | + p4d = READ_ONCE(*p4dp); |
---|
326 | 315 | } |
---|
327 | | - BUG_ON(pgd_bad(pgd)); |
---|
| 316 | + BUG_ON(p4d_bad(p4d)); |
---|
328 | 317 | |
---|
329 | | - pudp = pud_set_fixmap_offset(pgdp, addr); |
---|
| 318 | + /* |
---|
| 319 | + * No need for locking during early boot. And it doesn't work as |
---|
| 320 | + * expected with KASLR enabled. |
---|
| 321 | + */ |
---|
| 322 | + if (system_state != SYSTEM_BOOTING) |
---|
| 323 | + mutex_lock(&fixmap_lock); |
---|
| 324 | + pudp = pud_set_fixmap_offset(p4dp, addr); |
---|
330 | 325 | do { |
---|
331 | 326 | pud_t old_pud = READ_ONCE(*pudp); |
---|
332 | 327 | |
---|
.. | .. |
---|
336 | 331 | * For 4K granule only, attempt to put down a 1GB block |
---|
337 | 332 | */ |
---|
338 | 333 | if (use_1G_block(addr, next, phys) && |
---|
339 | | - (flags & NO_BLOCK_MAPPINGS) == 0 && |
---|
340 | | - !dma_overlap(phys, phys + next - addr)) { |
---|
| 334 | + (flags & NO_BLOCK_MAPPINGS) == 0) { |
---|
341 | 335 | pud_set_huge(pudp, phys, prot); |
---|
342 | 336 | |
---|
343 | 337 | /* |
---|
.. | .. |
---|
357 | 351 | } while (pudp++, addr = next, addr != end); |
---|
358 | 352 | |
---|
359 | 353 | pud_clear_fixmap(); |
---|
| 354 | + if (system_state != SYSTEM_BOOTING) |
---|
| 355 | + mutex_unlock(&fixmap_lock); |
---|
360 | 356 | } |
---|
361 | 357 | |
---|
362 | 358 | static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, |
---|
363 | 359 | unsigned long virt, phys_addr_t size, |
---|
364 | 360 | pgprot_t prot, |
---|
365 | | - phys_addr_t (*pgtable_alloc)(void), |
---|
| 361 | + phys_addr_t (*pgtable_alloc)(int), |
---|
366 | 362 | int flags) |
---|
367 | 363 | { |
---|
368 | | - unsigned long addr, length, end, next; |
---|
369 | | - pgd_t *pgdp = pgd_offset_raw(pgdir, virt); |
---|
| 364 | + unsigned long addr, end, next; |
---|
| 365 | + pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); |
---|
370 | 366 | |
---|
371 | 367 | /* |
---|
372 | 368 | * If the virtual and physical address don't have the same offset |
---|
.. | .. |
---|
377 | 373 | |
---|
378 | 374 | phys &= PAGE_MASK; |
---|
379 | 375 | addr = virt & PAGE_MASK; |
---|
380 | | - length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); |
---|
| 376 | + end = PAGE_ALIGN(virt + size); |
---|
381 | 377 | |
---|
382 | | - end = addr + length; |
---|
383 | 378 | do { |
---|
384 | 379 | next = pgd_addr_end(addr, end); |
---|
385 | 380 | alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, |
---|
.. | .. |
---|
388 | 383 | } while (pgdp++, addr = next, addr != end); |
---|
389 | 384 | } |
---|
390 | 385 | |
---|
391 | | -static phys_addr_t pgd_pgtable_alloc(void) |
---|
| 386 | +static phys_addr_t __pgd_pgtable_alloc(int shift) |
---|
392 | 387 | { |
---|
393 | | - void *ptr = (void *)__get_free_page(PGALLOC_GFP); |
---|
394 | | - if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) |
---|
395 | | - BUG(); |
---|
| 388 | + void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); |
---|
| 389 | + BUG_ON(!ptr); |
---|
396 | 390 | |
---|
397 | 391 | /* Ensure the zeroed page is visible to the page table walker */ |
---|
398 | 392 | dsb(ishst); |
---|
399 | 393 | return __pa(ptr); |
---|
400 | 394 | } |
---|
401 | 395 | |
---|
402 | | -/** |
---|
403 | | - * create_pgtable_mapping - create a pagetable mapping for given |
---|
404 | | - * physical start and end addresses. |
---|
405 | | - * @start: physical start address. |
---|
406 | | - * @end: physical end address. |
---|
407 | | - */ |
---|
408 | | -void create_pgtable_mapping(phys_addr_t start, phys_addr_t end) |
---|
| 396 | +static phys_addr_t pgd_pgtable_alloc(int shift) |
---|
409 | 397 | { |
---|
410 | | - unsigned long virt = (unsigned long)phys_to_virt(start); |
---|
| 398 | + phys_addr_t pa = __pgd_pgtable_alloc(shift); |
---|
411 | 399 | |
---|
412 | | - if (virt < VMALLOC_START) { |
---|
413 | | - pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", |
---|
414 | | - &start, virt); |
---|
415 | | - return; |
---|
416 | | - } |
---|
| 400 | + /* |
---|
| 401 | + * Call proper page table ctor in case later we need to |
---|
| 402 | + * call core mm functions like apply_to_page_range() on |
---|
| 403 | + * this pre-allocated page table. |
---|
| 404 | + * |
---|
| 405 | + * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is |
---|
| 406 | + * folded, and if so pgtable_pmd_page_ctor() becomes nop. |
---|
| 407 | + */ |
---|
| 408 | + if (shift == PAGE_SHIFT) |
---|
| 409 | + BUG_ON(!pgtable_pte_page_ctor(phys_to_page(pa))); |
---|
| 410 | + else if (shift == PMD_SHIFT) |
---|
| 411 | + BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa))); |
---|
417 | 412 | |
---|
418 | | - __create_pgd_mapping(init_mm.pgd, start, virt, end - start, |
---|
419 | | - PAGE_KERNEL, NULL, 0); |
---|
| 413 | + return pa; |
---|
420 | 414 | } |
---|
421 | | -EXPORT_SYMBOL_GPL(create_pgtable_mapping); |
---|
422 | 415 | |
---|
423 | 416 | /* |
---|
424 | 417 | * This function can only be used to modify existing table entries, |
---|
.. | .. |
---|
428 | 421 | static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, |
---|
429 | 422 | phys_addr_t size, pgprot_t prot) |
---|
430 | 423 | { |
---|
431 | | - if (virt < VMALLOC_START) { |
---|
| 424 | + if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { |
---|
432 | 425 | pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", |
---|
433 | 426 | &phys, virt); |
---|
434 | 427 | return; |
---|
.. | .. |
---|
455 | 448 | static void update_mapping_prot(phys_addr_t phys, unsigned long virt, |
---|
456 | 449 | phys_addr_t size, pgprot_t prot) |
---|
457 | 450 | { |
---|
458 | | - if (virt < VMALLOC_START) { |
---|
| 451 | + if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { |
---|
459 | 452 | pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", |
---|
460 | 453 | &phys, virt); |
---|
461 | 454 | return; |
---|
.. | .. |
---|
485 | 478 | PAGE_KERNEL_RO); |
---|
486 | 479 | } |
---|
487 | 480 | |
---|
| 481 | +static bool crash_mem_map __initdata; |
---|
| 482 | + |
---|
| 483 | +static int __init enable_crash_mem_map(char *arg) |
---|
| 484 | +{ |
---|
| 485 | + /* |
---|
| 486 | + * Proper parameter parsing is done by reserve_crashkernel(). We only |
---|
| 487 | + * need to know if the linear map has to avoid block mappings so that |
---|
| 488 | + * the crashkernel reservations can be unmapped later. |
---|
| 489 | + */ |
---|
| 490 | + crash_mem_map = true; |
---|
| 491 | + |
---|
| 492 | + return 0; |
---|
| 493 | +} |
---|
| 494 | +early_param("crashkernel", enable_crash_mem_map); |
---|
| 495 | + |
---|
488 | 496 | static void __init map_mem(pgd_t *pgdp) |
---|
489 | 497 | { |
---|
490 | 498 | phys_addr_t kernel_start = __pa_symbol(_text); |
---|
491 | 499 | phys_addr_t kernel_end = __pa_symbol(__init_begin); |
---|
492 | | - struct memblock_region *reg; |
---|
| 500 | + phys_addr_t start, end; |
---|
493 | 501 | int flags = 0; |
---|
| 502 | + u64 i; |
---|
494 | 503 | |
---|
495 | | - if (debug_pagealloc_enabled()) |
---|
| 504 | + if (rodata_full || debug_pagealloc_enabled() || |
---|
| 505 | + IS_ENABLED(CONFIG_KFENCE)) |
---|
496 | 506 | flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; |
---|
497 | 507 | |
---|
498 | 508 | /* |
---|
.. | .. |
---|
502 | 512 | * the following for-loop |
---|
503 | 513 | */ |
---|
504 | 514 | memblock_mark_nomap(kernel_start, kernel_end - kernel_start); |
---|
| 515 | + |
---|
505 | 516 | #ifdef CONFIG_KEXEC_CORE |
---|
506 | | - if (crashk_res.end) |
---|
507 | | - memblock_mark_nomap(crashk_res.start, |
---|
508 | | - resource_size(&crashk_res)); |
---|
| 517 | + if (crash_mem_map) { |
---|
| 518 | + if (IS_ENABLED(CONFIG_ZONE_DMA) || |
---|
| 519 | + IS_ENABLED(CONFIG_ZONE_DMA32)) |
---|
| 520 | + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; |
---|
| 521 | + else if (crashk_res.end) |
---|
| 522 | + memblock_mark_nomap(crashk_res.start, |
---|
| 523 | + resource_size(&crashk_res)); |
---|
| 524 | + } |
---|
509 | 525 | #endif |
---|
510 | 526 | |
---|
511 | 527 | /* map all the memory banks */ |
---|
512 | | - for_each_memblock(memory, reg) { |
---|
513 | | - phys_addr_t start = reg->base; |
---|
514 | | - phys_addr_t end = start + reg->size; |
---|
515 | | - |
---|
| 528 | + for_each_mem_range(i, &start, &end) { |
---|
516 | 529 | if (start >= end) |
---|
517 | 530 | break; |
---|
518 | | - if (memblock_is_nomap(reg)) |
---|
519 | | - continue; |
---|
520 | | - |
---|
521 | | - __map_memblock(pgdp, start, end, PAGE_KERNEL, flags); |
---|
| 531 | + /* |
---|
| 532 | + * The linear map must allow allocation tags reading/writing |
---|
| 533 | + * if MTE is present. Otherwise, it has the same attributes as |
---|
| 534 | + * PAGE_KERNEL. |
---|
| 535 | + */ |
---|
| 536 | + __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), |
---|
| 537 | + flags); |
---|
522 | 538 | } |
---|
523 | 539 | |
---|
524 | 540 | /* |
---|
.. | .. |
---|
535 | 551 | PAGE_KERNEL, NO_CONT_MAPPINGS); |
---|
536 | 552 | memblock_clear_nomap(kernel_start, kernel_end - kernel_start); |
---|
537 | 553 | |
---|
538 | | -#ifdef CONFIG_KEXEC_CORE |
---|
539 | 554 | /* |
---|
540 | 555 | * Use page-level mappings here so that we can shrink the region |
---|
541 | 556 | * in page granularity and put back unused memory to buddy system |
---|
542 | 557 | * through /sys/kernel/kexec_crash_size interface. |
---|
543 | 558 | */ |
---|
544 | | - if (crashk_res.end) { |
---|
545 | | - __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1, |
---|
546 | | - PAGE_KERNEL, |
---|
547 | | - NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); |
---|
548 | | - memblock_clear_nomap(crashk_res.start, |
---|
549 | | - resource_size(&crashk_res)); |
---|
| 559 | +#ifdef CONFIG_KEXEC_CORE |
---|
| 560 | + if (crash_mem_map && |
---|
| 561 | + !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) { |
---|
| 562 | + if (crashk_res.end) { |
---|
| 563 | + __map_memblock(pgdp, crashk_res.start, |
---|
| 564 | + crashk_res.end + 1, |
---|
| 565 | + PAGE_KERNEL, |
---|
| 566 | + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); |
---|
| 567 | + memblock_clear_nomap(crashk_res.start, |
---|
| 568 | + resource_size(&crashk_res)); |
---|
| 569 | + } |
---|
550 | 570 | } |
---|
551 | 571 | #endif |
---|
552 | 572 | } |
---|
.. | .. |
---|
593 | 613 | |
---|
594 | 614 | static int __init parse_rodata(char *arg) |
---|
595 | 615 | { |
---|
596 | | - return strtobool(arg, &rodata_enabled); |
---|
| 616 | + int ret = strtobool(arg, &rodata_enabled); |
---|
| 617 | + if (!ret) { |
---|
| 618 | + rodata_full = false; |
---|
| 619 | + return 0; |
---|
| 620 | + } |
---|
| 621 | + |
---|
| 622 | + /* permit 'full' in addition to boolean options */ |
---|
| 623 | + if (strcmp(arg, "full")) |
---|
| 624 | + return -EINVAL; |
---|
| 625 | + |
---|
| 626 | + rodata_enabled = true; |
---|
| 627 | + rodata_full = true; |
---|
| 628 | + return 0; |
---|
597 | 629 | } |
---|
598 | 630 | early_param("rodata", parse_rodata); |
---|
599 | 631 | |
---|
600 | 632 | #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 |
---|
601 | 633 | static int __init map_entry_trampoline(void) |
---|
602 | 634 | { |
---|
| 635 | + int i; |
---|
| 636 | + |
---|
603 | 637 | pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; |
---|
604 | 638 | phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); |
---|
605 | 639 | |
---|
.. | .. |
---|
608 | 642 | |
---|
609 | 643 | /* Map only the text into the trampoline page table */ |
---|
610 | 644 | memset(tramp_pg_dir, 0, PGD_SIZE); |
---|
611 | | - __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, |
---|
612 | | - prot, pgd_pgtable_alloc, 0); |
---|
| 645 | + __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, |
---|
| 646 | + entry_tramp_text_size(), prot, |
---|
| 647 | + __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); |
---|
613 | 648 | |
---|
614 | 649 | /* Map both the text and data into the kernel page table */ |
---|
615 | | - __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); |
---|
| 650 | + for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) |
---|
| 651 | + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, |
---|
| 652 | + pa_start + i * PAGE_SIZE, prot); |
---|
| 653 | + |
---|
616 | 654 | if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { |
---|
617 | 655 | extern char __entry_tramp_data_start[]; |
---|
618 | 656 | |
---|
.. | .. |
---|
625 | 663 | } |
---|
626 | 664 | core_initcall(map_entry_trampoline); |
---|
627 | 665 | #endif |
---|
| 666 | + |
---|
| 667 | +/* |
---|
| 668 | + * Open coded check for BTI, only for use to determine configuration |
---|
| 669 | + * for early mappings for before the cpufeature code has run. |
---|
| 670 | + */ |
---|
| 671 | +static bool arm64_early_this_cpu_has_bti(void) |
---|
| 672 | +{ |
---|
| 673 | + u64 pfr1; |
---|
| 674 | + |
---|
| 675 | + if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) |
---|
| 676 | + return false; |
---|
| 677 | + |
---|
| 678 | + pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1); |
---|
| 679 | + return cpuid_feature_extract_unsigned_field(pfr1, |
---|
| 680 | + ID_AA64PFR1_BT_SHIFT); |
---|
| 681 | +} |
---|
628 | 682 | |
---|
629 | 683 | /* |
---|
630 | 684 | * Create fine-grained mappings for the kernel. |
---|
.. | .. |
---|
642 | 696 | pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; |
---|
643 | 697 | |
---|
644 | 698 | /* |
---|
| 699 | + * If we have a CPU that supports BTI and a kernel built for |
---|
| 700 | + * BTI then mark the kernel executable text as guarded pages |
---|
| 701 | + * now so we don't have to rewrite the page tables later. |
---|
| 702 | + */ |
---|
| 703 | + if (arm64_early_this_cpu_has_bti()) |
---|
| 704 | + text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP); |
---|
| 705 | + |
---|
| 706 | + /* |
---|
645 | 707 | * Only rodata will be remapped with different permissions later on, |
---|
646 | 708 | * all other segments are allowed to use contiguous mappings. |
---|
647 | 709 | */ |
---|
.. | .. |
---|
655 | 717 | &vmlinux_initdata, 0, VM_NO_GUARD); |
---|
656 | 718 | map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0); |
---|
657 | 719 | |
---|
658 | | - if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) { |
---|
| 720 | + if (!READ_ONCE(pgd_val(*pgd_offset_pgd(pgdp, FIXADDR_START)))) { |
---|
659 | 721 | /* |
---|
660 | 722 | * The fixmap falls in a separate pgd to the kernel, and doesn't |
---|
661 | 723 | * live in the carveout for the swapper_pg_dir. We can simply |
---|
662 | 724 | * re-use the existing dir for the fixmap. |
---|
663 | 725 | */ |
---|
664 | | - set_pgd(pgd_offset_raw(pgdp, FIXADDR_START), |
---|
| 726 | + set_pgd(pgd_offset_pgd(pgdp, FIXADDR_START), |
---|
665 | 727 | READ_ONCE(*pgd_offset_k(FIXADDR_START))); |
---|
666 | 728 | } else if (CONFIG_PGTABLE_LEVELS > 3) { |
---|
| 729 | + pgd_t *bm_pgdp; |
---|
| 730 | + p4d_t *bm_p4dp; |
---|
| 731 | + pud_t *bm_pudp; |
---|
667 | 732 | /* |
---|
668 | 733 | * The fixmap shares its top level pgd entry with the kernel |
---|
669 | 734 | * mapping. This can really only occur when we are running |
---|
.. | .. |
---|
671 | 736 | * entry instead. |
---|
672 | 737 | */ |
---|
673 | 738 | BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); |
---|
674 | | - pud_populate(&init_mm, |
---|
675 | | - pud_set_fixmap_offset(pgdp, FIXADDR_START), |
---|
676 | | - lm_alias(bm_pmd)); |
---|
| 739 | + bm_pgdp = pgd_offset_pgd(pgdp, FIXADDR_START); |
---|
| 740 | + bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_START); |
---|
| 741 | + bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_START); |
---|
| 742 | + pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd)); |
---|
677 | 743 | pud_clear_fixmap(); |
---|
678 | 744 | } else { |
---|
679 | 745 | BUG(); |
---|
.. | .. |
---|
682 | 748 | kasan_copy_shadow(pgdp); |
---|
683 | 749 | } |
---|
684 | 750 | |
---|
685 | | -/* |
---|
686 | | - * paging_init() sets up the page tables, initialises the zone memory |
---|
687 | | - * maps and sets up the zero page. |
---|
688 | | - */ |
---|
689 | 751 | void __init paging_init(void) |
---|
690 | 752 | { |
---|
691 | | - phys_addr_t pgd_phys = early_pgtable_alloc(); |
---|
692 | | - pgd_t *pgdp = pgd_set_fixmap(pgd_phys); |
---|
| 753 | + pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); |
---|
693 | 754 | |
---|
694 | 755 | map_kernel(pgdp); |
---|
695 | 756 | map_mem(pgdp); |
---|
696 | 757 | |
---|
697 | | - /* |
---|
698 | | - * We want to reuse the original swapper_pg_dir so we don't have to |
---|
699 | | - * communicate the new address to non-coherent secondaries in |
---|
700 | | - * secondary_entry, and so cpu_switch_mm can generate the address with |
---|
701 | | - * adrp+add rather than a load from some global variable. |
---|
702 | | - * |
---|
703 | | - * To do this we need to go via a temporary pgd. |
---|
704 | | - */ |
---|
705 | | - cpu_replace_ttbr1(__va(pgd_phys)); |
---|
706 | | - memcpy(swapper_pg_dir, pgdp, PGD_SIZE); |
---|
707 | | - cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); |
---|
708 | | - |
---|
709 | 758 | pgd_clear_fixmap(); |
---|
710 | | - memblock_free(pgd_phys, PAGE_SIZE); |
---|
711 | 759 | |
---|
712 | | - /* |
---|
713 | | - * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd |
---|
714 | | - * allocated with it. |
---|
715 | | - */ |
---|
716 | | - memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, |
---|
717 | | - __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir) |
---|
718 | | - - PAGE_SIZE); |
---|
| 760 | + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); |
---|
| 761 | + init_mm.pgd = swapper_pg_dir; |
---|
| 762 | + |
---|
| 763 | + memblock_free(__pa_symbol(init_pg_dir), |
---|
| 764 | + __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); |
---|
| 765 | + |
---|
| 766 | + memblock_allow_resize(); |
---|
719 | 767 | } |
---|
720 | 768 | |
---|
721 | 769 | /* |
---|
.. | .. |
---|
724 | 772 | int kern_addr_valid(unsigned long addr) |
---|
725 | 773 | { |
---|
726 | 774 | pgd_t *pgdp; |
---|
| 775 | + p4d_t *p4dp; |
---|
727 | 776 | pud_t *pudp, pud; |
---|
728 | 777 | pmd_t *pmdp, pmd; |
---|
729 | 778 | pte_t *ptep, pte; |
---|
730 | 779 | |
---|
| 780 | + addr = arch_kasan_reset_tag(addr); |
---|
731 | 781 | if ((((long)addr) >> VA_BITS) != -1UL) |
---|
732 | 782 | return 0; |
---|
733 | 783 | |
---|
.. | .. |
---|
735 | 785 | if (pgd_none(READ_ONCE(*pgdp))) |
---|
736 | 786 | return 0; |
---|
737 | 787 | |
---|
738 | | - pudp = pud_offset(pgdp, addr); |
---|
| 788 | + p4dp = p4d_offset(pgdp, addr); |
---|
| 789 | + if (p4d_none(READ_ONCE(*p4dp))) |
---|
| 790 | + return 0; |
---|
| 791 | + |
---|
| 792 | + pudp = pud_offset(p4dp, addr); |
---|
739 | 793 | pud = READ_ONCE(*pudp); |
---|
740 | 794 | if (pud_none(pud)) |
---|
741 | 795 | return 0; |
---|
.. | .. |
---|
758 | 812 | |
---|
759 | 813 | return pfn_valid(pte_pfn(pte)); |
---|
760 | 814 | } |
---|
761 | | -EXPORT_SYMBOL_GPL(kern_addr_valid); |
---|
| 815 | + |
---|
| 816 | +#ifdef CONFIG_MEMORY_HOTPLUG |
---|
| 817 | +static void free_hotplug_page_range(struct page *page, size_t size, |
---|
| 818 | + struct vmem_altmap *altmap) |
---|
| 819 | +{ |
---|
| 820 | + if (altmap) { |
---|
| 821 | + vmem_altmap_free(altmap, size >> PAGE_SHIFT); |
---|
| 822 | + } else { |
---|
| 823 | + WARN_ON(PageReserved(page)); |
---|
| 824 | + free_pages((unsigned long)page_address(page), get_order(size)); |
---|
| 825 | + } |
---|
| 826 | +} |
---|
| 827 | + |
---|
| 828 | +static void free_hotplug_pgtable_page(struct page *page) |
---|
| 829 | +{ |
---|
| 830 | + free_hotplug_page_range(page, PAGE_SIZE, NULL); |
---|
| 831 | +} |
---|
| 832 | + |
---|
| 833 | +static bool pgtable_range_aligned(unsigned long start, unsigned long end, |
---|
| 834 | + unsigned long floor, unsigned long ceiling, |
---|
| 835 | + unsigned long mask) |
---|
| 836 | +{ |
---|
| 837 | + start &= mask; |
---|
| 838 | + if (start < floor) |
---|
| 839 | + return false; |
---|
| 840 | + |
---|
| 841 | + if (ceiling) { |
---|
| 842 | + ceiling &= mask; |
---|
| 843 | + if (!ceiling) |
---|
| 844 | + return false; |
---|
| 845 | + } |
---|
| 846 | + |
---|
| 847 | + if (end - 1 > ceiling - 1) |
---|
| 848 | + return false; |
---|
| 849 | + return true; |
---|
| 850 | +} |
---|
| 851 | + |
---|
| 852 | +static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr, |
---|
| 853 | + unsigned long end, bool free_mapped, |
---|
| 854 | + struct vmem_altmap *altmap) |
---|
| 855 | +{ |
---|
| 856 | + pte_t *ptep, pte; |
---|
| 857 | + |
---|
| 858 | + do { |
---|
| 859 | + ptep = pte_offset_kernel(pmdp, addr); |
---|
| 860 | + pte = READ_ONCE(*ptep); |
---|
| 861 | + if (pte_none(pte)) |
---|
| 862 | + continue; |
---|
| 863 | + |
---|
| 864 | + WARN_ON(!pte_present(pte)); |
---|
| 865 | + pte_clear(&init_mm, addr, ptep); |
---|
| 866 | + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); |
---|
| 867 | + if (free_mapped) |
---|
| 868 | + free_hotplug_page_range(pte_page(pte), |
---|
| 869 | + PAGE_SIZE, altmap); |
---|
| 870 | + } while (addr += PAGE_SIZE, addr < end); |
---|
| 871 | +} |
---|
| 872 | + |
---|
| 873 | +static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr, |
---|
| 874 | + unsigned long end, bool free_mapped, |
---|
| 875 | + struct vmem_altmap *altmap) |
---|
| 876 | +{ |
---|
| 877 | + unsigned long next; |
---|
| 878 | + pmd_t *pmdp, pmd; |
---|
| 879 | + |
---|
| 880 | + do { |
---|
| 881 | + next = pmd_addr_end(addr, end); |
---|
| 882 | + pmdp = pmd_offset(pudp, addr); |
---|
| 883 | + pmd = READ_ONCE(*pmdp); |
---|
| 884 | + if (pmd_none(pmd)) |
---|
| 885 | + continue; |
---|
| 886 | + |
---|
| 887 | + WARN_ON(!pmd_present(pmd)); |
---|
| 888 | + if (pmd_sect(pmd)) { |
---|
| 889 | + pmd_clear(pmdp); |
---|
| 890 | + |
---|
| 891 | + /* |
---|
| 892 | + * One TLBI should be sufficient here as the PMD_SIZE |
---|
| 893 | + * range is mapped with a single block entry. |
---|
| 894 | + */ |
---|
| 895 | + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); |
---|
| 896 | + if (free_mapped) |
---|
| 897 | + free_hotplug_page_range(pmd_page(pmd), |
---|
| 898 | + PMD_SIZE, altmap); |
---|
| 899 | + continue; |
---|
| 900 | + } |
---|
| 901 | + WARN_ON(!pmd_table(pmd)); |
---|
| 902 | + unmap_hotplug_pte_range(pmdp, addr, next, free_mapped, altmap); |
---|
| 903 | + } while (addr = next, addr < end); |
---|
| 904 | +} |
---|
| 905 | + |
---|
| 906 | +static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr, |
---|
| 907 | + unsigned long end, bool free_mapped, |
---|
| 908 | + struct vmem_altmap *altmap) |
---|
| 909 | +{ |
---|
| 910 | + unsigned long next; |
---|
| 911 | + pud_t *pudp, pud; |
---|
| 912 | + |
---|
| 913 | + do { |
---|
| 914 | + next = pud_addr_end(addr, end); |
---|
| 915 | + pudp = pud_offset(p4dp, addr); |
---|
| 916 | + pud = READ_ONCE(*pudp); |
---|
| 917 | + if (pud_none(pud)) |
---|
| 918 | + continue; |
---|
| 919 | + |
---|
| 920 | + WARN_ON(!pud_present(pud)); |
---|
| 921 | + if (pud_sect(pud)) { |
---|
| 922 | + pud_clear(pudp); |
---|
| 923 | + |
---|
| 924 | + /* |
---|
| 925 | + * One TLBI should be sufficient here as the PUD_SIZE |
---|
| 926 | + * range is mapped with a single block entry. |
---|
| 927 | + */ |
---|
| 928 | + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); |
---|
| 929 | + if (free_mapped) |
---|
| 930 | + free_hotplug_page_range(pud_page(pud), |
---|
| 931 | + PUD_SIZE, altmap); |
---|
| 932 | + continue; |
---|
| 933 | + } |
---|
| 934 | + WARN_ON(!pud_table(pud)); |
---|
| 935 | + unmap_hotplug_pmd_range(pudp, addr, next, free_mapped, altmap); |
---|
| 936 | + } while (addr = next, addr < end); |
---|
| 937 | +} |
---|
| 938 | + |
---|
| 939 | +static void unmap_hotplug_p4d_range(pgd_t *pgdp, unsigned long addr, |
---|
| 940 | + unsigned long end, bool free_mapped, |
---|
| 941 | + struct vmem_altmap *altmap) |
---|
| 942 | +{ |
---|
| 943 | + unsigned long next; |
---|
| 944 | + p4d_t *p4dp, p4d; |
---|
| 945 | + |
---|
| 946 | + do { |
---|
| 947 | + next = p4d_addr_end(addr, end); |
---|
| 948 | + p4dp = p4d_offset(pgdp, addr); |
---|
| 949 | + p4d = READ_ONCE(*p4dp); |
---|
| 950 | + if (p4d_none(p4d)) |
---|
| 951 | + continue; |
---|
| 952 | + |
---|
| 953 | + WARN_ON(!p4d_present(p4d)); |
---|
| 954 | + unmap_hotplug_pud_range(p4dp, addr, next, free_mapped, altmap); |
---|
| 955 | + } while (addr = next, addr < end); |
---|
| 956 | +} |
---|
| 957 | + |
---|
| 958 | +static void unmap_hotplug_range(unsigned long addr, unsigned long end, |
---|
| 959 | + bool free_mapped, struct vmem_altmap *altmap) |
---|
| 960 | +{ |
---|
| 961 | + unsigned long next; |
---|
| 962 | + pgd_t *pgdp, pgd; |
---|
| 963 | + |
---|
| 964 | + /* |
---|
| 965 | + * altmap can only be used as vmemmap mapping backing memory. |
---|
| 966 | + * In case the backing memory itself is not being freed, then |
---|
| 967 | + * altmap is irrelevant. Warn about this inconsistency when |
---|
| 968 | + * encountered. |
---|
| 969 | + */ |
---|
| 970 | + WARN_ON(!free_mapped && altmap); |
---|
| 971 | + |
---|
| 972 | + do { |
---|
| 973 | + next = pgd_addr_end(addr, end); |
---|
| 974 | + pgdp = pgd_offset_k(addr); |
---|
| 975 | + pgd = READ_ONCE(*pgdp); |
---|
| 976 | + if (pgd_none(pgd)) |
---|
| 977 | + continue; |
---|
| 978 | + |
---|
| 979 | + WARN_ON(!pgd_present(pgd)); |
---|
| 980 | + unmap_hotplug_p4d_range(pgdp, addr, next, free_mapped, altmap); |
---|
| 981 | + } while (addr = next, addr < end); |
---|
| 982 | +} |
---|
| 983 | + |
---|
| 984 | +static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr, |
---|
| 985 | + unsigned long end, unsigned long floor, |
---|
| 986 | + unsigned long ceiling) |
---|
| 987 | +{ |
---|
| 988 | + pte_t *ptep, pte; |
---|
| 989 | + unsigned long i, start = addr; |
---|
| 990 | + |
---|
| 991 | + do { |
---|
| 992 | + ptep = pte_offset_kernel(pmdp, addr); |
---|
| 993 | + pte = READ_ONCE(*ptep); |
---|
| 994 | + |
---|
| 995 | + /* |
---|
| 996 | + * This is just a sanity check here which verifies that |
---|
| 997 | + * pte clearing has been done by earlier unmap loops. |
---|
| 998 | + */ |
---|
| 999 | + WARN_ON(!pte_none(pte)); |
---|
| 1000 | + } while (addr += PAGE_SIZE, addr < end); |
---|
| 1001 | + |
---|
| 1002 | + if (!pgtable_range_aligned(start, end, floor, ceiling, PMD_MASK)) |
---|
| 1003 | + return; |
---|
| 1004 | + |
---|
| 1005 | + /* |
---|
| 1006 | + * Check whether we can free the pte page if the rest of the |
---|
| 1007 | + * entries are empty. Overlap with other regions have been |
---|
| 1008 | + * handled by the floor/ceiling check. |
---|
| 1009 | + */ |
---|
| 1010 | + ptep = pte_offset_kernel(pmdp, 0UL); |
---|
| 1011 | + for (i = 0; i < PTRS_PER_PTE; i++) { |
---|
| 1012 | + if (!pte_none(READ_ONCE(ptep[i]))) |
---|
| 1013 | + return; |
---|
| 1014 | + } |
---|
| 1015 | + |
---|
| 1016 | + pmd_clear(pmdp); |
---|
| 1017 | + __flush_tlb_kernel_pgtable(start); |
---|
| 1018 | + free_hotplug_pgtable_page(virt_to_page(ptep)); |
---|
| 1019 | +} |
---|
| 1020 | + |
---|
| 1021 | +static void free_empty_pmd_table(pud_t *pudp, unsigned long addr, |
---|
| 1022 | + unsigned long end, unsigned long floor, |
---|
| 1023 | + unsigned long ceiling) |
---|
| 1024 | +{ |
---|
| 1025 | + pmd_t *pmdp, pmd; |
---|
| 1026 | + unsigned long i, next, start = addr; |
---|
| 1027 | + |
---|
| 1028 | + do { |
---|
| 1029 | + next = pmd_addr_end(addr, end); |
---|
| 1030 | + pmdp = pmd_offset(pudp, addr); |
---|
| 1031 | + pmd = READ_ONCE(*pmdp); |
---|
| 1032 | + if (pmd_none(pmd)) |
---|
| 1033 | + continue; |
---|
| 1034 | + |
---|
| 1035 | + WARN_ON(!pmd_present(pmd) || !pmd_table(pmd) || pmd_sect(pmd)); |
---|
| 1036 | + free_empty_pte_table(pmdp, addr, next, floor, ceiling); |
---|
| 1037 | + } while (addr = next, addr < end); |
---|
| 1038 | + |
---|
| 1039 | + if (CONFIG_PGTABLE_LEVELS <= 2) |
---|
| 1040 | + return; |
---|
| 1041 | + |
---|
| 1042 | + if (!pgtable_range_aligned(start, end, floor, ceiling, PUD_MASK)) |
---|
| 1043 | + return; |
---|
| 1044 | + |
---|
| 1045 | + /* |
---|
| 1046 | + * Check whether we can free the pmd page if the rest of the |
---|
| 1047 | + * entries are empty. Overlap with other regions have been |
---|
| 1048 | + * handled by the floor/ceiling check. |
---|
| 1049 | + */ |
---|
| 1050 | + pmdp = pmd_offset(pudp, 0UL); |
---|
| 1051 | + for (i = 0; i < PTRS_PER_PMD; i++) { |
---|
| 1052 | + if (!pmd_none(READ_ONCE(pmdp[i]))) |
---|
| 1053 | + return; |
---|
| 1054 | + } |
---|
| 1055 | + |
---|
| 1056 | + pud_clear(pudp); |
---|
| 1057 | + __flush_tlb_kernel_pgtable(start); |
---|
| 1058 | + free_hotplug_pgtable_page(virt_to_page(pmdp)); |
---|
| 1059 | +} |
---|
| 1060 | + |
---|
| 1061 | +static void free_empty_pud_table(p4d_t *p4dp, unsigned long addr, |
---|
| 1062 | + unsigned long end, unsigned long floor, |
---|
| 1063 | + unsigned long ceiling) |
---|
| 1064 | +{ |
---|
| 1065 | + pud_t *pudp, pud; |
---|
| 1066 | + unsigned long i, next, start = addr; |
---|
| 1067 | + |
---|
| 1068 | + do { |
---|
| 1069 | + next = pud_addr_end(addr, end); |
---|
| 1070 | + pudp = pud_offset(p4dp, addr); |
---|
| 1071 | + pud = READ_ONCE(*pudp); |
---|
| 1072 | + if (pud_none(pud)) |
---|
| 1073 | + continue; |
---|
| 1074 | + |
---|
| 1075 | + WARN_ON(!pud_present(pud) || !pud_table(pud) || pud_sect(pud)); |
---|
| 1076 | + free_empty_pmd_table(pudp, addr, next, floor, ceiling); |
---|
| 1077 | + } while (addr = next, addr < end); |
---|
| 1078 | + |
---|
| 1079 | + if (CONFIG_PGTABLE_LEVELS <= 3) |
---|
| 1080 | + return; |
---|
| 1081 | + |
---|
| 1082 | + if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK)) |
---|
| 1083 | + return; |
---|
| 1084 | + |
---|
| 1085 | + /* |
---|
| 1086 | + * Check whether we can free the pud page if the rest of the |
---|
| 1087 | + * entries are empty. Overlap with other regions have been |
---|
| 1088 | + * handled by the floor/ceiling check. |
---|
| 1089 | + */ |
---|
| 1090 | + pudp = pud_offset(p4dp, 0UL); |
---|
| 1091 | + for (i = 0; i < PTRS_PER_PUD; i++) { |
---|
| 1092 | + if (!pud_none(READ_ONCE(pudp[i]))) |
---|
| 1093 | + return; |
---|
| 1094 | + } |
---|
| 1095 | + |
---|
| 1096 | + p4d_clear(p4dp); |
---|
| 1097 | + __flush_tlb_kernel_pgtable(start); |
---|
| 1098 | + free_hotplug_pgtable_page(virt_to_page(pudp)); |
---|
| 1099 | +} |
---|
| 1100 | + |
---|
| 1101 | +static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr, |
---|
| 1102 | + unsigned long end, unsigned long floor, |
---|
| 1103 | + unsigned long ceiling) |
---|
| 1104 | +{ |
---|
| 1105 | + unsigned long next; |
---|
| 1106 | + p4d_t *p4dp, p4d; |
---|
| 1107 | + |
---|
| 1108 | + do { |
---|
| 1109 | + next = p4d_addr_end(addr, end); |
---|
| 1110 | + p4dp = p4d_offset(pgdp, addr); |
---|
| 1111 | + p4d = READ_ONCE(*p4dp); |
---|
| 1112 | + if (p4d_none(p4d)) |
---|
| 1113 | + continue; |
---|
| 1114 | + |
---|
| 1115 | + WARN_ON(!p4d_present(p4d)); |
---|
| 1116 | + free_empty_pud_table(p4dp, addr, next, floor, ceiling); |
---|
| 1117 | + } while (addr = next, addr < end); |
---|
| 1118 | +} |
---|
| 1119 | + |
---|
| 1120 | +static void free_empty_tables(unsigned long addr, unsigned long end, |
---|
| 1121 | + unsigned long floor, unsigned long ceiling) |
---|
| 1122 | +{ |
---|
| 1123 | + unsigned long next; |
---|
| 1124 | + pgd_t *pgdp, pgd; |
---|
| 1125 | + |
---|
| 1126 | + do { |
---|
| 1127 | + next = pgd_addr_end(addr, end); |
---|
| 1128 | + pgdp = pgd_offset_k(addr); |
---|
| 1129 | + pgd = READ_ONCE(*pgdp); |
---|
| 1130 | + if (pgd_none(pgd)) |
---|
| 1131 | + continue; |
---|
| 1132 | + |
---|
| 1133 | + WARN_ON(!pgd_present(pgd)); |
---|
| 1134 | + free_empty_p4d_table(pgdp, addr, next, floor, ceiling); |
---|
| 1135 | + } while (addr = next, addr < end); |
---|
| 1136 | +} |
---|
| 1137 | +#endif |
---|
| 1138 | + |
---|
762 | 1139 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
---|
763 | 1140 | #if !ARM64_SWAPPER_USES_SECTION_MAPS |
---|
764 | 1141 | int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, |
---|
765 | 1142 | struct vmem_altmap *altmap) |
---|
766 | 1143 | { |
---|
767 | | - return vmemmap_populate_basepages(start, end, node); |
---|
| 1144 | + return vmemmap_populate_basepages(start, end, node, altmap); |
---|
768 | 1145 | } |
---|
769 | 1146 | #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ |
---|
770 | 1147 | int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, |
---|
.. | .. |
---|
773 | 1150 | unsigned long addr = start; |
---|
774 | 1151 | unsigned long next; |
---|
775 | 1152 | pgd_t *pgdp; |
---|
| 1153 | + p4d_t *p4dp; |
---|
776 | 1154 | pud_t *pudp; |
---|
777 | 1155 | pmd_t *pmdp; |
---|
778 | 1156 | |
---|
.. | .. |
---|
783 | 1161 | if (!pgdp) |
---|
784 | 1162 | return -ENOMEM; |
---|
785 | 1163 | |
---|
786 | | - pudp = vmemmap_pud_populate(pgdp, addr, node); |
---|
| 1164 | + p4dp = vmemmap_p4d_populate(pgdp, addr, node); |
---|
| 1165 | + if (!p4dp) |
---|
| 1166 | + return -ENOMEM; |
---|
| 1167 | + |
---|
| 1168 | + pudp = vmemmap_pud_populate(p4dp, addr, node); |
---|
787 | 1169 | if (!pudp) |
---|
788 | 1170 | return -ENOMEM; |
---|
789 | 1171 | |
---|
.. | .. |
---|
791 | 1173 | if (pmd_none(READ_ONCE(*pmdp))) { |
---|
792 | 1174 | void *p = NULL; |
---|
793 | 1175 | |
---|
794 | | - p = vmemmap_alloc_block_buf(PMD_SIZE, node); |
---|
795 | | - if (!p) |
---|
796 | | - return -ENOMEM; |
---|
| 1176 | + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); |
---|
| 1177 | + if (!p) { |
---|
| 1178 | + if (vmemmap_populate_basepages(addr, next, node, altmap)) |
---|
| 1179 | + return -ENOMEM; |
---|
| 1180 | + continue; |
---|
| 1181 | + } |
---|
797 | 1182 | |
---|
798 | 1183 | pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL)); |
---|
799 | 1184 | } else |
---|
.. | .. |
---|
802 | 1187 | |
---|
803 | 1188 | return 0; |
---|
804 | 1189 | } |
---|
805 | | -#endif /* CONFIG_ARM64_64K_PAGES */ |
---|
| 1190 | +#endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */ |
---|
806 | 1191 | void vmemmap_free(unsigned long start, unsigned long end, |
---|
807 | 1192 | struct vmem_altmap *altmap) |
---|
808 | 1193 | { |
---|
| 1194 | +#ifdef CONFIG_MEMORY_HOTPLUG |
---|
| 1195 | + WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); |
---|
| 1196 | + |
---|
| 1197 | + unmap_hotplug_range(start, end, true, altmap); |
---|
| 1198 | + free_empty_tables(start, end, VMEMMAP_START, VMEMMAP_END); |
---|
| 1199 | +#endif |
---|
809 | 1200 | } |
---|
810 | 1201 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
---|
811 | 1202 | |
---|
812 | 1203 | static inline pud_t * fixmap_pud(unsigned long addr) |
---|
813 | 1204 | { |
---|
814 | 1205 | pgd_t *pgdp = pgd_offset_k(addr); |
---|
815 | | - pgd_t pgd = READ_ONCE(*pgdp); |
---|
| 1206 | + p4d_t *p4dp = p4d_offset(pgdp, addr); |
---|
| 1207 | + p4d_t p4d = READ_ONCE(*p4dp); |
---|
816 | 1208 | |
---|
817 | | - BUG_ON(pgd_none(pgd) || pgd_bad(pgd)); |
---|
| 1209 | + BUG_ON(p4d_none(p4d) || p4d_bad(p4d)); |
---|
818 | 1210 | |
---|
819 | | - return pud_offset_kimg(pgdp, addr); |
---|
| 1211 | + return pud_offset_kimg(p4dp, addr); |
---|
820 | 1212 | } |
---|
821 | 1213 | |
---|
822 | 1214 | static inline pmd_t * fixmap_pmd(unsigned long addr) |
---|
.. | .. |
---|
842 | 1234 | */ |
---|
843 | 1235 | void __init early_fixmap_init(void) |
---|
844 | 1236 | { |
---|
845 | | - pgd_t *pgdp, pgd; |
---|
| 1237 | + pgd_t *pgdp; |
---|
| 1238 | + p4d_t *p4dp, p4d; |
---|
846 | 1239 | pud_t *pudp; |
---|
847 | 1240 | pmd_t *pmdp; |
---|
848 | 1241 | unsigned long addr = FIXADDR_START; |
---|
849 | 1242 | |
---|
850 | 1243 | pgdp = pgd_offset_k(addr); |
---|
851 | | - pgd = READ_ONCE(*pgdp); |
---|
| 1244 | + p4dp = p4d_offset(pgdp, addr); |
---|
| 1245 | + p4d = READ_ONCE(*p4dp); |
---|
852 | 1246 | if (CONFIG_PGTABLE_LEVELS > 3 && |
---|
853 | | - !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) { |
---|
| 1247 | + !(p4d_none(p4d) || p4d_page_paddr(p4d) == __pa_symbol(bm_pud))) { |
---|
854 | 1248 | /* |
---|
855 | 1249 | * We only end up here if the kernel mapping and the fixmap |
---|
856 | 1250 | * share the top level pgd entry, which should only happen on |
---|
857 | 1251 | * 16k/4 levels configurations. |
---|
858 | 1252 | */ |
---|
859 | 1253 | BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); |
---|
860 | | - pudp = pud_offset_kimg(pgdp, addr); |
---|
| 1254 | + pudp = pud_offset_kimg(p4dp, addr); |
---|
861 | 1255 | } else { |
---|
862 | | - if (pgd_none(pgd)) |
---|
863 | | - __pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE); |
---|
| 1256 | + if (p4d_none(p4d)) |
---|
| 1257 | + __p4d_populate(p4dp, __pa_symbol(bm_pud), PUD_TYPE_TABLE); |
---|
864 | 1258 | pudp = fixmap_pud(addr); |
---|
865 | 1259 | } |
---|
866 | 1260 | if (pud_none(READ_ONCE(*pudp))) |
---|
.. | .. |
---|
978 | 1372 | * SW table walks can't handle removal of intermediate entries. |
---|
979 | 1373 | */ |
---|
980 | 1374 | return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && |
---|
981 | | - !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); |
---|
| 1375 | + !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); |
---|
982 | 1376 | } |
---|
983 | 1377 | |
---|
984 | 1378 | int __init arch_ioremap_pmd_supported(void) |
---|
985 | 1379 | { |
---|
986 | 1380 | /* See arch_ioremap_pud_supported() */ |
---|
987 | | - return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); |
---|
| 1381 | + return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); |
---|
988 | 1382 | } |
---|
989 | 1383 | |
---|
990 | 1384 | int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) |
---|
991 | 1385 | { |
---|
992 | | - pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | |
---|
993 | | - pgprot_val(mk_sect_prot(prot))); |
---|
994 | | - pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot); |
---|
| 1386 | + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); |
---|
995 | 1387 | |
---|
996 | 1388 | /* Only allow permission changes for now */ |
---|
997 | 1389 | if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), |
---|
998 | 1390 | pud_val(new_pud))) |
---|
999 | 1391 | return 0; |
---|
1000 | 1392 | |
---|
1001 | | - BUG_ON(phys & ~PUD_MASK); |
---|
| 1393 | + VM_BUG_ON(phys & ~PUD_MASK); |
---|
1002 | 1394 | set_pud(pudp, new_pud); |
---|
1003 | 1395 | return 1; |
---|
1004 | 1396 | } |
---|
1005 | 1397 | |
---|
1006 | 1398 | int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) |
---|
1007 | 1399 | { |
---|
1008 | | - pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | |
---|
1009 | | - pgprot_val(mk_sect_prot(prot))); |
---|
1010 | | - pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot); |
---|
| 1400 | + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); |
---|
1011 | 1401 | |
---|
1012 | 1402 | /* Only allow permission changes for now */ |
---|
1013 | 1403 | if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), |
---|
1014 | 1404 | pmd_val(new_pmd))) |
---|
1015 | 1405 | return 0; |
---|
1016 | 1406 | |
---|
1017 | | - BUG_ON(phys & ~PMD_MASK); |
---|
| 1407 | + VM_BUG_ON(phys & ~PMD_MASK); |
---|
1018 | 1408 | set_pmd(pmdp, new_pmd); |
---|
1019 | 1409 | return 1; |
---|
1020 | 1410 | } |
---|
.. | .. |
---|
1042 | 1432 | |
---|
1043 | 1433 | pmd = READ_ONCE(*pmdp); |
---|
1044 | 1434 | |
---|
1045 | | - if (!pmd_present(pmd)) |
---|
1046 | | - return 1; |
---|
1047 | 1435 | if (!pmd_table(pmd)) { |
---|
1048 | | - VM_WARN_ON(!pmd_table(pmd)); |
---|
| 1436 | + VM_WARN_ON(1); |
---|
1049 | 1437 | return 1; |
---|
1050 | 1438 | } |
---|
1051 | 1439 | |
---|
.. | .. |
---|
1065 | 1453 | |
---|
1066 | 1454 | pud = READ_ONCE(*pudp); |
---|
1067 | 1455 | |
---|
1068 | | - if (!pud_present(pud)) |
---|
1069 | | - return 1; |
---|
1070 | 1456 | if (!pud_table(pud)) { |
---|
1071 | | - VM_WARN_ON(!pud_table(pud)); |
---|
| 1457 | + VM_WARN_ON(1); |
---|
1072 | 1458 | return 1; |
---|
1073 | 1459 | } |
---|
1074 | 1460 | |
---|
.. | .. |
---|
1085 | 1471 | pmd_free(NULL, table); |
---|
1086 | 1472 | return 1; |
---|
1087 | 1473 | } |
---|
| 1474 | + |
---|
| 1475 | +int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) |
---|
| 1476 | +{ |
---|
| 1477 | + return 0; /* Don't attempt a block mapping */ |
---|
| 1478 | +} |
---|
| 1479 | + |
---|
| 1480 | +#ifdef CONFIG_MEMORY_HOTPLUG |
---|
| 1481 | +static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) |
---|
| 1482 | +{ |
---|
| 1483 | + unsigned long end = start + size; |
---|
| 1484 | + |
---|
| 1485 | + WARN_ON(pgdir != init_mm.pgd); |
---|
| 1486 | + WARN_ON((start < PAGE_OFFSET) || (end > PAGE_END)); |
---|
| 1487 | + |
---|
| 1488 | + unmap_hotplug_range(start, end, false, NULL); |
---|
| 1489 | + free_empty_tables(start, end, PAGE_OFFSET, PAGE_END); |
---|
| 1490 | +} |
---|
| 1491 | + |
---|
| 1492 | +static bool inside_linear_region(u64 start, u64 size) |
---|
| 1493 | +{ |
---|
| 1494 | + u64 start_linear_pa = __pa(_PAGE_OFFSET(vabits_actual)); |
---|
| 1495 | + u64 end_linear_pa = __pa(PAGE_END - 1); |
---|
| 1496 | + |
---|
| 1497 | + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { |
---|
| 1498 | + /* |
---|
| 1499 | + * Check for a wrap, it is possible because of randomized linear |
---|
| 1500 | + * mapping the start physical address is actually bigger than |
---|
| 1501 | + * the end physical address. In this case set start to zero |
---|
| 1502 | + * because [0, end_linear_pa] range must still be able to cover |
---|
| 1503 | + * all addressable physical addresses. |
---|
| 1504 | + */ |
---|
| 1505 | + if (start_linear_pa > end_linear_pa) |
---|
| 1506 | + start_linear_pa = 0; |
---|
| 1507 | + } |
---|
| 1508 | + |
---|
| 1509 | + WARN_ON(start_linear_pa > end_linear_pa); |
---|
| 1510 | + |
---|
| 1511 | + /* |
---|
| 1512 | + * Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)] |
---|
| 1513 | + * accommodating both its ends but excluding PAGE_END. Max physical |
---|
| 1514 | + * range which can be mapped inside this linear mapping range, must |
---|
| 1515 | + * also be derived from its end points. |
---|
| 1516 | + */ |
---|
| 1517 | + return start >= start_linear_pa && (start + size - 1) <= end_linear_pa; |
---|
| 1518 | +} |
---|
| 1519 | + |
---|
| 1520 | +int arch_add_memory(int nid, u64 start, u64 size, |
---|
| 1521 | + struct mhp_params *params) |
---|
| 1522 | +{ |
---|
| 1523 | + int ret, flags = 0; |
---|
| 1524 | + |
---|
| 1525 | + if (!inside_linear_region(start, size)) { |
---|
| 1526 | + pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size); |
---|
| 1527 | + return -EINVAL; |
---|
| 1528 | + } |
---|
| 1529 | + |
---|
| 1530 | + /* |
---|
| 1531 | + * KFENCE requires linear map to be mapped at page granularity, so that |
---|
| 1532 | + * it is possible to protect/unprotect single pages in the KFENCE pool. |
---|
| 1533 | + */ |
---|
| 1534 | + if (rodata_full || debug_pagealloc_enabled() || |
---|
| 1535 | + IS_ENABLED(CONFIG_KFENCE)) |
---|
| 1536 | + flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; |
---|
| 1537 | + |
---|
| 1538 | + __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), |
---|
| 1539 | + size, params->pgprot, __pgd_pgtable_alloc, |
---|
| 1540 | + flags); |
---|
| 1541 | + |
---|
| 1542 | + memblock_clear_nomap(start, size); |
---|
| 1543 | + |
---|
| 1544 | + ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, |
---|
| 1545 | + params); |
---|
| 1546 | + if (ret) |
---|
| 1547 | + __remove_pgd_mapping(swapper_pg_dir, |
---|
| 1548 | + __phys_to_virt(start), size); |
---|
| 1549 | + else { |
---|
| 1550 | + max_pfn = PFN_UP(start + size); |
---|
| 1551 | + max_low_pfn = max_pfn; |
---|
| 1552 | + } |
---|
| 1553 | + |
---|
| 1554 | + return ret; |
---|
| 1555 | +} |
---|
| 1556 | + |
---|
| 1557 | +void arch_remove_memory(int nid, u64 start, u64 size, |
---|
| 1558 | + struct vmem_altmap *altmap) |
---|
| 1559 | +{ |
---|
| 1560 | + unsigned long start_pfn = start >> PAGE_SHIFT; |
---|
| 1561 | + unsigned long nr_pages = size >> PAGE_SHIFT; |
---|
| 1562 | + |
---|
| 1563 | + __remove_pages(start_pfn, nr_pages, altmap); |
---|
| 1564 | + __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); |
---|
| 1565 | +} |
---|
| 1566 | + |
---|
| 1567 | +int check_range_driver_managed(u64 start, u64 size, const char *resource_name) |
---|
| 1568 | +{ |
---|
| 1569 | + struct mem_section *ms; |
---|
| 1570 | + unsigned long pfn = __phys_to_pfn(start); |
---|
| 1571 | + unsigned long end_pfn = __phys_to_pfn(start + size); |
---|
| 1572 | + struct resource *res; |
---|
| 1573 | + unsigned long flags; |
---|
| 1574 | + |
---|
| 1575 | + res = lookup_resource(&iomem_resource, start); |
---|
| 1576 | + if (!res) { |
---|
| 1577 | + pr_err("%s: couldn't find memory resource for start 0x%llx\n", |
---|
| 1578 | + __func__, start); |
---|
| 1579 | + return -EINVAL; |
---|
| 1580 | + } |
---|
| 1581 | + |
---|
| 1582 | + flags = res->flags; |
---|
| 1583 | + |
---|
| 1584 | + if (!(flags & IORESOURCE_SYSRAM_DRIVER_MANAGED) || |
---|
| 1585 | + strstr(resource_name, "System RAM (") != resource_name) |
---|
| 1586 | + return -EINVAL; |
---|
| 1587 | + |
---|
| 1588 | + for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) { |
---|
| 1589 | + ms = __pfn_to_section(pfn); |
---|
| 1590 | + if (early_section(ms)) |
---|
| 1591 | + return -EINVAL; |
---|
| 1592 | + } |
---|
| 1593 | + |
---|
| 1594 | + return 0; |
---|
| 1595 | +} |
---|
| 1596 | + |
---|
| 1597 | +int populate_range_driver_managed(u64 start, u64 size, |
---|
| 1598 | + const char *resource_name) |
---|
| 1599 | +{ |
---|
| 1600 | + unsigned long virt = (unsigned long)phys_to_virt(start); |
---|
| 1601 | + int flags = 0; |
---|
| 1602 | + |
---|
| 1603 | + if (check_range_driver_managed(start, size, resource_name)) |
---|
| 1604 | + return -EINVAL; |
---|
| 1605 | + |
---|
| 1606 | + /* |
---|
| 1607 | + * When rodata_full is enabled, memory is mapped at page size granule, |
---|
| 1608 | + * as opposed to block mapping. |
---|
| 1609 | + */ |
---|
| 1610 | + if (rodata_full || debug_pagealloc_enabled()) |
---|
| 1611 | + flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; |
---|
| 1612 | + |
---|
| 1613 | + __create_pgd_mapping(init_mm.pgd, start, virt, size, |
---|
| 1614 | + PAGE_KERNEL, NULL, flags); |
---|
| 1615 | + |
---|
| 1616 | + return 0; |
---|
| 1617 | +} |
---|
| 1618 | +EXPORT_SYMBOL_GPL(populate_range_driver_managed); |
---|
| 1619 | + |
---|
| 1620 | +int depopulate_range_driver_managed(u64 start, u64 size, |
---|
| 1621 | + const char *resource_name) |
---|
| 1622 | +{ |
---|
| 1623 | + if (check_range_driver_managed(start, size, resource_name)) |
---|
| 1624 | + return -EINVAL; |
---|
| 1625 | + |
---|
| 1626 | + unmap_hotplug_range(start, start + size, false, NULL); |
---|
| 1627 | + |
---|
| 1628 | + return 0; |
---|
| 1629 | +} |
---|
| 1630 | +EXPORT_SYMBOL_GPL(depopulate_range_driver_managed); |
---|
| 1631 | + |
---|
| 1632 | +/* |
---|
| 1633 | + * This memory hotplug notifier helps prevent boot memory from being |
---|
| 1634 | + * inadvertently removed as it blocks pfn range offlining process in |
---|
| 1635 | + * __offline_pages(). Hence this prevents both offlining as well as |
---|
| 1636 | + * removal process for boot memory which is initially always online. |
---|
| 1637 | + * In future if and when boot memory could be removed, this notifier |
---|
| 1638 | + * should be dropped and free_hotplug_page_range() should handle any |
---|
| 1639 | + * reserved pages allocated during boot. |
---|
| 1640 | + */ |
---|
| 1641 | +static int prevent_bootmem_remove_notifier(struct notifier_block *nb, |
---|
| 1642 | + unsigned long action, void *data) |
---|
| 1643 | +{ |
---|
| 1644 | + struct mem_section *ms; |
---|
| 1645 | + struct memory_notify *arg = data; |
---|
| 1646 | + unsigned long end_pfn = arg->start_pfn + arg->nr_pages; |
---|
| 1647 | + unsigned long pfn = arg->start_pfn; |
---|
| 1648 | + |
---|
| 1649 | + if (action != MEM_GOING_OFFLINE) |
---|
| 1650 | + return NOTIFY_OK; |
---|
| 1651 | + |
---|
| 1652 | + for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) { |
---|
| 1653 | + ms = __pfn_to_section(pfn); |
---|
| 1654 | + if (early_section(ms)) |
---|
| 1655 | + return NOTIFY_BAD; |
---|
| 1656 | + } |
---|
| 1657 | + return NOTIFY_OK; |
---|
| 1658 | +} |
---|
| 1659 | + |
---|
| 1660 | +static struct notifier_block prevent_bootmem_remove_nb = { |
---|
| 1661 | + .notifier_call = prevent_bootmem_remove_notifier, |
---|
| 1662 | +}; |
---|
| 1663 | + |
---|
| 1664 | +static int __init prevent_bootmem_remove_init(void) |
---|
| 1665 | +{ |
---|
| 1666 | + return register_memory_notifier(&prevent_bootmem_remove_nb); |
---|
| 1667 | +} |
---|
| 1668 | +device_initcall(prevent_bootmem_remove_init); |
---|
| 1669 | +#endif |
---|