| .. | .. |
|---|
| 15 | 15 | #include <linux/export.h> |
|---|
| 16 | 16 | #include <linux/of_fdt.h> |
|---|
| 17 | 17 | #include <linux/memblock.h> |
|---|
| 18 | | -#include <linux/bootmem.h> |
|---|
| 19 | 18 | #include <linux/moduleparam.h> |
|---|
| 20 | 19 | #include <linux/swap.h> |
|---|
| 21 | 20 | #include <linux/swapops.h> |
|---|
| 22 | 21 | #include <linux/kmemleak.h> |
|---|
| 23 | | -#include <asm/pgtable.h> |
|---|
| 24 | 22 | #include <asm/pgalloc.h> |
|---|
| 25 | 23 | #include <asm/tlb.h> |
|---|
| 26 | 24 | #include <asm/setup.h> |
|---|
| 27 | 25 | #include <asm/hugetlb.h> |
|---|
| 28 | 26 | #include <asm/pte-walk.h> |
|---|
| 29 | 27 | |
|---|
| 30 | | - |
|---|
| 31 | | -#ifdef CONFIG_HUGETLB_PAGE |
|---|
| 32 | | - |
|---|
| 33 | | -#define PAGE_SHIFT_64K 16 |
|---|
| 34 | | -#define PAGE_SHIFT_512K 19 |
|---|
| 35 | | -#define PAGE_SHIFT_8M 23 |
|---|
| 36 | | -#define PAGE_SHIFT_16M 24 |
|---|
| 37 | | -#define PAGE_SHIFT_16G 34 |
|---|
| 38 | | - |
|---|
| 39 | 28 | bool hugetlb_disabled = false; |
|---|
| 40 | 29 | |
|---|
| 41 | | -unsigned int HPAGE_SHIFT; |
|---|
| 42 | | -EXPORT_SYMBOL(HPAGE_SHIFT); |
|---|
| 43 | | - |
|---|
| 44 | 30 | #define hugepd_none(hpd) (hpd_val(hpd) == 0) |
|---|
| 31 | + |
|---|
| 32 | +#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \ |
|---|
| 33 | + __builtin_ffs(sizeof(void *))) |
|---|
| 45 | 34 | |
|---|
| 46 | 35 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) |
|---|
| 47 | 36 | { |
|---|
| .. | .. |
|---|
| 62 | 51 | int num_hugepd; |
|---|
| 63 | 52 | |
|---|
| 64 | 53 | if (pshift >= pdshift) { |
|---|
| 65 | | - cachep = hugepte_cache; |
|---|
| 54 | + cachep = PGT_CACHE(PTE_T_ORDER); |
|---|
| 66 | 55 | num_hugepd = 1 << (pshift - pdshift); |
|---|
| 67 | 56 | } else { |
|---|
| 68 | 57 | cachep = PGT_CACHE(pdshift - pshift); |
|---|
| 69 | 58 | num_hugepd = 1; |
|---|
| 70 | 59 | } |
|---|
| 71 | 60 | |
|---|
| 72 | | - new = kmem_cache_zalloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); |
|---|
| 61 | + if (!cachep) { |
|---|
| 62 | + WARN_ONCE(1, "No page table cache created for hugetlb tables"); |
|---|
| 63 | + return -ENOMEM; |
|---|
| 64 | + } |
|---|
| 65 | + |
|---|
| 66 | + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); |
|---|
| 73 | 67 | |
|---|
| 74 | 68 | BUG_ON(pshift > HUGEPD_SHIFT_MASK); |
|---|
| 75 | 69 | BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); |
|---|
| 76 | 70 | |
|---|
| 77 | | - if (! new) |
|---|
| 71 | + if (!new) |
|---|
| 78 | 72 | return -ENOMEM; |
|---|
| 79 | 73 | |
|---|
| 80 | 74 | /* |
|---|
| .. | .. |
|---|
| 94 | 88 | for (i = 0; i < num_hugepd; i++, hpdp++) { |
|---|
| 95 | 89 | if (unlikely(!hugepd_none(*hpdp))) |
|---|
| 96 | 90 | break; |
|---|
| 97 | | - else { |
|---|
| 98 | | -#ifdef CONFIG_PPC_BOOK3S_64 |
|---|
| 99 | | - *hpdp = __hugepd(__pa(new) | |
|---|
| 100 | | - (shift_to_mmu_psize(pshift) << 2)); |
|---|
| 101 | | -#elif defined(CONFIG_PPC_8xx) |
|---|
| 102 | | - *hpdp = __hugepd(__pa(new) | _PMD_USER | |
|---|
| 103 | | - (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : |
|---|
| 104 | | - _PMD_PAGE_512K) | _PMD_PRESENT); |
|---|
| 105 | | -#else |
|---|
| 106 | | - /* We use the old format for PPC_FSL_BOOK3E */ |
|---|
| 107 | | - *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift); |
|---|
| 108 | | -#endif |
|---|
| 109 | | - } |
|---|
| 91 | + hugepd_populate(hpdp, new, pshift); |
|---|
| 110 | 92 | } |
|---|
| 111 | 93 | /* If we bailed from the for loop early, an error occurred, clean up */ |
|---|
| 112 | 94 | if (i < num_hugepd) { |
|---|
| .. | .. |
|---|
| 124 | 106 | * At this point we do the placement change only for BOOK3S 64. This would |
|---|
| 125 | 107 | * possibly work on other subarchs. |
|---|
| 126 | 108 | */ |
|---|
| 127 | | -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) |
|---|
| 109 | +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, |
|---|
| 110 | + unsigned long addr, unsigned long sz) |
|---|
| 128 | 111 | { |
|---|
| 129 | 112 | pgd_t *pg; |
|---|
| 113 | + p4d_t *p4; |
|---|
| 130 | 114 | pud_t *pu; |
|---|
| 131 | 115 | pmd_t *pm; |
|---|
| 132 | 116 | hugepd_t *hpdp = NULL; |
|---|
| .. | .. |
|---|
| 136 | 120 | |
|---|
| 137 | 121 | addr &= ~(sz-1); |
|---|
| 138 | 122 | pg = pgd_offset(mm, addr); |
|---|
| 123 | + p4 = p4d_offset(pg, addr); |
|---|
| 139 | 124 | |
|---|
| 140 | 125 | #ifdef CONFIG_PPC_BOOK3S_64 |
|---|
| 141 | 126 | if (pshift == PGDIR_SHIFT) |
|---|
| 142 | 127 | /* 16GB huge page */ |
|---|
| 143 | | - return (pte_t *) pg; |
|---|
| 128 | + return (pte_t *) p4; |
|---|
| 144 | 129 | else if (pshift > PUD_SHIFT) { |
|---|
| 145 | 130 | /* |
|---|
| 146 | 131 | * We need to use hugepd table |
|---|
| 147 | 132 | */ |
|---|
| 148 | 133 | ptl = &mm->page_table_lock; |
|---|
| 149 | | - hpdp = (hugepd_t *)pg; |
|---|
| 134 | + hpdp = (hugepd_t *)p4; |
|---|
| 150 | 135 | } else { |
|---|
| 151 | 136 | pdshift = PUD_SHIFT; |
|---|
| 152 | | - pu = pud_alloc(mm, pg, addr); |
|---|
| 137 | + pu = pud_alloc(mm, p4, addr); |
|---|
| 153 | 138 | if (!pu) |
|---|
| 154 | 139 | return NULL; |
|---|
| 155 | 140 | if (pshift == PUD_SHIFT) |
|---|
| .. | .. |
|---|
| 174 | 159 | #else |
|---|
| 175 | 160 | if (pshift >= PGDIR_SHIFT) { |
|---|
| 176 | 161 | ptl = &mm->page_table_lock; |
|---|
| 177 | | - hpdp = (hugepd_t *)pg; |
|---|
| 162 | + hpdp = (hugepd_t *)p4; |
|---|
| 178 | 163 | } else { |
|---|
| 179 | 164 | pdshift = PUD_SHIFT; |
|---|
| 180 | | - pu = pud_alloc(mm, pg, addr); |
|---|
| 165 | + pu = pud_alloc(mm, p4, addr); |
|---|
| 181 | 166 | if (!pu) |
|---|
| 182 | 167 | return NULL; |
|---|
| 183 | 168 | if (pshift >= PUD_SHIFT) { |
|---|
| .. | .. |
|---|
| 195 | 180 | #endif |
|---|
| 196 | 181 | if (!hpdp) |
|---|
| 197 | 182 | return NULL; |
|---|
| 183 | + |
|---|
| 184 | + if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT) |
|---|
| 185 | + return pte_alloc_map(mm, (pmd_t *)hpdp, addr); |
|---|
| 198 | 186 | |
|---|
| 199 | 187 | BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); |
|---|
| 200 | 188 | |
|---|
| .. | .. |
|---|
| 254 | 242 | return __alloc_bootmem_huge_page(h); |
|---|
| 255 | 243 | } |
|---|
| 256 | 244 | |
|---|
| 257 | | -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) |
|---|
| 245 | +#ifndef CONFIG_PPC_BOOK3S_64 |
|---|
| 258 | 246 | #define HUGEPD_FREELIST_SIZE \ |
|---|
| 259 | 247 | ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) |
|---|
| 260 | 248 | |
|---|
| 261 | 249 | struct hugepd_freelist { |
|---|
| 262 | 250 | struct rcu_head rcu; |
|---|
| 263 | 251 | unsigned int index; |
|---|
| 264 | | - void *ptes[0]; |
|---|
| 252 | + void *ptes[]; |
|---|
| 265 | 253 | }; |
|---|
| 266 | 254 | |
|---|
| 267 | 255 | static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); |
|---|
| .. | .. |
|---|
| 273 | 261 | unsigned int i; |
|---|
| 274 | 262 | |
|---|
| 275 | 263 | for (i = 0; i < batch->index; i++) |
|---|
| 276 | | - kmem_cache_free(hugepte_cache, batch->ptes[i]); |
|---|
| 264 | + kmem_cache_free(PGT_CACHE(PTE_T_ORDER), batch->ptes[i]); |
|---|
| 277 | 265 | |
|---|
| 278 | 266 | free_page((unsigned long)batch); |
|---|
| 279 | 267 | } |
|---|
| .. | .. |
|---|
| 286 | 274 | |
|---|
| 287 | 275 | if (atomic_read(&tlb->mm->mm_users) < 2 || |
|---|
| 288 | 276 | mm_is_thread_local(tlb->mm)) { |
|---|
| 289 | | - kmem_cache_free(hugepte_cache, hugepte); |
|---|
| 277 | + kmem_cache_free(PGT_CACHE(PTE_T_ORDER), hugepte); |
|---|
| 290 | 278 | put_cpu_var(hugepd_freelist_cur); |
|---|
| 291 | 279 | return; |
|---|
| 292 | 280 | } |
|---|
| .. | .. |
|---|
| 298 | 286 | |
|---|
| 299 | 287 | (*batchp)->ptes[(*batchp)->index++] = hugepte; |
|---|
| 300 | 288 | if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { |
|---|
| 301 | | - call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); |
|---|
| 289 | + call_rcu(&(*batchp)->rcu, hugepd_free_rcu_callback); |
|---|
| 302 | 290 | *batchp = NULL; |
|---|
| 303 | 291 | } |
|---|
| 304 | 292 | put_cpu_var(hugepd_freelist_cur); |
|---|
| .. | .. |
|---|
| 343 | 331 | get_hugepd_cache_index(pdshift - shift)); |
|---|
| 344 | 332 | } |
|---|
| 345 | 333 | |
|---|
| 334 | +static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, |
|---|
| 335 | + unsigned long addr, unsigned long end, |
|---|
| 336 | + unsigned long floor, unsigned long ceiling) |
|---|
| 337 | +{ |
|---|
| 338 | + unsigned long start = addr; |
|---|
| 339 | + pgtable_t token = pmd_pgtable(*pmd); |
|---|
| 340 | + |
|---|
| 341 | + start &= PMD_MASK; |
|---|
| 342 | + if (start < floor) |
|---|
| 343 | + return; |
|---|
| 344 | + if (ceiling) { |
|---|
| 345 | + ceiling &= PMD_MASK; |
|---|
| 346 | + if (!ceiling) |
|---|
| 347 | + return; |
|---|
| 348 | + } |
|---|
| 349 | + if (end - 1 > ceiling - 1) |
|---|
| 350 | + return; |
|---|
| 351 | + |
|---|
| 352 | + pmd_clear(pmd); |
|---|
| 353 | + pte_free_tlb(tlb, token, addr); |
|---|
| 354 | + mm_dec_nr_ptes(tlb->mm); |
|---|
| 355 | +} |
|---|
| 356 | + |
|---|
| 346 | 357 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
|---|
| 347 | 358 | unsigned long addr, unsigned long end, |
|---|
| 348 | 359 | unsigned long floor, unsigned long ceiling) |
|---|
| .. | .. |
|---|
| 358 | 369 | pmd = pmd_offset(pud, addr); |
|---|
| 359 | 370 | next = pmd_addr_end(addr, end); |
|---|
| 360 | 371 | if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { |
|---|
| 372 | + if (pmd_none_or_clear_bad(pmd)) |
|---|
| 373 | + continue; |
|---|
| 374 | + |
|---|
| 361 | 375 | /* |
|---|
| 362 | 376 | * if it is not hugepd pointer, we should already find |
|---|
| 363 | 377 | * it cleared. |
|---|
| 364 | 378 | */ |
|---|
| 365 | | - WARN_ON(!pmd_none_or_clear_bad(pmd)); |
|---|
| 379 | + WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx)); |
|---|
| 380 | + |
|---|
| 381 | + hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling); |
|---|
| 382 | + |
|---|
| 366 | 383 | continue; |
|---|
| 367 | 384 | } |
|---|
| 368 | 385 | /* |
|---|
| .. | .. |
|---|
| 396 | 413 | mm_dec_nr_pmds(tlb->mm); |
|---|
| 397 | 414 | } |
|---|
| 398 | 415 | |
|---|
| 399 | | -static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, |
|---|
| 416 | +static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, |
|---|
| 400 | 417 | unsigned long addr, unsigned long end, |
|---|
| 401 | 418 | unsigned long floor, unsigned long ceiling) |
|---|
| 402 | 419 | { |
|---|
| .. | .. |
|---|
| 406 | 423 | |
|---|
| 407 | 424 | start = addr; |
|---|
| 408 | 425 | do { |
|---|
| 409 | | - pud = pud_offset(pgd, addr); |
|---|
| 426 | + pud = pud_offset(p4d, addr); |
|---|
| 410 | 427 | next = pud_addr_end(addr, end); |
|---|
| 411 | 428 | if (!is_hugepd(__hugepd(pud_val(*pud)))) { |
|---|
| 412 | 429 | if (pud_none_or_clear_bad(pud)) |
|---|
| .. | .. |
|---|
| 441 | 458 | if (end - 1 > ceiling - 1) |
|---|
| 442 | 459 | return; |
|---|
| 443 | 460 | |
|---|
| 444 | | - pud = pud_offset(pgd, start); |
|---|
| 445 | | - pgd_clear(pgd); |
|---|
| 461 | + pud = pud_offset(p4d, start); |
|---|
| 462 | + p4d_clear(p4d); |
|---|
| 446 | 463 | pud_free_tlb(tlb, pud, start); |
|---|
| 447 | 464 | mm_dec_nr_puds(tlb->mm); |
|---|
| 448 | 465 | } |
|---|
| .. | .. |
|---|
| 455 | 472 | unsigned long floor, unsigned long ceiling) |
|---|
| 456 | 473 | { |
|---|
| 457 | 474 | pgd_t *pgd; |
|---|
| 475 | + p4d_t *p4d; |
|---|
| 458 | 476 | unsigned long next; |
|---|
| 459 | 477 | |
|---|
| 460 | 478 | /* |
|---|
| .. | .. |
|---|
| 477 | 495 | do { |
|---|
| 478 | 496 | next = pgd_addr_end(addr, end); |
|---|
| 479 | 497 | pgd = pgd_offset(tlb->mm, addr); |
|---|
| 498 | + p4d = p4d_offset(pgd, addr); |
|---|
| 480 | 499 | if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { |
|---|
| 481 | | - if (pgd_none_or_clear_bad(pgd)) |
|---|
| 500 | + if (p4d_none_or_clear_bad(p4d)) |
|---|
| 482 | 501 | continue; |
|---|
| 483 | | - hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); |
|---|
| 502 | + hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling); |
|---|
| 484 | 503 | } else { |
|---|
| 485 | 504 | unsigned long more; |
|---|
| 486 | 505 | /* |
|---|
| .. | .. |
|---|
| 493 | 512 | if (more > next) |
|---|
| 494 | 513 | next = more; |
|---|
| 495 | 514 | |
|---|
| 496 | | - free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, |
|---|
| 515 | + free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT, |
|---|
| 497 | 516 | addr, next, floor, ceiling); |
|---|
| 498 | 517 | } |
|---|
| 499 | 518 | } while (addr = next, addr != end); |
|---|
| .. | .. |
|---|
| 536 | 555 | return page; |
|---|
| 537 | 556 | } |
|---|
| 538 | 557 | |
|---|
| 539 | | -static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, |
|---|
| 540 | | - unsigned long sz) |
|---|
| 541 | | -{ |
|---|
| 542 | | - unsigned long __boundary = (addr + sz) & ~(sz-1); |
|---|
| 543 | | - return (__boundary - 1 < end - 1) ? __boundary : end; |
|---|
| 544 | | -} |
|---|
| 545 | | - |
|---|
| 546 | | -int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift, |
|---|
| 547 | | - unsigned long end, int write, struct page **pages, int *nr) |
|---|
| 548 | | -{ |
|---|
| 549 | | - pte_t *ptep; |
|---|
| 550 | | - unsigned long sz = 1UL << hugepd_shift(hugepd); |
|---|
| 551 | | - unsigned long next; |
|---|
| 552 | | - |
|---|
| 553 | | - ptep = hugepte_offset(hugepd, addr, pdshift); |
|---|
| 554 | | - do { |
|---|
| 555 | | - next = hugepte_addr_end(addr, end, sz); |
|---|
| 556 | | - if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) |
|---|
| 557 | | - return 0; |
|---|
| 558 | | - } while (ptep++, addr = next, addr != end); |
|---|
| 559 | | - |
|---|
| 560 | | - return 1; |
|---|
| 561 | | -} |
|---|
| 562 | | - |
|---|
| 563 | 558 | #ifdef CONFIG_PPC_MM_SLICES |
|---|
| 564 | 559 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
|---|
| 565 | 560 | unsigned long len, unsigned long pgoff, |
|---|
| .. | .. |
|---|
| 579 | 574 | |
|---|
| 580 | 575 | unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) |
|---|
| 581 | 576 | { |
|---|
| 582 | | -#ifdef CONFIG_PPC_MM_SLICES |
|---|
| 583 | 577 | /* With radix we don't use slice, so derive it from vma*/ |
|---|
| 584 | | - if (!radix_enabled()) { |
|---|
| 578 | + if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) { |
|---|
| 585 | 579 | unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); |
|---|
| 586 | 580 | |
|---|
| 587 | 581 | return 1UL << mmu_psize_to_shift(psize); |
|---|
| 588 | 582 | } |
|---|
| 589 | | -#endif |
|---|
| 590 | 583 | return vma_kernel_pagesize(vma); |
|---|
| 591 | 584 | } |
|---|
| 592 | 585 | |
|---|
| 593 | | -static inline bool is_power_of_4(unsigned long x) |
|---|
| 594 | | -{ |
|---|
| 595 | | - if (is_power_of_2(x)) |
|---|
| 596 | | - return (__ilog2(x) % 2) ? false : true; |
|---|
| 597 | | - return false; |
|---|
| 598 | | -} |
|---|
| 599 | | - |
|---|
| 600 | | -static int __init add_huge_page_size(unsigned long long size) |
|---|
| 586 | +bool __init arch_hugetlb_valid_size(unsigned long size) |
|---|
| 601 | 587 | { |
|---|
| 602 | 588 | int shift = __ffs(size); |
|---|
| 603 | 589 | int mmu_psize; |
|---|
| 604 | 590 | |
|---|
| 605 | 591 | /* Check that it is a page size supported by the hardware and |
|---|
| 606 | 592 | * that it fits within pagetable and slice limits. */ |
|---|
| 607 | | - if (size <= PAGE_SIZE) |
|---|
| 608 | | - return -EINVAL; |
|---|
| 609 | | -#if defined(CONFIG_PPC_FSL_BOOK3E) |
|---|
| 610 | | - if (!is_power_of_4(size)) |
|---|
| 611 | | - return -EINVAL; |
|---|
| 612 | | -#elif !defined(CONFIG_PPC_8xx) |
|---|
| 613 | | - if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT)) |
|---|
| 614 | | - return -EINVAL; |
|---|
| 615 | | -#endif |
|---|
| 593 | + if (size <= PAGE_SIZE || !is_power_of_2(size)) |
|---|
| 594 | + return false; |
|---|
| 616 | 595 | |
|---|
| 617 | | - if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) |
|---|
| 618 | | - return -EINVAL; |
|---|
| 619 | | - |
|---|
| 620 | | -#ifdef CONFIG_PPC_BOOK3S_64 |
|---|
| 621 | | - /* |
|---|
| 622 | | - * We need to make sure that for different page sizes reported by |
|---|
| 623 | | - * firmware we only add hugetlb support for page sizes that can be |
|---|
| 624 | | - * supported by linux page table layout. |
|---|
| 625 | | - * For now we have |
|---|
| 626 | | - * Radix: 2M and 1G |
|---|
| 627 | | - * Hash: 16M and 16G |
|---|
| 628 | | - */ |
|---|
| 629 | | - if (radix_enabled()) { |
|---|
| 630 | | - if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G) |
|---|
| 631 | | - return -EINVAL; |
|---|
| 632 | | - } else { |
|---|
| 633 | | - if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G) |
|---|
| 634 | | - return -EINVAL; |
|---|
| 635 | | - } |
|---|
| 636 | | -#endif |
|---|
| 596 | + mmu_psize = check_and_get_huge_psize(shift); |
|---|
| 597 | + if (mmu_psize < 0) |
|---|
| 598 | + return false; |
|---|
| 637 | 599 | |
|---|
| 638 | 600 | BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); |
|---|
| 639 | 601 | |
|---|
| 640 | | - /* Return if huge page size has already been setup */ |
|---|
| 641 | | - if (size_to_hstate(size)) |
|---|
| 642 | | - return 0; |
|---|
| 602 | + return true; |
|---|
| 603 | +} |
|---|
| 604 | + |
|---|
| 605 | +static int __init add_huge_page_size(unsigned long long size) |
|---|
| 606 | +{ |
|---|
| 607 | + int shift = __ffs(size); |
|---|
| 608 | + |
|---|
| 609 | + if (!arch_hugetlb_valid_size((unsigned long)size)) |
|---|
| 610 | + return -EINVAL; |
|---|
| 643 | 611 | |
|---|
| 644 | 612 | hugetlb_add_hstate(shift - PAGE_SHIFT); |
|---|
| 645 | | - |
|---|
| 646 | 613 | return 0; |
|---|
| 647 | 614 | } |
|---|
| 648 | 615 | |
|---|
| 649 | | -static int __init hugepage_setup_sz(char *str) |
|---|
| 650 | | -{ |
|---|
| 651 | | - unsigned long long size; |
|---|
| 652 | | - |
|---|
| 653 | | - size = memparse(str, &str); |
|---|
| 654 | | - |
|---|
| 655 | | - if (add_huge_page_size(size) != 0) { |
|---|
| 656 | | - hugetlb_bad_size(); |
|---|
| 657 | | - pr_err("Invalid huge page size specified(%llu)\n", size); |
|---|
| 658 | | - } |
|---|
| 659 | | - |
|---|
| 660 | | - return 1; |
|---|
| 661 | | -} |
|---|
| 662 | | -__setup("hugepagesz=", hugepage_setup_sz); |
|---|
| 663 | | - |
|---|
| 664 | | -struct kmem_cache *hugepte_cache; |
|---|
| 665 | 616 | static int __init hugetlbpage_init(void) |
|---|
| 666 | 617 | { |
|---|
| 618 | + bool configured = false; |
|---|
| 667 | 619 | int psize; |
|---|
| 668 | 620 | |
|---|
| 669 | 621 | if (hugetlb_disabled) { |
|---|
| .. | .. |
|---|
| 671 | 623 | return 0; |
|---|
| 672 | 624 | } |
|---|
| 673 | 625 | |
|---|
| 674 | | -#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx) |
|---|
| 675 | | - if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) |
|---|
| 626 | + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() && |
|---|
| 627 | + !mmu_has_feature(MMU_FTR_16M_PAGE)) |
|---|
| 676 | 628 | return -ENODEV; |
|---|
| 677 | | -#endif |
|---|
| 629 | + |
|---|
| 678 | 630 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { |
|---|
| 679 | 631 | unsigned shift; |
|---|
| 680 | 632 | unsigned pdshift; |
|---|
| .. | .. |
|---|
| 708 | 660 | * if we have pdshift and shift value same, we don't |
|---|
| 709 | 661 | * use pgt cache for hugepd. |
|---|
| 710 | 662 | */ |
|---|
| 711 | | - if (pdshift > shift) |
|---|
| 712 | | - pgtable_cache_add(pdshift - shift, NULL); |
|---|
| 713 | | -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) |
|---|
| 714 | | - else if (!hugepte_cache) { |
|---|
| 715 | | - /* |
|---|
| 716 | | - * Create a kmem cache for hugeptes. The bottom bits in |
|---|
| 717 | | - * the pte have size information encoded in them, so |
|---|
| 718 | | - * align them to allow this |
|---|
| 719 | | - */ |
|---|
| 720 | | - hugepte_cache = kmem_cache_create("hugepte-cache", |
|---|
| 721 | | - sizeof(pte_t), |
|---|
| 722 | | - HUGEPD_SHIFT_MASK + 1, |
|---|
| 723 | | - 0, NULL); |
|---|
| 724 | | - if (hugepte_cache == NULL) |
|---|
| 725 | | - panic("%s: Unable to create kmem cache " |
|---|
| 726 | | - "for hugeptes\n", __func__); |
|---|
| 727 | | - |
|---|
| 663 | + if (pdshift > shift) { |
|---|
| 664 | + if (!IS_ENABLED(CONFIG_PPC_8xx)) |
|---|
| 665 | + pgtable_cache_add(pdshift - shift); |
|---|
| 666 | + } else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || |
|---|
| 667 | + IS_ENABLED(CONFIG_PPC_8xx)) { |
|---|
| 668 | + pgtable_cache_add(PTE_T_ORDER); |
|---|
| 728 | 669 | } |
|---|
| 729 | | -#endif |
|---|
| 670 | + |
|---|
| 671 | + configured = true; |
|---|
| 730 | 672 | } |
|---|
| 731 | 673 | |
|---|
| 732 | | -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) |
|---|
| 733 | | - /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */ |
|---|
| 734 | | - if (mmu_psize_defs[MMU_PAGE_4M].shift) |
|---|
| 735 | | - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; |
|---|
| 736 | | - else if (mmu_psize_defs[MMU_PAGE_512K].shift) |
|---|
| 737 | | - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift; |
|---|
| 738 | | -#else |
|---|
| 739 | | - /* Set default large page size. Currently, we pick 16M or 1M |
|---|
| 740 | | - * depending on what is available |
|---|
| 741 | | - */ |
|---|
| 742 | | - if (mmu_psize_defs[MMU_PAGE_16M].shift) |
|---|
| 743 | | - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; |
|---|
| 744 | | - else if (mmu_psize_defs[MMU_PAGE_1M].shift) |
|---|
| 745 | | - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; |
|---|
| 746 | | - else if (mmu_psize_defs[MMU_PAGE_2M].shift) |
|---|
| 747 | | - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift; |
|---|
| 748 | | -#endif |
|---|
| 674 | + if (configured) { |
|---|
| 675 | + if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) |
|---|
| 676 | + hugetlbpage_init_default(); |
|---|
| 677 | + } else |
|---|
| 678 | + pr_info("Failed to initialize. Disabling HugeTLB"); |
|---|
| 679 | + |
|---|
| 749 | 680 | return 0; |
|---|
| 750 | 681 | } |
|---|
| 751 | 682 | |
|---|
| .. | .. |
|---|
| 758 | 689 | |
|---|
| 759 | 690 | BUG_ON(!PageCompound(page)); |
|---|
| 760 | 691 | |
|---|
| 761 | | - for (i = 0; i < (1UL << compound_order(page)); i++) { |
|---|
| 692 | + for (i = 0; i < compound_nr(page); i++) { |
|---|
| 762 | 693 | if (!PageHighMem(page)) { |
|---|
| 763 | 694 | __flush_dcache_icache(page_address(page+i)); |
|---|
| 764 | 695 | } else { |
|---|
| .. | .. |
|---|
| 769 | 700 | } |
|---|
| 770 | 701 | } |
|---|
| 771 | 702 | |
|---|
| 772 | | -#endif /* CONFIG_HUGETLB_PAGE */ |
|---|
| 773 | | - |
|---|
| 774 | | -/* |
|---|
| 775 | | - * We have 4 cases for pgds and pmds: |
|---|
| 776 | | - * (1) invalid (all zeroes) |
|---|
| 777 | | - * (2) pointer to next table, as normal; bottom 6 bits == 0 |
|---|
| 778 | | - * (3) leaf pte for huge page _PAGE_PTE set |
|---|
| 779 | | - * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table |
|---|
| 780 | | - * |
|---|
| 781 | | - * So long as we atomically load page table pointers we are safe against teardown, |
|---|
| 782 | | - * we can follow the address down to the the page and take a ref on it. |
|---|
| 783 | | - * This function need to be called with interrupts disabled. We use this variant |
|---|
| 784 | | - * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED |
|---|
| 785 | | - */ |
|---|
| 786 | | -pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, |
|---|
| 787 | | - bool *is_thp, unsigned *hpage_shift) |
|---|
| 703 | +void __init gigantic_hugetlb_cma_reserve(void) |
|---|
| 788 | 704 | { |
|---|
| 789 | | - pgd_t pgd, *pgdp; |
|---|
| 790 | | - pud_t pud, *pudp; |
|---|
| 791 | | - pmd_t pmd, *pmdp; |
|---|
| 792 | | - pte_t *ret_pte; |
|---|
| 793 | | - hugepd_t *hpdp = NULL; |
|---|
| 794 | | - unsigned pdshift = PGDIR_SHIFT; |
|---|
| 705 | + unsigned long order = 0; |
|---|
| 795 | 706 | |
|---|
| 796 | | - if (hpage_shift) |
|---|
| 797 | | - *hpage_shift = 0; |
|---|
| 798 | | - |
|---|
| 799 | | - if (is_thp) |
|---|
| 800 | | - *is_thp = false; |
|---|
| 801 | | - |
|---|
| 802 | | - pgdp = pgdir + pgd_index(ea); |
|---|
| 803 | | - pgd = READ_ONCE(*pgdp); |
|---|
| 804 | | - /* |
|---|
| 805 | | - * Always operate on the local stack value. This make sure the |
|---|
| 806 | | - * value don't get updated by a parallel THP split/collapse, |
|---|
| 807 | | - * page fault or a page unmap. The return pte_t * is still not |
|---|
| 808 | | - * stable. So should be checked there for above conditions. |
|---|
| 809 | | - */ |
|---|
| 810 | | - if (pgd_none(pgd)) |
|---|
| 811 | | - return NULL; |
|---|
| 812 | | - else if (pgd_huge(pgd)) { |
|---|
| 813 | | - ret_pte = (pte_t *) pgdp; |
|---|
| 814 | | - goto out; |
|---|
| 815 | | - } else if (is_hugepd(__hugepd(pgd_val(pgd)))) |
|---|
| 816 | | - hpdp = (hugepd_t *)&pgd; |
|---|
| 817 | | - else { |
|---|
| 707 | + if (radix_enabled()) |
|---|
| 708 | + order = PUD_SHIFT - PAGE_SHIFT; |
|---|
| 709 | + else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift) |
|---|
| 818 | 710 | /* |
|---|
| 819 | | - * Even if we end up with an unmap, the pgtable will not |
|---|
| 820 | | - * be freed, because we do an rcu free and here we are |
|---|
| 821 | | - * irq disabled |
|---|
| 711 | + * For pseries we do use ibm,expected#pages for reserving 16G pages. |
|---|
| 822 | 712 | */ |
|---|
| 823 | | - pdshift = PUD_SHIFT; |
|---|
| 824 | | - pudp = pud_offset(&pgd, ea); |
|---|
| 825 | | - pud = READ_ONCE(*pudp); |
|---|
| 713 | + order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT; |
|---|
| 826 | 714 | |
|---|
| 827 | | - if (pud_none(pud)) |
|---|
| 828 | | - return NULL; |
|---|
| 829 | | - else if (pud_huge(pud)) { |
|---|
| 830 | | - ret_pte = (pte_t *) pudp; |
|---|
| 831 | | - goto out; |
|---|
| 832 | | - } else if (is_hugepd(__hugepd(pud_val(pud)))) |
|---|
| 833 | | - hpdp = (hugepd_t *)&pud; |
|---|
| 834 | | - else { |
|---|
| 835 | | - pdshift = PMD_SHIFT; |
|---|
| 836 | | - pmdp = pmd_offset(&pud, ea); |
|---|
| 837 | | - pmd = READ_ONCE(*pmdp); |
|---|
| 838 | | - /* |
|---|
| 839 | | - * A hugepage collapse is captured by pmd_none, because |
|---|
| 840 | | - * it mark the pmd none and do a hpte invalidate. |
|---|
| 841 | | - */ |
|---|
| 842 | | - if (pmd_none(pmd)) |
|---|
| 843 | | - return NULL; |
|---|
| 844 | | - |
|---|
| 845 | | - if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { |
|---|
| 846 | | - if (is_thp) |
|---|
| 847 | | - *is_thp = true; |
|---|
| 848 | | - ret_pte = (pte_t *) pmdp; |
|---|
| 849 | | - goto out; |
|---|
| 850 | | - } |
|---|
| 851 | | - |
|---|
| 852 | | - if (pmd_huge(pmd)) { |
|---|
| 853 | | - ret_pte = (pte_t *) pmdp; |
|---|
| 854 | | - goto out; |
|---|
| 855 | | - } else if (is_hugepd(__hugepd(pmd_val(pmd)))) |
|---|
| 856 | | - hpdp = (hugepd_t *)&pmd; |
|---|
| 857 | | - else |
|---|
| 858 | | - return pte_offset_kernel(&pmd, ea); |
|---|
| 859 | | - } |
|---|
| 715 | + if (order) { |
|---|
| 716 | + VM_WARN_ON(order < MAX_ORDER); |
|---|
| 717 | + hugetlb_cma_reserve(order); |
|---|
| 860 | 718 | } |
|---|
| 861 | | - if (!hpdp) |
|---|
| 862 | | - return NULL; |
|---|
| 863 | | - |
|---|
| 864 | | - ret_pte = hugepte_offset(*hpdp, ea, pdshift); |
|---|
| 865 | | - pdshift = hugepd_shift(*hpdp); |
|---|
| 866 | | -out: |
|---|
| 867 | | - if (hpage_shift) |
|---|
| 868 | | - *hpage_shift = pdshift; |
|---|
| 869 | | - return ret_pte; |
|---|
| 870 | | -} |
|---|
| 871 | | -EXPORT_SYMBOL_GPL(__find_linux_pte); |
|---|
| 872 | | - |
|---|
| 873 | | -int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, |
|---|
| 874 | | - unsigned long end, int write, struct page **pages, int *nr) |
|---|
| 875 | | -{ |
|---|
| 876 | | - unsigned long pte_end; |
|---|
| 877 | | - struct page *head, *page; |
|---|
| 878 | | - pte_t pte; |
|---|
| 879 | | - int refs; |
|---|
| 880 | | - |
|---|
| 881 | | - pte_end = (addr + sz) & ~(sz-1); |
|---|
| 882 | | - if (pte_end < end) |
|---|
| 883 | | - end = pte_end; |
|---|
| 884 | | - |
|---|
| 885 | | - pte = READ_ONCE(*ptep); |
|---|
| 886 | | - |
|---|
| 887 | | - if (!pte_access_permitted(pte, write)) |
|---|
| 888 | | - return 0; |
|---|
| 889 | | - |
|---|
| 890 | | - /* hugepages are never "special" */ |
|---|
| 891 | | - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); |
|---|
| 892 | | - |
|---|
| 893 | | - refs = 0; |
|---|
| 894 | | - head = pte_page(pte); |
|---|
| 895 | | - |
|---|
| 896 | | - page = head + ((addr & (sz-1)) >> PAGE_SHIFT); |
|---|
| 897 | | - do { |
|---|
| 898 | | - VM_BUG_ON(compound_head(page) != head); |
|---|
| 899 | | - pages[*nr] = page; |
|---|
| 900 | | - (*nr)++; |
|---|
| 901 | | - page++; |
|---|
| 902 | | - refs++; |
|---|
| 903 | | - } while (addr += PAGE_SIZE, addr != end); |
|---|
| 904 | | - |
|---|
| 905 | | - if (!page_cache_add_speculative(head, refs)) { |
|---|
| 906 | | - *nr -= refs; |
|---|
| 907 | | - return 0; |
|---|
| 908 | | - } |
|---|
| 909 | | - |
|---|
| 910 | | - if (unlikely(pte_val(pte) != pte_val(*ptep))) { |
|---|
| 911 | | - /* Could be optimized better */ |
|---|
| 912 | | - *nr -= refs; |
|---|
| 913 | | - while (refs--) |
|---|
| 914 | | - put_page(head); |
|---|
| 915 | | - return 0; |
|---|
| 916 | | - } |
|---|
| 917 | | - |
|---|
| 918 | | - return 1; |
|---|
| 919 | 719 | } |
|---|