.. | .. |
---|
15 | 15 | #include <linux/export.h> |
---|
16 | 16 | #include <linux/of_fdt.h> |
---|
17 | 17 | #include <linux/memblock.h> |
---|
18 | | -#include <linux/bootmem.h> |
---|
19 | 18 | #include <linux/moduleparam.h> |
---|
20 | 19 | #include <linux/swap.h> |
---|
21 | 20 | #include <linux/swapops.h> |
---|
22 | 21 | #include <linux/kmemleak.h> |
---|
23 | | -#include <asm/pgtable.h> |
---|
24 | 22 | #include <asm/pgalloc.h> |
---|
25 | 23 | #include <asm/tlb.h> |
---|
26 | 24 | #include <asm/setup.h> |
---|
27 | 25 | #include <asm/hugetlb.h> |
---|
28 | 26 | #include <asm/pte-walk.h> |
---|
29 | 27 | |
---|
30 | | - |
---|
31 | | -#ifdef CONFIG_HUGETLB_PAGE |
---|
32 | | - |
---|
33 | | -#define PAGE_SHIFT_64K 16 |
---|
34 | | -#define PAGE_SHIFT_512K 19 |
---|
35 | | -#define PAGE_SHIFT_8M 23 |
---|
36 | | -#define PAGE_SHIFT_16M 24 |
---|
37 | | -#define PAGE_SHIFT_16G 34 |
---|
38 | | - |
---|
39 | 28 | bool hugetlb_disabled = false; |
---|
40 | 29 | |
---|
41 | | -unsigned int HPAGE_SHIFT; |
---|
42 | | -EXPORT_SYMBOL(HPAGE_SHIFT); |
---|
43 | | - |
---|
44 | 30 | #define hugepd_none(hpd) (hpd_val(hpd) == 0) |
---|
| 31 | + |
---|
| 32 | +#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \ |
---|
| 33 | + __builtin_ffs(sizeof(void *))) |
---|
45 | 34 | |
---|
46 | 35 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) |
---|
47 | 36 | { |
---|
.. | .. |
---|
62 | 51 | int num_hugepd; |
---|
63 | 52 | |
---|
64 | 53 | if (pshift >= pdshift) { |
---|
65 | | - cachep = hugepte_cache; |
---|
| 54 | + cachep = PGT_CACHE(PTE_T_ORDER); |
---|
66 | 55 | num_hugepd = 1 << (pshift - pdshift); |
---|
67 | 56 | } else { |
---|
68 | 57 | cachep = PGT_CACHE(pdshift - pshift); |
---|
69 | 58 | num_hugepd = 1; |
---|
70 | 59 | } |
---|
71 | 60 | |
---|
72 | | - new = kmem_cache_zalloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); |
---|
| 61 | + if (!cachep) { |
---|
| 62 | + WARN_ONCE(1, "No page table cache created for hugetlb tables"); |
---|
| 63 | + return -ENOMEM; |
---|
| 64 | + } |
---|
| 65 | + |
---|
| 66 | + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); |
---|
73 | 67 | |
---|
74 | 68 | BUG_ON(pshift > HUGEPD_SHIFT_MASK); |
---|
75 | 69 | BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); |
---|
76 | 70 | |
---|
77 | | - if (! new) |
---|
| 71 | + if (!new) |
---|
78 | 72 | return -ENOMEM; |
---|
79 | 73 | |
---|
80 | 74 | /* |
---|
.. | .. |
---|
94 | 88 | for (i = 0; i < num_hugepd; i++, hpdp++) { |
---|
95 | 89 | if (unlikely(!hugepd_none(*hpdp))) |
---|
96 | 90 | break; |
---|
97 | | - else { |
---|
98 | | -#ifdef CONFIG_PPC_BOOK3S_64 |
---|
99 | | - *hpdp = __hugepd(__pa(new) | |
---|
100 | | - (shift_to_mmu_psize(pshift) << 2)); |
---|
101 | | -#elif defined(CONFIG_PPC_8xx) |
---|
102 | | - *hpdp = __hugepd(__pa(new) | _PMD_USER | |
---|
103 | | - (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : |
---|
104 | | - _PMD_PAGE_512K) | _PMD_PRESENT); |
---|
105 | | -#else |
---|
106 | | - /* We use the old format for PPC_FSL_BOOK3E */ |
---|
107 | | - *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift); |
---|
108 | | -#endif |
---|
109 | | - } |
---|
| 91 | + hugepd_populate(hpdp, new, pshift); |
---|
110 | 92 | } |
---|
111 | 93 | /* If we bailed from the for loop early, an error occurred, clean up */ |
---|
112 | 94 | if (i < num_hugepd) { |
---|
.. | .. |
---|
124 | 106 | * At this point we do the placement change only for BOOK3S 64. This would |
---|
125 | 107 | * possibly work on other subarchs. |
---|
126 | 108 | */ |
---|
127 | | -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) |
---|
| 109 | +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, |
---|
| 110 | + unsigned long addr, unsigned long sz) |
---|
128 | 111 | { |
---|
129 | 112 | pgd_t *pg; |
---|
| 113 | + p4d_t *p4; |
---|
130 | 114 | pud_t *pu; |
---|
131 | 115 | pmd_t *pm; |
---|
132 | 116 | hugepd_t *hpdp = NULL; |
---|
.. | .. |
---|
136 | 120 | |
---|
137 | 121 | addr &= ~(sz-1); |
---|
138 | 122 | pg = pgd_offset(mm, addr); |
---|
| 123 | + p4 = p4d_offset(pg, addr); |
---|
139 | 124 | |
---|
140 | 125 | #ifdef CONFIG_PPC_BOOK3S_64 |
---|
141 | 126 | if (pshift == PGDIR_SHIFT) |
---|
142 | 127 | /* 16GB huge page */ |
---|
143 | | - return (pte_t *) pg; |
---|
| 128 | + return (pte_t *) p4; |
---|
144 | 129 | else if (pshift > PUD_SHIFT) { |
---|
145 | 130 | /* |
---|
146 | 131 | * We need to use hugepd table |
---|
147 | 132 | */ |
---|
148 | 133 | ptl = &mm->page_table_lock; |
---|
149 | | - hpdp = (hugepd_t *)pg; |
---|
| 134 | + hpdp = (hugepd_t *)p4; |
---|
150 | 135 | } else { |
---|
151 | 136 | pdshift = PUD_SHIFT; |
---|
152 | | - pu = pud_alloc(mm, pg, addr); |
---|
| 137 | + pu = pud_alloc(mm, p4, addr); |
---|
153 | 138 | if (!pu) |
---|
154 | 139 | return NULL; |
---|
155 | 140 | if (pshift == PUD_SHIFT) |
---|
.. | .. |
---|
174 | 159 | #else |
---|
175 | 160 | if (pshift >= PGDIR_SHIFT) { |
---|
176 | 161 | ptl = &mm->page_table_lock; |
---|
177 | | - hpdp = (hugepd_t *)pg; |
---|
| 162 | + hpdp = (hugepd_t *)p4; |
---|
178 | 163 | } else { |
---|
179 | 164 | pdshift = PUD_SHIFT; |
---|
180 | | - pu = pud_alloc(mm, pg, addr); |
---|
| 165 | + pu = pud_alloc(mm, p4, addr); |
---|
181 | 166 | if (!pu) |
---|
182 | 167 | return NULL; |
---|
183 | 168 | if (pshift >= PUD_SHIFT) { |
---|
.. | .. |
---|
195 | 180 | #endif |
---|
196 | 181 | if (!hpdp) |
---|
197 | 182 | return NULL; |
---|
| 183 | + |
---|
| 184 | + if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT) |
---|
| 185 | + return pte_alloc_map(mm, (pmd_t *)hpdp, addr); |
---|
198 | 186 | |
---|
199 | 187 | BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); |
---|
200 | 188 | |
---|
.. | .. |
---|
254 | 242 | return __alloc_bootmem_huge_page(h); |
---|
255 | 243 | } |
---|
256 | 244 | |
---|
257 | | -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) |
---|
| 245 | +#ifndef CONFIG_PPC_BOOK3S_64 |
---|
258 | 246 | #define HUGEPD_FREELIST_SIZE \ |
---|
259 | 247 | ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) |
---|
260 | 248 | |
---|
261 | 249 | struct hugepd_freelist { |
---|
262 | 250 | struct rcu_head rcu; |
---|
263 | 251 | unsigned int index; |
---|
264 | | - void *ptes[0]; |
---|
| 252 | + void *ptes[]; |
---|
265 | 253 | }; |
---|
266 | 254 | |
---|
267 | 255 | static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); |
---|
.. | .. |
---|
273 | 261 | unsigned int i; |
---|
274 | 262 | |
---|
275 | 263 | for (i = 0; i < batch->index; i++) |
---|
276 | | - kmem_cache_free(hugepte_cache, batch->ptes[i]); |
---|
| 264 | + kmem_cache_free(PGT_CACHE(PTE_T_ORDER), batch->ptes[i]); |
---|
277 | 265 | |
---|
278 | 266 | free_page((unsigned long)batch); |
---|
279 | 267 | } |
---|
.. | .. |
---|
286 | 274 | |
---|
287 | 275 | if (atomic_read(&tlb->mm->mm_users) < 2 || |
---|
288 | 276 | mm_is_thread_local(tlb->mm)) { |
---|
289 | | - kmem_cache_free(hugepte_cache, hugepte); |
---|
| 277 | + kmem_cache_free(PGT_CACHE(PTE_T_ORDER), hugepte); |
---|
290 | 278 | put_cpu_var(hugepd_freelist_cur); |
---|
291 | 279 | return; |
---|
292 | 280 | } |
---|
.. | .. |
---|
298 | 286 | |
---|
299 | 287 | (*batchp)->ptes[(*batchp)->index++] = hugepte; |
---|
300 | 288 | if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { |
---|
301 | | - call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); |
---|
| 289 | + call_rcu(&(*batchp)->rcu, hugepd_free_rcu_callback); |
---|
302 | 290 | *batchp = NULL; |
---|
303 | 291 | } |
---|
304 | 292 | put_cpu_var(hugepd_freelist_cur); |
---|
.. | .. |
---|
343 | 331 | get_hugepd_cache_index(pdshift - shift)); |
---|
344 | 332 | } |
---|
345 | 333 | |
---|
| 334 | +static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, |
---|
| 335 | + unsigned long addr, unsigned long end, |
---|
| 336 | + unsigned long floor, unsigned long ceiling) |
---|
| 337 | +{ |
---|
| 338 | + unsigned long start = addr; |
---|
| 339 | + pgtable_t token = pmd_pgtable(*pmd); |
---|
| 340 | + |
---|
| 341 | + start &= PMD_MASK; |
---|
| 342 | + if (start < floor) |
---|
| 343 | + return; |
---|
| 344 | + if (ceiling) { |
---|
| 345 | + ceiling &= PMD_MASK; |
---|
| 346 | + if (!ceiling) |
---|
| 347 | + return; |
---|
| 348 | + } |
---|
| 349 | + if (end - 1 > ceiling - 1) |
---|
| 350 | + return; |
---|
| 351 | + |
---|
| 352 | + pmd_clear(pmd); |
---|
| 353 | + pte_free_tlb(tlb, token, addr); |
---|
| 354 | + mm_dec_nr_ptes(tlb->mm); |
---|
| 355 | +} |
---|
| 356 | + |
---|
346 | 357 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
---|
347 | 358 | unsigned long addr, unsigned long end, |
---|
348 | 359 | unsigned long floor, unsigned long ceiling) |
---|
.. | .. |
---|
358 | 369 | pmd = pmd_offset(pud, addr); |
---|
359 | 370 | next = pmd_addr_end(addr, end); |
---|
360 | 371 | if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { |
---|
| 372 | + if (pmd_none_or_clear_bad(pmd)) |
---|
| 373 | + continue; |
---|
| 374 | + |
---|
361 | 375 | /* |
---|
362 | 376 | * if it is not hugepd pointer, we should already find |
---|
363 | 377 | * it cleared. |
---|
364 | 378 | */ |
---|
365 | | - WARN_ON(!pmd_none_or_clear_bad(pmd)); |
---|
| 379 | + WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx)); |
---|
| 380 | + |
---|
| 381 | + hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling); |
---|
| 382 | + |
---|
366 | 383 | continue; |
---|
367 | 384 | } |
---|
368 | 385 | /* |
---|
.. | .. |
---|
396 | 413 | mm_dec_nr_pmds(tlb->mm); |
---|
397 | 414 | } |
---|
398 | 415 | |
---|
399 | | -static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, |
---|
| 416 | +static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, |
---|
400 | 417 | unsigned long addr, unsigned long end, |
---|
401 | 418 | unsigned long floor, unsigned long ceiling) |
---|
402 | 419 | { |
---|
.. | .. |
---|
406 | 423 | |
---|
407 | 424 | start = addr; |
---|
408 | 425 | do { |
---|
409 | | - pud = pud_offset(pgd, addr); |
---|
| 426 | + pud = pud_offset(p4d, addr); |
---|
410 | 427 | next = pud_addr_end(addr, end); |
---|
411 | 428 | if (!is_hugepd(__hugepd(pud_val(*pud)))) { |
---|
412 | 429 | if (pud_none_or_clear_bad(pud)) |
---|
.. | .. |
---|
441 | 458 | if (end - 1 > ceiling - 1) |
---|
442 | 459 | return; |
---|
443 | 460 | |
---|
444 | | - pud = pud_offset(pgd, start); |
---|
445 | | - pgd_clear(pgd); |
---|
| 461 | + pud = pud_offset(p4d, start); |
---|
| 462 | + p4d_clear(p4d); |
---|
446 | 463 | pud_free_tlb(tlb, pud, start); |
---|
447 | 464 | mm_dec_nr_puds(tlb->mm); |
---|
448 | 465 | } |
---|
.. | .. |
---|
455 | 472 | unsigned long floor, unsigned long ceiling) |
---|
456 | 473 | { |
---|
457 | 474 | pgd_t *pgd; |
---|
| 475 | + p4d_t *p4d; |
---|
458 | 476 | unsigned long next; |
---|
459 | 477 | |
---|
460 | 478 | /* |
---|
.. | .. |
---|
477 | 495 | do { |
---|
478 | 496 | next = pgd_addr_end(addr, end); |
---|
479 | 497 | pgd = pgd_offset(tlb->mm, addr); |
---|
| 498 | + p4d = p4d_offset(pgd, addr); |
---|
480 | 499 | if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { |
---|
481 | | - if (pgd_none_or_clear_bad(pgd)) |
---|
| 500 | + if (p4d_none_or_clear_bad(p4d)) |
---|
482 | 501 | continue; |
---|
483 | | - hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); |
---|
| 502 | + hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling); |
---|
484 | 503 | } else { |
---|
485 | 504 | unsigned long more; |
---|
486 | 505 | /* |
---|
.. | .. |
---|
493 | 512 | if (more > next) |
---|
494 | 513 | next = more; |
---|
495 | 514 | |
---|
496 | | - free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, |
---|
| 515 | + free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT, |
---|
497 | 516 | addr, next, floor, ceiling); |
---|
498 | 517 | } |
---|
499 | 518 | } while (addr = next, addr != end); |
---|
.. | .. |
---|
536 | 555 | return page; |
---|
537 | 556 | } |
---|
538 | 557 | |
---|
539 | | -static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, |
---|
540 | | - unsigned long sz) |
---|
541 | | -{ |
---|
542 | | - unsigned long __boundary = (addr + sz) & ~(sz-1); |
---|
543 | | - return (__boundary - 1 < end - 1) ? __boundary : end; |
---|
544 | | -} |
---|
545 | | - |
---|
546 | | -int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift, |
---|
547 | | - unsigned long end, int write, struct page **pages, int *nr) |
---|
548 | | -{ |
---|
549 | | - pte_t *ptep; |
---|
550 | | - unsigned long sz = 1UL << hugepd_shift(hugepd); |
---|
551 | | - unsigned long next; |
---|
552 | | - |
---|
553 | | - ptep = hugepte_offset(hugepd, addr, pdshift); |
---|
554 | | - do { |
---|
555 | | - next = hugepte_addr_end(addr, end, sz); |
---|
556 | | - if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) |
---|
557 | | - return 0; |
---|
558 | | - } while (ptep++, addr = next, addr != end); |
---|
559 | | - |
---|
560 | | - return 1; |
---|
561 | | -} |
---|
562 | | - |
---|
563 | 558 | #ifdef CONFIG_PPC_MM_SLICES |
---|
564 | 559 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
---|
565 | 560 | unsigned long len, unsigned long pgoff, |
---|
.. | .. |
---|
579 | 574 | |
---|
580 | 575 | unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) |
---|
581 | 576 | { |
---|
582 | | -#ifdef CONFIG_PPC_MM_SLICES |
---|
583 | 577 | /* With radix we don't use slice, so derive it from vma*/ |
---|
584 | | - if (!radix_enabled()) { |
---|
| 578 | + if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) { |
---|
585 | 579 | unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); |
---|
586 | 580 | |
---|
587 | 581 | return 1UL << mmu_psize_to_shift(psize); |
---|
588 | 582 | } |
---|
589 | | -#endif |
---|
590 | 583 | return vma_kernel_pagesize(vma); |
---|
591 | 584 | } |
---|
592 | 585 | |
---|
593 | | -static inline bool is_power_of_4(unsigned long x) |
---|
594 | | -{ |
---|
595 | | - if (is_power_of_2(x)) |
---|
596 | | - return (__ilog2(x) % 2) ? false : true; |
---|
597 | | - return false; |
---|
598 | | -} |
---|
599 | | - |
---|
600 | | -static int __init add_huge_page_size(unsigned long long size) |
---|
| 586 | +bool __init arch_hugetlb_valid_size(unsigned long size) |
---|
601 | 587 | { |
---|
602 | 588 | int shift = __ffs(size); |
---|
603 | 589 | int mmu_psize; |
---|
604 | 590 | |
---|
605 | 591 | /* Check that it is a page size supported by the hardware and |
---|
606 | 592 | * that it fits within pagetable and slice limits. */ |
---|
607 | | - if (size <= PAGE_SIZE) |
---|
608 | | - return -EINVAL; |
---|
609 | | -#if defined(CONFIG_PPC_FSL_BOOK3E) |
---|
610 | | - if (!is_power_of_4(size)) |
---|
611 | | - return -EINVAL; |
---|
612 | | -#elif !defined(CONFIG_PPC_8xx) |
---|
613 | | - if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT)) |
---|
614 | | - return -EINVAL; |
---|
615 | | -#endif |
---|
| 593 | + if (size <= PAGE_SIZE || !is_power_of_2(size)) |
---|
| 594 | + return false; |
---|
616 | 595 | |
---|
617 | | - if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) |
---|
618 | | - return -EINVAL; |
---|
619 | | - |
---|
620 | | -#ifdef CONFIG_PPC_BOOK3S_64 |
---|
621 | | - /* |
---|
622 | | - * We need to make sure that for different page sizes reported by |
---|
623 | | - * firmware we only add hugetlb support for page sizes that can be |
---|
624 | | - * supported by linux page table layout. |
---|
625 | | - * For now we have |
---|
626 | | - * Radix: 2M and 1G |
---|
627 | | - * Hash: 16M and 16G |
---|
628 | | - */ |
---|
629 | | - if (radix_enabled()) { |
---|
630 | | - if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G) |
---|
631 | | - return -EINVAL; |
---|
632 | | - } else { |
---|
633 | | - if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G) |
---|
634 | | - return -EINVAL; |
---|
635 | | - } |
---|
636 | | -#endif |
---|
| 596 | + mmu_psize = check_and_get_huge_psize(shift); |
---|
| 597 | + if (mmu_psize < 0) |
---|
| 598 | + return false; |
---|
637 | 599 | |
---|
638 | 600 | BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); |
---|
639 | 601 | |
---|
640 | | - /* Return if huge page size has already been setup */ |
---|
641 | | - if (size_to_hstate(size)) |
---|
642 | | - return 0; |
---|
| 602 | + return true; |
---|
| 603 | +} |
---|
| 604 | + |
---|
| 605 | +static int __init add_huge_page_size(unsigned long long size) |
---|
| 606 | +{ |
---|
| 607 | + int shift = __ffs(size); |
---|
| 608 | + |
---|
| 609 | + if (!arch_hugetlb_valid_size((unsigned long)size)) |
---|
| 610 | + return -EINVAL; |
---|
643 | 611 | |
---|
644 | 612 | hugetlb_add_hstate(shift - PAGE_SHIFT); |
---|
645 | | - |
---|
646 | 613 | return 0; |
---|
647 | 614 | } |
---|
648 | 615 | |
---|
649 | | -static int __init hugepage_setup_sz(char *str) |
---|
650 | | -{ |
---|
651 | | - unsigned long long size; |
---|
652 | | - |
---|
653 | | - size = memparse(str, &str); |
---|
654 | | - |
---|
655 | | - if (add_huge_page_size(size) != 0) { |
---|
656 | | - hugetlb_bad_size(); |
---|
657 | | - pr_err("Invalid huge page size specified(%llu)\n", size); |
---|
658 | | - } |
---|
659 | | - |
---|
660 | | - return 1; |
---|
661 | | -} |
---|
662 | | -__setup("hugepagesz=", hugepage_setup_sz); |
---|
663 | | - |
---|
664 | | -struct kmem_cache *hugepte_cache; |
---|
665 | 616 | static int __init hugetlbpage_init(void) |
---|
666 | 617 | { |
---|
| 618 | + bool configured = false; |
---|
667 | 619 | int psize; |
---|
668 | 620 | |
---|
669 | 621 | if (hugetlb_disabled) { |
---|
.. | .. |
---|
671 | 623 | return 0; |
---|
672 | 624 | } |
---|
673 | 625 | |
---|
674 | | -#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx) |
---|
675 | | - if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) |
---|
| 626 | + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() && |
---|
| 627 | + !mmu_has_feature(MMU_FTR_16M_PAGE)) |
---|
676 | 628 | return -ENODEV; |
---|
677 | | -#endif |
---|
| 629 | + |
---|
678 | 630 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { |
---|
679 | 631 | unsigned shift; |
---|
680 | 632 | unsigned pdshift; |
---|
.. | .. |
---|
708 | 660 | * if we have pdshift and shift value same, we don't |
---|
709 | 661 | * use pgt cache for hugepd. |
---|
710 | 662 | */ |
---|
711 | | - if (pdshift > shift) |
---|
712 | | - pgtable_cache_add(pdshift - shift, NULL); |
---|
713 | | -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) |
---|
714 | | - else if (!hugepte_cache) { |
---|
715 | | - /* |
---|
716 | | - * Create a kmem cache for hugeptes. The bottom bits in |
---|
717 | | - * the pte have size information encoded in them, so |
---|
718 | | - * align them to allow this |
---|
719 | | - */ |
---|
720 | | - hugepte_cache = kmem_cache_create("hugepte-cache", |
---|
721 | | - sizeof(pte_t), |
---|
722 | | - HUGEPD_SHIFT_MASK + 1, |
---|
723 | | - 0, NULL); |
---|
724 | | - if (hugepte_cache == NULL) |
---|
725 | | - panic("%s: Unable to create kmem cache " |
---|
726 | | - "for hugeptes\n", __func__); |
---|
727 | | - |
---|
| 663 | + if (pdshift > shift) { |
---|
| 664 | + if (!IS_ENABLED(CONFIG_PPC_8xx)) |
---|
| 665 | + pgtable_cache_add(pdshift - shift); |
---|
| 666 | + } else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || |
---|
| 667 | + IS_ENABLED(CONFIG_PPC_8xx)) { |
---|
| 668 | + pgtable_cache_add(PTE_T_ORDER); |
---|
728 | 669 | } |
---|
729 | | -#endif |
---|
| 670 | + |
---|
| 671 | + configured = true; |
---|
730 | 672 | } |
---|
731 | 673 | |
---|
732 | | -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) |
---|
733 | | - /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */ |
---|
734 | | - if (mmu_psize_defs[MMU_PAGE_4M].shift) |
---|
735 | | - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; |
---|
736 | | - else if (mmu_psize_defs[MMU_PAGE_512K].shift) |
---|
737 | | - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift; |
---|
738 | | -#else |
---|
739 | | - /* Set default large page size. Currently, we pick 16M or 1M |
---|
740 | | - * depending on what is available |
---|
741 | | - */ |
---|
742 | | - if (mmu_psize_defs[MMU_PAGE_16M].shift) |
---|
743 | | - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; |
---|
744 | | - else if (mmu_psize_defs[MMU_PAGE_1M].shift) |
---|
745 | | - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; |
---|
746 | | - else if (mmu_psize_defs[MMU_PAGE_2M].shift) |
---|
747 | | - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift; |
---|
748 | | -#endif |
---|
| 674 | + if (configured) { |
---|
| 675 | + if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) |
---|
| 676 | + hugetlbpage_init_default(); |
---|
| 677 | + } else |
---|
| 678 | + pr_info("Failed to initialize. Disabling HugeTLB"); |
---|
| 679 | + |
---|
749 | 680 | return 0; |
---|
750 | 681 | } |
---|
751 | 682 | |
---|
.. | .. |
---|
758 | 689 | |
---|
759 | 690 | BUG_ON(!PageCompound(page)); |
---|
760 | 691 | |
---|
761 | | - for (i = 0; i < (1UL << compound_order(page)); i++) { |
---|
| 692 | + for (i = 0; i < compound_nr(page); i++) { |
---|
762 | 693 | if (!PageHighMem(page)) { |
---|
763 | 694 | __flush_dcache_icache(page_address(page+i)); |
---|
764 | 695 | } else { |
---|
.. | .. |
---|
769 | 700 | } |
---|
770 | 701 | } |
---|
771 | 702 | |
---|
772 | | -#endif /* CONFIG_HUGETLB_PAGE */ |
---|
773 | | - |
---|
774 | | -/* |
---|
775 | | - * We have 4 cases for pgds and pmds: |
---|
776 | | - * (1) invalid (all zeroes) |
---|
777 | | - * (2) pointer to next table, as normal; bottom 6 bits == 0 |
---|
778 | | - * (3) leaf pte for huge page _PAGE_PTE set |
---|
779 | | - * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table |
---|
780 | | - * |
---|
781 | | - * So long as we atomically load page table pointers we are safe against teardown, |
---|
782 | | - * we can follow the address down to the the page and take a ref on it. |
---|
783 | | - * This function need to be called with interrupts disabled. We use this variant |
---|
784 | | - * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED |
---|
785 | | - */ |
---|
786 | | -pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, |
---|
787 | | - bool *is_thp, unsigned *hpage_shift) |
---|
| 703 | +void __init gigantic_hugetlb_cma_reserve(void) |
---|
788 | 704 | { |
---|
789 | | - pgd_t pgd, *pgdp; |
---|
790 | | - pud_t pud, *pudp; |
---|
791 | | - pmd_t pmd, *pmdp; |
---|
792 | | - pte_t *ret_pte; |
---|
793 | | - hugepd_t *hpdp = NULL; |
---|
794 | | - unsigned pdshift = PGDIR_SHIFT; |
---|
| 705 | + unsigned long order = 0; |
---|
795 | 706 | |
---|
796 | | - if (hpage_shift) |
---|
797 | | - *hpage_shift = 0; |
---|
798 | | - |
---|
799 | | - if (is_thp) |
---|
800 | | - *is_thp = false; |
---|
801 | | - |
---|
802 | | - pgdp = pgdir + pgd_index(ea); |
---|
803 | | - pgd = READ_ONCE(*pgdp); |
---|
804 | | - /* |
---|
805 | | - * Always operate on the local stack value. This make sure the |
---|
806 | | - * value don't get updated by a parallel THP split/collapse, |
---|
807 | | - * page fault or a page unmap. The return pte_t * is still not |
---|
808 | | - * stable. So should be checked there for above conditions. |
---|
809 | | - */ |
---|
810 | | - if (pgd_none(pgd)) |
---|
811 | | - return NULL; |
---|
812 | | - else if (pgd_huge(pgd)) { |
---|
813 | | - ret_pte = (pte_t *) pgdp; |
---|
814 | | - goto out; |
---|
815 | | - } else if (is_hugepd(__hugepd(pgd_val(pgd)))) |
---|
816 | | - hpdp = (hugepd_t *)&pgd; |
---|
817 | | - else { |
---|
| 707 | + if (radix_enabled()) |
---|
| 708 | + order = PUD_SHIFT - PAGE_SHIFT; |
---|
| 709 | + else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift) |
---|
818 | 710 | /* |
---|
819 | | - * Even if we end up with an unmap, the pgtable will not |
---|
820 | | - * be freed, because we do an rcu free and here we are |
---|
821 | | - * irq disabled |
---|
| 711 | + * For pseries we do use ibm,expected#pages for reserving 16G pages. |
---|
822 | 712 | */ |
---|
823 | | - pdshift = PUD_SHIFT; |
---|
824 | | - pudp = pud_offset(&pgd, ea); |
---|
825 | | - pud = READ_ONCE(*pudp); |
---|
| 713 | + order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT; |
---|
826 | 714 | |
---|
827 | | - if (pud_none(pud)) |
---|
828 | | - return NULL; |
---|
829 | | - else if (pud_huge(pud)) { |
---|
830 | | - ret_pte = (pte_t *) pudp; |
---|
831 | | - goto out; |
---|
832 | | - } else if (is_hugepd(__hugepd(pud_val(pud)))) |
---|
833 | | - hpdp = (hugepd_t *)&pud; |
---|
834 | | - else { |
---|
835 | | - pdshift = PMD_SHIFT; |
---|
836 | | - pmdp = pmd_offset(&pud, ea); |
---|
837 | | - pmd = READ_ONCE(*pmdp); |
---|
838 | | - /* |
---|
839 | | - * A hugepage collapse is captured by pmd_none, because |
---|
840 | | - * it mark the pmd none and do a hpte invalidate. |
---|
841 | | - */ |
---|
842 | | - if (pmd_none(pmd)) |
---|
843 | | - return NULL; |
---|
844 | | - |
---|
845 | | - if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { |
---|
846 | | - if (is_thp) |
---|
847 | | - *is_thp = true; |
---|
848 | | - ret_pte = (pte_t *) pmdp; |
---|
849 | | - goto out; |
---|
850 | | - } |
---|
851 | | - |
---|
852 | | - if (pmd_huge(pmd)) { |
---|
853 | | - ret_pte = (pte_t *) pmdp; |
---|
854 | | - goto out; |
---|
855 | | - } else if (is_hugepd(__hugepd(pmd_val(pmd)))) |
---|
856 | | - hpdp = (hugepd_t *)&pmd; |
---|
857 | | - else |
---|
858 | | - return pte_offset_kernel(&pmd, ea); |
---|
859 | | - } |
---|
| 715 | + if (order) { |
---|
| 716 | + VM_WARN_ON(order < MAX_ORDER); |
---|
| 717 | + hugetlb_cma_reserve(order); |
---|
860 | 718 | } |
---|
861 | | - if (!hpdp) |
---|
862 | | - return NULL; |
---|
863 | | - |
---|
864 | | - ret_pte = hugepte_offset(*hpdp, ea, pdshift); |
---|
865 | | - pdshift = hugepd_shift(*hpdp); |
---|
866 | | -out: |
---|
867 | | - if (hpage_shift) |
---|
868 | | - *hpage_shift = pdshift; |
---|
869 | | - return ret_pte; |
---|
870 | | -} |
---|
871 | | -EXPORT_SYMBOL_GPL(__find_linux_pte); |
---|
872 | | - |
---|
873 | | -int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, |
---|
874 | | - unsigned long end, int write, struct page **pages, int *nr) |
---|
875 | | -{ |
---|
876 | | - unsigned long pte_end; |
---|
877 | | - struct page *head, *page; |
---|
878 | | - pte_t pte; |
---|
879 | | - int refs; |
---|
880 | | - |
---|
881 | | - pte_end = (addr + sz) & ~(sz-1); |
---|
882 | | - if (pte_end < end) |
---|
883 | | - end = pte_end; |
---|
884 | | - |
---|
885 | | - pte = READ_ONCE(*ptep); |
---|
886 | | - |
---|
887 | | - if (!pte_access_permitted(pte, write)) |
---|
888 | | - return 0; |
---|
889 | | - |
---|
890 | | - /* hugepages are never "special" */ |
---|
891 | | - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); |
---|
892 | | - |
---|
893 | | - refs = 0; |
---|
894 | | - head = pte_page(pte); |
---|
895 | | - |
---|
896 | | - page = head + ((addr & (sz-1)) >> PAGE_SHIFT); |
---|
897 | | - do { |
---|
898 | | - VM_BUG_ON(compound_head(page) != head); |
---|
899 | | - pages[*nr] = page; |
---|
900 | | - (*nr)++; |
---|
901 | | - page++; |
---|
902 | | - refs++; |
---|
903 | | - } while (addr += PAGE_SIZE, addr != end); |
---|
904 | | - |
---|
905 | | - if (!page_cache_add_speculative(head, refs)) { |
---|
906 | | - *nr -= refs; |
---|
907 | | - return 0; |
---|
908 | | - } |
---|
909 | | - |
---|
910 | | - if (unlikely(pte_val(pte) != pte_val(*ptep))) { |
---|
911 | | - /* Could be optimized better */ |
---|
912 | | - *nr -= refs; |
---|
913 | | - while (refs--) |
---|
914 | | - put_page(head); |
---|
915 | | - return 0; |
---|
916 | | - } |
---|
917 | | - |
---|
918 | | - return 1; |
---|
919 | 719 | } |
---|