forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-11 04dd17822334871b23ea2862f7798fb0e0007777
kernel/arch/powerpc/mm/hugetlbpage.c
....@@ -15,33 +15,22 @@
1515 #include <linux/export.h>
1616 #include <linux/of_fdt.h>
1717 #include <linux/memblock.h>
18
-#include <linux/bootmem.h>
1918 #include <linux/moduleparam.h>
2019 #include <linux/swap.h>
2120 #include <linux/swapops.h>
2221 #include <linux/kmemleak.h>
23
-#include <asm/pgtable.h>
2422 #include <asm/pgalloc.h>
2523 #include <asm/tlb.h>
2624 #include <asm/setup.h>
2725 #include <asm/hugetlb.h>
2826 #include <asm/pte-walk.h>
2927
30
-
31
-#ifdef CONFIG_HUGETLB_PAGE
32
-
33
-#define PAGE_SHIFT_64K 16
34
-#define PAGE_SHIFT_512K 19
35
-#define PAGE_SHIFT_8M 23
36
-#define PAGE_SHIFT_16M 24
37
-#define PAGE_SHIFT_16G 34
38
-
3928 bool hugetlb_disabled = false;
4029
41
-unsigned int HPAGE_SHIFT;
42
-EXPORT_SYMBOL(HPAGE_SHIFT);
43
-
4430 #define hugepd_none(hpd) (hpd_val(hpd) == 0)
31
+
32
+#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \
33
+ __builtin_ffs(sizeof(void *)))
4534
4635 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
4736 {
....@@ -62,19 +51,24 @@
6251 int num_hugepd;
6352
6453 if (pshift >= pdshift) {
65
- cachep = hugepte_cache;
54
+ cachep = PGT_CACHE(PTE_T_ORDER);
6655 num_hugepd = 1 << (pshift - pdshift);
6756 } else {
6857 cachep = PGT_CACHE(pdshift - pshift);
6958 num_hugepd = 1;
7059 }
7160
72
- new = kmem_cache_zalloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
61
+ if (!cachep) {
62
+ WARN_ONCE(1, "No page table cache created for hugetlb tables");
63
+ return -ENOMEM;
64
+ }
65
+
66
+ new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
7367
7468 BUG_ON(pshift > HUGEPD_SHIFT_MASK);
7569 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
7670
77
- if (! new)
71
+ if (!new)
7872 return -ENOMEM;
7973
8074 /*
....@@ -94,19 +88,7 @@
9488 for (i = 0; i < num_hugepd; i++, hpdp++) {
9589 if (unlikely(!hugepd_none(*hpdp)))
9690 break;
97
- else {
98
-#ifdef CONFIG_PPC_BOOK3S_64
99
- *hpdp = __hugepd(__pa(new) |
100
- (shift_to_mmu_psize(pshift) << 2));
101
-#elif defined(CONFIG_PPC_8xx)
102
- *hpdp = __hugepd(__pa(new) | _PMD_USER |
103
- (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M :
104
- _PMD_PAGE_512K) | _PMD_PRESENT);
105
-#else
106
- /* We use the old format for PPC_FSL_BOOK3E */
107
- *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift);
108
-#endif
109
- }
91
+ hugepd_populate(hpdp, new, pshift);
11092 }
11193 /* If we bailed from the for loop early, an error occurred, clean up */
11294 if (i < num_hugepd) {
....@@ -124,9 +106,11 @@
124106 * At this point we do the placement change only for BOOK3S 64. This would
125107 * possibly work on other subarchs.
126108 */
127
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
109
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
110
+ unsigned long addr, unsigned long sz)
128111 {
129112 pgd_t *pg;
113
+ p4d_t *p4;
130114 pud_t *pu;
131115 pmd_t *pm;
132116 hugepd_t *hpdp = NULL;
....@@ -136,20 +120,21 @@
136120
137121 addr &= ~(sz-1);
138122 pg = pgd_offset(mm, addr);
123
+ p4 = p4d_offset(pg, addr);
139124
140125 #ifdef CONFIG_PPC_BOOK3S_64
141126 if (pshift == PGDIR_SHIFT)
142127 /* 16GB huge page */
143
- return (pte_t *) pg;
128
+ return (pte_t *) p4;
144129 else if (pshift > PUD_SHIFT) {
145130 /*
146131 * We need to use hugepd table
147132 */
148133 ptl = &mm->page_table_lock;
149
- hpdp = (hugepd_t *)pg;
134
+ hpdp = (hugepd_t *)p4;
150135 } else {
151136 pdshift = PUD_SHIFT;
152
- pu = pud_alloc(mm, pg, addr);
137
+ pu = pud_alloc(mm, p4, addr);
153138 if (!pu)
154139 return NULL;
155140 if (pshift == PUD_SHIFT)
....@@ -174,10 +159,10 @@
174159 #else
175160 if (pshift >= PGDIR_SHIFT) {
176161 ptl = &mm->page_table_lock;
177
- hpdp = (hugepd_t *)pg;
162
+ hpdp = (hugepd_t *)p4;
178163 } else {
179164 pdshift = PUD_SHIFT;
180
- pu = pud_alloc(mm, pg, addr);
165
+ pu = pud_alloc(mm, p4, addr);
181166 if (!pu)
182167 return NULL;
183168 if (pshift >= PUD_SHIFT) {
....@@ -195,6 +180,9 @@
195180 #endif
196181 if (!hpdp)
197182 return NULL;
183
+
184
+ if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT)
185
+ return pte_alloc_map(mm, (pmd_t *)hpdp, addr);
198186
199187 BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
200188
....@@ -254,14 +242,14 @@
254242 return __alloc_bootmem_huge_page(h);
255243 }
256244
257
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
245
+#ifndef CONFIG_PPC_BOOK3S_64
258246 #define HUGEPD_FREELIST_SIZE \
259247 ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
260248
261249 struct hugepd_freelist {
262250 struct rcu_head rcu;
263251 unsigned int index;
264
- void *ptes[0];
252
+ void *ptes[];
265253 };
266254
267255 static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
....@@ -273,7 +261,7 @@
273261 unsigned int i;
274262
275263 for (i = 0; i < batch->index; i++)
276
- kmem_cache_free(hugepte_cache, batch->ptes[i]);
264
+ kmem_cache_free(PGT_CACHE(PTE_T_ORDER), batch->ptes[i]);
277265
278266 free_page((unsigned long)batch);
279267 }
....@@ -286,7 +274,7 @@
286274
287275 if (atomic_read(&tlb->mm->mm_users) < 2 ||
288276 mm_is_thread_local(tlb->mm)) {
289
- kmem_cache_free(hugepte_cache, hugepte);
277
+ kmem_cache_free(PGT_CACHE(PTE_T_ORDER), hugepte);
290278 put_cpu_var(hugepd_freelist_cur);
291279 return;
292280 }
....@@ -298,7 +286,7 @@
298286
299287 (*batchp)->ptes[(*batchp)->index++] = hugepte;
300288 if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
301
- call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
289
+ call_rcu(&(*batchp)->rcu, hugepd_free_rcu_callback);
302290 *batchp = NULL;
303291 }
304292 put_cpu_var(hugepd_freelist_cur);
....@@ -343,6 +331,29 @@
343331 get_hugepd_cache_index(pdshift - shift));
344332 }
345333
334
+static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
335
+ unsigned long addr, unsigned long end,
336
+ unsigned long floor, unsigned long ceiling)
337
+{
338
+ unsigned long start = addr;
339
+ pgtable_t token = pmd_pgtable(*pmd);
340
+
341
+ start &= PMD_MASK;
342
+ if (start < floor)
343
+ return;
344
+ if (ceiling) {
345
+ ceiling &= PMD_MASK;
346
+ if (!ceiling)
347
+ return;
348
+ }
349
+ if (end - 1 > ceiling - 1)
350
+ return;
351
+
352
+ pmd_clear(pmd);
353
+ pte_free_tlb(tlb, token, addr);
354
+ mm_dec_nr_ptes(tlb->mm);
355
+}
356
+
346357 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
347358 unsigned long addr, unsigned long end,
348359 unsigned long floor, unsigned long ceiling)
....@@ -358,11 +369,17 @@
358369 pmd = pmd_offset(pud, addr);
359370 next = pmd_addr_end(addr, end);
360371 if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
372
+ if (pmd_none_or_clear_bad(pmd))
373
+ continue;
374
+
361375 /*
362376 * if it is not hugepd pointer, we should already find
363377 * it cleared.
364378 */
365
- WARN_ON(!pmd_none_or_clear_bad(pmd));
379
+ WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx));
380
+
381
+ hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling);
382
+
366383 continue;
367384 }
368385 /*
....@@ -396,7 +413,7 @@
396413 mm_dec_nr_pmds(tlb->mm);
397414 }
398415
399
-static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
416
+static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
400417 unsigned long addr, unsigned long end,
401418 unsigned long floor, unsigned long ceiling)
402419 {
....@@ -406,7 +423,7 @@
406423
407424 start = addr;
408425 do {
409
- pud = pud_offset(pgd, addr);
426
+ pud = pud_offset(p4d, addr);
410427 next = pud_addr_end(addr, end);
411428 if (!is_hugepd(__hugepd(pud_val(*pud)))) {
412429 if (pud_none_or_clear_bad(pud))
....@@ -441,8 +458,8 @@
441458 if (end - 1 > ceiling - 1)
442459 return;
443460
444
- pud = pud_offset(pgd, start);
445
- pgd_clear(pgd);
461
+ pud = pud_offset(p4d, start);
462
+ p4d_clear(p4d);
446463 pud_free_tlb(tlb, pud, start);
447464 mm_dec_nr_puds(tlb->mm);
448465 }
....@@ -455,6 +472,7 @@
455472 unsigned long floor, unsigned long ceiling)
456473 {
457474 pgd_t *pgd;
475
+ p4d_t *p4d;
458476 unsigned long next;
459477
460478 /*
....@@ -477,10 +495,11 @@
477495 do {
478496 next = pgd_addr_end(addr, end);
479497 pgd = pgd_offset(tlb->mm, addr);
498
+ p4d = p4d_offset(pgd, addr);
480499 if (!is_hugepd(__hugepd(pgd_val(*pgd)))) {
481
- if (pgd_none_or_clear_bad(pgd))
500
+ if (p4d_none_or_clear_bad(p4d))
482501 continue;
483
- hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
502
+ hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling);
484503 } else {
485504 unsigned long more;
486505 /*
....@@ -493,7 +512,7 @@
493512 if (more > next)
494513 next = more;
495514
496
- free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
515
+ free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT,
497516 addr, next, floor, ceiling);
498517 }
499518 } while (addr = next, addr != end);
....@@ -536,30 +555,6 @@
536555 return page;
537556 }
538557
539
-static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
540
- unsigned long sz)
541
-{
542
- unsigned long __boundary = (addr + sz) & ~(sz-1);
543
- return (__boundary - 1 < end - 1) ? __boundary : end;
544
-}
545
-
546
-int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift,
547
- unsigned long end, int write, struct page **pages, int *nr)
548
-{
549
- pte_t *ptep;
550
- unsigned long sz = 1UL << hugepd_shift(hugepd);
551
- unsigned long next;
552
-
553
- ptep = hugepte_offset(hugepd, addr, pdshift);
554
- do {
555
- next = hugepte_addr_end(addr, end, sz);
556
- if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
557
- return 0;
558
- } while (ptep++, addr = next, addr != end);
559
-
560
- return 1;
561
-}
562
-
563558 #ifdef CONFIG_PPC_MM_SLICES
564559 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
565560 unsigned long len, unsigned long pgoff,
....@@ -579,91 +574,48 @@
579574
580575 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
581576 {
582
-#ifdef CONFIG_PPC_MM_SLICES
583577 /* With radix we don't use slice, so derive it from vma*/
584
- if (!radix_enabled()) {
578
+ if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) {
585579 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
586580
587581 return 1UL << mmu_psize_to_shift(psize);
588582 }
589
-#endif
590583 return vma_kernel_pagesize(vma);
591584 }
592585
593
-static inline bool is_power_of_4(unsigned long x)
594
-{
595
- if (is_power_of_2(x))
596
- return (__ilog2(x) % 2) ? false : true;
597
- return false;
598
-}
599
-
600
-static int __init add_huge_page_size(unsigned long long size)
586
+bool __init arch_hugetlb_valid_size(unsigned long size)
601587 {
602588 int shift = __ffs(size);
603589 int mmu_psize;
604590
605591 /* Check that it is a page size supported by the hardware and
606592 * that it fits within pagetable and slice limits. */
607
- if (size <= PAGE_SIZE)
608
- return -EINVAL;
609
-#if defined(CONFIG_PPC_FSL_BOOK3E)
610
- if (!is_power_of_4(size))
611
- return -EINVAL;
612
-#elif !defined(CONFIG_PPC_8xx)
613
- if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT))
614
- return -EINVAL;
615
-#endif
593
+ if (size <= PAGE_SIZE || !is_power_of_2(size))
594
+ return false;
616595
617
- if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
618
- return -EINVAL;
619
-
620
-#ifdef CONFIG_PPC_BOOK3S_64
621
- /*
622
- * We need to make sure that for different page sizes reported by
623
- * firmware we only add hugetlb support for page sizes that can be
624
- * supported by linux page table layout.
625
- * For now we have
626
- * Radix: 2M and 1G
627
- * Hash: 16M and 16G
628
- */
629
- if (radix_enabled()) {
630
- if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
631
- return -EINVAL;
632
- } else {
633
- if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
634
- return -EINVAL;
635
- }
636
-#endif
596
+ mmu_psize = check_and_get_huge_psize(shift);
597
+ if (mmu_psize < 0)
598
+ return false;
637599
638600 BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
639601
640
- /* Return if huge page size has already been setup */
641
- if (size_to_hstate(size))
642
- return 0;
602
+ return true;
603
+}
604
+
605
+static int __init add_huge_page_size(unsigned long long size)
606
+{
607
+ int shift = __ffs(size);
608
+
609
+ if (!arch_hugetlb_valid_size((unsigned long)size))
610
+ return -EINVAL;
643611
644612 hugetlb_add_hstate(shift - PAGE_SHIFT);
645
-
646613 return 0;
647614 }
648615
649
-static int __init hugepage_setup_sz(char *str)
650
-{
651
- unsigned long long size;
652
-
653
- size = memparse(str, &str);
654
-
655
- if (add_huge_page_size(size) != 0) {
656
- hugetlb_bad_size();
657
- pr_err("Invalid huge page size specified(%llu)\n", size);
658
- }
659
-
660
- return 1;
661
-}
662
-__setup("hugepagesz=", hugepage_setup_sz);
663
-
664
-struct kmem_cache *hugepte_cache;
665616 static int __init hugetlbpage_init(void)
666617 {
618
+ bool configured = false;
667619 int psize;
668620
669621 if (hugetlb_disabled) {
....@@ -671,10 +623,10 @@
671623 return 0;
672624 }
673625
674
-#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx)
675
- if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
626
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() &&
627
+ !mmu_has_feature(MMU_FTR_16M_PAGE))
676628 return -ENODEV;
677
-#endif
629
+
678630 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
679631 unsigned shift;
680632 unsigned pdshift;
....@@ -708,44 +660,23 @@
708660 * if we have pdshift and shift value same, we don't
709661 * use pgt cache for hugepd.
710662 */
711
- if (pdshift > shift)
712
- pgtable_cache_add(pdshift - shift, NULL);
713
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
714
- else if (!hugepte_cache) {
715
- /*
716
- * Create a kmem cache for hugeptes. The bottom bits in
717
- * the pte have size information encoded in them, so
718
- * align them to allow this
719
- */
720
- hugepte_cache = kmem_cache_create("hugepte-cache",
721
- sizeof(pte_t),
722
- HUGEPD_SHIFT_MASK + 1,
723
- 0, NULL);
724
- if (hugepte_cache == NULL)
725
- panic("%s: Unable to create kmem cache "
726
- "for hugeptes\n", __func__);
727
-
663
+ if (pdshift > shift) {
664
+ if (!IS_ENABLED(CONFIG_PPC_8xx))
665
+ pgtable_cache_add(pdshift - shift);
666
+ } else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) ||
667
+ IS_ENABLED(CONFIG_PPC_8xx)) {
668
+ pgtable_cache_add(PTE_T_ORDER);
728669 }
729
-#endif
670
+
671
+ configured = true;
730672 }
731673
732
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
733
- /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */
734
- if (mmu_psize_defs[MMU_PAGE_4M].shift)
735
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
736
- else if (mmu_psize_defs[MMU_PAGE_512K].shift)
737
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift;
738
-#else
739
- /* Set default large page size. Currently, we pick 16M or 1M
740
- * depending on what is available
741
- */
742
- if (mmu_psize_defs[MMU_PAGE_16M].shift)
743
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
744
- else if (mmu_psize_defs[MMU_PAGE_1M].shift)
745
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
746
- else if (mmu_psize_defs[MMU_PAGE_2M].shift)
747
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift;
748
-#endif
674
+ if (configured) {
675
+ if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
676
+ hugetlbpage_init_default();
677
+ } else
678
+ pr_info("Failed to initialize. Disabling HugeTLB");
679
+
749680 return 0;
750681 }
751682
....@@ -758,7 +689,7 @@
758689
759690 BUG_ON(!PageCompound(page));
760691
761
- for (i = 0; i < (1UL << compound_order(page)); i++) {
692
+ for (i = 0; i < compound_nr(page); i++) {
762693 if (!PageHighMem(page)) {
763694 __flush_dcache_icache(page_address(page+i));
764695 } else {
....@@ -769,151 +700,20 @@
769700 }
770701 }
771702
772
-#endif /* CONFIG_HUGETLB_PAGE */
773
-
774
-/*
775
- * We have 4 cases for pgds and pmds:
776
- * (1) invalid (all zeroes)
777
- * (2) pointer to next table, as normal; bottom 6 bits == 0
778
- * (3) leaf pte for huge page _PAGE_PTE set
779
- * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
780
- *
781
- * So long as we atomically load page table pointers we are safe against teardown,
782
- * we can follow the address down to the the page and take a ref on it.
783
- * This function need to be called with interrupts disabled. We use this variant
784
- * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
785
- */
786
-pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
787
- bool *is_thp, unsigned *hpage_shift)
703
+void __init gigantic_hugetlb_cma_reserve(void)
788704 {
789
- pgd_t pgd, *pgdp;
790
- pud_t pud, *pudp;
791
- pmd_t pmd, *pmdp;
792
- pte_t *ret_pte;
793
- hugepd_t *hpdp = NULL;
794
- unsigned pdshift = PGDIR_SHIFT;
705
+ unsigned long order = 0;
795706
796
- if (hpage_shift)
797
- *hpage_shift = 0;
798
-
799
- if (is_thp)
800
- *is_thp = false;
801
-
802
- pgdp = pgdir + pgd_index(ea);
803
- pgd = READ_ONCE(*pgdp);
804
- /*
805
- * Always operate on the local stack value. This make sure the
806
- * value don't get updated by a parallel THP split/collapse,
807
- * page fault or a page unmap. The return pte_t * is still not
808
- * stable. So should be checked there for above conditions.
809
- */
810
- if (pgd_none(pgd))
811
- return NULL;
812
- else if (pgd_huge(pgd)) {
813
- ret_pte = (pte_t *) pgdp;
814
- goto out;
815
- } else if (is_hugepd(__hugepd(pgd_val(pgd))))
816
- hpdp = (hugepd_t *)&pgd;
817
- else {
707
+ if (radix_enabled())
708
+ order = PUD_SHIFT - PAGE_SHIFT;
709
+ else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift)
818710 /*
819
- * Even if we end up with an unmap, the pgtable will not
820
- * be freed, because we do an rcu free and here we are
821
- * irq disabled
711
+ * For pseries we do use ibm,expected#pages for reserving 16G pages.
822712 */
823
- pdshift = PUD_SHIFT;
824
- pudp = pud_offset(&pgd, ea);
825
- pud = READ_ONCE(*pudp);
713
+ order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT;
826714
827
- if (pud_none(pud))
828
- return NULL;
829
- else if (pud_huge(pud)) {
830
- ret_pte = (pte_t *) pudp;
831
- goto out;
832
- } else if (is_hugepd(__hugepd(pud_val(pud))))
833
- hpdp = (hugepd_t *)&pud;
834
- else {
835
- pdshift = PMD_SHIFT;
836
- pmdp = pmd_offset(&pud, ea);
837
- pmd = READ_ONCE(*pmdp);
838
- /*
839
- * A hugepage collapse is captured by pmd_none, because
840
- * it mark the pmd none and do a hpte invalidate.
841
- */
842
- if (pmd_none(pmd))
843
- return NULL;
844
-
845
- if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
846
- if (is_thp)
847
- *is_thp = true;
848
- ret_pte = (pte_t *) pmdp;
849
- goto out;
850
- }
851
-
852
- if (pmd_huge(pmd)) {
853
- ret_pte = (pte_t *) pmdp;
854
- goto out;
855
- } else if (is_hugepd(__hugepd(pmd_val(pmd))))
856
- hpdp = (hugepd_t *)&pmd;
857
- else
858
- return pte_offset_kernel(&pmd, ea);
859
- }
715
+ if (order) {
716
+ VM_WARN_ON(order < MAX_ORDER);
717
+ hugetlb_cma_reserve(order);
860718 }
861
- if (!hpdp)
862
- return NULL;
863
-
864
- ret_pte = hugepte_offset(*hpdp, ea, pdshift);
865
- pdshift = hugepd_shift(*hpdp);
866
-out:
867
- if (hpage_shift)
868
- *hpage_shift = pdshift;
869
- return ret_pte;
870
-}
871
-EXPORT_SYMBOL_GPL(__find_linux_pte);
872
-
873
-int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
874
- unsigned long end, int write, struct page **pages, int *nr)
875
-{
876
- unsigned long pte_end;
877
- struct page *head, *page;
878
- pte_t pte;
879
- int refs;
880
-
881
- pte_end = (addr + sz) & ~(sz-1);
882
- if (pte_end < end)
883
- end = pte_end;
884
-
885
- pte = READ_ONCE(*ptep);
886
-
887
- if (!pte_access_permitted(pte, write))
888
- return 0;
889
-
890
- /* hugepages are never "special" */
891
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
892
-
893
- refs = 0;
894
- head = pte_page(pte);
895
-
896
- page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
897
- do {
898
- VM_BUG_ON(compound_head(page) != head);
899
- pages[*nr] = page;
900
- (*nr)++;
901
- page++;
902
- refs++;
903
- } while (addr += PAGE_SIZE, addr != end);
904
-
905
- if (!page_cache_add_speculative(head, refs)) {
906
- *nr -= refs;
907
- return 0;
908
- }
909
-
910
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
911
- /* Could be optimized better */
912
- *nr -= refs;
913
- while (refs--)
914
- put_page(head);
915
- return 0;
916
- }
917
-
918
- return 1;
919719 }