hc
2024-05-10 23fa18eaa71266feff7ba8d83022d9e1cc83c65a
kernel/arch/powerpc/include/asm/book3s/64/pgtable.h
....@@ -2,21 +2,18 @@
22 #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
33 #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
44
5
-#include <asm-generic/5level-fixup.h>
5
+#include <asm-generic/pgtable-nop4d.h>
66
77 #ifndef __ASSEMBLY__
88 #include <linux/mmdebug.h>
99 #include <linux/bug.h>
10
+#include <linux/sizes.h>
1011 #endif
1112
1213 /*
1314 * Common bits between hash and Radix page table
1415 */
1516 #define _PAGE_BIT_SWAP_TYPE 0
16
-
17
-#define _PAGE_NA 0
18
-#define _PAGE_RO 0
19
-#define _PAGE_USER 0
2017
2118 #define _PAGE_EXEC 0x00001 /* execute permission */
2219 #define _PAGE_WRITE 0x00002 /* write access allowed */
....@@ -36,11 +33,13 @@
3633 #define _RPAGE_SW1 0x00800
3734 #define _RPAGE_SW2 0x00400
3835 #define _RPAGE_SW3 0x00200
39
-#define _RPAGE_RSV1 0x1000000000000000UL
40
-#define _RPAGE_RSV2 0x0800000000000000UL
41
-#define _RPAGE_RSV3 0x0400000000000000UL
42
-#define _RPAGE_RSV4 0x0200000000000000UL
43
-#define _RPAGE_RSV5 0x00040UL
36
+#define _RPAGE_RSV1 0x00040UL
37
+
38
+#define _RPAGE_PKEY_BIT4 0x1000000000000000UL
39
+#define _RPAGE_PKEY_BIT3 0x0800000000000000UL
40
+#define _RPAGE_PKEY_BIT2 0x0400000000000000UL
41
+#define _RPAGE_PKEY_BIT1 0x0200000000000000UL
42
+#define _RPAGE_PKEY_BIT0 0x0100000000000000UL
4443
4544 #define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */
4645 #define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */
....@@ -62,13 +61,12 @@
6261 */
6362 #define _RPAGE_RPN0 0x01000
6463 #define _RPAGE_RPN1 0x02000
65
-#define _RPAGE_RPN44 0x0100000000000000UL
6664 #define _RPAGE_RPN43 0x0080000000000000UL
6765 #define _RPAGE_RPN42 0x0040000000000000UL
6866 #define _RPAGE_RPN41 0x0020000000000000UL
6967
7068 /* Max physical address bit as per radix table */
71
-#define _RPAGE_PA_MAX 57
69
+#define _RPAGE_PA_MAX 56
7270
7371 /*
7472 * Max physical address bit we will use for now.
....@@ -94,7 +92,6 @@
9492 #define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */
9593 #define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */
9694 #define _PAGE_DEVMAP _RPAGE_SW1 /* software: ZONE_DEVICE page */
97
-#define __HAVE_ARCH_PTE_DEVMAP
9895
9996 /*
10097 * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
....@@ -123,10 +120,6 @@
123120 #define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \
124121 _PAGE_RW | _PAGE_EXEC)
125122 /*
126
- * No page size encoding in the linux PTE
127
- */
128
-#define _PAGE_PSIZE 0
129
-/*
130123 * _PAGE_CHG_MASK masks of bits that are to be preserved across
131124 * pgprot changes
132125 */
....@@ -134,22 +127,13 @@
134127 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
135128 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
136129
137
-#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
138
- H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
139
-/*
140
- * Mask of bits returned by pte_pgprot()
141
- */
142
-#define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
143
- H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
144
- _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \
145
- _PAGE_SOFT_DIRTY | H_PTE_PKEY)
146130 /*
147131 * We define 2 sets of base prot bits, one for basic pages (ie,
148132 * cacheable kernel and user pages) and one for non cacheable
149133 * pages. We always set _PAGE_COHERENT when SMP is enabled or
150134 * the processor might need it for DMA coherency.
151135 */
152
-#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
136
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
153137 #define _PAGE_BASE (_PAGE_BASE_NC)
154138
155139 /* Permission masks used to generate the __P and __S table,
....@@ -159,8 +143,6 @@
159143 * Write permissions imply read permissions for now (we could make write-only
160144 * pages on BookE but we don't bother for now). Execute permission control is
161145 * possible on platforms that define _PAGE_EXEC
162
- *
163
- * Note due to the way vm flags are laid out, the bits are XWR
164146 */
165147 #define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
166148 #define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW)
....@@ -169,24 +151,6 @@
169151 #define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
170152 #define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ)
171153 #define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
172
-
173
-#define __P000 PAGE_NONE
174
-#define __P001 PAGE_READONLY
175
-#define __P010 PAGE_COPY
176
-#define __P011 PAGE_COPY
177
-#define __P100 PAGE_READONLY_X
178
-#define __P101 PAGE_READONLY_X
179
-#define __P110 PAGE_COPY_X
180
-#define __P111 PAGE_COPY_X
181
-
182
-#define __S000 PAGE_NONE
183
-#define __S001 PAGE_READONLY
184
-#define __S010 PAGE_SHARED
185
-#define __S011 PAGE_SHARED
186
-#define __S100 PAGE_READONLY_X
187
-#define __S101 PAGE_READONLY_X
188
-#define __S110 PAGE_SHARED_X
189
-#define __S111 PAGE_SHARED_X
190154
191155 /* Permission masks used for kernel mappings */
192156 #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
....@@ -287,7 +251,7 @@
287251 /* Bits to mask out from a PUD to get to the PMD page */
288252 #define PUD_MASKED_BITS 0xc0000000000000ffUL
289253 /* Bits to mask out from a PGD to get to the PUD page */
290
-#define PGD_MASKED_BITS 0xc0000000000000ffUL
254
+#define P4D_MASKED_BITS 0xc0000000000000ffUL
291255
292256 /*
293257 * Used as an indicator for rcu callback functions
....@@ -309,19 +273,34 @@
309273 #define VMALLOC_START __vmalloc_start
310274 #define VMALLOC_END __vmalloc_end
311275
276
+static inline unsigned int ioremap_max_order(void)
277
+{
278
+ if (radix_enabled())
279
+ return PUD_SHIFT;
280
+ return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
281
+}
282
+#define IOREMAP_MAX_ORDER ioremap_max_order()
283
+
312284 extern unsigned long __kernel_virt_start;
313
-extern unsigned long __kernel_virt_size;
314285 extern unsigned long __kernel_io_start;
286
+extern unsigned long __kernel_io_end;
315287 #define KERN_VIRT_START __kernel_virt_start
316
-#define KERN_VIRT_SIZE __kernel_virt_size
317288 #define KERN_IO_START __kernel_io_start
289
+#define KERN_IO_END __kernel_io_end
290
+
318291 extern struct page *vmemmap;
319
-extern unsigned long ioremap_bot;
320292 extern unsigned long pci_io_base;
321293 #endif /* __ASSEMBLY__ */
322294
323295 #include <asm/book3s/64/hash.h>
324296 #include <asm/book3s/64/radix.h>
297
+
298
+#if H_MAX_PHYSMEM_BITS > R_MAX_PHYSMEM_BITS
299
+#define MAX_PHYSMEM_BITS H_MAX_PHYSMEM_BITS
300
+#else
301
+#define MAX_PHYSMEM_BITS R_MAX_PHYSMEM_BITS
302
+#endif
303
+
325304
326305 #ifdef CONFIG_PPC_64K_PAGES
327306 #include <asm/book3s/64/pgtable-64k.h>
....@@ -331,8 +310,7 @@
331310
332311 #include <asm/barrier.h>
333312 /*
334
- * The second half of the kernel virtual space is used for IO mappings,
335
- * it's itself carved into the PIO region (ISA and PHB IO space) and
313
+ * IO space itself carved into the PIO region (ISA and PHB IO space) and
336314 * the ioremap space
337315 *
338316 * ISA_IO_BASE = KERN_IO_START, 64K reserved area
....@@ -345,7 +323,9 @@
345323 #define PHB_IO_BASE (ISA_IO_END)
346324 #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
347325 #define IOREMAP_BASE (PHB_IO_END)
348
-#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE)
326
+#define IOREMAP_START (ioremap_bot)
327
+#define IOREMAP_END (KERN_IO_END - FIXADDR_SIZE)
328
+#define FIXADDR_SIZE SZ_32M
349329
350330 /* Advertise special mapping type for AGP */
351331 #define HAVE_PAGE_AGP
....@@ -461,6 +441,7 @@
461441 pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
462442 }
463443
444
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
464445 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
465446 unsigned long addr, pte_t *ptep)
466447 {
....@@ -519,7 +500,11 @@
519500 return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL));
520501 }
521502
522
-static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
503
+static inline bool pte_exec(pte_t pte)
504
+{
505
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_EXEC));
506
+}
507
+
523508
524509 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
525510 static inline bool pte_soft_dirty(pte_t pte)
....@@ -529,12 +514,12 @@
529514
530515 static inline pte_t pte_mksoft_dirty(pte_t pte)
531516 {
532
- return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
517
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SOFT_DIRTY));
533518 }
534519
535520 static inline pte_t pte_clear_soft_dirty(pte_t pte)
536521 {
537
- return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY);
522
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SOFT_DIRTY));
538523 }
539524 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
540525
....@@ -555,7 +540,7 @@
555540 */
556541 VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) !=
557542 cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED));
558
- return __pte(pte_val(pte) & ~_PAGE_PRIVILEGED);
543
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
559544 }
560545
561546 #define pte_clear_savedwrite pte_clear_savedwrite
....@@ -565,16 +550,22 @@
565550 * Used by KSM subsystem to make a protnone pte readonly.
566551 */
567552 VM_BUG_ON(!pte_protnone(pte));
568
- return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
553
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
569554 }
570555 #else
571556 #define pte_clear_savedwrite pte_clear_savedwrite
572557 static inline pte_t pte_clear_savedwrite(pte_t pte)
573558 {
574559 VM_WARN_ON(1);
575
- return __pte(pte_val(pte) & ~_PAGE_WRITE);
560
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
576561 }
577562 #endif /* CONFIG_NUMA_BALANCING */
563
+
564
+static inline bool pte_hw_valid(pte_t pte)
565
+{
566
+ return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE)) ==
567
+ cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
568
+}
578569
579570 static inline int pte_present(pte_t pte)
580571 {
....@@ -584,7 +575,11 @@
584575 * invalid during ptep_set_access_flags. Hence we look for _PAGE_INVALID
585576 * if we find _PAGE_PRESENT cleared.
586577 */
587
- return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID));
578
+
579
+ if (pte_hw_valid(pte))
580
+ return true;
581
+ return (pte_raw(pte) & cpu_to_be64(_PAGE_INVALID | _PAGE_PTE)) ==
582
+ cpu_to_be64(_PAGE_INVALID | _PAGE_PTE);
588583 }
589584
590585 #ifdef CONFIG_PPC_MEM_KEYS
....@@ -596,25 +591,22 @@
596591 }
597592 #endif /* CONFIG_PPC_MEM_KEYS */
598593
594
+static inline bool pte_user(pte_t pte)
595
+{
596
+ return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
597
+}
598
+
599599 #define pte_access_permitted pte_access_permitted
600600 static inline bool pte_access_permitted(pte_t pte, bool write)
601601 {
602
- unsigned long pteval = pte_val(pte);
603
- /* Also check for pte_user */
604
- unsigned long clear_pte_bits = _PAGE_PRIVILEGED;
605602 /*
606603 * _PAGE_READ is needed for any access and will be
607604 * cleared for PROT_NONE
608605 */
609
- unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_READ;
610
-
611
- if (write)
612
- need_pte_bits |= _PAGE_WRITE;
613
-
614
- if ((pteval & need_pte_bits) != need_pte_bits)
606
+ if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
615607 return false;
616608
617
- if ((pteval & clear_pte_bits) == clear_pte_bits)
609
+ if (write && !pte_write(pte))
618610 return false;
619611
620612 return arch_pte_access_permitted(pte_val(pte), write, 0);
....@@ -629,8 +621,10 @@
629621 */
630622 static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
631623 {
632
- return __pte((((pte_basic_t)(pfn) << PAGE_SHIFT) & PTE_RPN_MASK) |
633
- pgprot_val(pgprot));
624
+ VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
625
+ VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
626
+
627
+ return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE);
634628 }
635629
636630 static inline unsigned long pte_pfn(pte_t pte)
....@@ -643,17 +637,27 @@
643637 {
644638 if (unlikely(pte_savedwrite(pte)))
645639 return pte_clear_savedwrite(pte);
646
- return __pte(pte_val(pte) & ~_PAGE_WRITE);
640
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
641
+}
642
+
643
+static inline pte_t pte_exprotect(pte_t pte)
644
+{
645
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_EXEC));
647646 }
648647
649648 static inline pte_t pte_mkclean(pte_t pte)
650649 {
651
- return __pte(pte_val(pte) & ~_PAGE_DIRTY);
650
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_DIRTY));
652651 }
653652
654653 static inline pte_t pte_mkold(pte_t pte)
655654 {
656
- return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
655
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_ACCESSED));
656
+}
657
+
658
+static inline pte_t pte_mkexec(pte_t pte)
659
+{
660
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
657661 }
658662
659663 static inline pte_t pte_mkwrite(pte_t pte)
....@@ -661,22 +665,22 @@
661665 /*
662666 * write implies read, hence set both
663667 */
664
- return __pte(pte_val(pte) | _PAGE_RW);
668
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_RW));
665669 }
666670
667671 static inline pte_t pte_mkdirty(pte_t pte)
668672 {
669
- return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
673
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_DIRTY | _PAGE_SOFT_DIRTY));
670674 }
671675
672676 static inline pte_t pte_mkyoung(pte_t pte)
673677 {
674
- return __pte(pte_val(pte) | _PAGE_ACCESSED);
678
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_ACCESSED));
675679 }
676680
677681 static inline pte_t pte_mkspecial(pte_t pte)
678682 {
679
- return __pte(pte_val(pte) | _PAGE_SPECIAL);
683
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL));
680684 }
681685
682686 static inline pte_t pte_mkhuge(pte_t pte)
....@@ -686,7 +690,17 @@
686690
687691 static inline pte_t pte_mkdevmap(pte_t pte)
688692 {
689
- return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
693
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL | _PAGE_DEVMAP));
694
+}
695
+
696
+static inline pte_t pte_mkprivileged(pte_t pte)
697
+{
698
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
699
+}
700
+
701
+static inline pte_t pte_mkuser(pte_t pte)
702
+{
703
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
690704 }
691705
692706 /*
....@@ -705,12 +719,8 @@
705719 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
706720 {
707721 /* FIXME!! check whether this need to be a conditional */
708
- return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
709
-}
710
-
711
-static inline bool pte_user(pte_t pte)
712
-{
713
- return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
722
+ return __pte_raw((pte_raw(pte) & cpu_to_be64(_PAGE_CHG_MASK)) |
723
+ cpu_to_be64(pgprot_val(newprot)));
714724 }
715725
716726 /* Encode and de-code a swap entry */
....@@ -723,9 +733,7 @@
723733 BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
724734 BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \
725735 } while (0)
726
-/*
727
- * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
728
- */
736
+
729737 #define SWP_TYPE_BITS 5
730738 #define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \
731739 & ((1UL << SWP_TYPE_BITS) - 1))
....@@ -741,6 +749,8 @@
741749 */
742750 #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
743751 #define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE)
752
+#define __pmd_to_swp_entry(pmd) (__pte_to_swp_entry(pmd_pte(pmd)))
753
+#define __swp_entry_to_pmd(x) (pte_pmd(__swp_entry_to_pte(x)))
744754
745755 #ifdef CONFIG_MEM_SOFT_DIRTY
746756 #define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
....@@ -751,7 +761,7 @@
751761 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
752762 static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
753763 {
754
- return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
764
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
755765 }
756766
757767 static inline bool pte_swp_soft_dirty(pte_t pte)
....@@ -761,7 +771,7 @@
761771
762772 static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
763773 {
764
- return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
774
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_SOFT_DIRTY));
765775 }
766776 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
767777
....@@ -813,12 +823,20 @@
813823 static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
814824 pte_t *ptep, pte_t pte, int percpu)
815825 {
826
+
827
+ VM_WARN_ON(!(pte_raw(pte) & cpu_to_be64(_PAGE_PTE)));
828
+ /*
829
+ * Keep the _PAGE_PTE added till we are sure we handle _PAGE_PTE
830
+ * in all the callers.
831
+ */
832
+ pte = __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
833
+
816834 if (radix_enabled())
817835 return radix__set_pte_at(mm, addr, ptep, pte, percpu);
818836 return hash__set_pte_at(mm, addr, ptep, pte, percpu);
819837 }
820838
821
-#define _PAGE_CACHE_CTL (_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
839
+#define _PAGE_CACHE_CTL (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
822840
823841 #define pgprot_noncached pgprot_noncached
824842 static inline pgprot_t pgprot_noncached(pgprot_t prot)
....@@ -850,21 +868,23 @@
850868 */
851869 static inline bool pte_ci(pte_t pte)
852870 {
853
- unsigned long pte_v = pte_val(pte);
871
+ __be64 pte_v = pte_raw(pte);
854872
855
- if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) ||
856
- ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT))
873
+ if (((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_TOLERANT)) ||
874
+ ((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_NON_IDEMPOTENT)))
857875 return true;
858876 return false;
859877 }
860878
861
-static inline void pmd_set(pmd_t *pmdp, unsigned long val)
862
-{
863
- *pmdp = __pmd(val);
864
-}
865
-
866879 static inline void pmd_clear(pmd_t *pmdp)
867880 {
881
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
882
+ /*
883
+ * Don't use this if we can possibly have a hash page table
884
+ * entry mapping this.
885
+ */
886
+ WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
887
+ }
868888 *pmdp = __pmd(0);
869889 }
870890
....@@ -875,8 +895,33 @@
875895
876896 static inline int pmd_present(pmd_t pmd)
877897 {
898
+ /*
899
+ * A pmd is considerent present if _PAGE_PRESENT is set.
900
+ * We also need to consider the pmd present which is marked
901
+ * invalid during a split. Hence we look for _PAGE_INVALID
902
+ * if we find _PAGE_PRESENT cleared.
903
+ */
904
+ if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
905
+ return true;
878906
879
- return !pmd_none(pmd);
907
+ return false;
908
+}
909
+
910
+static inline int pmd_is_serializing(pmd_t pmd)
911
+{
912
+ /*
913
+ * If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear
914
+ * and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate).
915
+ *
916
+ * This condition may also occur when flushing a pmd while flushing
917
+ * it (see ptep_modify_prot_start), so callers must ensure this
918
+ * case is fine as well.
919
+ */
920
+ if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) ==
921
+ cpu_to_be64(_PAGE_INVALID))
922
+ return true;
923
+
924
+ return false;
880925 }
881926
882927 static inline int pmd_bad(pmd_t pmd)
....@@ -886,13 +931,15 @@
886931 return hash__pmd_bad(pmd);
887932 }
888933
889
-static inline void pud_set(pud_t *pudp, unsigned long val)
890
-{
891
- *pudp = __pud(val);
892
-}
893
-
894934 static inline void pud_clear(pud_t *pudp)
895935 {
936
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
937
+ /*
938
+ * Don't use this if we can possibly have a hash page table
939
+ * entry mapping this.
940
+ */
941
+ WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
942
+ }
896943 *pudp = __pud(0);
897944 }
898945
....@@ -903,7 +950,7 @@
903950
904951 static inline int pud_present(pud_t pud)
905952 {
906
- return !pud_none(pud);
953
+ return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
907954 }
908955
909956 extern struct page *pud_page(pud_t pud);
....@@ -932,99 +979,66 @@
932979 return pte_access_permitted(pud_pte(pud), write);
933980 }
934981
935
-#define pgd_write(pgd) pte_write(pgd_pte(pgd))
936
-static inline void pgd_set(pgd_t *pgdp, unsigned long val)
982
+#define __p4d_raw(x) ((p4d_t) { __pgd_raw(x) })
983
+static inline __be64 p4d_raw(p4d_t x)
937984 {
938
- *pgdp = __pgd(val);
985
+ return pgd_raw(x.pgd);
939986 }
940987
941
-static inline void pgd_clear(pgd_t *pgdp)
988
+#define p4d_write(p4d) pte_write(p4d_pte(p4d))
989
+
990
+static inline void p4d_clear(p4d_t *p4dp)
942991 {
943
- *pgdp = __pgd(0);
992
+ *p4dp = __p4d(0);
944993 }
945994
946
-static inline int pgd_none(pgd_t pgd)
995
+static inline int p4d_none(p4d_t p4d)
947996 {
948
- return !pgd_raw(pgd);
997
+ return !p4d_raw(p4d);
949998 }
950999
951
-static inline int pgd_present(pgd_t pgd)
1000
+static inline int p4d_present(p4d_t p4d)
9521001 {
953
- return !pgd_none(pgd);
1002
+ return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PRESENT));
9541003 }
9551004
956
-static inline pte_t pgd_pte(pgd_t pgd)
1005
+static inline pte_t p4d_pte(p4d_t p4d)
9571006 {
958
- return __pte_raw(pgd_raw(pgd));
1007
+ return __pte_raw(p4d_raw(p4d));
9591008 }
9601009
961
-static inline pgd_t pte_pgd(pte_t pte)
1010
+static inline p4d_t pte_p4d(pte_t pte)
9621011 {
963
- return __pgd_raw(pte_raw(pte));
1012
+ return __p4d_raw(pte_raw(pte));
9641013 }
9651014
966
-static inline int pgd_bad(pgd_t pgd)
1015
+static inline int p4d_bad(p4d_t p4d)
9671016 {
9681017 if (radix_enabled())
969
- return radix__pgd_bad(pgd);
970
- return hash__pgd_bad(pgd);
1018
+ return radix__p4d_bad(p4d);
1019
+ return hash__p4d_bad(p4d);
9711020 }
9721021
973
-#define pgd_access_permitted pgd_access_permitted
974
-static inline bool pgd_access_permitted(pgd_t pgd, bool write)
1022
+#define p4d_access_permitted p4d_access_permitted
1023
+static inline bool p4d_access_permitted(p4d_t p4d, bool write)
9751024 {
976
- return pte_access_permitted(pgd_pte(pgd), write);
1025
+ return pte_access_permitted(p4d_pte(p4d), write);
9771026 }
9781027
979
-extern struct page *pgd_page(pgd_t pgd);
1028
+extern struct page *p4d_page(p4d_t p4d);
9801029
9811030 /* Pointers in the page table tree are physical addresses */
9821031 #define __pgtable_ptr_val(ptr) __pa(ptr)
9831032
984
-#define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS)
985
-#define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS)
986
-#define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS)
987
-
988
-static inline unsigned long pgd_index(unsigned long address)
1033
+static inline pud_t *p4d_pgtable(p4d_t p4d)
9891034 {
990
- return (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1);
1035
+ return (pud_t *)__va(p4d_val(p4d) & ~P4D_MASKED_BITS);
9911036 }
9921037
993
-static inline unsigned long pud_index(unsigned long address)
1038
+static inline pmd_t *pud_pgtable(pud_t pud)
9941039 {
995
- return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
1040
+ return (pmd_t *)__va(pud_val(pud) & ~PUD_MASKED_BITS);
9961041 }
997
-
998
-static inline unsigned long pmd_index(unsigned long address)
999
-{
1000
- return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
1001
-}
1002
-
1003
-static inline unsigned long pte_index(unsigned long address)
1004
-{
1005
- return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
1006
-}
1007
-
1008
-/*
1009
- * Find an entry in a page-table-directory. We combine the address region
1010
- * (the high order N bits) and the pgd portion of the address.
1011
- */
1012
-
1013
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
1014
-
1015
-#define pud_offset(pgdp, addr) \
1016
- (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr))
1017
-#define pmd_offset(pudp,addr) \
1018
- (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr))
1019
-#define pte_offset_kernel(dir,addr) \
1020
- (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
1021
-
1022
-#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
1023
-#define pte_unmap(pte) do { } while(0)
1024
-
1025
-/* to find an entry in a kernel page-table-directory */
1026
-/* This now only contains the vmalloc pages */
1027
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
10281042
10291043 #define pte_ERROR(e) \
10301044 pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
....@@ -1035,18 +1049,19 @@
10351049 #define pgd_ERROR(e) \
10361050 pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
10371051
1038
-static inline int map_kernel_page(unsigned long ea, unsigned long pa,
1039
- unsigned long flags)
1052
+static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
10401053 {
10411054 if (radix_enabled()) {
10421055 #if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM)
10431056 unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift;
10441057 WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE");
10451058 #endif
1046
- return radix__map_kernel_page(ea, pa, __pgprot(flags), PAGE_SIZE);
1059
+ return radix__map_kernel_page(ea, pa, prot, PAGE_SIZE);
10471060 }
1048
- return hash__map_kernel_page(ea, pa, flags);
1061
+ return hash__map_kernel_page(ea, pa, prot);
10491062 }
1063
+
1064
+void unmap_kernel_page(unsigned long va);
10501065
10511066 static inline int __meminit vmemmap_create_mapping(unsigned long start,
10521067 unsigned long page_size,
....@@ -1097,6 +1112,12 @@
10971112 #define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd))
10981113 #define pmd_mksoft_dirty(pmd) pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
10991114 #define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
1115
+
1116
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1117
+#define pmd_swp_mksoft_dirty(pmd) pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
1118
+#define pmd_swp_soft_dirty(pmd) pte_swp_soft_dirty(pmd_pte(pmd))
1119
+#define pmd_swp_clear_soft_dirty(pmd) pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
1120
+#endif
11001121 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
11011122
11021123 #ifdef CONFIG_NUMA_BALANCING
....@@ -1113,6 +1134,19 @@
11131134 #define pmd_access_permitted pmd_access_permitted
11141135 static inline bool pmd_access_permitted(pmd_t pmd, bool write)
11151136 {
1137
+ /*
1138
+ * pmdp_invalidate sets this combination (which is not caught by
1139
+ * !pte_present() check in pte_access_permitted), to prevent
1140
+ * lock-free lookups, as part of the serialize_against_pte_lookup()
1141
+ * synchronisation.
1142
+ *
1143
+ * This also catches the case where the PTE's hardware PRESENT bit is
1144
+ * cleared while TLB is flushed, which is suboptimal but should not
1145
+ * be frequent.
1146
+ */
1147
+ if (pmd_is_serializing(pmd))
1148
+ return false;
1149
+
11161150 return pte_access_permitted(pmd_pte(pmd), write);
11171151 }
11181152
....@@ -1122,8 +1156,11 @@
11221156 extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
11231157 extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
11241158 pmd_t *pmdp, pmd_t pmd);
1125
-extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
1126
- pmd_t *pmd);
1159
+static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
1160
+ unsigned long addr, pmd_t *pmd)
1161
+{
1162
+}
1163
+
11271164 extern int hash__has_transparent_hugepage(void);
11281165 static inline int has_transparent_hugepage(void)
11291166 {
....@@ -1142,15 +1179,15 @@
11421179 return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
11431180 }
11441181
1182
+/*
1183
+ * returns true for pmd migration entries, THP, devmap, hugetlb
1184
+ * But compile time dependent on THP config
1185
+ */
11451186 static inline int pmd_large(pmd_t pmd)
11461187 {
11471188 return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
11481189 }
11491190
1150
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
1151
-{
1152
- return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
1153
-}
11541191 /*
11551192 * For radix we should always find H_PAGE_HASHPTE zero. Hence
11561193 * the below will work for radix too
....@@ -1176,8 +1213,22 @@
11761213 pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
11771214 }
11781215
1216
+/*
1217
+ * Only returns true for a THP. False for pmd migration entry.
1218
+ * We also need to return true when we come across a pte that
1219
+ * in between a thp split. While splitting THP, we mark the pmd
1220
+ * invalid (pmdp_invalidate()) before we set it with pte page
1221
+ * address. A pmd_trans_huge() check against a pmd entry during that time
1222
+ * should return true.
1223
+ * We should not call this on a hugetlb entry. We should check for HugeTLB
1224
+ * entry using vma->vm_flags
1225
+ * The page table walk rule is explained in Documentation/vm/transhuge.rst
1226
+ */
11791227 static inline int pmd_trans_huge(pmd_t pmd)
11801228 {
1229
+ if (!pmd_present(pmd))
1230
+ return false;
1231
+
11811232 if (radix_enabled())
11821233 return radix__pmd_trans_huge(pmd);
11831234 return hash__pmd_trans_huge(pmd);
....@@ -1224,6 +1275,11 @@
12241275 return hash__pmdp_collapse_flush(vma, address, pmdp);
12251276 }
12261277 #define pmdp_collapse_flush pmdp_collapse_flush
1278
+
1279
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
1280
+pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
1281
+ unsigned long addr,
1282
+ pmd_t *pmdp, int full);
12271283
12281284 #define __HAVE_ARCH_PGTABLE_DEPOSIT
12291285 static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
....@@ -1289,7 +1345,7 @@
12891345 }
12901346 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
12911347
1292
-static inline const int pud_pfn(pud_t pud)
1348
+static inline int pud_pfn(pud_t pud)
12931349 {
12941350 /*
12951351 * Currently all calls to pud_pfn() are gated around a pud_devmap()
....@@ -1299,6 +1355,48 @@
12991355 BUILD_BUG();
13001356 return 0;
13011357 }
1358
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1359
+pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
1360
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
1361
+ pte_t *, pte_t, pte_t);
1362
+
1363
+/*
1364
+ * Returns true for a R -> RW upgrade of pte
1365
+ */
1366
+static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_val)
1367
+{
1368
+ if (!(old_val & _PAGE_READ))
1369
+ return false;
1370
+
1371
+ if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE))
1372
+ return true;
1373
+
1374
+ return false;
1375
+}
1376
+
1377
+/*
1378
+ * Like pmd_huge() and pmd_large(), but works regardless of config options
1379
+ */
1380
+#define pmd_is_leaf pmd_is_leaf
1381
+#define pmd_leaf pmd_is_leaf
1382
+static inline bool pmd_is_leaf(pmd_t pmd)
1383
+{
1384
+ return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
1385
+}
1386
+
1387
+#define pud_is_leaf pud_is_leaf
1388
+#define pud_leaf pud_is_leaf
1389
+static inline bool pud_is_leaf(pud_t pud)
1390
+{
1391
+ return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
1392
+}
1393
+
1394
+#define p4d_is_leaf p4d_is_leaf
1395
+#define p4d_leaf p4d_is_leaf
1396
+static inline bool p4d_is_leaf(p4d_t p4d)
1397
+{
1398
+ return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PTE));
1399
+}
13021400
13031401 #endif /* __ASSEMBLY__ */
13041402 #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */