hc
2024-01-03 2f7c68cb55ecb7331f2381deb497c27155f32faf
kernel/arch/x86/include/asm/pgtable.h
....@@ -23,12 +23,16 @@
2323
2424 #ifndef __ASSEMBLY__
2525 #include <asm/x86_init.h>
26
+#include <asm/fpu/xstate.h>
27
+#include <asm/fpu/api.h>
28
+#include <asm-generic/pgtable_uffd.h>
2629
2730 extern pgd_t early_top_pgt[PTRS_PER_PGD];
28
-int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
31
+bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
2932
30
-void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
31
-void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
33
+void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
34
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
35
+ bool user);
3236 void ptdump_walk_pgd_level_checkwx(void);
3337 void ptdump_walk_user_pgd_level_checkwx(void);
3438
....@@ -46,7 +50,7 @@
4650 */
4751 extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
4852 __visible;
49
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
53
+#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
5054
5155 extern spinlock_t pgd_lock;
5256 extern struct list_head pgd_list;
....@@ -55,11 +59,10 @@
5559
5660 extern pmdval_t early_pmd_flags;
5761
58
-#ifdef CONFIG_PARAVIRT
62
+#ifdef CONFIG_PARAVIRT_XXL
5963 #include <asm/paravirt.h>
60
-#else /* !CONFIG_PARAVIRT */
64
+#else /* !CONFIG_PARAVIRT_XXL */
6165 #define set_pte(ptep, pte) native_set_pte(ptep, pte)
62
-#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
6366
6467 #define set_pte_atomic(ptep, pte) \
6568 native_set_pte_atomic(ptep, pte)
....@@ -112,8 +115,7 @@
112115 #define __pte(x) native_make_pte(x)
113116
114117 #define arch_end_context_switch(prev) do {} while(0)
115
-
116
-#endif /* CONFIG_PARAVIRT */
118
+#endif /* CONFIG_PARAVIRT_XXL */
117119
118120 /*
119121 * The following only work if pte_present() is true.
....@@ -128,14 +130,29 @@
128130 static inline u32 read_pkru(void)
129131 {
130132 if (boot_cpu_has(X86_FEATURE_OSPKE))
131
- return __read_pkru();
133
+ return rdpkru();
132134 return 0;
133135 }
134136
135137 static inline void write_pkru(u32 pkru)
136138 {
137
- if (boot_cpu_has(X86_FEATURE_OSPKE))
138
- __write_pkru(pkru);
139
+ struct pkru_state *pk;
140
+
141
+ if (!boot_cpu_has(X86_FEATURE_OSPKE))
142
+ return;
143
+
144
+ pk = get_xsave_addr(&current->thread.fpu.state.xsave, XFEATURE_PKRU);
145
+
146
+ /*
147
+ * The PKRU value in xstate needs to be in sync with the value that is
148
+ * written to the CPU. The FPU restore on return to userland would
149
+ * otherwise load the previous value again.
150
+ */
151
+ fpregs_lock();
152
+ if (pk)
153
+ pk->pkru = pkru;
154
+ __write_pkru(pkru);
155
+ fpregs_unlock();
139156 }
140157
141158 static inline int pte_young(pte_t pte)
....@@ -223,6 +240,7 @@
223240 return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
224241 }
225242
243
+#define p4d_leaf p4d_large
226244 static inline int p4d_large(p4d_t p4d)
227245 {
228246 /* No 512 GiB pages yet */
....@@ -231,6 +249,7 @@
231249
232250 #define pte_page(pte) pfn_to_page(pte_pfn(pte))
233251
252
+#define pmd_leaf pmd_large
234253 static inline int pmd_large(pmd_t pte)
235254 {
236255 return pmd_flags(pte) & _PAGE_PSE;
....@@ -256,7 +275,7 @@
256275 return boot_cpu_has(X86_FEATURE_PSE);
257276 }
258277
259
-#ifdef __HAVE_ARCH_PTE_DEVMAP
278
+#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
260279 static inline int pmd_devmap(pmd_t pmd)
261280 {
262281 return !!(pmd_val(pmd) & _PAGE_DEVMAP);
....@@ -294,6 +313,23 @@
294313
295314 return native_make_pte(v & ~clear);
296315 }
316
+
317
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
318
+static inline int pte_uffd_wp(pte_t pte)
319
+{
320
+ return pte_flags(pte) & _PAGE_UFFD_WP;
321
+}
322
+
323
+static inline pte_t pte_mkuffd_wp(pte_t pte)
324
+{
325
+ return pte_set_flags(pte, _PAGE_UFFD_WP);
326
+}
327
+
328
+static inline pte_t pte_clear_uffd_wp(pte_t pte)
329
+{
330
+ return pte_clear_flags(pte, _PAGE_UFFD_WP);
331
+}
332
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
297333
298334 static inline pte_t pte_mkclean(pte_t pte)
299335 {
....@@ -373,6 +409,23 @@
373409
374410 return native_make_pmd(v & ~clear);
375411 }
412
+
413
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
414
+static inline int pmd_uffd_wp(pmd_t pmd)
415
+{
416
+ return pmd_flags(pmd) & _PAGE_UFFD_WP;
417
+}
418
+
419
+static inline pmd_t pmd_mkuffd_wp(pmd_t pmd)
420
+{
421
+ return pmd_set_flags(pmd, _PAGE_UFFD_WP);
422
+}
423
+
424
+static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd)
425
+{
426
+ return pmd_clear_flags(pmd, _PAGE_UFFD_WP);
427
+}
428
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
376429
377430 static inline pmd_t pmd_mkold(pmd_t pmd)
378431 {
....@@ -571,16 +624,10 @@
571624 return __pud(pfn | check_pgprot(pgprot));
572625 }
573626
574
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
627
+static inline pmd_t pmd_mkinvalid(pmd_t pmd)
575628 {
576629 return pfn_pmd(pmd_pfn(pmd),
577630 __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
578
-}
579
-
580
-static inline pud_t pud_mknotpresent(pud_t pud)
581
-{
582
- return pfn_pud(pud_pfn(pud),
583
- __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
584631 }
585632
586633 static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
....@@ -720,7 +767,7 @@
720767 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
721768 }
722769
723
-#ifdef __HAVE_ARCH_PTE_DEVMAP
770
+#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
724771 static inline int pte_devmap(pte_t a)
725772 {
726773 return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP;
....@@ -754,7 +801,7 @@
754801 #ifdef CONFIG_NUMA_BALANCING
755802 /*
756803 * These work without NUMA balancing but the kernel does not care. See the
757
- * comment in include/asm-generic/pgtable.h
804
+ * comment in include/linux/pgtable.h
758805 */
759806 static inline int pte_protnone(pte_t pte)
760807 {
....@@ -789,17 +836,6 @@
789836 #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
790837
791838 /*
792
- * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
793
- *
794
- * this macro returns the index of the entry in the pmd page which would
795
- * control the given virtual address
796
- */
797
-static inline unsigned long pmd_index(unsigned long address)
798
-{
799
- return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
800
-}
801
-
802
-/*
803839 * Conversion functions: convert a page and protection to a page entry,
804840 * and a page entry and page directory to the page they refer to.
805841 *
....@@ -807,22 +843,6 @@
807843 * to linux/mm.h:page_to_nid())
808844 */
809845 #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
810
-
811
-/*
812
- * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
813
- *
814
- * this function returns the index of the entry in the pte page which would
815
- * control the given virtual address
816
- */
817
-static inline unsigned long pte_index(unsigned long address)
818
-{
819
- return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
820
-}
821
-
822
-static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
823
-{
824
- return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
825
-}
826846
827847 static inline int pmd_bad(pmd_t pmd)
828848 {
....@@ -845,9 +865,9 @@
845865 return pud_flags(pud) & _PAGE_PRESENT;
846866 }
847867
848
-static inline unsigned long pud_page_vaddr(pud_t pud)
868
+static inline pmd_t *pud_pgtable(pud_t pud)
849869 {
850
- return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud));
870
+ return (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud));
851871 }
852872
853873 /*
....@@ -856,12 +876,7 @@
856876 */
857877 #define pud_page(pud) pfn_to_page(pud_pfn(pud))
858878
859
-/* Find an entry in the second-level page table.. */
860
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
861
-{
862
- return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
863
-}
864
-
879
+#define pud_leaf pud_large
865880 static inline int pud_large(pud_t pud)
866881 {
867882 return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
....@@ -873,16 +888,12 @@
873888 return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
874889 }
875890 #else
891
+#define pud_leaf pud_large
876892 static inline int pud_large(pud_t pud)
877893 {
878894 return 0;
879895 }
880896 #endif /* CONFIG_PGTABLE_LEVELS > 2 */
881
-
882
-static inline unsigned long pud_index(unsigned long address)
883
-{
884
- return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
885
-}
886897
887898 #if CONFIG_PGTABLE_LEVELS > 3
888899 static inline int p4d_none(p4d_t p4d)
....@@ -895,9 +906,9 @@
895906 return p4d_flags(p4d) & _PAGE_PRESENT;
896907 }
897908
898
-static inline unsigned long p4d_page_vaddr(p4d_t p4d)
909
+static inline pud_t *p4d_pgtable(p4d_t p4d)
899910 {
900
- return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
911
+ return (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
901912 }
902913
903914 /*
....@@ -905,12 +916,6 @@
905916 * linux/mmzone.h's __section_mem_map_addr() definition:
906917 */
907918 #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d))
908
-
909
-/* Find an entry in the third-level page table.. */
910
-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
911
-{
912
- return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
913
-}
914919
915920 static inline int p4d_bad(p4d_t p4d)
916921 {
....@@ -984,30 +989,6 @@
984989
985990 #endif /* __ASSEMBLY__ */
986991
987
-/*
988
- * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
989
- *
990
- * this macro returns the index of the entry in the pgd page which would
991
- * control the given virtual address
992
- */
993
-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
994
-
995
-/*
996
- * pgd_offset() returns a (pgd_t *)
997
- * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
998
- */
999
-#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
1000
-/*
1001
- * a shortcut to get a pgd_t in a given mm
1002
- */
1003
-#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
1004
-/*
1005
- * a shortcut which implies the use of the kernel's pgd, instead
1006
- * of a process's
1007
- */
1008
-#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
1009
-
1010
-
1011992 #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
1012993 #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
1013994
....@@ -1017,22 +998,12 @@
1017998 void init_mem_mapping(void);
1018999 void early_alloc_pgt_buf(void);
10191000 extern void memblock_find_dma_reserve(void);
1001
+void __init poking_init(void);
1002
+unsigned long init_memory_mapping(unsigned long start,
1003
+ unsigned long end, pgprot_t prot);
10201004
10211005 #ifdef CONFIG_X86_64
1022
-/* Realmode trampoline initialization. */
10231006 extern pgd_t trampoline_pgd_entry;
1024
-static inline void __meminit init_trampoline_default(void)
1025
-{
1026
- /* Default trampoline pgd value */
1027
- trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
1028
-}
1029
-# ifdef CONFIG_RANDOMIZE_MEMORY
1030
-void __meminit init_trampoline(void);
1031
-# else
1032
-# define init_trampoline init_trampoline_default
1033
-# endif
1034
-#else
1035
-static inline void init_trampoline(void) { }
10361007 #endif
10371008
10381009 /* local pte updates need not use xchg for locking */
....@@ -1061,16 +1032,16 @@
10611032 return res;
10621033 }
10631034
1064
-static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
1065
- pte_t *ptep , pte_t pte)
1035
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
1036
+ pte_t *ptep, pte_t pte)
10661037 {
1067
- native_set_pte(ptep, pte);
1038
+ set_pte(ptep, pte);
10681039 }
10691040
10701041 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
10711042 pmd_t *pmdp, pmd_t pmd)
10721043 {
1073
- native_set_pmd(pmdp, pmd);
1044
+ set_pmd(pmdp, pmd);
10741045 }
10751046
10761047 static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
....@@ -1218,6 +1189,7 @@
12181189 return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
12191190 }
12201191
1192
+#define pgd_leaf pgd_large
12211193 static inline int pgd_large(pgd_t pgd) { return 0; }
12221194
12231195 #ifdef CONFIG_PAGE_TABLE_ISOLATION
....@@ -1356,9 +1328,47 @@
13561328 #endif
13571329 #endif
13581330
1331
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
1332
+static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
1333
+{
1334
+ return pte_set_flags(pte, _PAGE_SWP_UFFD_WP);
1335
+}
1336
+
1337
+static inline int pte_swp_uffd_wp(pte_t pte)
1338
+{
1339
+ return pte_flags(pte) & _PAGE_SWP_UFFD_WP;
1340
+}
1341
+
1342
+static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
1343
+{
1344
+ return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP);
1345
+}
1346
+
1347
+static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd)
1348
+{
1349
+ return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP);
1350
+}
1351
+
1352
+static inline int pmd_swp_uffd_wp(pmd_t pmd)
1353
+{
1354
+ return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP;
1355
+}
1356
+
1357
+static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
1358
+{
1359
+ return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP);
1360
+}
1361
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
1362
+
13591363 #define PKRU_AD_BIT 0x1u
13601364 #define PKRU_WD_BIT 0x2u
13611365 #define PKRU_BITS_PER_PKEY 2
1366
+
1367
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
1368
+extern u32 init_pkru_value;
1369
+#else
1370
+#define init_pkru_value 0
1371
+#endif
13621372
13631373 static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
13641374 {
....@@ -1442,7 +1452,12 @@
14421452 return boot_cpu_has_bug(X86_BUG_L1TF);
14431453 }
14441454
1445
-#include <asm-generic/pgtable.h>
1455
+#define arch_faults_on_old_pte arch_faults_on_old_pte
1456
+static inline bool arch_faults_on_old_pte(void)
1457
+{
1458
+ return false;
1459
+}
1460
+
14461461 #endif /* __ASSEMBLY__ */
14471462
14481463 #endif /* _ASM_X86_PGTABLE_H */