forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-11 04dd17822334871b23ea2862f7798fb0e0007777
kernel/arch/powerpc/mm/pgtable.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * This file contains common routines for dealing with free of page tables
34 * Along with common page table handling code
....@@ -14,11 +15,6 @@
1415 *
1516 * Dave Engebretsen <engebret@us.ibm.com>
1617 * Rework for PPC64 port.
17
- *
18
- * This program is free software; you can redistribute it and/or
19
- * modify it under the terms of the GNU General Public License
20
- * as published by the Free Software Foundation; either version
21
- * 2 of the License, or (at your option) any later version.
2218 */
2319
2420 #include <linux/kernel.h>
....@@ -27,9 +23,9 @@
2723 #include <linux/percpu.h>
2824 #include <linux/hardirq.h>
2925 #include <linux/hugetlb.h>
30
-#include <asm/pgalloc.h>
3126 #include <asm/tlbflush.h>
3227 #include <asm/tlb.h>
28
+#include <asm/hugetlb.h>
3329
3430 static inline int is_exec_fault(void)
3531 {
....@@ -44,20 +40,13 @@
4440 static inline int pte_looks_normal(pte_t pte)
4541 {
4642
47
-#if defined(CONFIG_PPC_BOOK3S_64)
48
- if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) {
43
+ if (pte_present(pte) && !pte_special(pte)) {
4944 if (pte_ci(pte))
5045 return 0;
5146 if (pte_user(pte))
5247 return 1;
5348 }
5449 return 0;
55
-#else
56
- return (pte_val(pte) &
57
- (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER |
58
- _PAGE_PRIVILEGED)) ==
59
- (_PAGE_PRESENT | _PAGE_USER);
60
-#endif
6150 }
6251
6352 static struct page *maybe_pte_to_page(pte_t pte)
....@@ -73,7 +62,7 @@
7362 return page;
7463 }
7564
76
-#if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0
65
+#ifdef CONFIG_PPC_BOOK3S
7766
7867 /* Server-style MMU handles coherency when hashing if HW exec permission
7968 * is supposed per page (currently 64-bit only). If not, then, we always
....@@ -81,7 +70,7 @@
8170 * support falls into the same category.
8271 */
8372
84
-static pte_t set_pte_filter(pte_t pte)
73
+static pte_t set_pte_filter_hash(pte_t pte)
8574 {
8675 if (radix_enabled())
8776 return pte;
....@@ -100,24 +89,25 @@
10089 return pte;
10190 }
10291
103
-static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
104
- int dirty)
105
-{
106
- return pte;
107
-}
92
+#else /* CONFIG_PPC_BOOK3S */
10893
109
-#else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */
94
+static pte_t set_pte_filter_hash(pte_t pte) { return pte; }
95
+
96
+#endif /* CONFIG_PPC_BOOK3S */
11097
11198 /* Embedded type MMU with HW exec support. This is a bit more complicated
11299 * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
113100 * instead we "filter out" the exec permission for non clean pages.
114101 */
115
-static pte_t set_pte_filter(pte_t pte)
102
+static inline pte_t set_pte_filter(pte_t pte)
116103 {
117104 struct page *pg;
118105
106
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
107
+ return set_pte_filter_hash(pte);
108
+
119109 /* No exec permission in the first place, move on */
120
- if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte))
110
+ if (!pte_exec(pte) || !pte_looks_normal(pte))
121111 return pte;
122112
123113 /* If you set _PAGE_EXEC on weird pages you're on your own */
....@@ -137,7 +127,7 @@
137127 }
138128
139129 /* Else, we filter out _PAGE_EXEC */
140
- return __pte(pte_val(pte) & ~_PAGE_EXEC);
130
+ return pte_exprotect(pte);
141131 }
142132
143133 static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
....@@ -145,12 +135,15 @@
145135 {
146136 struct page *pg;
147137
138
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
139
+ return pte;
140
+
148141 /* So here, we only care about exec faults, as we use them
149142 * to recover lost _PAGE_EXEC and perform I$/D$ coherency
150143 * if necessary. Also if _PAGE_EXEC is already set, same deal,
151144 * we just bail out
152145 */
153
- if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault())
146
+ if (dirty || pte_exec(pte) || !is_exec_fault())
154147 return pte;
155148
156149 #ifdef CONFIG_DEBUG_VM
....@@ -176,10 +169,8 @@
176169 set_bit(PG_arch_1, &pg->flags);
177170
178171 bail:
179
- return __pte(pte_val(pte) | _PAGE_EXEC);
172
+ return pte_mkexec(pte);
180173 }
181
-
182
-#endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */
183174
184175 /*
185176 * set_pte stores a linux PTE into the linux page table.
....@@ -188,14 +179,10 @@
188179 pte_t pte)
189180 {
190181 /*
191
- * When handling numa faults, we already have the pte marked
192
- * _PAGE_PRESENT, but we can be sure that it is not in hpte.
193
- * Hence we can use set_pte_at for them.
182
+ * Make sure hardware valid bit is not set. We don't do
183
+ * tlb flush for this update.
194184 */
195
- VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
196
-
197
- /* Add the pte bit when trying to set a pte */
198
- pte = __pte(pte_val(pte) | _PAGE_PTE);
185
+ VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
199186
200187 /* Note: mm->context.id might not yet have been assigned as
201188 * this context might not have been activated yet when this
....@@ -205,6 +192,15 @@
205192
206193 /* Perform the setting of the PTE */
207194 __set_pte_at(mm, addr, ptep, pte, 0);
195
+}
196
+
197
+void unmap_kernel_page(unsigned long va)
198
+{
199
+ pmd_t *pmdp = pmd_off_k(va);
200
+ pte_t *ptep = pte_offset_kernel(pmdp, va);
201
+
202
+ pte_clear(&init_mm, va, ptep);
203
+ flush_tlb_kernel_range(va, va + PAGE_SIZE);
208204 }
209205
210206 /*
....@@ -229,9 +225,9 @@
229225 }
230226
231227 #ifdef CONFIG_HUGETLB_PAGE
232
-extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
233
- unsigned long addr, pte_t *ptep,
234
- pte_t pte, int dirty)
228
+int huge_ptep_set_access_flags(struct vm_area_struct *vma,
229
+ unsigned long addr, pte_t *ptep,
230
+ pte_t pte, int dirty)
235231 {
236232 #ifdef HUGETLB_NEED_PRELOAD
237233 /*
....@@ -258,22 +254,49 @@
258254
259255 #else
260256 /*
261
- * Not used on non book3s64 platforms. But 8xx
262
- * can possibly use tsize derived from hstate.
257
+ * Not used on non book3s64 platforms.
258
+ * 8xx compares it with mmu_virtual_psize to
259
+ * know if it is a huge page or not.
263260 */
264
- psize = 0;
261
+ psize = MMU_PAGE_COUNT;
265262 #endif
266263 __ptep_set_access_flags(vma, ptep, pte, addr, psize);
267264 }
268265 return changed;
269266 #endif
270267 }
268
+
269
+#if defined(CONFIG_PPC_8xx)
270
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
271
+{
272
+ pmd_t *pmd = pmd_off(mm, addr);
273
+ pte_basic_t val;
274
+ pte_basic_t *entry = &ptep->pte;
275
+ int num, i;
276
+
277
+ /*
278
+ * Make sure hardware valid bit is not set. We don't do
279
+ * tlb flush for this update.
280
+ */
281
+ VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
282
+
283
+ pte = set_pte_filter(pte);
284
+
285
+ val = pte_val(pte);
286
+
287
+ num = number_of_cells_per_pte(pmd, val, 1);
288
+
289
+ for (i = 0; i < num; i++, entry++, val += SZ_4K)
290
+ *entry = val;
291
+}
292
+#endif
271293 #endif /* CONFIG_HUGETLB_PAGE */
272294
273295 #ifdef CONFIG_DEBUG_VM
274296 void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
275297 {
276298 pgd_t *pgd;
299
+ p4d_t *p4d;
277300 pud_t *pud;
278301 pmd_t *pmd;
279302
....@@ -281,12 +304,14 @@
281304 return;
282305 pgd = mm->pgd + pgd_index(addr);
283306 BUG_ON(pgd_none(*pgd));
284
- pud = pud_offset(pgd, addr);
307
+ p4d = p4d_offset(pgd, addr);
308
+ BUG_ON(p4d_none(*p4d));
309
+ pud = pud_offset(p4d, addr);
285310 BUG_ON(pud_none(*pud));
286311 pmd = pmd_offset(pud, addr);
287312 /*
288313 * khugepaged to collapse normal pages to hugepage, first set
289
- * pmd to none to force page fault/gup to take mmap_sem. After
314
+ * pmd to none to force page fault/gup to take mmap_lock. After
290315 * pmd is set to none, we do a pte_clear which does this assertion
291316 * so if we find pmd none, return.
292317 */
....@@ -305,3 +330,133 @@
305330 return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
306331 }
307332 EXPORT_SYMBOL_GPL(vmalloc_to_phys);
333
+
334
+/*
335
+ * We have 4 cases for pgds and pmds:
336
+ * (1) invalid (all zeroes)
337
+ * (2) pointer to next table, as normal; bottom 6 bits == 0
338
+ * (3) leaf pte for huge page _PAGE_PTE set
339
+ * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
340
+ *
341
+ * So long as we atomically load page table pointers we are safe against teardown,
342
+ * we can follow the address down to the the page and take a ref on it.
343
+ * This function need to be called with interrupts disabled. We use this variant
344
+ * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
345
+ */
346
+pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
347
+ bool *is_thp, unsigned *hpage_shift)
348
+{
349
+ pgd_t *pgdp;
350
+ p4d_t p4d, *p4dp;
351
+ pud_t pud, *pudp;
352
+ pmd_t pmd, *pmdp;
353
+ pte_t *ret_pte;
354
+ hugepd_t *hpdp = NULL;
355
+ unsigned pdshift;
356
+
357
+ if (hpage_shift)
358
+ *hpage_shift = 0;
359
+
360
+ if (is_thp)
361
+ *is_thp = false;
362
+
363
+ /*
364
+ * Always operate on the local stack value. This make sure the
365
+ * value don't get updated by a parallel THP split/collapse,
366
+ * page fault or a page unmap. The return pte_t * is still not
367
+ * stable. So should be checked there for above conditions.
368
+ * Top level is an exception because it is folded into p4d.
369
+ */
370
+ pgdp = pgdir + pgd_index(ea);
371
+ p4dp = p4d_offset(pgdp, ea);
372
+ p4d = READ_ONCE(*p4dp);
373
+ pdshift = P4D_SHIFT;
374
+
375
+ if (p4d_none(p4d))
376
+ return NULL;
377
+
378
+ if (p4d_is_leaf(p4d)) {
379
+ ret_pte = (pte_t *)p4dp;
380
+ goto out;
381
+ }
382
+
383
+ if (is_hugepd(__hugepd(p4d_val(p4d)))) {
384
+ hpdp = (hugepd_t *)&p4d;
385
+ goto out_huge;
386
+ }
387
+
388
+ /*
389
+ * Even if we end up with an unmap, the pgtable will not
390
+ * be freed, because we do an rcu free and here we are
391
+ * irq disabled
392
+ */
393
+ pdshift = PUD_SHIFT;
394
+ pudp = pud_offset(&p4d, ea);
395
+ pud = READ_ONCE(*pudp);
396
+
397
+ if (pud_none(pud))
398
+ return NULL;
399
+
400
+ if (pud_is_leaf(pud)) {
401
+ ret_pte = (pte_t *)pudp;
402
+ goto out;
403
+ }
404
+
405
+ if (is_hugepd(__hugepd(pud_val(pud)))) {
406
+ hpdp = (hugepd_t *)&pud;
407
+ goto out_huge;
408
+ }
409
+
410
+ pdshift = PMD_SHIFT;
411
+ pmdp = pmd_offset(&pud, ea);
412
+ pmd = READ_ONCE(*pmdp);
413
+
414
+ /*
415
+ * A hugepage collapse is captured by this condition, see
416
+ * pmdp_collapse_flush.
417
+ */
418
+ if (pmd_none(pmd))
419
+ return NULL;
420
+
421
+#ifdef CONFIG_PPC_BOOK3S_64
422
+ /*
423
+ * A hugepage split is captured by this condition, see
424
+ * pmdp_invalidate.
425
+ *
426
+ * Huge page modification can be caught here too.
427
+ */
428
+ if (pmd_is_serializing(pmd))
429
+ return NULL;
430
+#endif
431
+
432
+ if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
433
+ if (is_thp)
434
+ *is_thp = true;
435
+ ret_pte = (pte_t *)pmdp;
436
+ goto out;
437
+ }
438
+
439
+ if (pmd_is_leaf(pmd)) {
440
+ ret_pte = (pte_t *)pmdp;
441
+ goto out;
442
+ }
443
+
444
+ if (is_hugepd(__hugepd(pmd_val(pmd)))) {
445
+ hpdp = (hugepd_t *)&pmd;
446
+ goto out_huge;
447
+ }
448
+
449
+ return pte_offset_kernel(&pmd, ea);
450
+
451
+out_huge:
452
+ if (!hpdp)
453
+ return NULL;
454
+
455
+ ret_pte = hugepte_offset(*hpdp, ea, pdshift);
456
+ pdshift = hugepd_shift(*hpdp);
457
+out:
458
+ if (hpage_shift)
459
+ *hpage_shift = pdshift;
460
+ return ret_pte;
461
+}
462
+EXPORT_SYMBOL_GPL(__find_linux_pte);