forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/arch/x86/mm/dump_pagetables.c
....@@ -1,3 +1,4 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Debug helper to dump the current kernel pagetables of the system
34 * so that we can see what the various memory ranges are set to.
....@@ -5,11 +6,6 @@
56 * (C) Copyright 2008 Intel Corporation
67 *
78 * Author: Arjan van de Ven <arjan@linux.intel.com>
8
- *
9
- * This program is free software; you can redistribute it and/or
10
- * modify it under the terms of the GNU General Public License
11
- * as published by the Free Software Foundation; version 2
12
- * of the License.
139 */
1410
1511 #include <linux/debugfs.h>
....@@ -20,9 +16,9 @@
2016 #include <linux/seq_file.h>
2117 #include <linux/highmem.h>
2218 #include <linux/pci.h>
19
+#include <linux/ptdump.h>
2320
2421 #include <asm/e820/types.h>
25
-#include <asm/pgtable.h>
2622
2723 /*
2824 * The dumper groups pagetable entries of the same type into one, and for
....@@ -30,16 +26,18 @@
3026 * when a "break" in the continuity is found.
3127 */
3228 struct pg_state {
29
+ struct ptdump_state ptdump;
3330 int level;
34
- pgprot_t current_prot;
31
+ pgprotval_t current_prot;
3532 pgprotval_t effective_prot;
33
+ pgprotval_t prot_levels[5];
3634 unsigned long start_address;
37
- unsigned long current_address;
3835 const struct addr_marker *marker;
3936 unsigned long lines;
4037 bool to_dmesg;
4138 bool check_wx;
4239 unsigned long wx_pages;
40
+ struct seq_file *seq;
4341 };
4442
4543 struct addr_marker {
....@@ -178,11 +176,10 @@
178176 /*
179177 * Print a readable form of a pgprot_t to the seq_file
180178 */
181
-static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
179
+static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg)
182180 {
183
- pgprotval_t pr = pgprot_val(prot);
184181 static const char * const level_name[] =
185
- { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
182
+ { "pgd", "p4d", "pud", "pmd", "pte" };
186183
187184 if (!(pr & _PAGE_PRESENT)) {
188185 /* Not present */
....@@ -206,12 +203,12 @@
206203 pt_dump_cont_printf(m, dmsg, " ");
207204
208205 /* Bit 7 has a different meaning on level 3 vs 4 */
209
- if (level <= 4 && pr & _PAGE_PSE)
206
+ if (level <= 3 && pr & _PAGE_PSE)
210207 pt_dump_cont_printf(m, dmsg, "PSE ");
211208 else
212209 pt_dump_cont_printf(m, dmsg, " ");
213
- if ((level == 5 && pr & _PAGE_PAT) ||
214
- ((level == 4 || level == 3) && pr & _PAGE_PAT_LARGE))
210
+ if ((level == 4 && pr & _PAGE_PAT) ||
211
+ ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
215212 pt_dump_cont_printf(m, dmsg, "PAT ");
216213 else
217214 pt_dump_cont_printf(m, dmsg, " ");
....@@ -227,24 +224,11 @@
227224 pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]);
228225 }
229226
230
-/*
231
- * On 64 bits, sign-extend the 48 bit address to 64 bit
232
- */
233
-static unsigned long normalize_addr(unsigned long u)
234
-{
235
- int shift;
236
- if (!IS_ENABLED(CONFIG_X86_64))
237
- return u;
238
-
239
- shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
240
- return (signed long)(u << shift) >> shift;
241
-}
242
-
243
-static void note_wx(struct pg_state *st)
227
+static void note_wx(struct pg_state *st, unsigned long addr)
244228 {
245229 unsigned long npages;
246230
247
- npages = (st->current_address - st->start_address) / PAGE_SIZE;
231
+ npages = (addr - st->start_address) / PAGE_SIZE;
248232
249233 #ifdef CONFIG_PCI_BIOS
250234 /*
....@@ -252,15 +236,34 @@
252236 * Inform about it, but avoid the warning.
253237 */
254238 if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN &&
255
- st->current_address <= PAGE_OFFSET + BIOS_END) {
239
+ addr <= PAGE_OFFSET + BIOS_END) {
256240 pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages);
257241 return;
258242 }
259243 #endif
260244 /* Account the WX pages */
261245 st->wx_pages += npages;
262
- WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %pS\n",
246
+ WARN_ONCE(__supported_pte_mask & _PAGE_NX,
247
+ "x86/mm: Found insecure W+X mapping at address %pS\n",
263248 (void *)st->start_address);
249
+}
250
+
251
+static void effective_prot(struct ptdump_state *pt_st, int level, u64 val)
252
+{
253
+ struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
254
+ pgprotval_t prot = val & PTE_FLAGS_MASK;
255
+ pgprotval_t effective;
256
+
257
+ if (level > 0) {
258
+ pgprotval_t higher_prot = st->prot_levels[level - 1];
259
+
260
+ effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) |
261
+ ((higher_prot | prot) & _PAGE_NX);
262
+ } else {
263
+ effective = prot;
264
+ }
265
+
266
+ st->prot_levels[level] = effective;
264267 }
265268
266269 /*
....@@ -268,22 +271,30 @@
268271 * of PTE entries; the next one is different so we need to
269272 * print what we collected so far.
270273 */
271
-static void note_page(struct seq_file *m, struct pg_state *st,
272
- pgprot_t new_prot, pgprotval_t new_eff, int level)
274
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
275
+ u64 val)
273276 {
274
- pgprotval_t prot, cur, eff;
277
+ struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
278
+ pgprotval_t new_prot, new_eff;
279
+ pgprotval_t cur, eff;
275280 static const char units[] = "BKMGTPE";
281
+ struct seq_file *m = st->seq;
282
+
283
+ new_prot = val & PTE_FLAGS_MASK;
284
+ if (!val)
285
+ new_eff = 0;
286
+ else
287
+ new_eff = st->prot_levels[level];
276288
277289 /*
278290 * If we have a "break" in the series, we need to flush the state that
279291 * we have now. "break" is either changing perms, levels or
280292 * address space marker.
281293 */
282
- prot = pgprot_val(new_prot);
283
- cur = pgprot_val(st->current_prot);
294
+ cur = st->current_prot;
284295 eff = st->effective_prot;
285296
286
- if (!st->level) {
297
+ if (st->level == -1) {
287298 /* First entry */
288299 st->current_prot = new_prot;
289300 st->effective_prot = new_eff;
....@@ -292,14 +303,14 @@
292303 st->lines = 0;
293304 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
294305 st->marker->name);
295
- } else if (prot != cur || new_eff != eff || level != st->level ||
296
- st->current_address >= st->marker[1].start_address) {
306
+ } else if (new_prot != cur || new_eff != eff || level != st->level ||
307
+ addr >= st->marker[1].start_address) {
297308 const char *unit = units;
298309 unsigned long delta;
299310 int width = sizeof(unsigned long) * 2;
300311
301312 if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX))
302
- note_wx(st);
313
+ note_wx(st, addr);
303314
304315 /*
305316 * Now print the actual finished series
....@@ -309,9 +320,9 @@
309320 pt_dump_seq_printf(m, st->to_dmesg,
310321 "0x%0*lx-0x%0*lx ",
311322 width, st->start_address,
312
- width, st->current_address);
323
+ width, addr);
313324
314
- delta = st->current_address - st->start_address;
325
+ delta = addr - st->start_address;
315326 while (!(delta & 1023) && unit[1]) {
316327 delta >>= 10;
317328 unit++;
....@@ -328,7 +339,7 @@
328339 * such as the start of vmalloc space etc.
329340 * This helps in the interpretation.
330341 */
331
- if (st->current_address >= st->marker[1].start_address) {
342
+ if (addr >= st->marker[1].start_address) {
332343 if (st->marker->max_lines &&
333344 st->lines > st->marker->max_lines) {
334345 unsigned long nskip =
....@@ -344,224 +355,41 @@
344355 st->marker->name);
345356 }
346357
347
- st->start_address = st->current_address;
358
+ st->start_address = addr;
348359 st->current_prot = new_prot;
349360 st->effective_prot = new_eff;
350361 st->level = level;
351362 }
352363 }
353364
354
-static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
355
-{
356
- return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
357
- ((prot1 | prot2) & _PAGE_NX);
358
-}
359
-
360
-static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
361
- pgprotval_t eff_in, unsigned long P)
362
-{
363
- int i;
364
- pte_t *pte;
365
- pgprotval_t prot, eff;
366
-
367
- for (i = 0; i < PTRS_PER_PTE; i++) {
368
- st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
369
- pte = pte_offset_map(&addr, st->current_address);
370
- prot = pte_flags(*pte);
371
- eff = effective_prot(eff_in, prot);
372
- note_page(m, st, __pgprot(prot), eff, 5);
373
- pte_unmap(pte);
374
- }
375
-}
376
-#ifdef CONFIG_KASAN
377
-
378
-/*
379
- * This is an optimization for KASAN=y case. Since all kasan page tables
380
- * eventually point to the kasan_early_shadow_page we could call note_page()
381
- * right away without walking through lower level page tables. This saves
382
- * us dozens of seconds (minutes for 5-level config) while checking for
383
- * W+X mapping or reading kernel_page_tables debugfs file.
384
- */
385
-static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
386
- void *pt)
387
-{
388
- if (__pa(pt) == __pa(kasan_early_shadow_pmd) ||
389
- (pgtable_l5_enabled() &&
390
- __pa(pt) == __pa(kasan_early_shadow_p4d)) ||
391
- __pa(pt) == __pa(kasan_early_shadow_pud)) {
392
- pgprotval_t prot = pte_flags(kasan_early_shadow_pte[0]);
393
- note_page(m, st, __pgprot(prot), 0, 5);
394
- return true;
395
- }
396
- return false;
397
-}
398
-#else
399
-static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
400
- void *pt)
401
-{
402
- return false;
403
-}
404
-#endif
405
-
406
-#if PTRS_PER_PMD > 1
407
-
408
-static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
409
- pgprotval_t eff_in, unsigned long P)
410
-{
411
- int i;
412
- pmd_t *start, *pmd_start;
413
- pgprotval_t prot, eff;
414
-
415
- pmd_start = start = (pmd_t *)pud_page_vaddr(addr);
416
- for (i = 0; i < PTRS_PER_PMD; i++) {
417
- st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
418
- if (!pmd_none(*start)) {
419
- prot = pmd_flags(*start);
420
- eff = effective_prot(eff_in, prot);
421
- if (pmd_large(*start) || !pmd_present(*start)) {
422
- note_page(m, st, __pgprot(prot), eff, 4);
423
- } else if (!kasan_page_table(m, st, pmd_start)) {
424
- walk_pte_level(m, st, *start, eff,
425
- P + i * PMD_LEVEL_MULT);
426
- }
427
- } else
428
- note_page(m, st, __pgprot(0), 0, 4);
429
- start++;
430
- }
431
-}
432
-
433
-#else
434
-#define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p)
435
-#define pud_large(a) pmd_large(__pmd(pud_val(a)))
436
-#define pud_none(a) pmd_none(__pmd(pud_val(a)))
437
-#endif
438
-
439
-#if PTRS_PER_PUD > 1
440
-
441
-static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
442
- pgprotval_t eff_in, unsigned long P)
443
-{
444
- int i;
445
- pud_t *start, *pud_start;
446
- pgprotval_t prot, eff;
447
- pud_t *prev_pud = NULL;
448
-
449
- pud_start = start = (pud_t *)p4d_page_vaddr(addr);
450
-
451
- for (i = 0; i < PTRS_PER_PUD; i++) {
452
- st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
453
- if (!pud_none(*start)) {
454
- prot = pud_flags(*start);
455
- eff = effective_prot(eff_in, prot);
456
- if (pud_large(*start) || !pud_present(*start)) {
457
- note_page(m, st, __pgprot(prot), eff, 3);
458
- } else if (!kasan_page_table(m, st, pud_start)) {
459
- walk_pmd_level(m, st, *start, eff,
460
- P + i * PUD_LEVEL_MULT);
461
- }
462
- } else
463
- note_page(m, st, __pgprot(0), 0, 3);
464
-
465
- prev_pud = start;
466
- start++;
467
- }
468
-}
469
-
470
-#else
471
-#define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p)
472
-#define p4d_large(a) pud_large(__pud(p4d_val(a)))
473
-#define p4d_none(a) pud_none(__pud(p4d_val(a)))
474
-#endif
475
-
476
-static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
477
- pgprotval_t eff_in, unsigned long P)
478
-{
479
- int i;
480
- p4d_t *start, *p4d_start;
481
- pgprotval_t prot, eff;
482
-
483
- if (PTRS_PER_P4D == 1)
484
- return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P);
485
-
486
- p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);
487
-
488
- for (i = 0; i < PTRS_PER_P4D; i++) {
489
- st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
490
- if (!p4d_none(*start)) {
491
- prot = p4d_flags(*start);
492
- eff = effective_prot(eff_in, prot);
493
- if (p4d_large(*start) || !p4d_present(*start)) {
494
- note_page(m, st, __pgprot(prot), eff, 2);
495
- } else if (!kasan_page_table(m, st, p4d_start)) {
496
- walk_pud_level(m, st, *start, eff,
497
- P + i * P4D_LEVEL_MULT);
498
- }
499
- } else
500
- note_page(m, st, __pgprot(0), 0, 2);
501
-
502
- start++;
503
- }
504
-}
505
-
506
-#define pgd_large(a) (pgtable_l5_enabled() ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
507
-#define pgd_none(a) (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
508
-
509
-static inline bool is_hypervisor_range(int idx)
510
-{
511
-#ifdef CONFIG_X86_64
512
- /*
513
- * A hole in the beginning of kernel address space reserved
514
- * for a hypervisor.
515
- */
516
- return (idx >= pgd_index(GUARD_HOLE_BASE_ADDR)) &&
517
- (idx < pgd_index(GUARD_HOLE_END_ADDR));
518
-#else
519
- return false;
520
-#endif
521
-}
522
-
523
-static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
365
+static void ptdump_walk_pgd_level_core(struct seq_file *m,
366
+ struct mm_struct *mm, pgd_t *pgd,
524367 bool checkwx, bool dmesg)
525368 {
526
- pgd_t *start = INIT_PGD;
527
- pgprotval_t prot, eff;
528
- int i;
529
- struct pg_state st = {};
530
-
531
- if (pgd) {
532
- start = pgd;
533
- st.to_dmesg = dmesg;
534
- }
535
-
536
- st.check_wx = checkwx;
537
- if (checkwx)
538
- st.wx_pages = 0;
539
-
540
- for (i = 0; i < PTRS_PER_PGD; i++) {
541
- st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
542
- if (!pgd_none(*start) && !is_hypervisor_range(i)) {
543
- prot = pgd_flags(*start);
544
-#ifdef CONFIG_X86_PAE
545
- eff = _PAGE_USER | _PAGE_RW;
369
+ const struct ptdump_range ptdump_ranges[] = {
370
+#ifdef CONFIG_X86_64
371
+ {0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
372
+ {GUARD_HOLE_END_ADDR, ~0UL},
546373 #else
547
- eff = prot;
374
+ {0, ~0UL},
548375 #endif
549
- if (pgd_large(*start) || !pgd_present(*start)) {
550
- note_page(m, &st, __pgprot(prot), eff, 1);
551
- } else {
552
- walk_p4d_level(m, &st, *start, eff,
553
- i * PGD_LEVEL_MULT);
554
- }
555
- } else
556
- note_page(m, &st, __pgprot(0), 0, 1);
376
+ {0, 0}
377
+};
557378
558
- cond_resched();
559
- start++;
560
- }
379
+ struct pg_state st = {
380
+ .ptdump = {
381
+ .note_page = note_page,
382
+ .effective_prot = effective_prot,
383
+ .range = ptdump_ranges
384
+ },
385
+ .level = -1,
386
+ .to_dmesg = dmesg,
387
+ .check_wx = checkwx,
388
+ .seq = m
389
+ };
561390
562
- /* Flush out the last page */
563
- st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
564
- note_page(m, &st, __pgprot(0), 0, 0);
391
+ ptdump_walk_pgd(&st.ptdump, mm, pgd);
392
+
565393 if (!checkwx)
566394 return;
567395 if (st.wx_pages)
....@@ -571,18 +399,20 @@
571399 pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
572400 }
573401
574
-void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
402
+void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm)
575403 {
576
- ptdump_walk_pgd_level_core(m, pgd, false, true);
404
+ ptdump_walk_pgd_level_core(m, mm, mm->pgd, false, true);
577405 }
578406
579
-void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
407
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
408
+ bool user)
580409 {
410
+ pgd_t *pgd = mm->pgd;
581411 #ifdef CONFIG_PAGE_TABLE_ISOLATION
582
- if (user && static_cpu_has(X86_FEATURE_PTI))
412
+ if (user && boot_cpu_has(X86_FEATURE_PTI))
583413 pgd = kernel_to_user_pgdp(pgd);
584414 #endif
585
- ptdump_walk_pgd_level_core(m, pgd, false, false);
415
+ ptdump_walk_pgd_level_core(m, mm, pgd, false, false);
586416 }
587417 EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
588418
....@@ -592,18 +422,18 @@
592422 pgd_t *pgd = INIT_PGD;
593423
594424 if (!(__supported_pte_mask & _PAGE_NX) ||
595
- !static_cpu_has(X86_FEATURE_PTI))
425
+ !boot_cpu_has(X86_FEATURE_PTI))
596426 return;
597427
598428 pr_info("x86/mm: Checking user space page tables\n");
599429 pgd = kernel_to_user_pgdp(pgd);
600
- ptdump_walk_pgd_level_core(NULL, pgd, true, false);
430
+ ptdump_walk_pgd_level_core(NULL, &init_mm, pgd, true, false);
601431 #endif
602432 }
603433
604434 void ptdump_walk_pgd_level_checkwx(void)
605435 {
606
- ptdump_walk_pgd_level_core(NULL, NULL, true, false);
436
+ ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false);
607437 }
608438
609439 static int __init pt_dump_init(void)