hc
2024-05-16 8d2a02b24d66aa359e83eebc1ed3c0f85367a1cb
kernel/mm/page_owner.c
....@@ -3,13 +3,14 @@
33 #include <linux/mm.h>
44 #include <linux/slab.h>
55 #include <linux/uaccess.h>
6
-#include <linux/bootmem.h>
6
+#include <linux/memblock.h>
77 #include <linux/stacktrace.h>
88 #include <linux/page_owner.h>
99 #include <linux/jump_label.h>
1010 #include <linux/migrate.h>
1111 #include <linux/stackdepot.h>
1212 #include <linux/seq_file.h>
13
+#include <linux/sched/clock.h>
1314
1415 #include "internal.h"
1516
....@@ -24,10 +25,15 @@
2425 short last_migrate_reason;
2526 gfp_t gfp_mask;
2627 depot_stack_handle_t handle;
28
+ depot_stack_handle_t free_handle;
29
+ u64 ts_nsec;
30
+ u64 free_ts_nsec;
31
+ pid_t pid;
2732 };
2833
29
-static bool page_owner_disabled = true;
34
+bool page_owner_enabled;
3035 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
36
+EXPORT_SYMBOL_GPL(page_owner_inited);
3137
3238 static depot_stack_handle_t dummy_handle;
3339 static depot_stack_handle_t failure_handle;
....@@ -41,7 +47,7 @@
4147 return -EINVAL;
4248
4349 if (strcmp(buf, "on") == 0)
44
- page_owner_disabled = false;
50
+ page_owner_enabled = true;
4551
4652 return 0;
4753 }
....@@ -49,24 +55,16 @@
4955
5056 static bool need_page_owner(void)
5157 {
52
- if (page_owner_disabled)
53
- return false;
54
-
55
- return true;
58
+ return page_owner_enabled;
5659 }
5760
5861 static __always_inline depot_stack_handle_t create_dummy_stack(void)
5962 {
6063 unsigned long entries[4];
61
- struct stack_trace dummy;
64
+ unsigned int nr_entries;
6265
63
- dummy.nr_entries = 0;
64
- dummy.max_entries = ARRAY_SIZE(entries);
65
- dummy.entries = &entries[0];
66
- dummy.skip = 0;
67
-
68
- save_stack_trace(&dummy);
69
- return depot_save_stack(&dummy, GFP_KERNEL);
66
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
67
+ return stack_depot_save(entries, nr_entries, GFP_KERNEL);
7068 }
7169
7270 static noinline void register_dummy_stack(void)
....@@ -86,7 +84,7 @@
8684
8785 static void init_page_owner(void)
8886 {
89
- if (page_owner_disabled)
87
+ if (!page_owner_enabled)
9088 return;
9189
9290 register_dummy_stack();
....@@ -102,103 +100,134 @@
102100 .init = init_page_owner,
103101 };
104102
105
-static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
103
+struct page_owner *get_page_owner(struct page_ext *page_ext)
106104 {
107105 return (void *)page_ext + page_owner_ops.offset;
108106 }
107
+EXPORT_SYMBOL_GPL(get_page_owner);
109108
110
-void __reset_page_owner(struct page *page, unsigned int order)
109
+depot_stack_handle_t get_page_owner_handle(struct page_ext *page_ext, unsigned long pfn)
111110 {
112
- int i;
113
- struct page_ext *page_ext;
111
+ struct page_owner *page_owner;
112
+ depot_stack_handle_t handle;
114113
115
- for (i = 0; i < (1 << order); i++) {
116
- page_ext = lookup_page_ext(page + i);
117
- if (unlikely(!page_ext))
118
- continue;
119
- __clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
120
- }
114
+ if (!page_owner_enabled)
115
+ return 0;
116
+
117
+ page_owner = get_page_owner(page_ext);
118
+
119
+ /* skip handle for tail pages of higher order allocations */
120
+ if (!IS_ALIGNED(pfn, 1 << page_owner->order))
121
+ return 0;
122
+
123
+ handle = READ_ONCE(page_owner->handle);
124
+ return handle;
121125 }
126
+EXPORT_SYMBOL_GPL(get_page_owner_handle);
122127
123
-static inline bool check_recursive_alloc(struct stack_trace *trace,
124
- unsigned long ip)
128
+static inline bool check_recursive_alloc(unsigned long *entries,
129
+ unsigned int nr_entries,
130
+ unsigned long ip)
125131 {
126
- int i;
132
+ unsigned int i;
127133
128
- if (!trace->nr_entries)
129
- return false;
130
-
131
- for (i = 0; i < trace->nr_entries; i++) {
132
- if (trace->entries[i] == ip)
134
+ for (i = 0; i < nr_entries; i++) {
135
+ if (entries[i] == ip)
133136 return true;
134137 }
135
-
136138 return false;
137139 }
138140
139141 static noinline depot_stack_handle_t save_stack(gfp_t flags)
140142 {
141143 unsigned long entries[PAGE_OWNER_STACK_DEPTH];
142
- struct stack_trace trace = {
143
- .nr_entries = 0,
144
- .entries = entries,
145
- .max_entries = PAGE_OWNER_STACK_DEPTH,
146
- .skip = 2
147
- };
148144 depot_stack_handle_t handle;
145
+ unsigned int nr_entries;
149146
150
- save_stack_trace(&trace);
151
- if (trace.nr_entries != 0 &&
152
- trace.entries[trace.nr_entries-1] == ULONG_MAX)
153
- trace.nr_entries--;
147
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
154148
155149 /*
156
- * We need to check recursion here because our request to stackdepot
157
- * could trigger memory allocation to save new entry. New memory
158
- * allocation would reach here and call depot_save_stack() again
159
- * if we don't catch it. There is still not enough memory in stackdepot
160
- * so it would try to allocate memory again and loop forever.
150
+ * We need to check recursion here because our request to
151
+ * stackdepot could trigger memory allocation to save new
152
+ * entry. New memory allocation would reach here and call
153
+ * stack_depot_save_entries() again if we don't catch it. There is
154
+ * still not enough memory in stackdepot so it would try to
155
+ * allocate memory again and loop forever.
161156 */
162
- if (check_recursive_alloc(&trace, _RET_IP_))
157
+ if (check_recursive_alloc(entries, nr_entries, _RET_IP_))
163158 return dummy_handle;
164159
165
- handle = depot_save_stack(&trace, flags);
160
+ handle = stack_depot_save(entries, nr_entries, flags);
166161 if (!handle)
167162 handle = failure_handle;
168163
169164 return handle;
170165 }
171166
172
-static inline void __set_page_owner_handle(struct page_ext *page_ext,
173
- depot_stack_handle_t handle, unsigned int order, gfp_t gfp_mask)
167
+void __reset_page_owner(struct page *page, unsigned int order)
168
+{
169
+ int i;
170
+ struct page_ext *page_ext;
171
+ depot_stack_handle_t handle = 0;
172
+ struct page_owner *page_owner;
173
+ u64 free_ts_nsec = local_clock();
174
+
175
+ handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
176
+
177
+ page_ext = page_ext_get(page);
178
+ if (unlikely(!page_ext))
179
+ return;
180
+ for (i = 0; i < (1 << order); i++) {
181
+ __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
182
+ page_owner = get_page_owner(page_ext);
183
+ page_owner->free_handle = handle;
184
+ page_owner->free_ts_nsec = free_ts_nsec;
185
+ page_ext = page_ext_next(page_ext);
186
+ }
187
+ page_ext_put(page_ext);
188
+}
189
+
190
+static inline void __set_page_owner_handle(struct page *page,
191
+ struct page_ext *page_ext, depot_stack_handle_t handle,
192
+ unsigned int order, gfp_t gfp_mask)
174193 {
175194 struct page_owner *page_owner;
195
+ int i;
176196
177
- page_owner = get_page_owner(page_ext);
178
- page_owner->handle = handle;
179
- page_owner->order = order;
180
- page_owner->gfp_mask = gfp_mask;
181
- page_owner->last_migrate_reason = -1;
197
+ for (i = 0; i < (1 << order); i++) {
198
+ page_owner = get_page_owner(page_ext);
199
+ page_owner->handle = handle;
200
+ page_owner->order = order;
201
+ page_owner->gfp_mask = gfp_mask;
202
+ page_owner->last_migrate_reason = -1;
203
+ page_owner->pid = current->pid;
204
+ page_owner->ts_nsec = local_clock();
205
+ __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
206
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
182207
183
- __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
208
+ page_ext = page_ext_next(page_ext);
209
+ }
184210 }
185211
186212 noinline void __set_page_owner(struct page *page, unsigned int order,
187213 gfp_t gfp_mask)
188214 {
189
- struct page_ext *page_ext = lookup_page_ext(page);
215
+ struct page_ext *page_ext;
190216 depot_stack_handle_t handle;
191217
218
+ handle = save_stack(gfp_mask);
219
+
220
+ page_ext = page_ext_get(page);
192221 if (unlikely(!page_ext))
193222 return;
194
-
195
- handle = save_stack(gfp_mask);
196
- __set_page_owner_handle(page_ext, handle, order, gfp_mask);
223
+ __set_page_owner_handle(page, page_ext, handle, order, gfp_mask);
224
+ page_ext_put(page_ext);
197225 }
226
+EXPORT_SYMBOL_GPL(__set_page_owner);
198227
199228 void __set_page_owner_migrate_reason(struct page *page, int reason)
200229 {
201
- struct page_ext *page_ext = lookup_page_ext(page);
230
+ struct page_ext *page_ext = page_ext_get(page);
202231 struct page_owner *page_owner;
203232
204233 if (unlikely(!page_ext))
....@@ -206,31 +235,41 @@
206235
207236 page_owner = get_page_owner(page_ext);
208237 page_owner->last_migrate_reason = reason;
238
+ page_ext_put(page_ext);
209239 }
210240
211
-void __split_page_owner(struct page *page, unsigned int order)
241
+void __split_page_owner(struct page *page, unsigned int nr)
212242 {
213243 int i;
214
- struct page_ext *page_ext = lookup_page_ext(page);
244
+ struct page_ext *page_ext = page_ext_get(page);
215245 struct page_owner *page_owner;
216246
217247 if (unlikely(!page_ext))
218248 return;
219249
220
- page_owner = get_page_owner(page_ext);
221
- page_owner->order = 0;
222
- for (i = 1; i < (1 << order); i++)
223
- __copy_page_owner(page, page + i);
250
+ for (i = 0; i < nr; i++) {
251
+ page_owner = get_page_owner(page_ext);
252
+ page_owner->order = 0;
253
+ page_ext = page_ext_next(page_ext);
254
+ }
255
+ page_ext_put(page_ext);
224256 }
225257
226258 void __copy_page_owner(struct page *oldpage, struct page *newpage)
227259 {
228
- struct page_ext *old_ext = lookup_page_ext(oldpage);
229
- struct page_ext *new_ext = lookup_page_ext(newpage);
260
+ struct page_ext *old_ext;
261
+ struct page_ext *new_ext;
230262 struct page_owner *old_page_owner, *new_page_owner;
231263
232
- if (unlikely(!old_ext || !new_ext))
264
+ old_ext = page_ext_get(oldpage);
265
+ if (unlikely(!old_ext))
233266 return;
267
+
268
+ new_ext = page_ext_get(newpage);
269
+ if (unlikely(!new_ext)) {
270
+ page_ext_put(old_ext);
271
+ return;
272
+ }
234273
235274 old_page_owner = get_page_owner(old_ext);
236275 new_page_owner = get_page_owner(new_ext);
....@@ -239,6 +278,9 @@
239278 new_page_owner->last_migrate_reason =
240279 old_page_owner->last_migrate_reason;
241280 new_page_owner->handle = old_page_owner->handle;
281
+ new_page_owner->pid = old_page_owner->pid;
282
+ new_page_owner->ts_nsec = old_page_owner->ts_nsec;
283
+ new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
242284
243285 /*
244286 * We don't clear the bit on the oldpage as it's going to be freed
....@@ -250,6 +292,9 @@
250292 * the new page, which will be freed.
251293 */
252294 __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
295
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
296
+ page_ext_put(new_ext);
297
+ page_ext_put(old_ext);
253298 }
254299
255300 void pagetypeinfo_showmixedcount_print(struct seq_file *m,
....@@ -297,7 +342,7 @@
297342 if (PageBuddy(page)) {
298343 unsigned long freepage_order;
299344
300
- freepage_order = page_order_unsafe(page);
345
+ freepage_order = buddy_order_unsafe(page);
301346 if (freepage_order < MAX_ORDER)
302347 pfn += (1UL << freepage_order) - 1;
303348 continue;
....@@ -306,16 +351,15 @@
306351 if (PageReserved(page))
307352 continue;
308353
309
- page_ext = lookup_page_ext(page);
354
+ page_ext = page_ext_get(page);
310355 if (unlikely(!page_ext))
311356 continue;
312357
313
- if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
314
- continue;
358
+ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
359
+ goto ext_put_continue;
315360
316361 page_owner = get_page_owner(page_ext);
317
- page_mt = gfpflags_to_migratetype(
318
- page_owner->gfp_mask);
362
+ page_mt = gfp_migratetype(page_owner->gfp_mask);
319363 if (pageblock_mt != page_mt) {
320364 if (is_migrate_cma(pageblock_mt))
321365 count[MIGRATE_MOVABLE]++;
....@@ -323,9 +367,12 @@
323367 count[pageblock_mt]++;
324368
325369 pfn = block_end_pfn;
370
+ page_ext_put(page_ext);
326371 break;
327372 }
328373 pfn += (1UL << page_owner->order) - 1;
374
+ext_put_continue:
375
+ page_ext_put(page_ext);
329376 }
330377 }
331378
....@@ -341,32 +388,28 @@
341388 struct page *page, struct page_owner *page_owner,
342389 depot_stack_handle_t handle)
343390 {
344
- int ret;
345
- int pageblock_mt, page_mt;
391
+ int ret, pageblock_mt, page_mt;
392
+ unsigned long *entries;
393
+ unsigned int nr_entries;
346394 char *kbuf;
347
- unsigned long entries[PAGE_OWNER_STACK_DEPTH];
348
- struct stack_trace trace = {
349
- .nr_entries = 0,
350
- .entries = entries,
351
- .max_entries = PAGE_OWNER_STACK_DEPTH,
352
- .skip = 0
353
- };
354395
396
+ count = min_t(size_t, count, PAGE_SIZE);
355397 kbuf = kmalloc(count, GFP_KERNEL);
356398 if (!kbuf)
357399 return -ENOMEM;
358400
359401 ret = snprintf(kbuf, count,
360
- "Page allocated via order %u, mask %#x(%pGg)\n",
402
+ "Page allocated via order %u, mask %#x(%pGg), pid %d, ts %llu ns, free_ts %llu ns\n",
361403 page_owner->order, page_owner->gfp_mask,
362
- &page_owner->gfp_mask);
404
+ &page_owner->gfp_mask, page_owner->pid,
405
+ page_owner->ts_nsec, page_owner->free_ts_nsec);
363406
364407 if (ret >= count)
365408 goto err;
366409
367410 /* Print information relevant to grouping pages by mobility */
368411 pageblock_mt = get_pageblock_migratetype(page);
369
- page_mt = gfpflags_to_migratetype(page_owner->gfp_mask);
412
+ page_mt = gfp_migratetype(page_owner->gfp_mask);
370413 ret += snprintf(kbuf + ret, count - ret,
371414 "PFN %lu type %s Block %lu type %s Flags %#lx(%pGp)\n",
372415 pfn,
....@@ -378,8 +421,8 @@
378421 if (ret >= count)
379422 goto err;
380423
381
- depot_fetch_stack(handle, &trace);
382
- ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0);
424
+ nr_entries = stack_depot_fetch(handle, &entries);
425
+ ret += stack_trace_snprint(kbuf + ret, count - ret, entries, nr_entries, 0);
383426 if (ret >= count)
384427 goto err;
385428
....@@ -408,16 +451,11 @@
408451
409452 void __dump_page_owner(struct page *page)
410453 {
411
- struct page_ext *page_ext = lookup_page_ext(page);
454
+ struct page_ext *page_ext = page_ext_get((void *)page);
412455 struct page_owner *page_owner;
413
- unsigned long entries[PAGE_OWNER_STACK_DEPTH];
414
- struct stack_trace trace = {
415
- .nr_entries = 0,
416
- .entries = entries,
417
- .max_entries = PAGE_OWNER_STACK_DEPTH,
418
- .skip = 0
419
- };
420456 depot_stack_handle_t handle;
457
+ unsigned long *entries;
458
+ unsigned int nr_entries;
421459 gfp_t gfp_mask;
422460 int mt;
423461
....@@ -428,28 +466,44 @@
428466
429467 page_owner = get_page_owner(page_ext);
430468 gfp_mask = page_owner->gfp_mask;
431
- mt = gfpflags_to_migratetype(gfp_mask);
469
+ mt = gfp_migratetype(gfp_mask);
432470
433471 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
434
- pr_alert("page_owner info is not active (free page?)\n");
472
+ pr_alert("page_owner info is not present (never set?)\n");
473
+ page_ext_put(page_ext);
435474 return;
436475 }
476
+
477
+ if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
478
+ pr_alert("page_owner tracks the page as allocated\n");
479
+ else
480
+ pr_alert("page_owner tracks the page as freed\n");
481
+
482
+ pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, ts %llu, free_ts %llu\n",
483
+ page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask,
484
+ page_owner->pid, page_owner->ts_nsec, page_owner->free_ts_nsec);
437485
438486 handle = READ_ONCE(page_owner->handle);
439487 if (!handle) {
440
- pr_alert("page_owner info is not active (free page?)\n");
441
- return;
488
+ pr_alert("page_owner allocation stack trace missing\n");
489
+ } else {
490
+ nr_entries = stack_depot_fetch(handle, &entries);
491
+ stack_trace_print(entries, nr_entries, 0);
442492 }
443493
444
- depot_fetch_stack(handle, &trace);
445
- pr_alert("PFN 0x%lx allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
446
- page_to_pfn(page),
447
- page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
448
- print_stack_trace(&trace, 0);
494
+ handle = READ_ONCE(page_owner->free_handle);
495
+ if (!handle) {
496
+ pr_alert("page_owner free stack trace missing\n");
497
+ } else {
498
+ nr_entries = stack_depot_fetch(handle, &entries);
499
+ pr_alert("page last free stack trace:\n");
500
+ stack_trace_print(entries, nr_entries, 0);
501
+ }
449502
450503 if (page_owner->last_migrate_reason != -1)
451504 pr_alert("page has been migrated, last migrate reason: %s\n",
452505 migrate_reason_names[page_owner->last_migrate_reason]);
506
+ page_ext_put(page_ext);
453507 }
454508
455509 static ssize_t
....@@ -476,6 +530,14 @@
476530 /* Find an allocated page */
477531 for (; pfn < max_pfn; pfn++) {
478532 /*
533
+ * This temporary page_owner is required so
534
+ * that we can avoid the context switches while holding
535
+ * the rcu lock and copying the page owner information to
536
+ * user through copy_to_user() or GFP_KERNEL allocations.
537
+ */
538
+ struct page_owner page_owner_tmp;
539
+
540
+ /*
479541 * If the new page is in a new MAX_ORDER_NR_PAGES area,
480542 * validate the area as existing, skip it if not
481543 */
....@@ -490,14 +552,14 @@
490552
491553 page = pfn_to_page(pfn);
492554 if (PageBuddy(page)) {
493
- unsigned long freepage_order = page_order_unsafe(page);
555
+ unsigned long freepage_order = buddy_order_unsafe(page);
494556
495557 if (freepage_order < MAX_ORDER)
496558 pfn += (1UL << freepage_order) - 1;
497559 continue;
498560 }
499561
500
- page_ext = lookup_page_ext(page);
562
+ page_ext = page_ext_get(page);
501563 if (unlikely(!page_ext))
502564 continue;
503565
....@@ -506,9 +568,23 @@
506568 * because we don't hold the zone lock.
507569 */
508570 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
509
- continue;
571
+ goto ext_put_continue;
572
+
573
+ /*
574
+ * Although we do have the info about past allocation of free
575
+ * pages, it's not relevant for current memory usage.
576
+ */
577
+ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
578
+ goto ext_put_continue;
510579
511580 page_owner = get_page_owner(page_ext);
581
+
582
+ /*
583
+ * Don't print "tail" pages of high-order allocations as that
584
+ * would inflate the stats.
585
+ */
586
+ if (!IS_ALIGNED(pfn, 1 << page_owner->order))
587
+ goto ext_put_continue;
512588
513589 /*
514590 * Access to page_ext->handle isn't synchronous so we should
....@@ -516,13 +592,17 @@
516592 */
517593 handle = READ_ONCE(page_owner->handle);
518594 if (!handle)
519
- continue;
595
+ goto ext_put_continue;
520596
521597 /* Record the next PFN to read in the file offset */
522598 *ppos = (pfn - min_low_pfn) + 1;
523599
600
+ page_owner_tmp = *page_owner;
601
+ page_ext_put(page_ext);
524602 return print_page_owner(buf, count, pfn, page,
525
- page_owner, handle);
603
+ &page_owner_tmp, handle);
604
+ext_put_continue:
605
+ page_ext_put(page_ext);
526606 }
527607
528608 return 0;
....@@ -570,7 +650,7 @@
570650 * heavy lock contention.
571651 */
572652 if (PageBuddy(page)) {
573
- unsigned long order = page_order_unsafe(page);
653
+ unsigned long order = buddy_order_unsafe(page);
574654
575655 if (order > 0 && order < MAX_ORDER)
576656 pfn += (1UL << order) - 1;
....@@ -580,17 +660,20 @@
580660 if (PageReserved(page))
581661 continue;
582662
583
- page_ext = lookup_page_ext(page);
663
+ page_ext = page_ext_get(page);
584664 if (unlikely(!page_ext))
585665 continue;
586666
587667 /* Maybe overlapping zone */
588668 if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
589
- continue;
669
+ goto ext_put_continue;
590670
591671 /* Found early allocated page */
592
- __set_page_owner_handle(page_ext, early_handle, 0, 0);
672
+ __set_page_owner_handle(page, page_ext, early_handle,
673
+ 0, 0);
593674 count++;
675
+ext_put_continue:
676
+ page_ext_put(page_ext);
594677 }
595678 cond_resched();
596679 }
....@@ -626,16 +709,14 @@
626709
627710 static int __init pageowner_init(void)
628711 {
629
- struct dentry *dentry;
630
-
631712 if (!static_branch_unlikely(&page_owner_inited)) {
632713 pr_info("page_owner is disabled\n");
633714 return 0;
634715 }
635716
636
- dentry = debugfs_create_file("page_owner", 0400, NULL,
637
- NULL, &proc_page_owner_operations);
717
+ debugfs_create_file("page_owner", 0400, NULL, NULL,
718
+ &proc_page_owner_operations);
638719
639
- return PTR_ERR_OR_ZERO(dentry);
720
+ return 0;
640721 }
641722 late_initcall(pageowner_init)