hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/mm/memfd.c
....@@ -21,46 +21,45 @@
2121 #include <uapi/linux/memfd.h>
2222
2323 /*
24
- * We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
24
+ * We need a tag: a new tag would expand every xa_node by 8 bytes,
2525 * so reuse a tag which we firmly believe is never set or cleared on tmpfs
2626 * or hugetlbfs because they are memory only filesystems.
2727 */
2828 #define MEMFD_TAG_PINNED PAGECACHE_TAG_TOWRITE
2929 #define LAST_SCAN 4 /* about 150ms max */
3030
31
-static void memfd_tag_pins(struct address_space *mapping)
31
+static void memfd_tag_pins(struct xa_state *xas)
3232 {
33
- struct radix_tree_iter iter;
34
- void __rcu **slot;
35
- pgoff_t start;
3633 struct page *page;
37
- unsigned int tagged = 0;
34
+ int latency = 0;
35
+ int cache_count;
3836
3937 lru_add_drain();
40
- start = 0;
4138
42
- xa_lock_irq(&mapping->i_pages);
43
- radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
44
- page = radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
45
- if (!page || radix_tree_exception(page)) {
46
- if (radix_tree_deref_retry(page)) {
47
- slot = radix_tree_iter_retry(&iter);
48
- continue;
49
- }
50
- } else if (page_count(page) - page_mapcount(page) > 1) {
51
- radix_tree_tag_set(&mapping->i_pages, iter.index,
52
- MEMFD_TAG_PINNED);
53
- }
39
+ xas_lock_irq(xas);
40
+ xas_for_each(xas, page, ULONG_MAX) {
41
+ cache_count = 1;
42
+ if (!xa_is_value(page) &&
43
+ PageTransHuge(page) && !PageHuge(page))
44
+ cache_count = HPAGE_PMD_NR;
5445
55
- if (++tagged % 1024)
46
+ if (!xa_is_value(page) &&
47
+ page_count(page) - total_mapcount(page) != cache_count)
48
+ xas_set_mark(xas, MEMFD_TAG_PINNED);
49
+ if (cache_count != 1)
50
+ xas_set(xas, page->index + cache_count);
51
+
52
+ latency += cache_count;
53
+ if (latency < XA_CHECK_SCHED)
5654 continue;
55
+ latency = 0;
5756
58
- slot = radix_tree_iter_resume(slot, &iter);
59
- xa_unlock_irq(&mapping->i_pages);
57
+ xas_pause(xas);
58
+ xas_unlock_irq(xas);
6059 cond_resched();
61
- xa_lock_irq(&mapping->i_pages);
60
+ xas_lock_irq(xas);
6261 }
63
- xa_unlock_irq(&mapping->i_pages);
62
+ xas_unlock_irq(xas);
6463 }
6564
6665 /*
....@@ -74,17 +73,18 @@
7473 */
7574 static int memfd_wait_for_pins(struct address_space *mapping)
7675 {
77
- struct radix_tree_iter iter;
78
- void __rcu **slot;
79
- pgoff_t start;
76
+ XA_STATE(xas, &mapping->i_pages, 0);
8077 struct page *page;
8178 int error, scan;
8279
83
- memfd_tag_pins(mapping);
80
+ memfd_tag_pins(&xas);
8481
8582 error = 0;
8683 for (scan = 0; scan <= LAST_SCAN; scan++) {
87
- if (!radix_tree_tagged(&mapping->i_pages, MEMFD_TAG_PINNED))
84
+ int latency = 0;
85
+ int cache_count;
86
+
87
+ if (!xas_marked(&xas, MEMFD_TAG_PINNED))
8888 break;
8989
9090 if (!scan)
....@@ -92,45 +92,42 @@
9292 else if (schedule_timeout_killable((HZ << scan) / 200))
9393 scan = LAST_SCAN;
9494
95
- start = 0;
96
- rcu_read_lock();
97
- radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter,
98
- start, MEMFD_TAG_PINNED) {
95
+ xas_set(&xas, 0);
96
+ xas_lock_irq(&xas);
97
+ xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
98
+ bool clear = true;
9999
100
- page = radix_tree_deref_slot(slot);
101
- if (radix_tree_exception(page)) {
102
- if (radix_tree_deref_retry(page)) {
103
- slot = radix_tree_iter_retry(&iter);
104
- continue;
105
- }
100
+ cache_count = 1;
101
+ if (!xa_is_value(page) &&
102
+ PageTransHuge(page) && !PageHuge(page))
103
+ cache_count = HPAGE_PMD_NR;
106104
107
- page = NULL;
108
- }
109
-
110
- if (page &&
111
- page_count(page) - page_mapcount(page) != 1) {
112
- if (scan < LAST_SCAN)
113
- goto continue_resched;
114
-
105
+ if (!xa_is_value(page) && cache_count !=
106
+ page_count(page) - total_mapcount(page)) {
115107 /*
116108 * On the last scan, we clean up all those tags
117109 * we inserted; but make a note that we still
118110 * found pages pinned.
119111 */
120
- error = -EBUSY;
112
+ if (scan == LAST_SCAN)
113
+ error = -EBUSY;
114
+ else
115
+ clear = false;
121116 }
117
+ if (clear)
118
+ xas_clear_mark(&xas, MEMFD_TAG_PINNED);
122119
123
- xa_lock_irq(&mapping->i_pages);
124
- radix_tree_tag_clear(&mapping->i_pages,
125
- iter.index, MEMFD_TAG_PINNED);
126
- xa_unlock_irq(&mapping->i_pages);
127
-continue_resched:
128
- if (need_resched()) {
129
- slot = radix_tree_iter_resume(slot, &iter);
130
- cond_resched_rcu();
131
- }
120
+ latency += cache_count;
121
+ if (latency < XA_CHECK_SCHED)
122
+ continue;
123
+ latency = 0;
124
+
125
+ xas_pause(&xas);
126
+ xas_unlock_irq(&xas);
127
+ cond_resched();
128
+ xas_lock_irq(&xas);
132129 }
133
- rcu_read_unlock();
130
+ xas_unlock_irq(&xas);
134131 }
135132
136133 return error;
....@@ -333,7 +330,8 @@
333330
334331 if (flags & MFD_ALLOW_SEALING) {
335332 file_seals = memfd_file_seals_ptr(file);
336
- *file_seals &= ~F_SEAL_SEAL;
333
+ if (file_seals)
334
+ *file_seals &= ~F_SEAL_SEAL;
337335 }
338336
339337 fd_install(fd, file);