.. | .. |
---|
21 | 21 | #include <uapi/linux/memfd.h> |
---|
22 | 22 | |
---|
23 | 23 | /* |
---|
24 | | - * We need a tag: a new tag would expand every radix_tree_node by 8 bytes, |
---|
| 24 | + * We need a tag: a new tag would expand every xa_node by 8 bytes, |
---|
25 | 25 | * so reuse a tag which we firmly believe is never set or cleared on tmpfs |
---|
26 | 26 | * or hugetlbfs because they are memory only filesystems. |
---|
27 | 27 | */ |
---|
28 | 28 | #define MEMFD_TAG_PINNED PAGECACHE_TAG_TOWRITE |
---|
29 | 29 | #define LAST_SCAN 4 /* about 150ms max */ |
---|
30 | 30 | |
---|
31 | | -static void memfd_tag_pins(struct address_space *mapping) |
---|
| 31 | +static void memfd_tag_pins(struct xa_state *xas) |
---|
32 | 32 | { |
---|
33 | | - struct radix_tree_iter iter; |
---|
34 | | - void __rcu **slot; |
---|
35 | | - pgoff_t start; |
---|
36 | 33 | struct page *page; |
---|
37 | | - unsigned int tagged = 0; |
---|
| 34 | + int latency = 0; |
---|
| 35 | + int cache_count; |
---|
38 | 36 | |
---|
39 | 37 | lru_add_drain(); |
---|
40 | | - start = 0; |
---|
41 | 38 | |
---|
42 | | - xa_lock_irq(&mapping->i_pages); |
---|
43 | | - radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) { |
---|
44 | | - page = radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); |
---|
45 | | - if (!page || radix_tree_exception(page)) { |
---|
46 | | - if (radix_tree_deref_retry(page)) { |
---|
47 | | - slot = radix_tree_iter_retry(&iter); |
---|
48 | | - continue; |
---|
49 | | - } |
---|
50 | | - } else if (page_count(page) - page_mapcount(page) > 1) { |
---|
51 | | - radix_tree_tag_set(&mapping->i_pages, iter.index, |
---|
52 | | - MEMFD_TAG_PINNED); |
---|
53 | | - } |
---|
| 39 | + xas_lock_irq(xas); |
---|
| 40 | + xas_for_each(xas, page, ULONG_MAX) { |
---|
| 41 | + cache_count = 1; |
---|
| 42 | + if (!xa_is_value(page) && |
---|
| 43 | + PageTransHuge(page) && !PageHuge(page)) |
---|
| 44 | + cache_count = HPAGE_PMD_NR; |
---|
54 | 45 | |
---|
55 | | - if (++tagged % 1024) |
---|
| 46 | + if (!xa_is_value(page) && |
---|
| 47 | + page_count(page) - total_mapcount(page) != cache_count) |
---|
| 48 | + xas_set_mark(xas, MEMFD_TAG_PINNED); |
---|
| 49 | + if (cache_count != 1) |
---|
| 50 | + xas_set(xas, page->index + cache_count); |
---|
| 51 | + |
---|
| 52 | + latency += cache_count; |
---|
| 53 | + if (latency < XA_CHECK_SCHED) |
---|
56 | 54 | continue; |
---|
| 55 | + latency = 0; |
---|
57 | 56 | |
---|
58 | | - slot = radix_tree_iter_resume(slot, &iter); |
---|
59 | | - xa_unlock_irq(&mapping->i_pages); |
---|
| 57 | + xas_pause(xas); |
---|
| 58 | + xas_unlock_irq(xas); |
---|
60 | 59 | cond_resched(); |
---|
61 | | - xa_lock_irq(&mapping->i_pages); |
---|
| 60 | + xas_lock_irq(xas); |
---|
62 | 61 | } |
---|
63 | | - xa_unlock_irq(&mapping->i_pages); |
---|
| 62 | + xas_unlock_irq(xas); |
---|
64 | 63 | } |
---|
65 | 64 | |
---|
66 | 65 | /* |
---|
.. | .. |
---|
74 | 73 | */ |
---|
75 | 74 | static int memfd_wait_for_pins(struct address_space *mapping) |
---|
76 | 75 | { |
---|
77 | | - struct radix_tree_iter iter; |
---|
78 | | - void __rcu **slot; |
---|
79 | | - pgoff_t start; |
---|
| 76 | + XA_STATE(xas, &mapping->i_pages, 0); |
---|
80 | 77 | struct page *page; |
---|
81 | 78 | int error, scan; |
---|
82 | 79 | |
---|
83 | | - memfd_tag_pins(mapping); |
---|
| 80 | + memfd_tag_pins(&xas); |
---|
84 | 81 | |
---|
85 | 82 | error = 0; |
---|
86 | 83 | for (scan = 0; scan <= LAST_SCAN; scan++) { |
---|
87 | | - if (!radix_tree_tagged(&mapping->i_pages, MEMFD_TAG_PINNED)) |
---|
| 84 | + int latency = 0; |
---|
| 85 | + int cache_count; |
---|
| 86 | + |
---|
| 87 | + if (!xas_marked(&xas, MEMFD_TAG_PINNED)) |
---|
88 | 88 | break; |
---|
89 | 89 | |
---|
90 | 90 | if (!scan) |
---|
.. | .. |
---|
92 | 92 | else if (schedule_timeout_killable((HZ << scan) / 200)) |
---|
93 | 93 | scan = LAST_SCAN; |
---|
94 | 94 | |
---|
95 | | - start = 0; |
---|
96 | | - rcu_read_lock(); |
---|
97 | | - radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, |
---|
98 | | - start, MEMFD_TAG_PINNED) { |
---|
| 95 | + xas_set(&xas, 0); |
---|
| 96 | + xas_lock_irq(&xas); |
---|
| 97 | + xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { |
---|
| 98 | + bool clear = true; |
---|
99 | 99 | |
---|
100 | | - page = radix_tree_deref_slot(slot); |
---|
101 | | - if (radix_tree_exception(page)) { |
---|
102 | | - if (radix_tree_deref_retry(page)) { |
---|
103 | | - slot = radix_tree_iter_retry(&iter); |
---|
104 | | - continue; |
---|
105 | | - } |
---|
| 100 | + cache_count = 1; |
---|
| 101 | + if (!xa_is_value(page) && |
---|
| 102 | + PageTransHuge(page) && !PageHuge(page)) |
---|
| 103 | + cache_count = HPAGE_PMD_NR; |
---|
106 | 104 | |
---|
107 | | - page = NULL; |
---|
108 | | - } |
---|
109 | | - |
---|
110 | | - if (page && |
---|
111 | | - page_count(page) - page_mapcount(page) != 1) { |
---|
112 | | - if (scan < LAST_SCAN) |
---|
113 | | - goto continue_resched; |
---|
114 | | - |
---|
| 105 | + if (!xa_is_value(page) && cache_count != |
---|
| 106 | + page_count(page) - total_mapcount(page)) { |
---|
115 | 107 | /* |
---|
116 | 108 | * On the last scan, we clean up all those tags |
---|
117 | 109 | * we inserted; but make a note that we still |
---|
118 | 110 | * found pages pinned. |
---|
119 | 111 | */ |
---|
120 | | - error = -EBUSY; |
---|
| 112 | + if (scan == LAST_SCAN) |
---|
| 113 | + error = -EBUSY; |
---|
| 114 | + else |
---|
| 115 | + clear = false; |
---|
121 | 116 | } |
---|
| 117 | + if (clear) |
---|
| 118 | + xas_clear_mark(&xas, MEMFD_TAG_PINNED); |
---|
122 | 119 | |
---|
123 | | - xa_lock_irq(&mapping->i_pages); |
---|
124 | | - radix_tree_tag_clear(&mapping->i_pages, |
---|
125 | | - iter.index, MEMFD_TAG_PINNED); |
---|
126 | | - xa_unlock_irq(&mapping->i_pages); |
---|
127 | | -continue_resched: |
---|
128 | | - if (need_resched()) { |
---|
129 | | - slot = radix_tree_iter_resume(slot, &iter); |
---|
130 | | - cond_resched_rcu(); |
---|
131 | | - } |
---|
| 120 | + latency += cache_count; |
---|
| 121 | + if (latency < XA_CHECK_SCHED) |
---|
| 122 | + continue; |
---|
| 123 | + latency = 0; |
---|
| 124 | + |
---|
| 125 | + xas_pause(&xas); |
---|
| 126 | + xas_unlock_irq(&xas); |
---|
| 127 | + cond_resched(); |
---|
| 128 | + xas_lock_irq(&xas); |
---|
132 | 129 | } |
---|
133 | | - rcu_read_unlock(); |
---|
| 130 | + xas_unlock_irq(&xas); |
---|
134 | 131 | } |
---|
135 | 132 | |
---|
136 | 133 | return error; |
---|
.. | .. |
---|
333 | 330 | |
---|
334 | 331 | if (flags & MFD_ALLOW_SEALING) { |
---|
335 | 332 | file_seals = memfd_file_seals_ptr(file); |
---|
336 | | - *file_seals &= ~F_SEAL_SEAL; |
---|
| 333 | + if (file_seals) |
---|
| 334 | + *file_seals &= ~F_SEAL_SEAL; |
---|
337 | 335 | } |
---|
338 | 336 | |
---|
339 | 337 | fd_install(fd, file); |
---|