.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * linux/mm/mmu_notifier.c |
---|
3 | 4 | * |
---|
4 | 5 | * Copyright (C) 2008 Qumranet, Inc. |
---|
5 | 6 | * Copyright (C) 2008 SGI |
---|
6 | 7 | * Christoph Lameter <cl@linux.com> |
---|
7 | | - * |
---|
8 | | - * This work is licensed under the terms of the GNU GPL, version 2. See |
---|
9 | | - * the COPYING file in the top-level directory. |
---|
10 | 8 | */ |
---|
11 | 9 | |
---|
12 | 10 | #include <linux/rculist.h> |
---|
.. | .. |
---|
14 | 12 | #include <linux/export.h> |
---|
15 | 13 | #include <linux/mm.h> |
---|
16 | 14 | #include <linux/err.h> |
---|
| 15 | +#include <linux/interval_tree.h> |
---|
17 | 16 | #include <linux/srcu.h> |
---|
18 | 17 | #include <linux/rcupdate.h> |
---|
19 | 18 | #include <linux/sched.h> |
---|
.. | .. |
---|
23 | 22 | /* global SRCU for all MMs */ |
---|
24 | 23 | DEFINE_STATIC_SRCU(srcu); |
---|
25 | 24 | |
---|
26 | | -/* |
---|
27 | | - * This function allows mmu_notifier::release callback to delay a call to |
---|
28 | | - * a function that will free appropriate resources. The function must be |
---|
29 | | - * quick and must not block. |
---|
30 | | - */ |
---|
31 | | -void mmu_notifier_call_srcu(struct rcu_head *rcu, |
---|
32 | | - void (*func)(struct rcu_head *rcu)) |
---|
33 | | -{ |
---|
34 | | - call_srcu(&srcu, rcu, func); |
---|
35 | | -} |
---|
36 | | -EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu); |
---|
| 25 | +#ifdef CONFIG_LOCKDEP |
---|
| 26 | +struct lockdep_map __mmu_notifier_invalidate_range_start_map = { |
---|
| 27 | + .name = "mmu_notifier_invalidate_range_start" |
---|
| 28 | +}; |
---|
| 29 | +#endif |
---|
37 | 30 | |
---|
38 | | -void mmu_notifier_synchronize(void) |
---|
| 31 | +/* |
---|
| 32 | + * The mmu_notifier_subscriptions structure is allocated and installed in |
---|
| 33 | + * mm->notifier_subscriptions inside the mm_take_all_locks() protected |
---|
| 34 | + * critical section and it's released only when mm_count reaches zero |
---|
| 35 | + * in mmdrop(). |
---|
| 36 | + */ |
---|
| 37 | +struct mmu_notifier_subscriptions { |
---|
| 38 | + /* |
---|
| 39 | + * WARNING: hdr should be the first member of this structure |
---|
| 40 | + * so that it can be typecasted into mmu_notifier_subscriptions_hdr. |
---|
| 41 | + * This is required to avoid KMI CRC breakage. |
---|
| 42 | + */ |
---|
| 43 | + struct mmu_notifier_subscriptions_hdr hdr; |
---|
| 44 | + /* all mmu notifiers registered in this mm are queued in this list */ |
---|
| 45 | + struct hlist_head list; |
---|
| 46 | + bool has_itree; |
---|
| 47 | + /* to serialize the list modifications and hlist_unhashed */ |
---|
| 48 | + spinlock_t lock; |
---|
| 49 | + unsigned long invalidate_seq; |
---|
| 50 | + unsigned long active_invalidate_ranges; |
---|
| 51 | + struct rb_root_cached itree; |
---|
| 52 | + wait_queue_head_t wq; |
---|
| 53 | + struct hlist_head deferred_list; |
---|
| 54 | +}; |
---|
| 55 | + |
---|
| 56 | +/* |
---|
| 57 | + * This is a collision-retry read-side/write-side 'lock', a lot like a |
---|
| 58 | + * seqcount, however this allows multiple write-sides to hold it at |
---|
| 59 | + * once. Conceptually the write side is protecting the values of the PTEs in |
---|
| 60 | + * this mm, such that PTES cannot be read into SPTEs (shadow PTEs) while any |
---|
| 61 | + * writer exists. |
---|
| 62 | + * |
---|
| 63 | + * Note that the core mm creates nested invalidate_range_start()/end() regions |
---|
| 64 | + * within the same thread, and runs invalidate_range_start()/end() in parallel |
---|
| 65 | + * on multiple CPUs. This is designed to not reduce concurrency or block |
---|
| 66 | + * progress on the mm side. |
---|
| 67 | + * |
---|
| 68 | + * As a secondary function, holding the full write side also serves to prevent |
---|
| 69 | + * writers for the itree, this is an optimization to avoid extra locking |
---|
| 70 | + * during invalidate_range_start/end notifiers. |
---|
| 71 | + * |
---|
| 72 | + * The write side has two states, fully excluded: |
---|
| 73 | + * - mm->active_invalidate_ranges != 0 |
---|
| 74 | + * - subscriptions->invalidate_seq & 1 == True (odd) |
---|
| 75 | + * - some range on the mm_struct is being invalidated |
---|
| 76 | + * - the itree is not allowed to change |
---|
| 77 | + * |
---|
| 78 | + * And partially excluded: |
---|
| 79 | + * - mm->active_invalidate_ranges != 0 |
---|
| 80 | + * - subscriptions->invalidate_seq & 1 == False (even) |
---|
| 81 | + * - some range on the mm_struct is being invalidated |
---|
| 82 | + * - the itree is allowed to change |
---|
| 83 | + * |
---|
| 84 | + * Operations on notifier_subscriptions->invalidate_seq (under spinlock): |
---|
| 85 | + * seq |= 1 # Begin writing |
---|
| 86 | + * seq++ # Release the writing state |
---|
| 87 | + * seq & 1 # True if a writer exists |
---|
| 88 | + * |
---|
| 89 | + * The later state avoids some expensive work on inv_end in the common case of |
---|
| 90 | + * no mmu_interval_notifier monitoring the VA. |
---|
| 91 | + */ |
---|
| 92 | +static bool |
---|
| 93 | +mn_itree_is_invalidating(struct mmu_notifier_subscriptions *subscriptions) |
---|
39 | 94 | { |
---|
40 | | - /* Wait for any running method to finish. */ |
---|
41 | | - srcu_barrier(&srcu); |
---|
| 95 | + lockdep_assert_held(&subscriptions->lock); |
---|
| 96 | + return subscriptions->invalidate_seq & 1; |
---|
42 | 97 | } |
---|
43 | | -EXPORT_SYMBOL_GPL(mmu_notifier_synchronize); |
---|
| 98 | + |
---|
| 99 | +static struct mmu_interval_notifier * |
---|
| 100 | +mn_itree_inv_start_range(struct mmu_notifier_subscriptions *subscriptions, |
---|
| 101 | + const struct mmu_notifier_range *range, |
---|
| 102 | + unsigned long *seq) |
---|
| 103 | +{ |
---|
| 104 | + struct interval_tree_node *node; |
---|
| 105 | + struct mmu_interval_notifier *res = NULL; |
---|
| 106 | + |
---|
| 107 | + spin_lock(&subscriptions->lock); |
---|
| 108 | + subscriptions->active_invalidate_ranges++; |
---|
| 109 | + node = interval_tree_iter_first(&subscriptions->itree, range->start, |
---|
| 110 | + range->end - 1); |
---|
| 111 | + if (node) { |
---|
| 112 | + subscriptions->invalidate_seq |= 1; |
---|
| 113 | + res = container_of(node, struct mmu_interval_notifier, |
---|
| 114 | + interval_tree); |
---|
| 115 | + } |
---|
| 116 | + |
---|
| 117 | + *seq = subscriptions->invalidate_seq; |
---|
| 118 | + spin_unlock(&subscriptions->lock); |
---|
| 119 | + return res; |
---|
| 120 | +} |
---|
| 121 | + |
---|
| 122 | +static struct mmu_interval_notifier * |
---|
| 123 | +mn_itree_inv_next(struct mmu_interval_notifier *interval_sub, |
---|
| 124 | + const struct mmu_notifier_range *range) |
---|
| 125 | +{ |
---|
| 126 | + struct interval_tree_node *node; |
---|
| 127 | + |
---|
| 128 | + node = interval_tree_iter_next(&interval_sub->interval_tree, |
---|
| 129 | + range->start, range->end - 1); |
---|
| 130 | + if (!node) |
---|
| 131 | + return NULL; |
---|
| 132 | + return container_of(node, struct mmu_interval_notifier, interval_tree); |
---|
| 133 | +} |
---|
| 134 | + |
---|
| 135 | +static void mn_itree_inv_end(struct mmu_notifier_subscriptions *subscriptions) |
---|
| 136 | +{ |
---|
| 137 | + struct mmu_interval_notifier *interval_sub; |
---|
| 138 | + struct hlist_node *next; |
---|
| 139 | + |
---|
| 140 | + spin_lock(&subscriptions->lock); |
---|
| 141 | + if (--subscriptions->active_invalidate_ranges || |
---|
| 142 | + !mn_itree_is_invalidating(subscriptions)) { |
---|
| 143 | + spin_unlock(&subscriptions->lock); |
---|
| 144 | + return; |
---|
| 145 | + } |
---|
| 146 | + |
---|
| 147 | + /* Make invalidate_seq even */ |
---|
| 148 | + subscriptions->invalidate_seq++; |
---|
| 149 | + |
---|
| 150 | + /* |
---|
| 151 | + * The inv_end incorporates a deferred mechanism like rtnl_unlock(). |
---|
| 152 | + * Adds and removes are queued until the final inv_end happens then |
---|
| 153 | + * they are progressed. This arrangement for tree updates is used to |
---|
| 154 | + * avoid using a blocking lock during invalidate_range_start. |
---|
| 155 | + */ |
---|
| 156 | + hlist_for_each_entry_safe(interval_sub, next, |
---|
| 157 | + &subscriptions->deferred_list, |
---|
| 158 | + deferred_item) { |
---|
| 159 | + if (RB_EMPTY_NODE(&interval_sub->interval_tree.rb)) |
---|
| 160 | + interval_tree_insert(&interval_sub->interval_tree, |
---|
| 161 | + &subscriptions->itree); |
---|
| 162 | + else |
---|
| 163 | + interval_tree_remove(&interval_sub->interval_tree, |
---|
| 164 | + &subscriptions->itree); |
---|
| 165 | + hlist_del(&interval_sub->deferred_item); |
---|
| 166 | + } |
---|
| 167 | + spin_unlock(&subscriptions->lock); |
---|
| 168 | + |
---|
| 169 | + wake_up_all(&subscriptions->wq); |
---|
| 170 | +} |
---|
| 171 | + |
---|
| 172 | +/** |
---|
| 173 | + * mmu_interval_read_begin - Begin a read side critical section against a VA |
---|
| 174 | + * range |
---|
| 175 | + * @interval_sub: The interval subscription |
---|
| 176 | + * |
---|
| 177 | + * mmu_iterval_read_begin()/mmu_iterval_read_retry() implement a |
---|
| 178 | + * collision-retry scheme similar to seqcount for the VA range under |
---|
| 179 | + * subscription. If the mm invokes invalidation during the critical section |
---|
| 180 | + * then mmu_interval_read_retry() will return true. |
---|
| 181 | + * |
---|
| 182 | + * This is useful to obtain shadow PTEs where teardown or setup of the SPTEs |
---|
| 183 | + * require a blocking context. The critical region formed by this can sleep, |
---|
| 184 | + * and the required 'user_lock' can also be a sleeping lock. |
---|
| 185 | + * |
---|
| 186 | + * The caller is required to provide a 'user_lock' to serialize both teardown |
---|
| 187 | + * and setup. |
---|
| 188 | + * |
---|
| 189 | + * The return value should be passed to mmu_interval_read_retry(). |
---|
| 190 | + */ |
---|
| 191 | +unsigned long |
---|
| 192 | +mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub) |
---|
| 193 | +{ |
---|
| 194 | + struct mmu_notifier_subscriptions *subscriptions = |
---|
| 195 | + interval_sub->mm->notifier_subscriptions; |
---|
| 196 | + unsigned long seq; |
---|
| 197 | + bool is_invalidating; |
---|
| 198 | + |
---|
| 199 | + /* |
---|
| 200 | + * If the subscription has a different seq value under the user_lock |
---|
| 201 | + * than we started with then it has collided. |
---|
| 202 | + * |
---|
| 203 | + * If the subscription currently has the same seq value as the |
---|
| 204 | + * subscriptions seq, then it is currently between |
---|
| 205 | + * invalidate_start/end and is colliding. |
---|
| 206 | + * |
---|
| 207 | + * The locking looks broadly like this: |
---|
| 208 | + * mn_tree_invalidate_start(): mmu_interval_read_begin(): |
---|
| 209 | + * spin_lock |
---|
| 210 | + * seq = READ_ONCE(interval_sub->invalidate_seq); |
---|
| 211 | + * seq == subs->invalidate_seq |
---|
| 212 | + * spin_unlock |
---|
| 213 | + * spin_lock |
---|
| 214 | + * seq = ++subscriptions->invalidate_seq |
---|
| 215 | + * spin_unlock |
---|
| 216 | + * op->invalidate_range(): |
---|
| 217 | + * user_lock |
---|
| 218 | + * mmu_interval_set_seq() |
---|
| 219 | + * interval_sub->invalidate_seq = seq |
---|
| 220 | + * user_unlock |
---|
| 221 | + * |
---|
| 222 | + * [Required: mmu_interval_read_retry() == true] |
---|
| 223 | + * |
---|
| 224 | + * mn_itree_inv_end(): |
---|
| 225 | + * spin_lock |
---|
| 226 | + * seq = ++subscriptions->invalidate_seq |
---|
| 227 | + * spin_unlock |
---|
| 228 | + * |
---|
| 229 | + * user_lock |
---|
| 230 | + * mmu_interval_read_retry(): |
---|
| 231 | + * interval_sub->invalidate_seq != seq |
---|
| 232 | + * user_unlock |
---|
| 233 | + * |
---|
| 234 | + * Barriers are not needed here as any races here are closed by an |
---|
| 235 | + * eventual mmu_interval_read_retry(), which provides a barrier via the |
---|
| 236 | + * user_lock. |
---|
| 237 | + */ |
---|
| 238 | + spin_lock(&subscriptions->lock); |
---|
| 239 | + /* Pairs with the WRITE_ONCE in mmu_interval_set_seq() */ |
---|
| 240 | + seq = READ_ONCE(interval_sub->invalidate_seq); |
---|
| 241 | + is_invalidating = seq == subscriptions->invalidate_seq; |
---|
| 242 | + spin_unlock(&subscriptions->lock); |
---|
| 243 | + |
---|
| 244 | + /* |
---|
| 245 | + * interval_sub->invalidate_seq must always be set to an odd value via |
---|
| 246 | + * mmu_interval_set_seq() using the provided cur_seq from |
---|
| 247 | + * mn_itree_inv_start_range(). This ensures that if seq does wrap we |
---|
| 248 | + * will always clear the below sleep in some reasonable time as |
---|
| 249 | + * subscriptions->invalidate_seq is even in the idle state. |
---|
| 250 | + */ |
---|
| 251 | + lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); |
---|
| 252 | + lock_map_release(&__mmu_notifier_invalidate_range_start_map); |
---|
| 253 | + if (is_invalidating) |
---|
| 254 | + wait_event(subscriptions->wq, |
---|
| 255 | + READ_ONCE(subscriptions->invalidate_seq) != seq); |
---|
| 256 | + |
---|
| 257 | + /* |
---|
| 258 | + * Notice that mmu_interval_read_retry() can already be true at this |
---|
| 259 | + * point, avoiding loops here allows the caller to provide a global |
---|
| 260 | + * time bound. |
---|
| 261 | + */ |
---|
| 262 | + |
---|
| 263 | + return seq; |
---|
| 264 | +} |
---|
| 265 | +EXPORT_SYMBOL_GPL(mmu_interval_read_begin); |
---|
| 266 | + |
---|
| 267 | +static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions, |
---|
| 268 | + struct mm_struct *mm) |
---|
| 269 | +{ |
---|
| 270 | + struct mmu_notifier_range range = { |
---|
| 271 | + .flags = MMU_NOTIFIER_RANGE_BLOCKABLE, |
---|
| 272 | + .event = MMU_NOTIFY_RELEASE, |
---|
| 273 | + .mm = mm, |
---|
| 274 | + .start = 0, |
---|
| 275 | + .end = ULONG_MAX, |
---|
| 276 | + }; |
---|
| 277 | + struct mmu_interval_notifier *interval_sub; |
---|
| 278 | + unsigned long cur_seq; |
---|
| 279 | + bool ret; |
---|
| 280 | + |
---|
| 281 | + for (interval_sub = |
---|
| 282 | + mn_itree_inv_start_range(subscriptions, &range, &cur_seq); |
---|
| 283 | + interval_sub; |
---|
| 284 | + interval_sub = mn_itree_inv_next(interval_sub, &range)) { |
---|
| 285 | + ret = interval_sub->ops->invalidate(interval_sub, &range, |
---|
| 286 | + cur_seq); |
---|
| 287 | + WARN_ON(!ret); |
---|
| 288 | + } |
---|
| 289 | + |
---|
| 290 | + mn_itree_inv_end(subscriptions); |
---|
| 291 | +} |
---|
44 | 292 | |
---|
45 | 293 | /* |
---|
46 | 294 | * This function can't run concurrently against mmu_notifier_register |
---|
.. | .. |
---|
49 | 297 | * in parallel despite there being no task using this mm any more, |
---|
50 | 298 | * through the vmas outside of the exit_mmap context, such as with |
---|
51 | 299 | * vmtruncate. This serializes against mmu_notifier_unregister with |
---|
52 | | - * the mmu_notifier_mm->lock in addition to SRCU and it serializes |
---|
53 | | - * against the other mmu notifiers with SRCU. struct mmu_notifier_mm |
---|
| 300 | + * the notifier_subscriptions->lock in addition to SRCU and it serializes |
---|
| 301 | + * against the other mmu notifiers with SRCU. struct mmu_notifier_subscriptions |
---|
54 | 302 | * can't go away from under us as exit_mmap holds an mm_count pin |
---|
55 | 303 | * itself. |
---|
56 | 304 | */ |
---|
57 | | -void __mmu_notifier_release(struct mm_struct *mm) |
---|
| 305 | +static void mn_hlist_release(struct mmu_notifier_subscriptions *subscriptions, |
---|
| 306 | + struct mm_struct *mm) |
---|
58 | 307 | { |
---|
59 | | - struct mmu_notifier *mn; |
---|
| 308 | + struct mmu_notifier *subscription; |
---|
60 | 309 | int id; |
---|
61 | 310 | |
---|
62 | 311 | /* |
---|
.. | .. |
---|
64 | 313 | * ->release returns. |
---|
65 | 314 | */ |
---|
66 | 315 | id = srcu_read_lock(&srcu); |
---|
67 | | - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) |
---|
| 316 | + hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, |
---|
| 317 | + srcu_read_lock_held(&srcu)) |
---|
68 | 318 | /* |
---|
69 | 319 | * If ->release runs before mmu_notifier_unregister it must be |
---|
70 | 320 | * handled, as it's the only way for the driver to flush all |
---|
71 | 321 | * existing sptes and stop the driver from establishing any more |
---|
72 | 322 | * sptes before all the pages in the mm are freed. |
---|
73 | 323 | */ |
---|
74 | | - if (mn->ops->release) |
---|
75 | | - mn->ops->release(mn, mm); |
---|
| 324 | + if (subscription->ops->release) |
---|
| 325 | + subscription->ops->release(subscription, mm); |
---|
76 | 326 | |
---|
77 | | - spin_lock(&mm->mmu_notifier_mm->lock); |
---|
78 | | - while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { |
---|
79 | | - mn = hlist_entry(mm->mmu_notifier_mm->list.first, |
---|
80 | | - struct mmu_notifier, |
---|
81 | | - hlist); |
---|
| 327 | + spin_lock(&subscriptions->lock); |
---|
| 328 | + while (unlikely(!hlist_empty(&subscriptions->list))) { |
---|
| 329 | + subscription = hlist_entry(subscriptions->list.first, |
---|
| 330 | + struct mmu_notifier, hlist); |
---|
82 | 331 | /* |
---|
83 | 332 | * We arrived before mmu_notifier_unregister so |
---|
84 | 333 | * mmu_notifier_unregister will do nothing other than to wait |
---|
85 | 334 | * for ->release to finish and for mmu_notifier_unregister to |
---|
86 | 335 | * return. |
---|
87 | 336 | */ |
---|
88 | | - hlist_del_init_rcu(&mn->hlist); |
---|
| 337 | + hlist_del_init_rcu(&subscription->hlist); |
---|
89 | 338 | } |
---|
90 | | - spin_unlock(&mm->mmu_notifier_mm->lock); |
---|
| 339 | + spin_unlock(&subscriptions->lock); |
---|
91 | 340 | srcu_read_unlock(&srcu, id); |
---|
92 | 341 | |
---|
93 | 342 | /* |
---|
.. | .. |
---|
96 | 345 | * until the ->release method returns, if it was invoked by |
---|
97 | 346 | * mmu_notifier_unregister. |
---|
98 | 347 | * |
---|
99 | | - * The mmu_notifier_mm can't go away from under us because one mm_count |
---|
100 | | - * is held by exit_mmap. |
---|
| 348 | + * The notifier_subscriptions can't go away from under us because |
---|
| 349 | + * one mm_count is held by exit_mmap. |
---|
101 | 350 | */ |
---|
102 | 351 | synchronize_srcu(&srcu); |
---|
| 352 | +} |
---|
| 353 | + |
---|
| 354 | +void __mmu_notifier_release(struct mm_struct *mm) |
---|
| 355 | +{ |
---|
| 356 | + struct mmu_notifier_subscriptions *subscriptions = |
---|
| 357 | + mm->notifier_subscriptions; |
---|
| 358 | + |
---|
| 359 | + if (subscriptions->has_itree) |
---|
| 360 | + mn_itree_release(subscriptions, mm); |
---|
| 361 | + |
---|
| 362 | + if (!hlist_empty(&subscriptions->list)) |
---|
| 363 | + mn_hlist_release(subscriptions, mm); |
---|
103 | 364 | } |
---|
104 | 365 | |
---|
105 | 366 | /* |
---|
.. | .. |
---|
111 | 372 | unsigned long start, |
---|
112 | 373 | unsigned long end) |
---|
113 | 374 | { |
---|
114 | | - struct mmu_notifier *mn; |
---|
| 375 | + struct mmu_notifier *subscription; |
---|
115 | 376 | int young = 0, id; |
---|
116 | 377 | |
---|
117 | 378 | id = srcu_read_lock(&srcu); |
---|
118 | | - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { |
---|
119 | | - if (mn->ops->clear_flush_young) |
---|
120 | | - young |= mn->ops->clear_flush_young(mn, mm, start, end); |
---|
| 379 | + hlist_for_each_entry_rcu(subscription, |
---|
| 380 | + &mm->notifier_subscriptions->list, hlist, |
---|
| 381 | + srcu_read_lock_held(&srcu)) { |
---|
| 382 | + if (subscription->ops->clear_flush_young) |
---|
| 383 | + young |= subscription->ops->clear_flush_young( |
---|
| 384 | + subscription, mm, start, end); |
---|
121 | 385 | } |
---|
122 | 386 | srcu_read_unlock(&srcu, id); |
---|
123 | 387 | |
---|
.. | .. |
---|
128 | 392 | unsigned long start, |
---|
129 | 393 | unsigned long end) |
---|
130 | 394 | { |
---|
131 | | - struct mmu_notifier *mn; |
---|
| 395 | + struct mmu_notifier *subscription; |
---|
132 | 396 | int young = 0, id; |
---|
133 | 397 | |
---|
134 | 398 | id = srcu_read_lock(&srcu); |
---|
135 | | - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { |
---|
136 | | - if (mn->ops->clear_young) |
---|
137 | | - young |= mn->ops->clear_young(mn, mm, start, end); |
---|
| 399 | + hlist_for_each_entry_rcu(subscription, |
---|
| 400 | + &mm->notifier_subscriptions->list, hlist, |
---|
| 401 | + srcu_read_lock_held(&srcu)) { |
---|
| 402 | + if (subscription->ops->clear_young) |
---|
| 403 | + young |= subscription->ops->clear_young(subscription, |
---|
| 404 | + mm, start, end); |
---|
138 | 405 | } |
---|
139 | 406 | srcu_read_unlock(&srcu, id); |
---|
140 | 407 | |
---|
.. | .. |
---|
144 | 411 | int __mmu_notifier_test_young(struct mm_struct *mm, |
---|
145 | 412 | unsigned long address) |
---|
146 | 413 | { |
---|
147 | | - struct mmu_notifier *mn; |
---|
| 414 | + struct mmu_notifier *subscription; |
---|
148 | 415 | int young = 0, id; |
---|
149 | 416 | |
---|
150 | 417 | id = srcu_read_lock(&srcu); |
---|
151 | | - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { |
---|
152 | | - if (mn->ops->test_young) { |
---|
153 | | - young = mn->ops->test_young(mn, mm, address); |
---|
| 418 | + hlist_for_each_entry_rcu(subscription, |
---|
| 419 | + &mm->notifier_subscriptions->list, hlist, |
---|
| 420 | + srcu_read_lock_held(&srcu)) { |
---|
| 421 | + if (subscription->ops->test_young) { |
---|
| 422 | + young = subscription->ops->test_young(subscription, mm, |
---|
| 423 | + address); |
---|
154 | 424 | if (young) |
---|
155 | 425 | break; |
---|
156 | 426 | } |
---|
.. | .. |
---|
163 | 433 | void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, |
---|
164 | 434 | pte_t pte) |
---|
165 | 435 | { |
---|
166 | | - struct mmu_notifier *mn; |
---|
| 436 | + struct mmu_notifier *subscription; |
---|
167 | 437 | int id; |
---|
168 | 438 | |
---|
169 | 439 | id = srcu_read_lock(&srcu); |
---|
170 | | - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { |
---|
171 | | - if (mn->ops->change_pte) |
---|
172 | | - mn->ops->change_pte(mn, mm, address, pte); |
---|
| 440 | + hlist_for_each_entry_rcu(subscription, |
---|
| 441 | + &mm->notifier_subscriptions->list, hlist, |
---|
| 442 | + srcu_read_lock_held(&srcu)) { |
---|
| 443 | + if (subscription->ops->change_pte) |
---|
| 444 | + subscription->ops->change_pte(subscription, mm, address, |
---|
| 445 | + pte); |
---|
173 | 446 | } |
---|
174 | 447 | srcu_read_unlock(&srcu, id); |
---|
175 | 448 | } |
---|
176 | 449 | |
---|
177 | | -int __mmu_notifier_invalidate_range_start(struct mm_struct *mm, |
---|
178 | | - unsigned long start, unsigned long end, |
---|
179 | | - bool blockable) |
---|
| 450 | +static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions, |
---|
| 451 | + const struct mmu_notifier_range *range) |
---|
180 | 452 | { |
---|
181 | | - struct mmu_notifier *mn; |
---|
| 453 | + struct mmu_interval_notifier *interval_sub; |
---|
| 454 | + unsigned long cur_seq; |
---|
| 455 | + |
---|
| 456 | + for (interval_sub = |
---|
| 457 | + mn_itree_inv_start_range(subscriptions, range, &cur_seq); |
---|
| 458 | + interval_sub; |
---|
| 459 | + interval_sub = mn_itree_inv_next(interval_sub, range)) { |
---|
| 460 | + bool ret; |
---|
| 461 | + |
---|
| 462 | + ret = interval_sub->ops->invalidate(interval_sub, range, |
---|
| 463 | + cur_seq); |
---|
| 464 | + if (!ret) { |
---|
| 465 | + if (WARN_ON(mmu_notifier_range_blockable(range))) |
---|
| 466 | + continue; |
---|
| 467 | + goto out_would_block; |
---|
| 468 | + } |
---|
| 469 | + } |
---|
| 470 | + return 0; |
---|
| 471 | + |
---|
| 472 | +out_would_block: |
---|
| 473 | + /* |
---|
| 474 | + * On -EAGAIN the non-blocking caller is not allowed to call |
---|
| 475 | + * invalidate_range_end() |
---|
| 476 | + */ |
---|
| 477 | + mn_itree_inv_end(subscriptions); |
---|
| 478 | + return -EAGAIN; |
---|
| 479 | +} |
---|
| 480 | + |
---|
| 481 | +static int mn_hlist_invalidate_range_start( |
---|
| 482 | + struct mmu_notifier_subscriptions *subscriptions, |
---|
| 483 | + struct mmu_notifier_range *range) |
---|
| 484 | +{ |
---|
| 485 | + struct mmu_notifier *subscription; |
---|
182 | 486 | int ret = 0; |
---|
183 | 487 | int id; |
---|
184 | 488 | |
---|
185 | 489 | id = srcu_read_lock(&srcu); |
---|
186 | | - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { |
---|
187 | | - if (mn->ops->invalidate_range_start) { |
---|
188 | | - int _ret = mn->ops->invalidate_range_start(mn, mm, start, end, blockable); |
---|
| 490 | + hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, |
---|
| 491 | + srcu_read_lock_held(&srcu)) { |
---|
| 492 | + const struct mmu_notifier_ops *ops = subscription->ops; |
---|
| 493 | + |
---|
| 494 | + if (ops->invalidate_range_start) { |
---|
| 495 | + int _ret; |
---|
| 496 | + |
---|
| 497 | + if (!mmu_notifier_range_blockable(range)) |
---|
| 498 | + non_block_start(); |
---|
| 499 | + _ret = ops->invalidate_range_start(subscription, range); |
---|
| 500 | + if (!mmu_notifier_range_blockable(range)) |
---|
| 501 | + non_block_end(); |
---|
189 | 502 | if (_ret) { |
---|
190 | 503 | pr_info("%pS callback failed with %d in %sblockable context.\n", |
---|
191 | | - mn->ops->invalidate_range_start, _ret, |
---|
192 | | - !blockable ? "non-" : ""); |
---|
| 504 | + ops->invalidate_range_start, _ret, |
---|
| 505 | + !mmu_notifier_range_blockable(range) ? |
---|
| 506 | + "non-" : |
---|
| 507 | + ""); |
---|
| 508 | + WARN_ON(mmu_notifier_range_blockable(range) || |
---|
| 509 | + _ret != -EAGAIN); |
---|
| 510 | + /* |
---|
| 511 | + * We call all the notifiers on any EAGAIN, |
---|
| 512 | + * there is no way for a notifier to know if |
---|
| 513 | + * its start method failed, thus a start that |
---|
| 514 | + * does EAGAIN can't also do end. |
---|
| 515 | + */ |
---|
| 516 | + WARN_ON(ops->invalidate_range_end); |
---|
193 | 517 | ret = _ret; |
---|
194 | 518 | } |
---|
| 519 | + } |
---|
| 520 | + } |
---|
| 521 | + |
---|
| 522 | + if (ret) { |
---|
| 523 | + /* |
---|
| 524 | + * Must be non-blocking to get here. If there are multiple |
---|
| 525 | + * notifiers and one or more failed start, any that succeeded |
---|
| 526 | + * start are expecting their end to be called. Do so now. |
---|
| 527 | + */ |
---|
| 528 | + hlist_for_each_entry_rcu(subscription, &subscriptions->list, |
---|
| 529 | + hlist, srcu_read_lock_held(&srcu)) { |
---|
| 530 | + if (!subscription->ops->invalidate_range_end) |
---|
| 531 | + continue; |
---|
| 532 | + |
---|
| 533 | + subscription->ops->invalidate_range_end(subscription, |
---|
| 534 | + range); |
---|
195 | 535 | } |
---|
196 | 536 | } |
---|
197 | 537 | srcu_read_unlock(&srcu, id); |
---|
198 | 538 | |
---|
199 | 539 | return ret; |
---|
200 | 540 | } |
---|
201 | | -EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start); |
---|
202 | 541 | |
---|
203 | | -void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, |
---|
204 | | - unsigned long start, |
---|
205 | | - unsigned long end, |
---|
206 | | - bool only_end) |
---|
| 542 | +int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) |
---|
207 | 543 | { |
---|
208 | | - struct mmu_notifier *mn; |
---|
| 544 | + struct mmu_notifier_subscriptions *subscriptions = |
---|
| 545 | + range->mm->notifier_subscriptions; |
---|
| 546 | + int ret; |
---|
| 547 | + |
---|
| 548 | + if (subscriptions->has_itree) { |
---|
| 549 | + ret = mn_itree_invalidate(subscriptions, range); |
---|
| 550 | + if (ret) |
---|
| 551 | + return ret; |
---|
| 552 | + } |
---|
| 553 | + if (!hlist_empty(&subscriptions->list)) |
---|
| 554 | + return mn_hlist_invalidate_range_start(subscriptions, range); |
---|
| 555 | + return 0; |
---|
| 556 | +} |
---|
| 557 | + |
---|
| 558 | +static void |
---|
| 559 | +mn_hlist_invalidate_end(struct mmu_notifier_subscriptions *subscriptions, |
---|
| 560 | + struct mmu_notifier_range *range, bool only_end) |
---|
| 561 | +{ |
---|
| 562 | + struct mmu_notifier *subscription; |
---|
209 | 563 | int id; |
---|
210 | 564 | |
---|
211 | 565 | id = srcu_read_lock(&srcu); |
---|
212 | | - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { |
---|
| 566 | + hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, |
---|
| 567 | + srcu_read_lock_held(&srcu)) { |
---|
213 | 568 | /* |
---|
214 | 569 | * Call invalidate_range here too to avoid the need for the |
---|
215 | 570 | * subsystem of having to register an invalidate_range_end |
---|
.. | .. |
---|
223 | 578 | * is safe to do when we know that a call to invalidate_range() |
---|
224 | 579 | * already happen under page table lock. |
---|
225 | 580 | */ |
---|
226 | | - if (!only_end && mn->ops->invalidate_range) |
---|
227 | | - mn->ops->invalidate_range(mn, mm, start, end); |
---|
228 | | - if (mn->ops->invalidate_range_end) |
---|
229 | | - mn->ops->invalidate_range_end(mn, mm, start, end); |
---|
| 581 | + if (!only_end && subscription->ops->invalidate_range) |
---|
| 582 | + subscription->ops->invalidate_range(subscription, |
---|
| 583 | + range->mm, |
---|
| 584 | + range->start, |
---|
| 585 | + range->end); |
---|
| 586 | + if (subscription->ops->invalidate_range_end) { |
---|
| 587 | + if (!mmu_notifier_range_blockable(range)) |
---|
| 588 | + non_block_start(); |
---|
| 589 | + subscription->ops->invalidate_range_end(subscription, |
---|
| 590 | + range); |
---|
| 591 | + if (!mmu_notifier_range_blockable(range)) |
---|
| 592 | + non_block_end(); |
---|
| 593 | + } |
---|
230 | 594 | } |
---|
231 | 595 | srcu_read_unlock(&srcu, id); |
---|
232 | 596 | } |
---|
233 | | -EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_end); |
---|
| 597 | + |
---|
| 598 | +void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range, |
---|
| 599 | + bool only_end) |
---|
| 600 | +{ |
---|
| 601 | + struct mmu_notifier_subscriptions *subscriptions = |
---|
| 602 | + range->mm->notifier_subscriptions; |
---|
| 603 | + |
---|
| 604 | + lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); |
---|
| 605 | + if (subscriptions->has_itree) |
---|
| 606 | + mn_itree_inv_end(subscriptions); |
---|
| 607 | + |
---|
| 608 | + if (!hlist_empty(&subscriptions->list)) |
---|
| 609 | + mn_hlist_invalidate_end(subscriptions, range, only_end); |
---|
| 610 | + lock_map_release(&__mmu_notifier_invalidate_range_start_map); |
---|
| 611 | +} |
---|
234 | 612 | |
---|
235 | 613 | void __mmu_notifier_invalidate_range(struct mm_struct *mm, |
---|
236 | 614 | unsigned long start, unsigned long end) |
---|
237 | 615 | { |
---|
238 | | - struct mmu_notifier *mn; |
---|
| 616 | + struct mmu_notifier *subscription; |
---|
239 | 617 | int id; |
---|
240 | 618 | |
---|
241 | 619 | id = srcu_read_lock(&srcu); |
---|
242 | | - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { |
---|
243 | | - if (mn->ops->invalidate_range) |
---|
244 | | - mn->ops->invalidate_range(mn, mm, start, end); |
---|
| 620 | + hlist_for_each_entry_rcu(subscription, |
---|
| 621 | + &mm->notifier_subscriptions->list, hlist, |
---|
| 622 | + srcu_read_lock_held(&srcu)) { |
---|
| 623 | + if (subscription->ops->invalidate_range) |
---|
| 624 | + subscription->ops->invalidate_range(subscription, mm, |
---|
| 625 | + start, end); |
---|
245 | 626 | } |
---|
246 | 627 | srcu_read_unlock(&srcu, id); |
---|
247 | 628 | } |
---|
248 | | -EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range); |
---|
| 629 | + |
---|
| 630 | +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT |
---|
| 631 | + |
---|
| 632 | +static inline void mmu_notifier_write_lock(struct mm_struct *mm) |
---|
| 633 | +{ |
---|
| 634 | + percpu_down_write( |
---|
| 635 | + &mm->notifier_subscriptions->hdr.mmu_notifier_lock->rw_sem); |
---|
| 636 | +} |
---|
| 637 | + |
---|
| 638 | +static inline void mmu_notifier_write_unlock(struct mm_struct *mm) |
---|
| 639 | +{ |
---|
| 640 | + percpu_up_write( |
---|
| 641 | + &mm->notifier_subscriptions->hdr.mmu_notifier_lock->rw_sem); |
---|
| 642 | +} |
---|
| 643 | + |
---|
| 644 | +#else /* CONFIG_SPECULATIVE_PAGE_FAULT */ |
---|
| 645 | + |
---|
| 646 | +static inline void mmu_notifier_write_lock(struct mm_struct *mm) {} |
---|
| 647 | +static inline void mmu_notifier_write_unlock(struct mm_struct *mm) {} |
---|
| 648 | + |
---|
| 649 | +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ |
---|
| 650 | + |
---|
| 651 | +static void init_subscriptions(struct mmu_notifier_subscriptions *subscriptions) |
---|
| 652 | +{ |
---|
| 653 | + INIT_HLIST_HEAD(&subscriptions->list); |
---|
| 654 | + spin_lock_init(&subscriptions->lock); |
---|
| 655 | + subscriptions->invalidate_seq = 2; |
---|
| 656 | + subscriptions->itree = RB_ROOT_CACHED; |
---|
| 657 | + init_waitqueue_head(&subscriptions->wq); |
---|
| 658 | + INIT_HLIST_HEAD(&subscriptions->deferred_list); |
---|
| 659 | +} |
---|
249 | 660 | |
---|
250 | 661 | /* |
---|
251 | | - * Must be called while holding mm->mmap_sem for either read or write. |
---|
252 | | - * The result is guaranteed to be valid until mm->mmap_sem is dropped. |
---|
| 662 | + * Same as mmu_notifier_register but here the caller must hold the mmap_lock in |
---|
| 663 | + * write mode. A NULL mn signals the notifier is being registered for itree |
---|
| 664 | + * mode. |
---|
253 | 665 | */ |
---|
254 | | -bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm) |
---|
| 666 | +int __mmu_notifier_register(struct mmu_notifier *subscription, |
---|
| 667 | + struct mm_struct *mm) |
---|
255 | 668 | { |
---|
256 | | - struct mmu_notifier *mn; |
---|
257 | | - int id; |
---|
258 | | - bool ret = false; |
---|
259 | | - |
---|
260 | | - WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem)); |
---|
261 | | - |
---|
262 | | - if (!mm_has_notifiers(mm)) |
---|
263 | | - return ret; |
---|
264 | | - |
---|
265 | | - id = srcu_read_lock(&srcu); |
---|
266 | | - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { |
---|
267 | | - if (!mn->ops->invalidate_range && |
---|
268 | | - !mn->ops->invalidate_range_start && |
---|
269 | | - !mn->ops->invalidate_range_end) |
---|
270 | | - continue; |
---|
271 | | - |
---|
272 | | - if (!(mn->ops->flags & MMU_INVALIDATE_DOES_NOT_BLOCK)) { |
---|
273 | | - ret = true; |
---|
274 | | - break; |
---|
275 | | - } |
---|
276 | | - } |
---|
277 | | - srcu_read_unlock(&srcu, id); |
---|
278 | | - return ret; |
---|
279 | | -} |
---|
280 | | - |
---|
281 | | -static int do_mmu_notifier_register(struct mmu_notifier *mn, |
---|
282 | | - struct mm_struct *mm, |
---|
283 | | - int take_mmap_sem) |
---|
284 | | -{ |
---|
285 | | - struct mmu_notifier_mm *mmu_notifier_mm; |
---|
| 669 | + struct mmu_notifier_subscriptions *subscriptions = NULL; |
---|
286 | 670 | int ret; |
---|
287 | 671 | |
---|
| 672 | + mmap_assert_write_locked(mm); |
---|
288 | 673 | BUG_ON(atomic_read(&mm->mm_users) <= 0); |
---|
289 | 674 | |
---|
290 | | - ret = -ENOMEM; |
---|
291 | | - mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); |
---|
292 | | - if (unlikely(!mmu_notifier_mm)) |
---|
293 | | - goto out; |
---|
294 | | - |
---|
295 | | - if (take_mmap_sem) |
---|
296 | | - down_write(&mm->mmap_sem); |
---|
297 | | - ret = mm_take_all_locks(mm); |
---|
298 | | - if (unlikely(ret)) |
---|
299 | | - goto out_clean; |
---|
300 | | - |
---|
301 | | - if (!mm_has_notifiers(mm)) { |
---|
302 | | - INIT_HLIST_HEAD(&mmu_notifier_mm->list); |
---|
303 | | - spin_lock_init(&mmu_notifier_mm->lock); |
---|
304 | | - |
---|
305 | | - mm->mmu_notifier_mm = mmu_notifier_mm; |
---|
306 | | - mmu_notifier_mm = NULL; |
---|
| 675 | + if (IS_ENABLED(CONFIG_LOCKDEP)) { |
---|
| 676 | + fs_reclaim_acquire(GFP_KERNEL); |
---|
| 677 | + lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); |
---|
| 678 | + lock_map_release(&__mmu_notifier_invalidate_range_start_map); |
---|
| 679 | + fs_reclaim_release(GFP_KERNEL); |
---|
307 | 680 | } |
---|
308 | | - mmgrab(mm); |
---|
| 681 | + |
---|
| 682 | + if (!mm->notifier_subscriptions) { |
---|
| 683 | + /* |
---|
| 684 | + * kmalloc cannot be called under mm_take_all_locks(), but we |
---|
| 685 | + * know that mm->notifier_subscriptions can't change while we |
---|
| 686 | + * hold the write side of the mmap_lock. |
---|
| 687 | + */ |
---|
| 688 | + subscriptions = kzalloc( |
---|
| 689 | + sizeof(struct mmu_notifier_subscriptions), GFP_KERNEL); |
---|
| 690 | + if (!subscriptions) |
---|
| 691 | + return -ENOMEM; |
---|
| 692 | + |
---|
| 693 | + init_subscriptions(subscriptions); |
---|
| 694 | + } |
---|
| 695 | + |
---|
| 696 | + mmu_notifier_write_lock(mm); |
---|
| 697 | + |
---|
| 698 | + ret = mm_take_all_locks(mm); |
---|
| 699 | + if (unlikely(ret)) { |
---|
| 700 | + mmu_notifier_write_unlock(mm); |
---|
| 701 | + goto out_clean; |
---|
| 702 | + } |
---|
309 | 703 | |
---|
310 | 704 | /* |
---|
311 | 705 | * Serialize the update against mmu_notifier_unregister. A |
---|
.. | .. |
---|
314 | 708 | * current->mm or explicitly with get_task_mm() or similar). |
---|
315 | 709 | * We can't race against any other mmu notifier method either |
---|
316 | 710 | * thanks to mm_take_all_locks(). |
---|
| 711 | + * |
---|
| 712 | + * release semantics on the initialization of the |
---|
| 713 | + * mmu_notifier_subscriptions's contents are provided for unlocked |
---|
| 714 | + * readers. acquire can only be used while holding the mmgrab or |
---|
| 715 | + * mmget, and is safe because once created the |
---|
| 716 | + * mmu_notifier_subscriptions is not freed until the mm is destroyed. |
---|
| 717 | + * As above, users holding the mmap_lock or one of the |
---|
| 718 | + * mm_take_all_locks() do not need to use acquire semantics. |
---|
317 | 719 | */ |
---|
318 | | - spin_lock(&mm->mmu_notifier_mm->lock); |
---|
319 | | - hlist_add_head_rcu(&mn->hlist, &mm->mmu_notifier_mm->list); |
---|
320 | | - spin_unlock(&mm->mmu_notifier_mm->lock); |
---|
| 720 | + if (subscriptions) |
---|
| 721 | + smp_store_release(&mm->notifier_subscriptions, subscriptions); |
---|
| 722 | + mm->notifier_subscriptions->hdr.valid = true; |
---|
| 723 | + |
---|
| 724 | + if (subscription) { |
---|
| 725 | + /* Pairs with the mmdrop in mmu_notifier_unregister_* */ |
---|
| 726 | + mmgrab(mm); |
---|
| 727 | + subscription->mm = mm; |
---|
| 728 | + subscription->users = 1; |
---|
| 729 | + |
---|
| 730 | + spin_lock(&mm->notifier_subscriptions->lock); |
---|
| 731 | + hlist_add_head_rcu(&subscription->hlist, |
---|
| 732 | + &mm->notifier_subscriptions->list); |
---|
| 733 | + spin_unlock(&mm->notifier_subscriptions->lock); |
---|
| 734 | + } else |
---|
| 735 | + mm->notifier_subscriptions->has_itree = true; |
---|
321 | 736 | |
---|
322 | 737 | mm_drop_all_locks(mm); |
---|
323 | | -out_clean: |
---|
324 | | - if (take_mmap_sem) |
---|
325 | | - up_write(&mm->mmap_sem); |
---|
326 | | - kfree(mmu_notifier_mm); |
---|
327 | | -out: |
---|
| 738 | + mmu_notifier_write_unlock(mm); |
---|
328 | 739 | BUG_ON(atomic_read(&mm->mm_users) <= 0); |
---|
| 740 | + return 0; |
---|
| 741 | + |
---|
| 742 | +out_clean: |
---|
| 743 | + kfree(subscriptions); |
---|
329 | 744 | return ret; |
---|
330 | 745 | } |
---|
| 746 | +EXPORT_SYMBOL_GPL(__mmu_notifier_register); |
---|
331 | 747 | |
---|
332 | | -/* |
---|
333 | | - * Must not hold mmap_sem nor any other VM related lock when calling |
---|
| 748 | +/** |
---|
| 749 | + * mmu_notifier_register - Register a notifier on a mm |
---|
| 750 | + * @subscription: The notifier to attach |
---|
| 751 | + * @mm: The mm to attach the notifier to |
---|
| 752 | + * |
---|
| 753 | + * Must not hold mmap_lock nor any other VM related lock when calling |
---|
334 | 754 | * this registration function. Must also ensure mm_users can't go down |
---|
335 | 755 | * to zero while this runs to avoid races with mmu_notifier_release, |
---|
336 | 756 | * so mm has to be current->mm or the mm should be pinned safely such |
---|
337 | 757 | * as with get_task_mm(). If the mm is not current->mm, the mm_users |
---|
338 | 758 | * pin should be released by calling mmput after mmu_notifier_register |
---|
339 | | - * returns. mmu_notifier_unregister must be always called to |
---|
340 | | - * unregister the notifier. mm_count is automatically pinned to allow |
---|
341 | | - * mmu_notifier_unregister to safely run at any time later, before or |
---|
342 | | - * after exit_mmap. ->release will always be called before exit_mmap |
---|
343 | | - * frees the pages. |
---|
| 759 | + * returns. |
---|
| 760 | + * |
---|
| 761 | + * mmu_notifier_unregister() or mmu_notifier_put() must be always called to |
---|
| 762 | + * unregister the notifier. |
---|
| 763 | + * |
---|
| 764 | + * While the caller has a mmu_notifier get the subscription->mm pointer will remain |
---|
| 765 | + * valid, and can be converted to an active mm pointer via mmget_not_zero(). |
---|
344 | 766 | */ |
---|
345 | | -int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) |
---|
| 767 | +int mmu_notifier_register(struct mmu_notifier *subscription, |
---|
| 768 | + struct mm_struct *mm) |
---|
346 | 769 | { |
---|
347 | | - return do_mmu_notifier_register(mn, mm, 1); |
---|
| 770 | + int ret; |
---|
| 771 | + |
---|
| 772 | + mmap_write_lock(mm); |
---|
| 773 | + ret = __mmu_notifier_register(subscription, mm); |
---|
| 774 | + mmap_write_unlock(mm); |
---|
| 775 | + return ret; |
---|
348 | 776 | } |
---|
349 | 777 | EXPORT_SYMBOL_GPL(mmu_notifier_register); |
---|
350 | 778 | |
---|
351 | | -/* |
---|
352 | | - * Same as mmu_notifier_register but here the caller must hold the |
---|
353 | | - * mmap_sem in write mode. |
---|
354 | | - */ |
---|
355 | | -int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) |
---|
| 779 | +static struct mmu_notifier * |
---|
| 780 | +find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops) |
---|
356 | 781 | { |
---|
357 | | - return do_mmu_notifier_register(mn, mm, 0); |
---|
| 782 | + struct mmu_notifier *subscription; |
---|
| 783 | + |
---|
| 784 | + spin_lock(&mm->notifier_subscriptions->lock); |
---|
| 785 | + hlist_for_each_entry_rcu(subscription, |
---|
| 786 | + &mm->notifier_subscriptions->list, hlist, |
---|
| 787 | + lockdep_is_held(&mm->notifier_subscriptions->lock)) { |
---|
| 788 | + if (subscription->ops != ops) |
---|
| 789 | + continue; |
---|
| 790 | + |
---|
| 791 | + if (likely(subscription->users != UINT_MAX)) |
---|
| 792 | + subscription->users++; |
---|
| 793 | + else |
---|
| 794 | + subscription = ERR_PTR(-EOVERFLOW); |
---|
| 795 | + spin_unlock(&mm->notifier_subscriptions->lock); |
---|
| 796 | + return subscription; |
---|
| 797 | + } |
---|
| 798 | + spin_unlock(&mm->notifier_subscriptions->lock); |
---|
| 799 | + return NULL; |
---|
358 | 800 | } |
---|
359 | | -EXPORT_SYMBOL_GPL(__mmu_notifier_register); |
---|
| 801 | + |
---|
| 802 | +/** |
---|
| 803 | + * mmu_notifier_get_locked - Return the single struct mmu_notifier for |
---|
| 804 | + * the mm & ops |
---|
| 805 | + * @ops: The operations struct being subscribe with |
---|
| 806 | + * @mm : The mm to attach notifiers too |
---|
| 807 | + * |
---|
| 808 | + * This function either allocates a new mmu_notifier via |
---|
| 809 | + * ops->alloc_notifier(), or returns an already existing notifier on the |
---|
| 810 | + * list. The value of the ops pointer is used to determine when two notifiers |
---|
| 811 | + * are the same. |
---|
| 812 | + * |
---|
| 813 | + * Each call to mmu_notifier_get() must be paired with a call to |
---|
| 814 | + * mmu_notifier_put(). The caller must hold the write side of mm->mmap_lock. |
---|
| 815 | + * |
---|
| 816 | + * While the caller has a mmu_notifier get the mm pointer will remain valid, |
---|
| 817 | + * and can be converted to an active mm pointer via mmget_not_zero(). |
---|
| 818 | + */ |
---|
| 819 | +struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops, |
---|
| 820 | + struct mm_struct *mm) |
---|
| 821 | +{ |
---|
| 822 | + struct mmu_notifier *subscription; |
---|
| 823 | + int ret; |
---|
| 824 | + |
---|
| 825 | + mmap_assert_write_locked(mm); |
---|
| 826 | + |
---|
| 827 | + if (mm->notifier_subscriptions) { |
---|
| 828 | + subscription = find_get_mmu_notifier(mm, ops); |
---|
| 829 | + if (subscription) |
---|
| 830 | + return subscription; |
---|
| 831 | + } |
---|
| 832 | + |
---|
| 833 | + subscription = ops->alloc_notifier(mm); |
---|
| 834 | + if (IS_ERR(subscription)) |
---|
| 835 | + return subscription; |
---|
| 836 | + subscription->ops = ops; |
---|
| 837 | + ret = __mmu_notifier_register(subscription, mm); |
---|
| 838 | + if (ret) |
---|
| 839 | + goto out_free; |
---|
| 840 | + return subscription; |
---|
| 841 | +out_free: |
---|
| 842 | + subscription->ops->free_notifier(subscription); |
---|
| 843 | + return ERR_PTR(ret); |
---|
| 844 | +} |
---|
| 845 | +EXPORT_SYMBOL_GPL(mmu_notifier_get_locked); |
---|
360 | 846 | |
---|
361 | 847 | /* this is called after the last mmu_notifier_unregister() returned */ |
---|
362 | | -void __mmu_notifier_mm_destroy(struct mm_struct *mm) |
---|
| 848 | +void __mmu_notifier_subscriptions_destroy(struct mm_struct *mm) |
---|
363 | 849 | { |
---|
364 | | - BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list)); |
---|
365 | | - kfree(mm->mmu_notifier_mm); |
---|
366 | | - mm->mmu_notifier_mm = LIST_POISON1; /* debug */ |
---|
| 850 | + BUG_ON(!hlist_empty(&mm->notifier_subscriptions->list)); |
---|
| 851 | + kfree(mm->notifier_subscriptions); |
---|
| 852 | + mm->notifier_subscriptions = LIST_POISON1; /* debug */ |
---|
367 | 853 | } |
---|
368 | 854 | |
---|
369 | 855 | /* |
---|
.. | .. |
---|
376 | 862 | * and only after mmu_notifier_unregister returned we're guaranteed |
---|
377 | 863 | * that ->release or any other method can't run anymore. |
---|
378 | 864 | */ |
---|
379 | | -void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) |
---|
| 865 | +void mmu_notifier_unregister(struct mmu_notifier *subscription, |
---|
| 866 | + struct mm_struct *mm) |
---|
380 | 867 | { |
---|
381 | 868 | BUG_ON(atomic_read(&mm->mm_count) <= 0); |
---|
382 | 869 | |
---|
383 | | - if (!hlist_unhashed(&mn->hlist)) { |
---|
| 870 | + if (!hlist_unhashed(&subscription->hlist)) { |
---|
384 | 871 | /* |
---|
385 | 872 | * SRCU here will force exit_mmap to wait for ->release to |
---|
386 | 873 | * finish before freeing the pages. |
---|
.. | .. |
---|
392 | 879 | * exit_mmap will block in mmu_notifier_release to guarantee |
---|
393 | 880 | * that ->release is called before freeing the pages. |
---|
394 | 881 | */ |
---|
395 | | - if (mn->ops->release) |
---|
396 | | - mn->ops->release(mn, mm); |
---|
| 882 | + if (subscription->ops->release) |
---|
| 883 | + subscription->ops->release(subscription, mm); |
---|
397 | 884 | srcu_read_unlock(&srcu, id); |
---|
398 | 885 | |
---|
399 | | - spin_lock(&mm->mmu_notifier_mm->lock); |
---|
| 886 | + spin_lock(&mm->notifier_subscriptions->lock); |
---|
400 | 887 | /* |
---|
401 | 888 | * Can not use list_del_rcu() since __mmu_notifier_release |
---|
402 | 889 | * can delete it before we hold the lock. |
---|
403 | 890 | */ |
---|
404 | | - hlist_del_init_rcu(&mn->hlist); |
---|
405 | | - spin_unlock(&mm->mmu_notifier_mm->lock); |
---|
| 891 | + hlist_del_init_rcu(&subscription->hlist); |
---|
| 892 | + spin_unlock(&mm->notifier_subscriptions->lock); |
---|
406 | 893 | } |
---|
407 | 894 | |
---|
408 | 895 | /* |
---|
.. | .. |
---|
417 | 904 | } |
---|
418 | 905 | EXPORT_SYMBOL_GPL(mmu_notifier_unregister); |
---|
419 | 906 | |
---|
420 | | -/* |
---|
421 | | - * Same as mmu_notifier_unregister but no callback and no srcu synchronization. |
---|
422 | | - */ |
---|
423 | | -void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, |
---|
424 | | - struct mm_struct *mm) |
---|
| 907 | +static void mmu_notifier_free_rcu(struct rcu_head *rcu) |
---|
425 | 908 | { |
---|
426 | | - spin_lock(&mm->mmu_notifier_mm->lock); |
---|
427 | | - /* |
---|
428 | | - * Can not use list_del_rcu() since __mmu_notifier_release |
---|
429 | | - * can delete it before we hold the lock. |
---|
430 | | - */ |
---|
431 | | - hlist_del_init_rcu(&mn->hlist); |
---|
432 | | - spin_unlock(&mm->mmu_notifier_mm->lock); |
---|
| 909 | + struct mmu_notifier *subscription = |
---|
| 910 | + container_of(rcu, struct mmu_notifier, rcu); |
---|
| 911 | + struct mm_struct *mm = subscription->mm; |
---|
433 | 912 | |
---|
434 | | - BUG_ON(atomic_read(&mm->mm_count) <= 0); |
---|
| 913 | + subscription->ops->free_notifier(subscription); |
---|
| 914 | + /* Pairs with the get in __mmu_notifier_register() */ |
---|
435 | 915 | mmdrop(mm); |
---|
436 | 916 | } |
---|
437 | | -EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release); |
---|
| 917 | + |
---|
| 918 | +/** |
---|
| 919 | + * mmu_notifier_put - Release the reference on the notifier |
---|
| 920 | + * @subscription: The notifier to act on |
---|
| 921 | + * |
---|
| 922 | + * This function must be paired with each mmu_notifier_get(), it releases the |
---|
| 923 | + * reference obtained by the get. If this is the last reference then process |
---|
| 924 | + * to free the notifier will be run asynchronously. |
---|
| 925 | + * |
---|
| 926 | + * Unlike mmu_notifier_unregister() the get/put flow only calls ops->release |
---|
| 927 | + * when the mm_struct is destroyed. Instead free_notifier is always called to |
---|
| 928 | + * release any resources held by the user. |
---|
| 929 | + * |
---|
| 930 | + * As ops->release is not guaranteed to be called, the user must ensure that |
---|
| 931 | + * all sptes are dropped, and no new sptes can be established before |
---|
| 932 | + * mmu_notifier_put() is called. |
---|
| 933 | + * |
---|
| 934 | + * This function can be called from the ops->release callback, however the |
---|
| 935 | + * caller must still ensure it is called pairwise with mmu_notifier_get(). |
---|
| 936 | + * |
---|
| 937 | + * Modules calling this function must call mmu_notifier_synchronize() in |
---|
| 938 | + * their __exit functions to ensure the async work is completed. |
---|
| 939 | + */ |
---|
| 940 | +void mmu_notifier_put(struct mmu_notifier *subscription) |
---|
| 941 | +{ |
---|
| 942 | + struct mm_struct *mm = subscription->mm; |
---|
| 943 | + |
---|
| 944 | + spin_lock(&mm->notifier_subscriptions->lock); |
---|
| 945 | + if (WARN_ON(!subscription->users) || --subscription->users) |
---|
| 946 | + goto out_unlock; |
---|
| 947 | + hlist_del_init_rcu(&subscription->hlist); |
---|
| 948 | + spin_unlock(&mm->notifier_subscriptions->lock); |
---|
| 949 | + |
---|
| 950 | + call_srcu(&srcu, &subscription->rcu, mmu_notifier_free_rcu); |
---|
| 951 | + return; |
---|
| 952 | + |
---|
| 953 | +out_unlock: |
---|
| 954 | + spin_unlock(&mm->notifier_subscriptions->lock); |
---|
| 955 | +} |
---|
| 956 | +EXPORT_SYMBOL_GPL(mmu_notifier_put); |
---|
| 957 | + |
---|
| 958 | +static int __mmu_interval_notifier_insert( |
---|
| 959 | + struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, |
---|
| 960 | + struct mmu_notifier_subscriptions *subscriptions, unsigned long start, |
---|
| 961 | + unsigned long length, const struct mmu_interval_notifier_ops *ops) |
---|
| 962 | +{ |
---|
| 963 | + interval_sub->mm = mm; |
---|
| 964 | + interval_sub->ops = ops; |
---|
| 965 | + RB_CLEAR_NODE(&interval_sub->interval_tree.rb); |
---|
| 966 | + interval_sub->interval_tree.start = start; |
---|
| 967 | + /* |
---|
| 968 | + * Note that the representation of the intervals in the interval tree |
---|
| 969 | + * considers the ending point as contained in the interval. |
---|
| 970 | + */ |
---|
| 971 | + if (length == 0 || |
---|
| 972 | + check_add_overflow(start, length - 1, |
---|
| 973 | + &interval_sub->interval_tree.last)) |
---|
| 974 | + return -EOVERFLOW; |
---|
| 975 | + |
---|
| 976 | + /* Must call with a mmget() held */ |
---|
| 977 | + if (WARN_ON(atomic_read(&mm->mm_users) <= 0)) |
---|
| 978 | + return -EINVAL; |
---|
| 979 | + |
---|
| 980 | + /* pairs with mmdrop in mmu_interval_notifier_remove() */ |
---|
| 981 | + mmgrab(mm); |
---|
| 982 | + |
---|
| 983 | + /* |
---|
| 984 | + * If some invalidate_range_start/end region is going on in parallel |
---|
| 985 | + * we don't know what VA ranges are affected, so we must assume this |
---|
| 986 | + * new range is included. |
---|
| 987 | + * |
---|
| 988 | + * If the itree is invalidating then we are not allowed to change |
---|
| 989 | + * it. Retrying until invalidation is done is tricky due to the |
---|
| 990 | + * possibility for live lock, instead defer the add to |
---|
| 991 | + * mn_itree_inv_end() so this algorithm is deterministic. |
---|
| 992 | + * |
---|
| 993 | + * In all cases the value for the interval_sub->invalidate_seq should be |
---|
| 994 | + * odd, see mmu_interval_read_begin() |
---|
| 995 | + */ |
---|
| 996 | + spin_lock(&subscriptions->lock); |
---|
| 997 | + if (subscriptions->active_invalidate_ranges) { |
---|
| 998 | + if (mn_itree_is_invalidating(subscriptions)) |
---|
| 999 | + hlist_add_head(&interval_sub->deferred_item, |
---|
| 1000 | + &subscriptions->deferred_list); |
---|
| 1001 | + else { |
---|
| 1002 | + subscriptions->invalidate_seq |= 1; |
---|
| 1003 | + interval_tree_insert(&interval_sub->interval_tree, |
---|
| 1004 | + &subscriptions->itree); |
---|
| 1005 | + } |
---|
| 1006 | + interval_sub->invalidate_seq = subscriptions->invalidate_seq; |
---|
| 1007 | + } else { |
---|
| 1008 | + WARN_ON(mn_itree_is_invalidating(subscriptions)); |
---|
| 1009 | + /* |
---|
| 1010 | + * The starting seq for a subscription not under invalidation |
---|
| 1011 | + * should be odd, not equal to the current invalidate_seq and |
---|
| 1012 | + * invalidate_seq should not 'wrap' to the new seq any time |
---|
| 1013 | + * soon. |
---|
| 1014 | + */ |
---|
| 1015 | + interval_sub->invalidate_seq = |
---|
| 1016 | + subscriptions->invalidate_seq - 1; |
---|
| 1017 | + interval_tree_insert(&interval_sub->interval_tree, |
---|
| 1018 | + &subscriptions->itree); |
---|
| 1019 | + } |
---|
| 1020 | + spin_unlock(&subscriptions->lock); |
---|
| 1021 | + return 0; |
---|
| 1022 | +} |
---|
| 1023 | + |
---|
| 1024 | +/** |
---|
| 1025 | + * mmu_interval_notifier_insert - Insert an interval notifier |
---|
| 1026 | + * @interval_sub: Interval subscription to register |
---|
| 1027 | + * @start: Starting virtual address to monitor |
---|
| 1028 | + * @length: Length of the range to monitor |
---|
| 1029 | + * @mm: mm_struct to attach to |
---|
| 1030 | + * @ops: Interval notifier operations to be called on matching events |
---|
| 1031 | + * |
---|
| 1032 | + * This function subscribes the interval notifier for notifications from the |
---|
| 1033 | + * mm. Upon return the ops related to mmu_interval_notifier will be called |
---|
| 1034 | + * whenever an event that intersects with the given range occurs. |
---|
| 1035 | + * |
---|
| 1036 | + * Upon return the range_notifier may not be present in the interval tree yet. |
---|
| 1037 | + * The caller must use the normal interval notifier read flow via |
---|
| 1038 | + * mmu_interval_read_begin() to establish SPTEs for this range. |
---|
| 1039 | + */ |
---|
| 1040 | +int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub, |
---|
| 1041 | + struct mm_struct *mm, unsigned long start, |
---|
| 1042 | + unsigned long length, |
---|
| 1043 | + const struct mmu_interval_notifier_ops *ops) |
---|
| 1044 | +{ |
---|
| 1045 | + struct mmu_notifier_subscriptions *subscriptions; |
---|
| 1046 | + int ret; |
---|
| 1047 | + |
---|
| 1048 | + might_lock(&mm->mmap_lock); |
---|
| 1049 | + |
---|
| 1050 | + subscriptions = smp_load_acquire(&mm->notifier_subscriptions); |
---|
| 1051 | + if (!subscriptions || !subscriptions->has_itree) { |
---|
| 1052 | + ret = mmu_notifier_register(NULL, mm); |
---|
| 1053 | + if (ret) |
---|
| 1054 | + return ret; |
---|
| 1055 | + subscriptions = mm->notifier_subscriptions; |
---|
| 1056 | + } |
---|
| 1057 | + return __mmu_interval_notifier_insert(interval_sub, mm, subscriptions, |
---|
| 1058 | + start, length, ops); |
---|
| 1059 | +} |
---|
| 1060 | +EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert); |
---|
| 1061 | + |
---|
| 1062 | +int mmu_interval_notifier_insert_locked( |
---|
| 1063 | + struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, |
---|
| 1064 | + unsigned long start, unsigned long length, |
---|
| 1065 | + const struct mmu_interval_notifier_ops *ops) |
---|
| 1066 | +{ |
---|
| 1067 | + struct mmu_notifier_subscriptions *subscriptions = |
---|
| 1068 | + mm->notifier_subscriptions; |
---|
| 1069 | + int ret; |
---|
| 1070 | + |
---|
| 1071 | + mmap_assert_write_locked(mm); |
---|
| 1072 | + |
---|
| 1073 | + if (!subscriptions || !subscriptions->has_itree) { |
---|
| 1074 | + ret = __mmu_notifier_register(NULL, mm); |
---|
| 1075 | + if (ret) |
---|
| 1076 | + return ret; |
---|
| 1077 | + subscriptions = mm->notifier_subscriptions; |
---|
| 1078 | + } |
---|
| 1079 | + return __mmu_interval_notifier_insert(interval_sub, mm, subscriptions, |
---|
| 1080 | + start, length, ops); |
---|
| 1081 | +} |
---|
| 1082 | +EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert_locked); |
---|
| 1083 | + |
---|
| 1084 | +static bool |
---|
| 1085 | +mmu_interval_seq_released(struct mmu_notifier_subscriptions *subscriptions, |
---|
| 1086 | + unsigned long seq) |
---|
| 1087 | +{ |
---|
| 1088 | + bool ret; |
---|
| 1089 | + |
---|
| 1090 | + spin_lock(&subscriptions->lock); |
---|
| 1091 | + ret = subscriptions->invalidate_seq != seq; |
---|
| 1092 | + spin_unlock(&subscriptions->lock); |
---|
| 1093 | + return ret; |
---|
| 1094 | +} |
---|
| 1095 | + |
---|
| 1096 | +/** |
---|
| 1097 | + * mmu_interval_notifier_remove - Remove a interval notifier |
---|
| 1098 | + * @interval_sub: Interval subscription to unregister |
---|
| 1099 | + * |
---|
| 1100 | + * This function must be paired with mmu_interval_notifier_insert(). It cannot |
---|
| 1101 | + * be called from any ops callback. |
---|
| 1102 | + * |
---|
| 1103 | + * Once this returns ops callbacks are no longer running on other CPUs and |
---|
| 1104 | + * will not be called in future. |
---|
| 1105 | + */ |
---|
| 1106 | +void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub) |
---|
| 1107 | +{ |
---|
| 1108 | + struct mm_struct *mm = interval_sub->mm; |
---|
| 1109 | + struct mmu_notifier_subscriptions *subscriptions = |
---|
| 1110 | + mm->notifier_subscriptions; |
---|
| 1111 | + unsigned long seq = 0; |
---|
| 1112 | + |
---|
| 1113 | + might_sleep(); |
---|
| 1114 | + |
---|
| 1115 | + spin_lock(&subscriptions->lock); |
---|
| 1116 | + if (mn_itree_is_invalidating(subscriptions)) { |
---|
| 1117 | + /* |
---|
| 1118 | + * remove is being called after insert put this on the |
---|
| 1119 | + * deferred list, but before the deferred list was processed. |
---|
| 1120 | + */ |
---|
| 1121 | + if (RB_EMPTY_NODE(&interval_sub->interval_tree.rb)) { |
---|
| 1122 | + hlist_del(&interval_sub->deferred_item); |
---|
| 1123 | + } else { |
---|
| 1124 | + hlist_add_head(&interval_sub->deferred_item, |
---|
| 1125 | + &subscriptions->deferred_list); |
---|
| 1126 | + seq = subscriptions->invalidate_seq; |
---|
| 1127 | + } |
---|
| 1128 | + } else { |
---|
| 1129 | + WARN_ON(RB_EMPTY_NODE(&interval_sub->interval_tree.rb)); |
---|
| 1130 | + interval_tree_remove(&interval_sub->interval_tree, |
---|
| 1131 | + &subscriptions->itree); |
---|
| 1132 | + } |
---|
| 1133 | + spin_unlock(&subscriptions->lock); |
---|
| 1134 | + |
---|
| 1135 | + /* |
---|
| 1136 | + * The possible sleep on progress in the invalidation requires the |
---|
| 1137 | + * caller not hold any locks held by invalidation callbacks. |
---|
| 1138 | + */ |
---|
| 1139 | + lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); |
---|
| 1140 | + lock_map_release(&__mmu_notifier_invalidate_range_start_map); |
---|
| 1141 | + if (seq) |
---|
| 1142 | + wait_event(subscriptions->wq, |
---|
| 1143 | + mmu_interval_seq_released(subscriptions, seq)); |
---|
| 1144 | + |
---|
| 1145 | + /* pairs with mmgrab in mmu_interval_notifier_insert() */ |
---|
| 1146 | + mmdrop(mm); |
---|
| 1147 | +} |
---|
| 1148 | +EXPORT_SYMBOL_GPL(mmu_interval_notifier_remove); |
---|
| 1149 | + |
---|
| 1150 | +/** |
---|
| 1151 | + * mmu_notifier_synchronize - Ensure all mmu_notifiers are freed |
---|
| 1152 | + * |
---|
| 1153 | + * This function ensures that all outstanding async SRU work from |
---|
| 1154 | + * mmu_notifier_put() is completed. After it returns any mmu_notifier_ops |
---|
| 1155 | + * associated with an unused mmu_notifier will no longer be called. |
---|
| 1156 | + * |
---|
| 1157 | + * Before using the caller must ensure that all of its mmu_notifiers have been |
---|
| 1158 | + * fully released via mmu_notifier_put(). |
---|
| 1159 | + * |
---|
| 1160 | + * Modules using the mmu_notifier_put() API should call this in their __exit |
---|
| 1161 | + * function to avoid module unloading races. |
---|
| 1162 | + */ |
---|
| 1163 | +void mmu_notifier_synchronize(void) |
---|
| 1164 | +{ |
---|
| 1165 | + synchronize_srcu(&srcu); |
---|
| 1166 | +} |
---|
| 1167 | +EXPORT_SYMBOL_GPL(mmu_notifier_synchronize); |
---|
| 1168 | + |
---|
| 1169 | +bool |
---|
| 1170 | +mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range) |
---|
| 1171 | +{ |
---|
| 1172 | + if (!range->vma || range->event != MMU_NOTIFY_PROTECTION_VMA) |
---|
| 1173 | + return false; |
---|
| 1174 | + /* Return true if the vma still have the read flag set. */ |
---|
| 1175 | + return range->vma->vm_flags & VM_READ; |
---|
| 1176 | +} |
---|
| 1177 | +EXPORT_SYMBOL_GPL(mmu_notifier_range_update_to_read_only); |
---|
| 1178 | + |
---|
| 1179 | +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT |
---|
| 1180 | + |
---|
| 1181 | +bool mmu_notifier_subscriptions_init(struct mm_struct *mm) |
---|
| 1182 | +{ |
---|
| 1183 | + struct mmu_notifier_subscriptions *subscriptions; |
---|
| 1184 | + struct percpu_rw_semaphore_atomic *sem; |
---|
| 1185 | + |
---|
| 1186 | + subscriptions = kzalloc( |
---|
| 1187 | + sizeof(struct mmu_notifier_subscriptions), GFP_KERNEL); |
---|
| 1188 | + if (!subscriptions) |
---|
| 1189 | + return false; |
---|
| 1190 | + |
---|
| 1191 | + sem = kzalloc(sizeof(struct percpu_rw_semaphore_atomic), GFP_KERNEL); |
---|
| 1192 | + if (!sem) { |
---|
| 1193 | + kfree(subscriptions); |
---|
| 1194 | + return false; |
---|
| 1195 | + } |
---|
| 1196 | + percpu_init_rwsem(&sem->rw_sem); |
---|
| 1197 | + |
---|
| 1198 | + init_subscriptions(subscriptions); |
---|
| 1199 | + subscriptions->has_itree = true; |
---|
| 1200 | + subscriptions->hdr.valid = false; |
---|
| 1201 | + subscriptions->hdr.mmu_notifier_lock = sem; |
---|
| 1202 | + mm->notifier_subscriptions = subscriptions; |
---|
| 1203 | + |
---|
| 1204 | + return true; |
---|
| 1205 | +} |
---|
| 1206 | + |
---|
| 1207 | +void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) |
---|
| 1208 | +{ |
---|
| 1209 | + percpu_rwsem_async_destroy( |
---|
| 1210 | + mm->notifier_subscriptions->hdr.mmu_notifier_lock); |
---|
| 1211 | + kfree(mm->notifier_subscriptions); |
---|
| 1212 | + mm->notifier_subscriptions = NULL; |
---|
| 1213 | +} |
---|
| 1214 | + |
---|
| 1215 | +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ |
---|