| .. | .. |
|---|
| 1 | 1 | /* |
|---|
| 2 | + * Copyright(c) 2020 Cornelis Networks, Inc. |
|---|
| 2 | 3 | * Copyright(c) 2016 - 2017 Intel Corporation. |
|---|
| 3 | 4 | * |
|---|
| 4 | 5 | * This file is provided under a dual BSD/GPLv2 license. When using or |
|---|
| .. | .. |
|---|
| 48 | 49 | #include <linux/rculist.h> |
|---|
| 49 | 50 | #include <linux/mmu_notifier.h> |
|---|
| 50 | 51 | #include <linux/interval_tree_generic.h> |
|---|
| 52 | +#include <linux/sched/mm.h> |
|---|
| 51 | 53 | |
|---|
| 52 | 54 | #include "mmu_rb.h" |
|---|
| 53 | 55 | #include "trace.h" |
|---|
| 54 | 56 | |
|---|
| 55 | | -struct mmu_rb_handler { |
|---|
| 56 | | - struct mmu_notifier mn; |
|---|
| 57 | | - struct rb_root_cached root; |
|---|
| 58 | | - void *ops_arg; |
|---|
| 59 | | - spinlock_t lock; /* protect the RB tree */ |
|---|
| 60 | | - struct mmu_rb_ops *ops; |
|---|
| 61 | | - struct mm_struct *mm; |
|---|
| 62 | | - struct list_head lru_list; |
|---|
| 63 | | - struct work_struct del_work; |
|---|
| 64 | | - struct list_head del_list; |
|---|
| 65 | | - struct workqueue_struct *wq; |
|---|
| 66 | | -}; |
|---|
| 67 | | - |
|---|
| 68 | 57 | static unsigned long mmu_node_start(struct mmu_rb_node *); |
|---|
| 69 | 58 | static unsigned long mmu_node_last(struct mmu_rb_node *); |
|---|
| 70 | 59 | static int mmu_notifier_range_start(struct mmu_notifier *, |
|---|
| 71 | | - struct mm_struct *, |
|---|
| 72 | | - unsigned long, unsigned long, bool); |
|---|
| 60 | + const struct mmu_notifier_range *); |
|---|
| 73 | 61 | static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, |
|---|
| 74 | 62 | unsigned long, unsigned long); |
|---|
| 75 | | -static void do_remove(struct mmu_rb_handler *handler, |
|---|
| 76 | | - struct list_head *del_list); |
|---|
| 63 | +static void release_immediate(struct kref *refcount); |
|---|
| 77 | 64 | static void handle_remove(struct work_struct *work); |
|---|
| 78 | 65 | |
|---|
| 79 | 66 | static const struct mmu_notifier_ops mn_opts = { |
|---|
| 80 | | - .flags = MMU_INVALIDATE_DOES_NOT_BLOCK, |
|---|
| 81 | 67 | .invalidate_range_start = mmu_notifier_range_start, |
|---|
| 82 | 68 | }; |
|---|
| 83 | 69 | |
|---|
| .. | .. |
|---|
| 94 | 80 | return PAGE_ALIGN(node->addr + node->len) - 1; |
|---|
| 95 | 81 | } |
|---|
| 96 | 82 | |
|---|
| 97 | | -int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm, |
|---|
| 83 | +int hfi1_mmu_rb_register(void *ops_arg, |
|---|
| 98 | 84 | struct mmu_rb_ops *ops, |
|---|
| 99 | 85 | struct workqueue_struct *wq, |
|---|
| 100 | 86 | struct mmu_rb_handler **handler) |
|---|
| 101 | 87 | { |
|---|
| 102 | | - struct mmu_rb_handler *handlr; |
|---|
| 88 | + struct mmu_rb_handler *h; |
|---|
| 103 | 89 | int ret; |
|---|
| 104 | 90 | |
|---|
| 105 | | - handlr = kmalloc(sizeof(*handlr), GFP_KERNEL); |
|---|
| 106 | | - if (!handlr) |
|---|
| 91 | + h = kzalloc(sizeof(*h), GFP_KERNEL); |
|---|
| 92 | + if (!h) |
|---|
| 107 | 93 | return -ENOMEM; |
|---|
| 108 | 94 | |
|---|
| 109 | | - handlr->root = RB_ROOT_CACHED; |
|---|
| 110 | | - handlr->ops = ops; |
|---|
| 111 | | - handlr->ops_arg = ops_arg; |
|---|
| 112 | | - INIT_HLIST_NODE(&handlr->mn.hlist); |
|---|
| 113 | | - spin_lock_init(&handlr->lock); |
|---|
| 114 | | - handlr->mn.ops = &mn_opts; |
|---|
| 115 | | - handlr->mm = mm; |
|---|
| 116 | | - INIT_WORK(&handlr->del_work, handle_remove); |
|---|
| 117 | | - INIT_LIST_HEAD(&handlr->del_list); |
|---|
| 118 | | - INIT_LIST_HEAD(&handlr->lru_list); |
|---|
| 119 | | - handlr->wq = wq; |
|---|
| 95 | + h->root = RB_ROOT_CACHED; |
|---|
| 96 | + h->ops = ops; |
|---|
| 97 | + h->ops_arg = ops_arg; |
|---|
| 98 | + INIT_HLIST_NODE(&h->mn.hlist); |
|---|
| 99 | + spin_lock_init(&h->lock); |
|---|
| 100 | + h->mn.ops = &mn_opts; |
|---|
| 101 | + INIT_WORK(&h->del_work, handle_remove); |
|---|
| 102 | + INIT_LIST_HEAD(&h->del_list); |
|---|
| 103 | + INIT_LIST_HEAD(&h->lru_list); |
|---|
| 104 | + h->wq = wq; |
|---|
| 120 | 105 | |
|---|
| 121 | | - ret = mmu_notifier_register(&handlr->mn, handlr->mm); |
|---|
| 106 | + ret = mmu_notifier_register(&h->mn, current->mm); |
|---|
| 122 | 107 | if (ret) { |
|---|
| 123 | | - kfree(handlr); |
|---|
| 108 | + kfree(h); |
|---|
| 124 | 109 | return ret; |
|---|
| 125 | 110 | } |
|---|
| 126 | 111 | |
|---|
| 127 | | - *handler = handlr; |
|---|
| 112 | + *handler = h; |
|---|
| 128 | 113 | return 0; |
|---|
| 129 | 114 | } |
|---|
| 130 | 115 | |
|---|
| .. | .. |
|---|
| 135 | 120 | unsigned long flags; |
|---|
| 136 | 121 | struct list_head del_list; |
|---|
| 137 | 122 | |
|---|
| 123 | + /* Prevent freeing of mm until we are completely finished. */ |
|---|
| 124 | + mmgrab(handler->mn.mm); |
|---|
| 125 | + |
|---|
| 138 | 126 | /* Unregister first so we don't get any more notifications. */ |
|---|
| 139 | | - mmu_notifier_unregister(&handler->mn, handler->mm); |
|---|
| 127 | + mmu_notifier_unregister(&handler->mn, handler->mn.mm); |
|---|
| 140 | 128 | |
|---|
| 141 | 129 | /* |
|---|
| 142 | 130 | * Make sure the wq delete handler is finished running. It will not |
|---|
| .. | .. |
|---|
| 155 | 143 | } |
|---|
| 156 | 144 | spin_unlock_irqrestore(&handler->lock, flags); |
|---|
| 157 | 145 | |
|---|
| 158 | | - do_remove(handler, &del_list); |
|---|
| 146 | + while (!list_empty(&del_list)) { |
|---|
| 147 | + rbnode = list_first_entry(&del_list, struct mmu_rb_node, list); |
|---|
| 148 | + list_del(&rbnode->list); |
|---|
| 149 | + kref_put(&rbnode->refcount, release_immediate); |
|---|
| 150 | + } |
|---|
| 151 | + |
|---|
| 152 | + /* Now the mm may be freed. */ |
|---|
| 153 | + mmdrop(handler->mn.mm); |
|---|
| 159 | 154 | |
|---|
| 160 | 155 | kfree(handler); |
|---|
| 161 | 156 | } |
|---|
| .. | .. |
|---|
| 168 | 163 | int ret = 0; |
|---|
| 169 | 164 | |
|---|
| 170 | 165 | trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len); |
|---|
| 166 | + |
|---|
| 167 | + if (current->mm != handler->mn.mm) |
|---|
| 168 | + return -EPERM; |
|---|
| 169 | + |
|---|
| 171 | 170 | spin_lock_irqsave(&handler->lock, flags); |
|---|
| 172 | 171 | node = __mmu_rb_search(handler, mnode->addr, mnode->len); |
|---|
| 173 | 172 | if (node) { |
|---|
| 174 | | - ret = -EINVAL; |
|---|
| 173 | + ret = -EEXIST; |
|---|
| 175 | 174 | goto unlock; |
|---|
| 176 | 175 | } |
|---|
| 177 | 176 | __mmu_int_rb_insert(mnode, &handler->root); |
|---|
| 178 | | - list_add(&mnode->list, &handler->lru_list); |
|---|
| 179 | | - |
|---|
| 180 | | - ret = handler->ops->insert(handler->ops_arg, mnode); |
|---|
| 181 | | - if (ret) { |
|---|
| 182 | | - __mmu_int_rb_remove(mnode, &handler->root); |
|---|
| 183 | | - list_del(&mnode->list); /* remove from LRU list */ |
|---|
| 184 | | - } |
|---|
| 177 | + list_add_tail(&mnode->list, &handler->lru_list); |
|---|
| 178 | + mnode->handler = handler; |
|---|
| 185 | 179 | unlock: |
|---|
| 186 | 180 | spin_unlock_irqrestore(&handler->lock, flags); |
|---|
| 187 | 181 | return ret; |
|---|
| 182 | +} |
|---|
| 183 | + |
|---|
| 184 | +/* Caller must hold handler lock */ |
|---|
| 185 | +struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler, |
|---|
| 186 | + unsigned long addr, unsigned long len) |
|---|
| 187 | +{ |
|---|
| 188 | + struct mmu_rb_node *node; |
|---|
| 189 | + |
|---|
| 190 | + trace_hfi1_mmu_rb_search(addr, len); |
|---|
| 191 | + node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1); |
|---|
| 192 | + if (node) |
|---|
| 193 | + list_move_tail(&node->list, &handler->lru_list); |
|---|
| 194 | + return node; |
|---|
| 188 | 195 | } |
|---|
| 189 | 196 | |
|---|
| 190 | 197 | /* Caller must hold handler lock */ |
|---|
| .. | .. |
|---|
| 211 | 218 | return node; |
|---|
| 212 | 219 | } |
|---|
| 213 | 220 | |
|---|
| 214 | | -bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler, |
|---|
| 215 | | - unsigned long addr, unsigned long len, |
|---|
| 216 | | - struct mmu_rb_node **rb_node) |
|---|
| 221 | +/* |
|---|
| 222 | + * Must NOT call while holding mnode->handler->lock. |
|---|
| 223 | + * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a |
|---|
| 224 | + * spinlock. |
|---|
| 225 | + */ |
|---|
| 226 | +static void release_immediate(struct kref *refcount) |
|---|
| 217 | 227 | { |
|---|
| 218 | | - struct mmu_rb_node *node; |
|---|
| 228 | + struct mmu_rb_node *mnode = |
|---|
| 229 | + container_of(refcount, struct mmu_rb_node, refcount); |
|---|
| 230 | + mnode->handler->ops->remove(mnode->handler->ops_arg, mnode); |
|---|
| 231 | +} |
|---|
| 232 | + |
|---|
| 233 | +/* Caller must hold mnode->handler->lock */ |
|---|
| 234 | +static void release_nolock(struct kref *refcount) |
|---|
| 235 | +{ |
|---|
| 236 | + struct mmu_rb_node *mnode = |
|---|
| 237 | + container_of(refcount, struct mmu_rb_node, refcount); |
|---|
| 238 | + list_move(&mnode->list, &mnode->handler->del_list); |
|---|
| 239 | + queue_work(mnode->handler->wq, &mnode->handler->del_work); |
|---|
| 240 | +} |
|---|
| 241 | + |
|---|
| 242 | +/* |
|---|
| 243 | + * struct mmu_rb_node->refcount kref_put() callback. |
|---|
| 244 | + * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues |
|---|
| 245 | + * handler->del_work on handler->wq. |
|---|
| 246 | + * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root. |
|---|
| 247 | + * Acquires mmu_rb_node->handler->lock; do not call while already holding |
|---|
| 248 | + * handler->lock. |
|---|
| 249 | + */ |
|---|
| 250 | +void hfi1_mmu_rb_release(struct kref *refcount) |
|---|
| 251 | +{ |
|---|
| 252 | + struct mmu_rb_node *mnode = |
|---|
| 253 | + container_of(refcount, struct mmu_rb_node, refcount); |
|---|
| 254 | + struct mmu_rb_handler *handler = mnode->handler; |
|---|
| 219 | 255 | unsigned long flags; |
|---|
| 220 | | - bool ret = false; |
|---|
| 221 | 256 | |
|---|
| 222 | 257 | spin_lock_irqsave(&handler->lock, flags); |
|---|
| 223 | | - node = __mmu_rb_search(handler, addr, len); |
|---|
| 224 | | - if (node) { |
|---|
| 225 | | - if (node->addr == addr && node->len == len) |
|---|
| 226 | | - goto unlock; |
|---|
| 227 | | - __mmu_int_rb_remove(node, &handler->root); |
|---|
| 228 | | - list_del(&node->list); /* remove from LRU list */ |
|---|
| 229 | | - ret = true; |
|---|
| 230 | | - } |
|---|
| 231 | | -unlock: |
|---|
| 258 | + list_move(&mnode->list, &mnode->handler->del_list); |
|---|
| 232 | 259 | spin_unlock_irqrestore(&handler->lock, flags); |
|---|
| 233 | | - *rb_node = node; |
|---|
| 234 | | - return ret; |
|---|
| 260 | + queue_work(handler->wq, &handler->del_work); |
|---|
| 235 | 261 | } |
|---|
| 236 | 262 | |
|---|
| 237 | 263 | void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) |
|---|
| .. | .. |
|---|
| 241 | 267 | unsigned long flags; |
|---|
| 242 | 268 | bool stop = false; |
|---|
| 243 | 269 | |
|---|
| 270 | + if (current->mm != handler->mn.mm) |
|---|
| 271 | + return; |
|---|
| 272 | + |
|---|
| 244 | 273 | INIT_LIST_HEAD(&del_list); |
|---|
| 245 | 274 | |
|---|
| 246 | 275 | spin_lock_irqsave(&handler->lock, flags); |
|---|
| 247 | | - list_for_each_entry_safe_reverse(rbnode, ptr, &handler->lru_list, |
|---|
| 248 | | - list) { |
|---|
| 276 | + list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) { |
|---|
| 277 | + /* refcount == 1 implies mmu_rb_handler has only rbnode ref */ |
|---|
| 278 | + if (kref_read(&rbnode->refcount) > 1) |
|---|
| 279 | + continue; |
|---|
| 280 | + |
|---|
| 249 | 281 | if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg, |
|---|
| 250 | 282 | &stop)) { |
|---|
| 251 | 283 | __mmu_int_rb_remove(rbnode, &handler->root); |
|---|
| .. | .. |
|---|
| 257 | 289 | } |
|---|
| 258 | 290 | spin_unlock_irqrestore(&handler->lock, flags); |
|---|
| 259 | 291 | |
|---|
| 260 | | - while (!list_empty(&del_list)) { |
|---|
| 261 | | - rbnode = list_first_entry(&del_list, struct mmu_rb_node, list); |
|---|
| 262 | | - list_del(&rbnode->list); |
|---|
| 263 | | - handler->ops->remove(handler->ops_arg, rbnode); |
|---|
| 292 | + list_for_each_entry_safe(rbnode, ptr, &del_list, list) { |
|---|
| 293 | + kref_put(&rbnode->refcount, release_immediate); |
|---|
| 264 | 294 | } |
|---|
| 265 | 295 | } |
|---|
| 266 | 296 | |
|---|
| 267 | | -/* |
|---|
| 268 | | - * It is up to the caller to ensure that this function does not race with the |
|---|
| 269 | | - * mmu invalidate notifier which may be calling the users remove callback on |
|---|
| 270 | | - * 'node'. |
|---|
| 271 | | - */ |
|---|
| 272 | | -void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, |
|---|
| 273 | | - struct mmu_rb_node *node) |
|---|
| 274 | | -{ |
|---|
| 275 | | - unsigned long flags; |
|---|
| 276 | | - |
|---|
| 277 | | - /* Validity of handler and node pointers has been checked by caller. */ |
|---|
| 278 | | - trace_hfi1_mmu_rb_remove(node->addr, node->len); |
|---|
| 279 | | - spin_lock_irqsave(&handler->lock, flags); |
|---|
| 280 | | - __mmu_int_rb_remove(node, &handler->root); |
|---|
| 281 | | - list_del(&node->list); /* remove from LRU list */ |
|---|
| 282 | | - spin_unlock_irqrestore(&handler->lock, flags); |
|---|
| 283 | | - |
|---|
| 284 | | - handler->ops->remove(handler->ops_arg, node); |
|---|
| 285 | | -} |
|---|
| 286 | | - |
|---|
| 287 | 297 | static int mmu_notifier_range_start(struct mmu_notifier *mn, |
|---|
| 288 | | - struct mm_struct *mm, |
|---|
| 289 | | - unsigned long start, |
|---|
| 290 | | - unsigned long end, |
|---|
| 291 | | - bool blockable) |
|---|
| 298 | + const struct mmu_notifier_range *range) |
|---|
| 292 | 299 | { |
|---|
| 293 | 300 | struct mmu_rb_handler *handler = |
|---|
| 294 | 301 | container_of(mn, struct mmu_rb_handler, mn); |
|---|
| 295 | 302 | struct rb_root_cached *root = &handler->root; |
|---|
| 296 | 303 | struct mmu_rb_node *node, *ptr = NULL; |
|---|
| 297 | 304 | unsigned long flags; |
|---|
| 298 | | - bool added = false; |
|---|
| 299 | 305 | |
|---|
| 300 | 306 | spin_lock_irqsave(&handler->lock, flags); |
|---|
| 301 | | - for (node = __mmu_int_rb_iter_first(root, start, end - 1); |
|---|
| 307 | + for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1); |
|---|
| 302 | 308 | node; node = ptr) { |
|---|
| 303 | 309 | /* Guard against node removal. */ |
|---|
| 304 | | - ptr = __mmu_int_rb_iter_next(node, start, end - 1); |
|---|
| 310 | + ptr = __mmu_int_rb_iter_next(node, range->start, |
|---|
| 311 | + range->end - 1); |
|---|
| 305 | 312 | trace_hfi1_mmu_mem_invalidate(node->addr, node->len); |
|---|
| 306 | | - if (handler->ops->invalidate(handler->ops_arg, node)) { |
|---|
| 307 | | - __mmu_int_rb_remove(node, root); |
|---|
| 308 | | - /* move from LRU list to delete list */ |
|---|
| 309 | | - list_move(&node->list, &handler->del_list); |
|---|
| 310 | | - added = true; |
|---|
| 311 | | - } |
|---|
| 313 | + /* Remove from rb tree and lru_list. */ |
|---|
| 314 | + __mmu_int_rb_remove(node, root); |
|---|
| 315 | + list_del_init(&node->list); |
|---|
| 316 | + kref_put(&node->refcount, release_nolock); |
|---|
| 312 | 317 | } |
|---|
| 313 | 318 | spin_unlock_irqrestore(&handler->lock, flags); |
|---|
| 314 | | - |
|---|
| 315 | | - if (added) |
|---|
| 316 | | - queue_work(handler->wq, &handler->del_work); |
|---|
| 317 | 319 | |
|---|
| 318 | 320 | return 0; |
|---|
| 319 | 321 | } |
|---|
| 320 | 322 | |
|---|
| 321 | 323 | /* |
|---|
| 322 | | - * Call the remove function for the given handler and the list. This |
|---|
| 323 | | - * is expected to be called with a delete list extracted from handler. |
|---|
| 324 | | - * The caller should not be holding the handler lock. |
|---|
| 325 | | - */ |
|---|
| 326 | | -static void do_remove(struct mmu_rb_handler *handler, |
|---|
| 327 | | - struct list_head *del_list) |
|---|
| 328 | | -{ |
|---|
| 329 | | - struct mmu_rb_node *node; |
|---|
| 330 | | - |
|---|
| 331 | | - while (!list_empty(del_list)) { |
|---|
| 332 | | - node = list_first_entry(del_list, struct mmu_rb_node, list); |
|---|
| 333 | | - list_del(&node->list); |
|---|
| 334 | | - handler->ops->remove(handler->ops_arg, node); |
|---|
| 335 | | - } |
|---|
| 336 | | -} |
|---|
| 337 | | - |
|---|
| 338 | | -/* |
|---|
| 339 | 324 | * Work queue function to remove all nodes that have been queued up to |
|---|
| 340 | | - * be removed. The key feature is that mm->mmap_sem is not being held |
|---|
| 325 | + * be removed. The key feature is that mm->mmap_lock is not being held |
|---|
| 341 | 326 | * and the remove callback can sleep while taking it, if needed. |
|---|
| 342 | 327 | */ |
|---|
| 343 | 328 | static void handle_remove(struct work_struct *work) |
|---|
| .. | .. |
|---|
| 347 | 332 | del_work); |
|---|
| 348 | 333 | struct list_head del_list; |
|---|
| 349 | 334 | unsigned long flags; |
|---|
| 335 | + struct mmu_rb_node *node; |
|---|
| 350 | 336 | |
|---|
| 351 | 337 | /* remove anything that is queued to get removed */ |
|---|
| 352 | 338 | spin_lock_irqsave(&handler->lock, flags); |
|---|
| 353 | 339 | list_replace_init(&handler->del_list, &del_list); |
|---|
| 354 | 340 | spin_unlock_irqrestore(&handler->lock, flags); |
|---|
| 355 | 341 | |
|---|
| 356 | | - do_remove(handler, &del_list); |
|---|
| 342 | + while (!list_empty(&del_list)) { |
|---|
| 343 | + node = list_first_entry(&del_list, struct mmu_rb_node, list); |
|---|
| 344 | + list_del(&node->list); |
|---|
| 345 | + handler->ops->remove(handler->ops_arg, node); |
|---|
| 346 | + } |
|---|
| 357 | 347 | } |
|---|