.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0+ |
---|
1 | 2 | /* |
---|
2 | 3 | * User-space Probes (UProbes) |
---|
3 | | - * |
---|
4 | | - * This program is free software; you can redistribute it and/or modify |
---|
5 | | - * it under the terms of the GNU General Public License as published by |
---|
6 | | - * the Free Software Foundation; either version 2 of the License, or |
---|
7 | | - * (at your option) any later version. |
---|
8 | | - * |
---|
9 | | - * This program is distributed in the hope that it will be useful, |
---|
10 | | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
11 | | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
12 | | - * GNU General Public License for more details. |
---|
13 | | - * |
---|
14 | | - * You should have received a copy of the GNU General Public License |
---|
15 | | - * along with this program; if not, write to the Free Software |
---|
16 | | - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
---|
17 | 4 | * |
---|
18 | 5 | * Copyright (C) IBM Corporation, 2008-2012 |
---|
19 | 6 | * Authors: |
---|
.. | .. |
---|
39 | 26 | #include <linux/percpu-rwsem.h> |
---|
40 | 27 | #include <linux/task_work.h> |
---|
41 | 28 | #include <linux/shmem_fs.h> |
---|
| 29 | +#include <linux/khugepaged.h> |
---|
42 | 30 | |
---|
43 | 31 | #include <linux/uprobes.h> |
---|
44 | 32 | |
---|
.. | .. |
---|
59 | 47 | static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; |
---|
60 | 48 | #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) |
---|
61 | 49 | |
---|
62 | | -static struct percpu_rw_semaphore dup_mmap_sem; |
---|
| 50 | +DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem); |
---|
63 | 51 | |
---|
64 | 52 | /* Have a copy of original instruction */ |
---|
65 | 53 | #define UPROBE_COPY_INSN 0 |
---|
66 | 54 | |
---|
67 | 55 | struct uprobe { |
---|
68 | 56 | struct rb_node rb_node; /* node in the rb tree */ |
---|
69 | | - atomic_t ref; |
---|
| 57 | + refcount_t ref; |
---|
70 | 58 | struct rw_semaphore register_rwsem; |
---|
71 | 59 | struct rw_semaphore consumer_rwsem; |
---|
72 | 60 | struct list_head pending_list; |
---|
73 | 61 | struct uprobe_consumer *consumers; |
---|
74 | 62 | struct inode *inode; /* Also hold a ref to inode */ |
---|
75 | 63 | loff_t offset; |
---|
| 64 | + loff_t ref_ctr_offset; |
---|
76 | 65 | unsigned long flags; |
---|
77 | 66 | |
---|
78 | 67 | /* |
---|
.. | .. |
---|
87 | 76 | */ |
---|
88 | 77 | struct arch_uprobe arch; |
---|
89 | 78 | }; |
---|
| 79 | + |
---|
| 80 | +struct delayed_uprobe { |
---|
| 81 | + struct list_head list; |
---|
| 82 | + struct uprobe *uprobe; |
---|
| 83 | + struct mm_struct *mm; |
---|
| 84 | +}; |
---|
| 85 | + |
---|
| 86 | +static DEFINE_MUTEX(delayed_uprobe_lock); |
---|
| 87 | +static LIST_HEAD(delayed_uprobe_list); |
---|
90 | 88 | |
---|
91 | 89 | /* |
---|
92 | 90 | * Execute out of line area: anonymous executable mapping installed |
---|
.. | .. |
---|
146 | 144 | * |
---|
147 | 145 | * @vma: vma that holds the pte pointing to page |
---|
148 | 146 | * @addr: address the old @page is mapped at |
---|
149 | | - * @page: the cowed page we are replacing by kpage |
---|
150 | | - * @kpage: the modified page we replace page by |
---|
| 147 | + * @old_page: the page we are replacing by new_page |
---|
| 148 | + * @new_page: the modified page we replace page by |
---|
151 | 149 | * |
---|
152 | | - * Returns 0 on success, -EFAULT on failure. |
---|
| 150 | + * If @new_page is NULL, only unmap @old_page. |
---|
| 151 | + * |
---|
| 152 | + * Returns 0 on success, negative error code otherwise. |
---|
153 | 153 | */ |
---|
154 | 154 | static int __replace_page(struct vm_area_struct *vma, unsigned long addr, |
---|
155 | 155 | struct page *old_page, struct page *new_page) |
---|
156 | 156 | { |
---|
157 | 157 | struct mm_struct *mm = vma->vm_mm; |
---|
158 | 158 | struct page_vma_mapped_walk pvmw = { |
---|
159 | | - .page = old_page, |
---|
| 159 | + .page = compound_head(old_page), |
---|
160 | 160 | .vma = vma, |
---|
161 | 161 | .address = addr, |
---|
162 | 162 | }; |
---|
163 | 163 | int err; |
---|
164 | | - /* For mmu_notifiers */ |
---|
165 | | - const unsigned long mmun_start = addr; |
---|
166 | | - const unsigned long mmun_end = addr + PAGE_SIZE; |
---|
167 | | - struct mem_cgroup *memcg; |
---|
| 164 | + struct mmu_notifier_range range; |
---|
168 | 165 | |
---|
169 | | - VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page); |
---|
| 166 | + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr, |
---|
| 167 | + addr + PAGE_SIZE); |
---|
170 | 168 | |
---|
171 | | - err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg, |
---|
172 | | - false); |
---|
173 | | - if (err) |
---|
174 | | - return err; |
---|
| 169 | + if (new_page) { |
---|
| 170 | + err = mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL); |
---|
| 171 | + if (err) |
---|
| 172 | + return err; |
---|
| 173 | + } |
---|
175 | 174 | |
---|
176 | 175 | /* For try_to_free_swap() and munlock_vma_page() below */ |
---|
177 | 176 | lock_page(old_page); |
---|
178 | 177 | |
---|
179 | | - mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
---|
| 178 | + mmu_notifier_invalidate_range_start(&range); |
---|
180 | 179 | err = -EAGAIN; |
---|
181 | | - if (!page_vma_mapped_walk(&pvmw)) { |
---|
182 | | - mem_cgroup_cancel_charge(new_page, memcg, false); |
---|
| 180 | + if (!page_vma_mapped_walk(&pvmw)) |
---|
183 | 181 | goto unlock; |
---|
184 | | - } |
---|
185 | 182 | VM_BUG_ON_PAGE(addr != pvmw.address, old_page); |
---|
186 | 183 | |
---|
187 | | - get_page(new_page); |
---|
188 | | - page_add_new_anon_rmap(new_page, vma, addr, false); |
---|
189 | | - mem_cgroup_commit_charge(new_page, memcg, false, false); |
---|
190 | | - lru_cache_add_active_or_unevictable(new_page, vma); |
---|
| 184 | + if (new_page) { |
---|
| 185 | + get_page(new_page); |
---|
| 186 | + page_add_new_anon_rmap(new_page, vma, addr, false); |
---|
| 187 | + lru_cache_add_inactive_or_unevictable(new_page, vma); |
---|
| 188 | + } else |
---|
| 189 | + /* no new page, just dec_mm_counter for old_page */ |
---|
| 190 | + dec_mm_counter(mm, MM_ANONPAGES); |
---|
191 | 191 | |
---|
192 | 192 | if (!PageAnon(old_page)) { |
---|
193 | 193 | dec_mm_counter(mm, mm_counter_file(old_page)); |
---|
.. | .. |
---|
196 | 196 | |
---|
197 | 197 | flush_cache_page(vma, addr, pte_pfn(*pvmw.pte)); |
---|
198 | 198 | ptep_clear_flush_notify(vma, addr, pvmw.pte); |
---|
199 | | - set_pte_at_notify(mm, addr, pvmw.pte, |
---|
200 | | - mk_pte(new_page, vma->vm_page_prot)); |
---|
| 199 | + if (new_page) |
---|
| 200 | + set_pte_at_notify(mm, addr, pvmw.pte, |
---|
| 201 | + mk_pte(new_page, vma->vm_page_prot)); |
---|
201 | 202 | |
---|
202 | 203 | page_remove_rmap(old_page, false); |
---|
203 | 204 | if (!page_mapped(old_page)) |
---|
204 | 205 | try_to_free_swap(old_page); |
---|
205 | 206 | page_vma_mapped_walk_done(&pvmw); |
---|
206 | 207 | |
---|
207 | | - if (vma->vm_flags & VM_LOCKED) |
---|
| 208 | + if ((vma->vm_flags & VM_LOCKED) && !PageCompound(old_page)) |
---|
208 | 209 | munlock_vma_page(old_page); |
---|
209 | 210 | put_page(old_page); |
---|
210 | 211 | |
---|
211 | 212 | err = 0; |
---|
212 | 213 | unlock: |
---|
213 | | - mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
---|
| 214 | + mmu_notifier_invalidate_range_end(&range); |
---|
214 | 215 | unlock_page(old_page); |
---|
215 | 216 | return err; |
---|
216 | 217 | } |
---|
.. | .. |
---|
282 | 283 | return 1; |
---|
283 | 284 | } |
---|
284 | 285 | |
---|
| 286 | +static struct delayed_uprobe * |
---|
| 287 | +delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm) |
---|
| 288 | +{ |
---|
| 289 | + struct delayed_uprobe *du; |
---|
| 290 | + |
---|
| 291 | + list_for_each_entry(du, &delayed_uprobe_list, list) |
---|
| 292 | + if (du->uprobe == uprobe && du->mm == mm) |
---|
| 293 | + return du; |
---|
| 294 | + return NULL; |
---|
| 295 | +} |
---|
| 296 | + |
---|
| 297 | +static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm) |
---|
| 298 | +{ |
---|
| 299 | + struct delayed_uprobe *du; |
---|
| 300 | + |
---|
| 301 | + if (delayed_uprobe_check(uprobe, mm)) |
---|
| 302 | + return 0; |
---|
| 303 | + |
---|
| 304 | + du = kzalloc(sizeof(*du), GFP_KERNEL); |
---|
| 305 | + if (!du) |
---|
| 306 | + return -ENOMEM; |
---|
| 307 | + |
---|
| 308 | + du->uprobe = uprobe; |
---|
| 309 | + du->mm = mm; |
---|
| 310 | + list_add(&du->list, &delayed_uprobe_list); |
---|
| 311 | + return 0; |
---|
| 312 | +} |
---|
| 313 | + |
---|
| 314 | +static void delayed_uprobe_delete(struct delayed_uprobe *du) |
---|
| 315 | +{ |
---|
| 316 | + if (WARN_ON(!du)) |
---|
| 317 | + return; |
---|
| 318 | + list_del(&du->list); |
---|
| 319 | + kfree(du); |
---|
| 320 | +} |
---|
| 321 | + |
---|
| 322 | +static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm) |
---|
| 323 | +{ |
---|
| 324 | + struct list_head *pos, *q; |
---|
| 325 | + struct delayed_uprobe *du; |
---|
| 326 | + |
---|
| 327 | + if (!uprobe && !mm) |
---|
| 328 | + return; |
---|
| 329 | + |
---|
| 330 | + list_for_each_safe(pos, q, &delayed_uprobe_list) { |
---|
| 331 | + du = list_entry(pos, struct delayed_uprobe, list); |
---|
| 332 | + |
---|
| 333 | + if (uprobe && du->uprobe != uprobe) |
---|
| 334 | + continue; |
---|
| 335 | + if (mm && du->mm != mm) |
---|
| 336 | + continue; |
---|
| 337 | + |
---|
| 338 | + delayed_uprobe_delete(du); |
---|
| 339 | + } |
---|
| 340 | +} |
---|
| 341 | + |
---|
| 342 | +static bool valid_ref_ctr_vma(struct uprobe *uprobe, |
---|
| 343 | + struct vm_area_struct *vma) |
---|
| 344 | +{ |
---|
| 345 | + unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset); |
---|
| 346 | + |
---|
| 347 | + return uprobe->ref_ctr_offset && |
---|
| 348 | + vma->vm_file && |
---|
| 349 | + file_inode(vma->vm_file) == uprobe->inode && |
---|
| 350 | + (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE && |
---|
| 351 | + vma->vm_start <= vaddr && |
---|
| 352 | + vma->vm_end > vaddr; |
---|
| 353 | +} |
---|
| 354 | + |
---|
| 355 | +static struct vm_area_struct * |
---|
| 356 | +find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm) |
---|
| 357 | +{ |
---|
| 358 | + struct vm_area_struct *tmp; |
---|
| 359 | + |
---|
| 360 | + for (tmp = mm->mmap; tmp; tmp = tmp->vm_next) |
---|
| 361 | + if (valid_ref_ctr_vma(uprobe, tmp)) |
---|
| 362 | + return tmp; |
---|
| 363 | + |
---|
| 364 | + return NULL; |
---|
| 365 | +} |
---|
| 366 | + |
---|
| 367 | +static int |
---|
| 368 | +__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d) |
---|
| 369 | +{ |
---|
| 370 | + void *kaddr; |
---|
| 371 | + struct page *page; |
---|
| 372 | + struct vm_area_struct *vma; |
---|
| 373 | + int ret; |
---|
| 374 | + short *ptr; |
---|
| 375 | + |
---|
| 376 | + if (!vaddr || !d) |
---|
| 377 | + return -EINVAL; |
---|
| 378 | + |
---|
| 379 | + ret = get_user_pages_remote(mm, vaddr, 1, |
---|
| 380 | + FOLL_WRITE, &page, &vma, NULL); |
---|
| 381 | + if (unlikely(ret <= 0)) { |
---|
| 382 | + /* |
---|
| 383 | + * We are asking for 1 page. If get_user_pages_remote() fails, |
---|
| 384 | + * it may return 0, in that case we have to return error. |
---|
| 385 | + */ |
---|
| 386 | + return ret == 0 ? -EBUSY : ret; |
---|
| 387 | + } |
---|
| 388 | + |
---|
| 389 | + kaddr = kmap_atomic(page); |
---|
| 390 | + ptr = kaddr + (vaddr & ~PAGE_MASK); |
---|
| 391 | + |
---|
| 392 | + if (unlikely(*ptr + d < 0)) { |
---|
| 393 | + pr_warn("ref_ctr going negative. vaddr: 0x%lx, " |
---|
| 394 | + "curr val: %d, delta: %d\n", vaddr, *ptr, d); |
---|
| 395 | + ret = -EINVAL; |
---|
| 396 | + goto out; |
---|
| 397 | + } |
---|
| 398 | + |
---|
| 399 | + *ptr += d; |
---|
| 400 | + ret = 0; |
---|
| 401 | +out: |
---|
| 402 | + kunmap_atomic(kaddr); |
---|
| 403 | + put_page(page); |
---|
| 404 | + return ret; |
---|
| 405 | +} |
---|
| 406 | + |
---|
| 407 | +static void update_ref_ctr_warn(struct uprobe *uprobe, |
---|
| 408 | + struct mm_struct *mm, short d) |
---|
| 409 | +{ |
---|
| 410 | + pr_warn("ref_ctr %s failed for inode: 0x%lx offset: " |
---|
| 411 | + "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n", |
---|
| 412 | + d > 0 ? "increment" : "decrement", uprobe->inode->i_ino, |
---|
| 413 | + (unsigned long long) uprobe->offset, |
---|
| 414 | + (unsigned long long) uprobe->ref_ctr_offset, mm); |
---|
| 415 | +} |
---|
| 416 | + |
---|
| 417 | +static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm, |
---|
| 418 | + short d) |
---|
| 419 | +{ |
---|
| 420 | + struct vm_area_struct *rc_vma; |
---|
| 421 | + unsigned long rc_vaddr; |
---|
| 422 | + int ret = 0; |
---|
| 423 | + |
---|
| 424 | + rc_vma = find_ref_ctr_vma(uprobe, mm); |
---|
| 425 | + |
---|
| 426 | + if (rc_vma) { |
---|
| 427 | + rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset); |
---|
| 428 | + ret = __update_ref_ctr(mm, rc_vaddr, d); |
---|
| 429 | + if (ret) |
---|
| 430 | + update_ref_ctr_warn(uprobe, mm, d); |
---|
| 431 | + |
---|
| 432 | + if (d > 0) |
---|
| 433 | + return ret; |
---|
| 434 | + } |
---|
| 435 | + |
---|
| 436 | + mutex_lock(&delayed_uprobe_lock); |
---|
| 437 | + if (d > 0) |
---|
| 438 | + ret = delayed_uprobe_add(uprobe, mm); |
---|
| 439 | + else |
---|
| 440 | + delayed_uprobe_remove(uprobe, mm); |
---|
| 441 | + mutex_unlock(&delayed_uprobe_lock); |
---|
| 442 | + |
---|
| 443 | + return ret; |
---|
| 444 | +} |
---|
| 445 | + |
---|
285 | 446 | /* |
---|
286 | 447 | * NOTE: |
---|
287 | 448 | * Expect the breakpoint instruction to be the smallest size instruction for |
---|
.. | .. |
---|
296 | 457 | * @vaddr: the virtual address to store the opcode. |
---|
297 | 458 | * @opcode: opcode to be written at @vaddr. |
---|
298 | 459 | * |
---|
299 | | - * Called with mm->mmap_sem held for write. |
---|
| 460 | + * Called with mm->mmap_lock held for write. |
---|
300 | 461 | * Return 0 (success) or a negative errno. |
---|
301 | 462 | */ |
---|
302 | 463 | int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, |
---|
303 | 464 | unsigned long vaddr, uprobe_opcode_t opcode) |
---|
304 | 465 | { |
---|
| 466 | + struct uprobe *uprobe; |
---|
305 | 467 | struct page *old_page, *new_page; |
---|
306 | 468 | struct vm_area_struct *vma; |
---|
307 | | - int ret; |
---|
| 469 | + int ret, is_register, ref_ctr_updated = 0; |
---|
| 470 | + bool orig_page_huge = false; |
---|
| 471 | + unsigned int gup_flags = FOLL_FORCE; |
---|
| 472 | + |
---|
| 473 | + is_register = is_swbp_insn(&opcode); |
---|
| 474 | + uprobe = container_of(auprobe, struct uprobe, arch); |
---|
308 | 475 | |
---|
309 | 476 | retry: |
---|
| 477 | + if (is_register) |
---|
| 478 | + gup_flags |= FOLL_SPLIT_PMD; |
---|
310 | 479 | /* Read the page with vaddr into memory */ |
---|
311 | | - ret = get_user_pages_remote(NULL, mm, vaddr, 1, |
---|
312 | | - FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL); |
---|
| 480 | + ret = get_user_pages_remote(mm, vaddr, 1, gup_flags, |
---|
| 481 | + &old_page, &vma, NULL); |
---|
313 | 482 | if (ret <= 0) |
---|
314 | 483 | return ret; |
---|
315 | 484 | |
---|
316 | 485 | ret = verify_opcode(old_page, vaddr, &opcode); |
---|
317 | 486 | if (ret <= 0) |
---|
| 487 | + goto put_old; |
---|
| 488 | + |
---|
| 489 | + if (WARN(!is_register && PageCompound(old_page), |
---|
| 490 | + "uprobe unregister should never work on compound page\n")) { |
---|
| 491 | + ret = -EINVAL; |
---|
| 492 | + goto put_old; |
---|
| 493 | + } |
---|
| 494 | + |
---|
| 495 | + /* We are going to replace instruction, update ref_ctr. */ |
---|
| 496 | + if (!ref_ctr_updated && uprobe->ref_ctr_offset) { |
---|
| 497 | + ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1); |
---|
| 498 | + if (ret) |
---|
| 499 | + goto put_old; |
---|
| 500 | + |
---|
| 501 | + ref_ctr_updated = 1; |
---|
| 502 | + } |
---|
| 503 | + |
---|
| 504 | + ret = 0; |
---|
| 505 | + if (!is_register && !PageAnon(old_page)) |
---|
318 | 506 | goto put_old; |
---|
319 | 507 | |
---|
320 | 508 | ret = anon_vma_prepare(vma); |
---|
.. | .. |
---|
330 | 518 | copy_highpage(new_page, old_page); |
---|
331 | 519 | copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); |
---|
332 | 520 | |
---|
| 521 | + if (!is_register) { |
---|
| 522 | + struct page *orig_page; |
---|
| 523 | + pgoff_t index; |
---|
| 524 | + |
---|
| 525 | + VM_BUG_ON_PAGE(!PageAnon(old_page), old_page); |
---|
| 526 | + |
---|
| 527 | + index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT; |
---|
| 528 | + orig_page = find_get_page(vma->vm_file->f_inode->i_mapping, |
---|
| 529 | + index); |
---|
| 530 | + |
---|
| 531 | + if (orig_page) { |
---|
| 532 | + if (PageUptodate(orig_page) && |
---|
| 533 | + pages_identical(new_page, orig_page)) { |
---|
| 534 | + /* let go new_page */ |
---|
| 535 | + put_page(new_page); |
---|
| 536 | + new_page = NULL; |
---|
| 537 | + |
---|
| 538 | + if (PageCompound(orig_page)) |
---|
| 539 | + orig_page_huge = true; |
---|
| 540 | + } |
---|
| 541 | + put_page(orig_page); |
---|
| 542 | + } |
---|
| 543 | + } |
---|
| 544 | + |
---|
333 | 545 | ret = __replace_page(vma, vaddr, old_page, new_page); |
---|
334 | | - put_page(new_page); |
---|
| 546 | + if (new_page) |
---|
| 547 | + put_page(new_page); |
---|
335 | 548 | put_old: |
---|
336 | 549 | put_page(old_page); |
---|
337 | 550 | |
---|
338 | 551 | if (unlikely(ret == -EAGAIN)) |
---|
339 | 552 | goto retry; |
---|
| 553 | + |
---|
| 554 | + /* Revert back reference counter if instruction update failed. */ |
---|
| 555 | + if (ret && is_register && ref_ctr_updated) |
---|
| 556 | + update_ref_ctr(uprobe, mm, -1); |
---|
| 557 | + |
---|
| 558 | + /* try collapse pmd for compound page */ |
---|
| 559 | + if (!ret && orig_page_huge) |
---|
| 560 | + collapse_pte_mapped_thp(mm, vaddr); |
---|
| 561 | + |
---|
340 | 562 | return ret; |
---|
341 | 563 | } |
---|
342 | 564 | |
---|
.. | .. |
---|
372 | 594 | |
---|
373 | 595 | static struct uprobe *get_uprobe(struct uprobe *uprobe) |
---|
374 | 596 | { |
---|
375 | | - atomic_inc(&uprobe->ref); |
---|
| 597 | + refcount_inc(&uprobe->ref); |
---|
376 | 598 | return uprobe; |
---|
377 | 599 | } |
---|
378 | 600 | |
---|
379 | 601 | static void put_uprobe(struct uprobe *uprobe) |
---|
380 | 602 | { |
---|
381 | | - if (atomic_dec_and_test(&uprobe->ref)) |
---|
| 603 | + if (refcount_dec_and_test(&uprobe->ref)) { |
---|
| 604 | + /* |
---|
| 605 | + * If application munmap(exec_vma) before uprobe_unregister() |
---|
| 606 | + * gets called, we don't get a chance to remove uprobe from |
---|
| 607 | + * delayed_uprobe_list from remove_breakpoint(). Do it here. |
---|
| 608 | + */ |
---|
| 609 | + mutex_lock(&delayed_uprobe_lock); |
---|
| 610 | + delayed_uprobe_remove(uprobe, NULL); |
---|
| 611 | + mutex_unlock(&delayed_uprobe_lock); |
---|
382 | 612 | kfree(uprobe); |
---|
| 613 | + } |
---|
383 | 614 | } |
---|
384 | 615 | |
---|
385 | 616 | static int match_uprobe(struct uprobe *l, struct uprobe *r) |
---|
.. | .. |
---|
460 | 691 | rb_link_node(&uprobe->rb_node, parent, p); |
---|
461 | 692 | rb_insert_color(&uprobe->rb_node, &uprobes_tree); |
---|
462 | 693 | /* get access + creation ref */ |
---|
463 | | - atomic_set(&uprobe->ref, 2); |
---|
| 694 | + refcount_set(&uprobe->ref, 2); |
---|
464 | 695 | |
---|
465 | 696 | return u; |
---|
466 | 697 | } |
---|
.. | .. |
---|
484 | 715 | return u; |
---|
485 | 716 | } |
---|
486 | 717 | |
---|
487 | | -static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) |
---|
| 718 | +static void |
---|
| 719 | +ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe) |
---|
| 720 | +{ |
---|
| 721 | + pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx " |
---|
| 722 | + "ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n", |
---|
| 723 | + uprobe->inode->i_ino, (unsigned long long) uprobe->offset, |
---|
| 724 | + (unsigned long long) cur_uprobe->ref_ctr_offset, |
---|
| 725 | + (unsigned long long) uprobe->ref_ctr_offset); |
---|
| 726 | +} |
---|
| 727 | + |
---|
| 728 | +static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, |
---|
| 729 | + loff_t ref_ctr_offset) |
---|
488 | 730 | { |
---|
489 | 731 | struct uprobe *uprobe, *cur_uprobe; |
---|
490 | 732 | |
---|
.. | .. |
---|
494 | 736 | |
---|
495 | 737 | uprobe->inode = inode; |
---|
496 | 738 | uprobe->offset = offset; |
---|
| 739 | + uprobe->ref_ctr_offset = ref_ctr_offset; |
---|
497 | 740 | init_rwsem(&uprobe->register_rwsem); |
---|
498 | 741 | init_rwsem(&uprobe->consumer_rwsem); |
---|
499 | 742 | |
---|
.. | .. |
---|
501 | 744 | cur_uprobe = insert_uprobe(uprobe); |
---|
502 | 745 | /* a uprobe exists for this inode:offset combination */ |
---|
503 | 746 | if (cur_uprobe) { |
---|
| 747 | + if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) { |
---|
| 748 | + ref_ctr_mismatch_warn(cur_uprobe, uprobe); |
---|
| 749 | + put_uprobe(cur_uprobe); |
---|
| 750 | + kfree(uprobe); |
---|
| 751 | + return ERR_PTR(-EINVAL); |
---|
| 752 | + } |
---|
504 | 753 | kfree(uprobe); |
---|
505 | 754 | uprobe = cur_uprobe; |
---|
506 | 755 | } |
---|
.. | .. |
---|
805 | 1054 | if (err && is_register) |
---|
806 | 1055 | goto free; |
---|
807 | 1056 | |
---|
808 | | - down_write(&mm->mmap_sem); |
---|
| 1057 | + mmap_write_lock(mm); |
---|
809 | 1058 | vma = find_vma(mm, info->vaddr); |
---|
810 | 1059 | if (!vma || !valid_vma(vma, is_register) || |
---|
811 | 1060 | file_inode(vma->vm_file) != uprobe->inode) |
---|
.. | .. |
---|
827 | 1076 | } |
---|
828 | 1077 | |
---|
829 | 1078 | unlock: |
---|
830 | | - up_write(&mm->mmap_sem); |
---|
| 1079 | + mmap_write_unlock(mm); |
---|
831 | 1080 | free: |
---|
832 | 1081 | mmput(mm); |
---|
833 | 1082 | info = free_map_info(info); |
---|
.. | .. |
---|
891 | 1140 | * else return 0 (success) |
---|
892 | 1141 | */ |
---|
893 | 1142 | static int __uprobe_register(struct inode *inode, loff_t offset, |
---|
894 | | - struct uprobe_consumer *uc) |
---|
| 1143 | + loff_t ref_ctr_offset, struct uprobe_consumer *uc) |
---|
895 | 1144 | { |
---|
896 | 1145 | struct uprobe *uprobe; |
---|
897 | 1146 | int ret; |
---|
.. | .. |
---|
908 | 1157 | return -EINVAL; |
---|
909 | 1158 | |
---|
910 | 1159 | /* |
---|
911 | | - * This ensures that copy_from_page() and copy_to_page() |
---|
912 | | - * can't cross page boundary. |
---|
| 1160 | + * This ensures that copy_from_page(), copy_to_page() and |
---|
| 1161 | + * __update_ref_ctr() can't cross page boundary. |
---|
913 | 1162 | */ |
---|
914 | 1163 | if (!IS_ALIGNED(offset, UPROBE_SWBP_INSN_SIZE)) |
---|
915 | 1164 | return -EINVAL; |
---|
| 1165 | + if (!IS_ALIGNED(ref_ctr_offset, sizeof(short))) |
---|
| 1166 | + return -EINVAL; |
---|
916 | 1167 | |
---|
917 | 1168 | retry: |
---|
918 | | - uprobe = alloc_uprobe(inode, offset); |
---|
| 1169 | + uprobe = alloc_uprobe(inode, offset, ref_ctr_offset); |
---|
919 | 1170 | if (!uprobe) |
---|
920 | 1171 | return -ENOMEM; |
---|
| 1172 | + if (IS_ERR(uprobe)) |
---|
| 1173 | + return PTR_ERR(uprobe); |
---|
| 1174 | + |
---|
921 | 1175 | /* |
---|
922 | 1176 | * We can race with uprobe_unregister()->delete_uprobe(). |
---|
923 | 1177 | * Check uprobe_is_active() and retry if it is false. |
---|
.. | .. |
---|
941 | 1195 | int uprobe_register(struct inode *inode, loff_t offset, |
---|
942 | 1196 | struct uprobe_consumer *uc) |
---|
943 | 1197 | { |
---|
944 | | - return __uprobe_register(inode, offset, uc); |
---|
| 1198 | + return __uprobe_register(inode, offset, 0, uc); |
---|
945 | 1199 | } |
---|
946 | 1200 | EXPORT_SYMBOL_GPL(uprobe_register); |
---|
| 1201 | + |
---|
| 1202 | +int uprobe_register_refctr(struct inode *inode, loff_t offset, |
---|
| 1203 | + loff_t ref_ctr_offset, struct uprobe_consumer *uc) |
---|
| 1204 | +{ |
---|
| 1205 | + return __uprobe_register(inode, offset, ref_ctr_offset, uc); |
---|
| 1206 | +} |
---|
| 1207 | +EXPORT_SYMBOL_GPL(uprobe_register_refctr); |
---|
947 | 1208 | |
---|
948 | 1209 | /* |
---|
949 | 1210 | * uprobe_apply - unregister an already registered probe. |
---|
.. | .. |
---|
979 | 1240 | struct vm_area_struct *vma; |
---|
980 | 1241 | int err = 0; |
---|
981 | 1242 | |
---|
982 | | - down_read(&mm->mmap_sem); |
---|
| 1243 | + mmap_read_lock(mm); |
---|
983 | 1244 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
---|
984 | 1245 | unsigned long vaddr; |
---|
985 | 1246 | loff_t offset; |
---|
.. | .. |
---|
996 | 1257 | vaddr = offset_to_vaddr(vma, uprobe->offset); |
---|
997 | 1258 | err |= remove_breakpoint(uprobe, mm, vaddr); |
---|
998 | 1259 | } |
---|
999 | | - up_read(&mm->mmap_sem); |
---|
| 1260 | + mmap_read_unlock(mm); |
---|
1000 | 1261 | |
---|
1001 | 1262 | return err; |
---|
1002 | 1263 | } |
---|
.. | .. |
---|
1063 | 1324 | spin_unlock(&uprobes_treelock); |
---|
1064 | 1325 | } |
---|
1065 | 1326 | |
---|
| 1327 | +/* @vma contains reference counter, not the probed instruction. */ |
---|
| 1328 | +static int delayed_ref_ctr_inc(struct vm_area_struct *vma) |
---|
| 1329 | +{ |
---|
| 1330 | + struct list_head *pos, *q; |
---|
| 1331 | + struct delayed_uprobe *du; |
---|
| 1332 | + unsigned long vaddr; |
---|
| 1333 | + int ret = 0, err = 0; |
---|
| 1334 | + |
---|
| 1335 | + mutex_lock(&delayed_uprobe_lock); |
---|
| 1336 | + list_for_each_safe(pos, q, &delayed_uprobe_list) { |
---|
| 1337 | + du = list_entry(pos, struct delayed_uprobe, list); |
---|
| 1338 | + |
---|
| 1339 | + if (du->mm != vma->vm_mm || |
---|
| 1340 | + !valid_ref_ctr_vma(du->uprobe, vma)) |
---|
| 1341 | + continue; |
---|
| 1342 | + |
---|
| 1343 | + vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset); |
---|
| 1344 | + ret = __update_ref_ctr(vma->vm_mm, vaddr, 1); |
---|
| 1345 | + if (ret) { |
---|
| 1346 | + update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1); |
---|
| 1347 | + if (!err) |
---|
| 1348 | + err = ret; |
---|
| 1349 | + } |
---|
| 1350 | + delayed_uprobe_delete(du); |
---|
| 1351 | + } |
---|
| 1352 | + mutex_unlock(&delayed_uprobe_lock); |
---|
| 1353 | + return err; |
---|
| 1354 | +} |
---|
| 1355 | + |
---|
1066 | 1356 | /* |
---|
1067 | | - * Called from mmap_region/vma_adjust with mm->mmap_sem acquired. |
---|
| 1357 | + * Called from mmap_region/vma_adjust with mm->mmap_lock acquired. |
---|
1068 | 1358 | * |
---|
1069 | 1359 | * Currently we ignore all errors and always return 0, the callers |
---|
1070 | 1360 | * can't handle the failure anyway. |
---|
.. | .. |
---|
1075 | 1365 | struct uprobe *uprobe, *u; |
---|
1076 | 1366 | struct inode *inode; |
---|
1077 | 1367 | |
---|
1078 | | - if (no_uprobe_events() || !valid_vma(vma, true)) |
---|
| 1368 | + if (no_uprobe_events()) |
---|
| 1369 | + return 0; |
---|
| 1370 | + |
---|
| 1371 | + if (vma->vm_file && |
---|
| 1372 | + (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE && |
---|
| 1373 | + test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags)) |
---|
| 1374 | + delayed_ref_ctr_inc(vma); |
---|
| 1375 | + |
---|
| 1376 | + if (!valid_vma(vma, true)) |
---|
1079 | 1377 | return 0; |
---|
1080 | 1378 | |
---|
1081 | 1379 | inode = file_inode(vma->vm_file); |
---|
.. | .. |
---|
1146 | 1444 | struct vm_area_struct *vma; |
---|
1147 | 1445 | int ret; |
---|
1148 | 1446 | |
---|
1149 | | - if (down_write_killable(&mm->mmap_sem)) |
---|
| 1447 | + if (mmap_write_lock_killable(mm)) |
---|
1150 | 1448 | return -EINTR; |
---|
1151 | 1449 | |
---|
1152 | 1450 | if (mm->uprobes_state.xol_area) { |
---|
.. | .. |
---|
1158 | 1456 | /* Try to map as high as possible, this is only a hint. */ |
---|
1159 | 1457 | area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, |
---|
1160 | 1458 | PAGE_SIZE, 0, 0); |
---|
1161 | | - if (area->vaddr & ~PAGE_MASK) { |
---|
| 1459 | + if (IS_ERR_VALUE(area->vaddr)) { |
---|
1162 | 1460 | ret = area->vaddr; |
---|
1163 | 1461 | goto fail; |
---|
1164 | 1462 | } |
---|
.. | .. |
---|
1176 | 1474 | /* pairs with get_xol_area() */ |
---|
1177 | 1475 | smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */ |
---|
1178 | 1476 | fail: |
---|
1179 | | - up_write(&mm->mmap_sem); |
---|
| 1477 | + mmap_write_unlock(mm); |
---|
1180 | 1478 | |
---|
1181 | 1479 | return ret; |
---|
1182 | 1480 | } |
---|
.. | .. |
---|
1248 | 1546 | void uprobe_clear_state(struct mm_struct *mm) |
---|
1249 | 1547 | { |
---|
1250 | 1548 | struct xol_area *area = mm->uprobes_state.xol_area; |
---|
| 1549 | + |
---|
| 1550 | + mutex_lock(&delayed_uprobe_lock); |
---|
| 1551 | + delayed_uprobe_remove(NULL, mm); |
---|
| 1552 | + mutex_unlock(&delayed_uprobe_lock); |
---|
1251 | 1553 | |
---|
1252 | 1554 | if (!area) |
---|
1253 | 1555 | return; |
---|
.. | .. |
---|
1371 | 1673 | copy_to_page(page, vaddr, src, len); |
---|
1372 | 1674 | |
---|
1373 | 1675 | /* |
---|
1374 | | - * We probably need flush_icache_user_range() but it needs vma. |
---|
| 1676 | + * We probably need flush_icache_user_page() but it needs vma. |
---|
1375 | 1677 | * This should work on most of architectures by default. If |
---|
1376 | 1678 | * architecture needs to do something different it can define |
---|
1377 | 1679 | * its own version of the function. |
---|
.. | .. |
---|
1433 | 1735 | } |
---|
1434 | 1736 | |
---|
1435 | 1737 | /* |
---|
1436 | | - * Allocate a uprobe_task object for the task if if necessary. |
---|
| 1738 | + * Allocate a uprobe_task object for the task if necessary. |
---|
1437 | 1739 | * Called when the thread hits a breakpoint. |
---|
1438 | 1740 | * |
---|
1439 | 1741 | * Returns: |
---|
.. | .. |
---|
1521 | 1823 | |
---|
1522 | 1824 | t->utask->dup_xol_addr = area->vaddr; |
---|
1523 | 1825 | init_task_work(&t->utask->dup_xol_work, dup_xol_work); |
---|
1524 | | - task_work_add(t, &t->utask->dup_xol_work, true); |
---|
| 1826 | + task_work_add(t, &t->utask->dup_xol_work, TWA_RESUME); |
---|
1525 | 1827 | } |
---|
1526 | 1828 | |
---|
1527 | 1829 | /* |
---|
.. | .. |
---|
1671 | 1973 | |
---|
1672 | 1974 | WARN_ON_ONCE(utask->state != UTASK_SSTEP); |
---|
1673 | 1975 | |
---|
1674 | | - if (signal_pending(t)) { |
---|
| 1976 | + if (task_sigpending(t)) { |
---|
1675 | 1977 | spin_lock_irq(&t->sighand->siglock); |
---|
1676 | 1978 | clear_tsk_thread_flag(t, TIF_SIGPENDING); |
---|
1677 | 1979 | spin_unlock_irq(&t->sighand->siglock); |
---|
.. | .. |
---|
1727 | 2029 | * but we treat this as a 'remote' access since it is |
---|
1728 | 2030 | * essentially a kernel access to the memory. |
---|
1729 | 2031 | */ |
---|
1730 | | - result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page, |
---|
| 2032 | + result = get_user_pages_remote(mm, vaddr, 1, FOLL_FORCE, &page, |
---|
1731 | 2033 | NULL, NULL); |
---|
1732 | 2034 | if (result < 0) |
---|
1733 | 2035 | return result; |
---|
.. | .. |
---|
1745 | 2047 | struct uprobe *uprobe = NULL; |
---|
1746 | 2048 | struct vm_area_struct *vma; |
---|
1747 | 2049 | |
---|
1748 | | - down_read(&mm->mmap_sem); |
---|
| 2050 | + mmap_read_lock(mm); |
---|
1749 | 2051 | vma = find_vma(mm, bp_vaddr); |
---|
1750 | 2052 | if (vma && vma->vm_start <= bp_vaddr) { |
---|
1751 | 2053 | if (valid_vma(vma, false)) { |
---|
.. | .. |
---|
1763 | 2065 | |
---|
1764 | 2066 | if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags)) |
---|
1765 | 2067 | mmf_recalc_uprobes(mm); |
---|
1766 | | - up_read(&mm->mmap_sem); |
---|
| 2068 | + mmap_read_unlock(mm); |
---|
1767 | 2069 | |
---|
1768 | 2070 | return uprobe; |
---|
1769 | 2071 | } |
---|
.. | .. |
---|
1781 | 2083 | if (uc->handler) { |
---|
1782 | 2084 | rc = uc->handler(uc, regs); |
---|
1783 | 2085 | WARN(rc & ~UPROBE_HANDLER_MASK, |
---|
1784 | | - "bad rc=0x%x from %pf()\n", rc, uc->handler); |
---|
| 2086 | + "bad rc=0x%x from %ps()\n", rc, uc->handler); |
---|
1785 | 2087 | } |
---|
1786 | 2088 | |
---|
1787 | 2089 | if (uc->ret_handler) |
---|
.. | .. |
---|
1864 | 2166 | |
---|
1865 | 2167 | sigill: |
---|
1866 | 2168 | uprobe_warn(current, "handle uretprobe, sending SIGILL."); |
---|
1867 | | - force_sig(SIGILL, current); |
---|
| 2169 | + force_sig(SIGILL); |
---|
1868 | 2170 | |
---|
1869 | 2171 | } |
---|
1870 | 2172 | |
---|
.. | .. |
---|
1887 | 2189 | { |
---|
1888 | 2190 | struct uprobe *uprobe; |
---|
1889 | 2191 | unsigned long bp_vaddr; |
---|
1890 | | - int uninitialized_var(is_swbp); |
---|
| 2192 | + int is_swbp; |
---|
1891 | 2193 | |
---|
1892 | 2194 | bp_vaddr = uprobe_get_swbp_addr(regs); |
---|
1893 | 2195 | if (bp_vaddr == get_trampoline_vaddr()) |
---|
.. | .. |
---|
1897 | 2199 | if (!uprobe) { |
---|
1898 | 2200 | if (is_swbp > 0) { |
---|
1899 | 2201 | /* No matching uprobe; signal SIGTRAP. */ |
---|
1900 | | - force_sig(SIGTRAP, current); |
---|
| 2202 | + force_sig(SIGTRAP); |
---|
1901 | 2203 | } else { |
---|
1902 | 2204 | /* |
---|
1903 | 2205 | * Either we raced with uprobe_unregister() or we can't |
---|
.. | .. |
---|
1980 | 2282 | |
---|
1981 | 2283 | if (unlikely(err)) { |
---|
1982 | 2284 | uprobe_warn(current, "execute the probed insn, sending SIGILL."); |
---|
1983 | | - force_sig(SIGILL, current); |
---|
| 2285 | + force_sig(SIGILL); |
---|
1984 | 2286 | } |
---|
1985 | 2287 | } |
---|
1986 | 2288 | |
---|
.. | .. |
---|
2047 | 2349 | .priority = INT_MAX-1, /* notified after kprobes, kgdb */ |
---|
2048 | 2350 | }; |
---|
2049 | 2351 | |
---|
2050 | | -static int __init init_uprobes(void) |
---|
| 2352 | +void __init uprobes_init(void) |
---|
2051 | 2353 | { |
---|
2052 | 2354 | int i; |
---|
2053 | 2355 | |
---|
2054 | 2356 | for (i = 0; i < UPROBES_HASH_SZ; i++) |
---|
2055 | 2357 | mutex_init(&uprobes_mmap_mutex[i]); |
---|
2056 | 2358 | |
---|
2057 | | - if (percpu_init_rwsem(&dup_mmap_sem)) |
---|
2058 | | - return -ENOMEM; |
---|
2059 | | - |
---|
2060 | | - return register_die_notifier(&uprobe_exception_nb); |
---|
| 2359 | + BUG_ON(register_die_notifier(&uprobe_exception_nb)); |
---|
2061 | 2360 | } |
---|
2062 | | -__initcall(init_uprobes); |
---|