hc
2024-10-12 a5969cabbb4660eab42b6ef0412cbbd1200cf14d
kernel/kernel/events/uprobes.c
....@@ -1,19 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0+
12 /*
23 * User-space Probes (UProbes)
3
- *
4
- * This program is free software; you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation; either version 2 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program; if not, write to the Free Software
16
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
174 *
185 * Copyright (C) IBM Corporation, 2008-2012
196 * Authors:
....@@ -39,6 +26,7 @@
3926 #include <linux/percpu-rwsem.h>
4027 #include <linux/task_work.h>
4128 #include <linux/shmem_fs.h>
29
+#include <linux/khugepaged.h>
4230
4331 #include <linux/uprobes.h>
4432
....@@ -59,20 +47,21 @@
5947 static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
6048 #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
6149
62
-static struct percpu_rw_semaphore dup_mmap_sem;
50
+DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
6351
6452 /* Have a copy of original instruction */
6553 #define UPROBE_COPY_INSN 0
6654
6755 struct uprobe {
6856 struct rb_node rb_node; /* node in the rb tree */
69
- atomic_t ref;
57
+ refcount_t ref;
7058 struct rw_semaphore register_rwsem;
7159 struct rw_semaphore consumer_rwsem;
7260 struct list_head pending_list;
7361 struct uprobe_consumer *consumers;
7462 struct inode *inode; /* Also hold a ref to inode */
7563 loff_t offset;
64
+ loff_t ref_ctr_offset;
7665 unsigned long flags;
7766
7867 /*
....@@ -87,6 +76,15 @@
8776 */
8877 struct arch_uprobe arch;
8978 };
79
+
80
+struct delayed_uprobe {
81
+ struct list_head list;
82
+ struct uprobe *uprobe;
83
+ struct mm_struct *mm;
84
+};
85
+
86
+static DEFINE_MUTEX(delayed_uprobe_lock);
87
+static LIST_HEAD(delayed_uprobe_list);
9088
9189 /*
9290 * Execute out of line area: anonymous executable mapping installed
....@@ -146,48 +144,50 @@
146144 *
147145 * @vma: vma that holds the pte pointing to page
148146 * @addr: address the old @page is mapped at
149
- * @page: the cowed page we are replacing by kpage
150
- * @kpage: the modified page we replace page by
147
+ * @old_page: the page we are replacing by new_page
148
+ * @new_page: the modified page we replace page by
151149 *
152
- * Returns 0 on success, -EFAULT on failure.
150
+ * If @new_page is NULL, only unmap @old_page.
151
+ *
152
+ * Returns 0 on success, negative error code otherwise.
153153 */
154154 static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
155155 struct page *old_page, struct page *new_page)
156156 {
157157 struct mm_struct *mm = vma->vm_mm;
158158 struct page_vma_mapped_walk pvmw = {
159
- .page = old_page,
159
+ .page = compound_head(old_page),
160160 .vma = vma,
161161 .address = addr,
162162 };
163163 int err;
164
- /* For mmu_notifiers */
165
- const unsigned long mmun_start = addr;
166
- const unsigned long mmun_end = addr + PAGE_SIZE;
167
- struct mem_cgroup *memcg;
164
+ struct mmu_notifier_range range;
168165
169
- VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
166
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
167
+ addr + PAGE_SIZE);
170168
171
- err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
172
- false);
173
- if (err)
174
- return err;
169
+ if (new_page) {
170
+ err = mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL);
171
+ if (err)
172
+ return err;
173
+ }
175174
176175 /* For try_to_free_swap() and munlock_vma_page() below */
177176 lock_page(old_page);
178177
179
- mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
178
+ mmu_notifier_invalidate_range_start(&range);
180179 err = -EAGAIN;
181
- if (!page_vma_mapped_walk(&pvmw)) {
182
- mem_cgroup_cancel_charge(new_page, memcg, false);
180
+ if (!page_vma_mapped_walk(&pvmw))
183181 goto unlock;
184
- }
185182 VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
186183
187
- get_page(new_page);
188
- page_add_new_anon_rmap(new_page, vma, addr, false);
189
- mem_cgroup_commit_charge(new_page, memcg, false, false);
190
- lru_cache_add_active_or_unevictable(new_page, vma);
184
+ if (new_page) {
185
+ get_page(new_page);
186
+ page_add_new_anon_rmap(new_page, vma, addr, false);
187
+ lru_cache_add_inactive_or_unevictable(new_page, vma);
188
+ } else
189
+ /* no new page, just dec_mm_counter for old_page */
190
+ dec_mm_counter(mm, MM_ANONPAGES);
191191
192192 if (!PageAnon(old_page)) {
193193 dec_mm_counter(mm, mm_counter_file(old_page));
....@@ -196,21 +196,22 @@
196196
197197 flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
198198 ptep_clear_flush_notify(vma, addr, pvmw.pte);
199
- set_pte_at_notify(mm, addr, pvmw.pte,
200
- mk_pte(new_page, vma->vm_page_prot));
199
+ if (new_page)
200
+ set_pte_at_notify(mm, addr, pvmw.pte,
201
+ mk_pte(new_page, vma->vm_page_prot));
201202
202203 page_remove_rmap(old_page, false);
203204 if (!page_mapped(old_page))
204205 try_to_free_swap(old_page);
205206 page_vma_mapped_walk_done(&pvmw);
206207
207
- if (vma->vm_flags & VM_LOCKED)
208
+ if ((vma->vm_flags & VM_LOCKED) && !PageCompound(old_page))
208209 munlock_vma_page(old_page);
209210 put_page(old_page);
210211
211212 err = 0;
212213 unlock:
213
- mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
214
+ mmu_notifier_invalidate_range_end(&range);
214215 unlock_page(old_page);
215216 return err;
216217 }
....@@ -282,6 +283,166 @@
282283 return 1;
283284 }
284285
286
+static struct delayed_uprobe *
287
+delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm)
288
+{
289
+ struct delayed_uprobe *du;
290
+
291
+ list_for_each_entry(du, &delayed_uprobe_list, list)
292
+ if (du->uprobe == uprobe && du->mm == mm)
293
+ return du;
294
+ return NULL;
295
+}
296
+
297
+static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm)
298
+{
299
+ struct delayed_uprobe *du;
300
+
301
+ if (delayed_uprobe_check(uprobe, mm))
302
+ return 0;
303
+
304
+ du = kzalloc(sizeof(*du), GFP_KERNEL);
305
+ if (!du)
306
+ return -ENOMEM;
307
+
308
+ du->uprobe = uprobe;
309
+ du->mm = mm;
310
+ list_add(&du->list, &delayed_uprobe_list);
311
+ return 0;
312
+}
313
+
314
+static void delayed_uprobe_delete(struct delayed_uprobe *du)
315
+{
316
+ if (WARN_ON(!du))
317
+ return;
318
+ list_del(&du->list);
319
+ kfree(du);
320
+}
321
+
322
+static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm)
323
+{
324
+ struct list_head *pos, *q;
325
+ struct delayed_uprobe *du;
326
+
327
+ if (!uprobe && !mm)
328
+ return;
329
+
330
+ list_for_each_safe(pos, q, &delayed_uprobe_list) {
331
+ du = list_entry(pos, struct delayed_uprobe, list);
332
+
333
+ if (uprobe && du->uprobe != uprobe)
334
+ continue;
335
+ if (mm && du->mm != mm)
336
+ continue;
337
+
338
+ delayed_uprobe_delete(du);
339
+ }
340
+}
341
+
342
+static bool valid_ref_ctr_vma(struct uprobe *uprobe,
343
+ struct vm_area_struct *vma)
344
+{
345
+ unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset);
346
+
347
+ return uprobe->ref_ctr_offset &&
348
+ vma->vm_file &&
349
+ file_inode(vma->vm_file) == uprobe->inode &&
350
+ (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
351
+ vma->vm_start <= vaddr &&
352
+ vma->vm_end > vaddr;
353
+}
354
+
355
+static struct vm_area_struct *
356
+find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm)
357
+{
358
+ struct vm_area_struct *tmp;
359
+
360
+ for (tmp = mm->mmap; tmp; tmp = tmp->vm_next)
361
+ if (valid_ref_ctr_vma(uprobe, tmp))
362
+ return tmp;
363
+
364
+ return NULL;
365
+}
366
+
367
+static int
368
+__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
369
+{
370
+ void *kaddr;
371
+ struct page *page;
372
+ struct vm_area_struct *vma;
373
+ int ret;
374
+ short *ptr;
375
+
376
+ if (!vaddr || !d)
377
+ return -EINVAL;
378
+
379
+ ret = get_user_pages_remote(mm, vaddr, 1,
380
+ FOLL_WRITE, &page, &vma, NULL);
381
+ if (unlikely(ret <= 0)) {
382
+ /*
383
+ * We are asking for 1 page. If get_user_pages_remote() fails,
384
+ * it may return 0, in that case we have to return error.
385
+ */
386
+ return ret == 0 ? -EBUSY : ret;
387
+ }
388
+
389
+ kaddr = kmap_atomic(page);
390
+ ptr = kaddr + (vaddr & ~PAGE_MASK);
391
+
392
+ if (unlikely(*ptr + d < 0)) {
393
+ pr_warn("ref_ctr going negative. vaddr: 0x%lx, "
394
+ "curr val: %d, delta: %d\n", vaddr, *ptr, d);
395
+ ret = -EINVAL;
396
+ goto out;
397
+ }
398
+
399
+ *ptr += d;
400
+ ret = 0;
401
+out:
402
+ kunmap_atomic(kaddr);
403
+ put_page(page);
404
+ return ret;
405
+}
406
+
407
+static void update_ref_ctr_warn(struct uprobe *uprobe,
408
+ struct mm_struct *mm, short d)
409
+{
410
+ pr_warn("ref_ctr %s failed for inode: 0x%lx offset: "
411
+ "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n",
412
+ d > 0 ? "increment" : "decrement", uprobe->inode->i_ino,
413
+ (unsigned long long) uprobe->offset,
414
+ (unsigned long long) uprobe->ref_ctr_offset, mm);
415
+}
416
+
417
+static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
418
+ short d)
419
+{
420
+ struct vm_area_struct *rc_vma;
421
+ unsigned long rc_vaddr;
422
+ int ret = 0;
423
+
424
+ rc_vma = find_ref_ctr_vma(uprobe, mm);
425
+
426
+ if (rc_vma) {
427
+ rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset);
428
+ ret = __update_ref_ctr(mm, rc_vaddr, d);
429
+ if (ret)
430
+ update_ref_ctr_warn(uprobe, mm, d);
431
+
432
+ if (d > 0)
433
+ return ret;
434
+ }
435
+
436
+ mutex_lock(&delayed_uprobe_lock);
437
+ if (d > 0)
438
+ ret = delayed_uprobe_add(uprobe, mm);
439
+ else
440
+ delayed_uprobe_remove(uprobe, mm);
441
+ mutex_unlock(&delayed_uprobe_lock);
442
+
443
+ return ret;
444
+}
445
+
285446 /*
286447 * NOTE:
287448 * Expect the breakpoint instruction to be the smallest size instruction for
....@@ -296,25 +457,52 @@
296457 * @vaddr: the virtual address to store the opcode.
297458 * @opcode: opcode to be written at @vaddr.
298459 *
299
- * Called with mm->mmap_sem held for write.
460
+ * Called with mm->mmap_lock held for write.
300461 * Return 0 (success) or a negative errno.
301462 */
302463 int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
303464 unsigned long vaddr, uprobe_opcode_t opcode)
304465 {
466
+ struct uprobe *uprobe;
305467 struct page *old_page, *new_page;
306468 struct vm_area_struct *vma;
307
- int ret;
469
+ int ret, is_register, ref_ctr_updated = 0;
470
+ bool orig_page_huge = false;
471
+ unsigned int gup_flags = FOLL_FORCE;
472
+
473
+ is_register = is_swbp_insn(&opcode);
474
+ uprobe = container_of(auprobe, struct uprobe, arch);
308475
309476 retry:
477
+ if (is_register)
478
+ gup_flags |= FOLL_SPLIT_PMD;
310479 /* Read the page with vaddr into memory */
311
- ret = get_user_pages_remote(NULL, mm, vaddr, 1,
312
- FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
480
+ ret = get_user_pages_remote(mm, vaddr, 1, gup_flags,
481
+ &old_page, &vma, NULL);
313482 if (ret <= 0)
314483 return ret;
315484
316485 ret = verify_opcode(old_page, vaddr, &opcode);
317486 if (ret <= 0)
487
+ goto put_old;
488
+
489
+ if (WARN(!is_register && PageCompound(old_page),
490
+ "uprobe unregister should never work on compound page\n")) {
491
+ ret = -EINVAL;
492
+ goto put_old;
493
+ }
494
+
495
+ /* We are going to replace instruction, update ref_ctr. */
496
+ if (!ref_ctr_updated && uprobe->ref_ctr_offset) {
497
+ ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1);
498
+ if (ret)
499
+ goto put_old;
500
+
501
+ ref_ctr_updated = 1;
502
+ }
503
+
504
+ ret = 0;
505
+ if (!is_register && !PageAnon(old_page))
318506 goto put_old;
319507
320508 ret = anon_vma_prepare(vma);
....@@ -330,13 +518,47 @@
330518 copy_highpage(new_page, old_page);
331519 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
332520
521
+ if (!is_register) {
522
+ struct page *orig_page;
523
+ pgoff_t index;
524
+
525
+ VM_BUG_ON_PAGE(!PageAnon(old_page), old_page);
526
+
527
+ index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
528
+ orig_page = find_get_page(vma->vm_file->f_inode->i_mapping,
529
+ index);
530
+
531
+ if (orig_page) {
532
+ if (PageUptodate(orig_page) &&
533
+ pages_identical(new_page, orig_page)) {
534
+ /* let go new_page */
535
+ put_page(new_page);
536
+ new_page = NULL;
537
+
538
+ if (PageCompound(orig_page))
539
+ orig_page_huge = true;
540
+ }
541
+ put_page(orig_page);
542
+ }
543
+ }
544
+
333545 ret = __replace_page(vma, vaddr, old_page, new_page);
334
- put_page(new_page);
546
+ if (new_page)
547
+ put_page(new_page);
335548 put_old:
336549 put_page(old_page);
337550
338551 if (unlikely(ret == -EAGAIN))
339552 goto retry;
553
+
554
+ /* Revert back reference counter if instruction update failed. */
555
+ if (ret && is_register && ref_ctr_updated)
556
+ update_ref_ctr(uprobe, mm, -1);
557
+
558
+ /* try collapse pmd for compound page */
559
+ if (!ret && orig_page_huge)
560
+ collapse_pte_mapped_thp(mm, vaddr);
561
+
340562 return ret;
341563 }
342564
....@@ -372,14 +594,23 @@
372594
373595 static struct uprobe *get_uprobe(struct uprobe *uprobe)
374596 {
375
- atomic_inc(&uprobe->ref);
597
+ refcount_inc(&uprobe->ref);
376598 return uprobe;
377599 }
378600
379601 static void put_uprobe(struct uprobe *uprobe)
380602 {
381
- if (atomic_dec_and_test(&uprobe->ref))
603
+ if (refcount_dec_and_test(&uprobe->ref)) {
604
+ /*
605
+ * If application munmap(exec_vma) before uprobe_unregister()
606
+ * gets called, we don't get a chance to remove uprobe from
607
+ * delayed_uprobe_list from remove_breakpoint(). Do it here.
608
+ */
609
+ mutex_lock(&delayed_uprobe_lock);
610
+ delayed_uprobe_remove(uprobe, NULL);
611
+ mutex_unlock(&delayed_uprobe_lock);
382612 kfree(uprobe);
613
+ }
383614 }
384615
385616 static int match_uprobe(struct uprobe *l, struct uprobe *r)
....@@ -460,7 +691,7 @@
460691 rb_link_node(&uprobe->rb_node, parent, p);
461692 rb_insert_color(&uprobe->rb_node, &uprobes_tree);
462693 /* get access + creation ref */
463
- atomic_set(&uprobe->ref, 2);
694
+ refcount_set(&uprobe->ref, 2);
464695
465696 return u;
466697 }
....@@ -484,7 +715,18 @@
484715 return u;
485716 }
486717
487
-static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
718
+static void
719
+ref_ctr_mismatch_warn(struct uprobe *cur_uprobe, struct uprobe *uprobe)
720
+{
721
+ pr_warn("ref_ctr_offset mismatch. inode: 0x%lx offset: 0x%llx "
722
+ "ref_ctr_offset(old): 0x%llx ref_ctr_offset(new): 0x%llx\n",
723
+ uprobe->inode->i_ino, (unsigned long long) uprobe->offset,
724
+ (unsigned long long) cur_uprobe->ref_ctr_offset,
725
+ (unsigned long long) uprobe->ref_ctr_offset);
726
+}
727
+
728
+static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
729
+ loff_t ref_ctr_offset)
488730 {
489731 struct uprobe *uprobe, *cur_uprobe;
490732
....@@ -494,6 +736,7 @@
494736
495737 uprobe->inode = inode;
496738 uprobe->offset = offset;
739
+ uprobe->ref_ctr_offset = ref_ctr_offset;
497740 init_rwsem(&uprobe->register_rwsem);
498741 init_rwsem(&uprobe->consumer_rwsem);
499742
....@@ -501,6 +744,12 @@
501744 cur_uprobe = insert_uprobe(uprobe);
502745 /* a uprobe exists for this inode:offset combination */
503746 if (cur_uprobe) {
747
+ if (cur_uprobe->ref_ctr_offset != uprobe->ref_ctr_offset) {
748
+ ref_ctr_mismatch_warn(cur_uprobe, uprobe);
749
+ put_uprobe(cur_uprobe);
750
+ kfree(uprobe);
751
+ return ERR_PTR(-EINVAL);
752
+ }
504753 kfree(uprobe);
505754 uprobe = cur_uprobe;
506755 }
....@@ -805,7 +1054,7 @@
8051054 if (err && is_register)
8061055 goto free;
8071056
808
- down_write(&mm->mmap_sem);
1057
+ mmap_write_lock(mm);
8091058 vma = find_vma(mm, info->vaddr);
8101059 if (!vma || !valid_vma(vma, is_register) ||
8111060 file_inode(vma->vm_file) != uprobe->inode)
....@@ -827,7 +1076,7 @@
8271076 }
8281077
8291078 unlock:
830
- up_write(&mm->mmap_sem);
1079
+ mmap_write_unlock(mm);
8311080 free:
8321081 mmput(mm);
8331082 info = free_map_info(info);
....@@ -891,7 +1140,7 @@
8911140 * else return 0 (success)
8921141 */
8931142 static int __uprobe_register(struct inode *inode, loff_t offset,
894
- struct uprobe_consumer *uc)
1143
+ loff_t ref_ctr_offset, struct uprobe_consumer *uc)
8951144 {
8961145 struct uprobe *uprobe;
8971146 int ret;
....@@ -908,16 +1157,21 @@
9081157 return -EINVAL;
9091158
9101159 /*
911
- * This ensures that copy_from_page() and copy_to_page()
912
- * can't cross page boundary.
1160
+ * This ensures that copy_from_page(), copy_to_page() and
1161
+ * __update_ref_ctr() can't cross page boundary.
9131162 */
9141163 if (!IS_ALIGNED(offset, UPROBE_SWBP_INSN_SIZE))
9151164 return -EINVAL;
1165
+ if (!IS_ALIGNED(ref_ctr_offset, sizeof(short)))
1166
+ return -EINVAL;
9161167
9171168 retry:
918
- uprobe = alloc_uprobe(inode, offset);
1169
+ uprobe = alloc_uprobe(inode, offset, ref_ctr_offset);
9191170 if (!uprobe)
9201171 return -ENOMEM;
1172
+ if (IS_ERR(uprobe))
1173
+ return PTR_ERR(uprobe);
1174
+
9211175 /*
9221176 * We can race with uprobe_unregister()->delete_uprobe().
9231177 * Check uprobe_is_active() and retry if it is false.
....@@ -941,9 +1195,16 @@
9411195 int uprobe_register(struct inode *inode, loff_t offset,
9421196 struct uprobe_consumer *uc)
9431197 {
944
- return __uprobe_register(inode, offset, uc);
1198
+ return __uprobe_register(inode, offset, 0, uc);
9451199 }
9461200 EXPORT_SYMBOL_GPL(uprobe_register);
1201
+
1202
+int uprobe_register_refctr(struct inode *inode, loff_t offset,
1203
+ loff_t ref_ctr_offset, struct uprobe_consumer *uc)
1204
+{
1205
+ return __uprobe_register(inode, offset, ref_ctr_offset, uc);
1206
+}
1207
+EXPORT_SYMBOL_GPL(uprobe_register_refctr);
9471208
9481209 /*
9491210 * uprobe_apply - unregister an already registered probe.
....@@ -979,7 +1240,7 @@
9791240 struct vm_area_struct *vma;
9801241 int err = 0;
9811242
982
- down_read(&mm->mmap_sem);
1243
+ mmap_read_lock(mm);
9831244 for (vma = mm->mmap; vma; vma = vma->vm_next) {
9841245 unsigned long vaddr;
9851246 loff_t offset;
....@@ -996,7 +1257,7 @@
9961257 vaddr = offset_to_vaddr(vma, uprobe->offset);
9971258 err |= remove_breakpoint(uprobe, mm, vaddr);
9981259 }
999
- up_read(&mm->mmap_sem);
1260
+ mmap_read_unlock(mm);
10001261
10011262 return err;
10021263 }
....@@ -1063,8 +1324,37 @@
10631324 spin_unlock(&uprobes_treelock);
10641325 }
10651326
1327
+/* @vma contains reference counter, not the probed instruction. */
1328
+static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
1329
+{
1330
+ struct list_head *pos, *q;
1331
+ struct delayed_uprobe *du;
1332
+ unsigned long vaddr;
1333
+ int ret = 0, err = 0;
1334
+
1335
+ mutex_lock(&delayed_uprobe_lock);
1336
+ list_for_each_safe(pos, q, &delayed_uprobe_list) {
1337
+ du = list_entry(pos, struct delayed_uprobe, list);
1338
+
1339
+ if (du->mm != vma->vm_mm ||
1340
+ !valid_ref_ctr_vma(du->uprobe, vma))
1341
+ continue;
1342
+
1343
+ vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset);
1344
+ ret = __update_ref_ctr(vma->vm_mm, vaddr, 1);
1345
+ if (ret) {
1346
+ update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1);
1347
+ if (!err)
1348
+ err = ret;
1349
+ }
1350
+ delayed_uprobe_delete(du);
1351
+ }
1352
+ mutex_unlock(&delayed_uprobe_lock);
1353
+ return err;
1354
+}
1355
+
10661356 /*
1067
- * Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
1357
+ * Called from mmap_region/vma_adjust with mm->mmap_lock acquired.
10681358 *
10691359 * Currently we ignore all errors and always return 0, the callers
10701360 * can't handle the failure anyway.
....@@ -1075,7 +1365,15 @@
10751365 struct uprobe *uprobe, *u;
10761366 struct inode *inode;
10771367
1078
- if (no_uprobe_events() || !valid_vma(vma, true))
1368
+ if (no_uprobe_events())
1369
+ return 0;
1370
+
1371
+ if (vma->vm_file &&
1372
+ (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
1373
+ test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags))
1374
+ delayed_ref_ctr_inc(vma);
1375
+
1376
+ if (!valid_vma(vma, true))
10791377 return 0;
10801378
10811379 inode = file_inode(vma->vm_file);
....@@ -1146,7 +1444,7 @@
11461444 struct vm_area_struct *vma;
11471445 int ret;
11481446
1149
- if (down_write_killable(&mm->mmap_sem))
1447
+ if (mmap_write_lock_killable(mm))
11501448 return -EINTR;
11511449
11521450 if (mm->uprobes_state.xol_area) {
....@@ -1158,7 +1456,7 @@
11581456 /* Try to map as high as possible, this is only a hint. */
11591457 area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE,
11601458 PAGE_SIZE, 0, 0);
1161
- if (area->vaddr & ~PAGE_MASK) {
1459
+ if (IS_ERR_VALUE(area->vaddr)) {
11621460 ret = area->vaddr;
11631461 goto fail;
11641462 }
....@@ -1176,7 +1474,7 @@
11761474 /* pairs with get_xol_area() */
11771475 smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */
11781476 fail:
1179
- up_write(&mm->mmap_sem);
1477
+ mmap_write_unlock(mm);
11801478
11811479 return ret;
11821480 }
....@@ -1248,6 +1546,10 @@
12481546 void uprobe_clear_state(struct mm_struct *mm)
12491547 {
12501548 struct xol_area *area = mm->uprobes_state.xol_area;
1549
+
1550
+ mutex_lock(&delayed_uprobe_lock);
1551
+ delayed_uprobe_remove(NULL, mm);
1552
+ mutex_unlock(&delayed_uprobe_lock);
12511553
12521554 if (!area)
12531555 return;
....@@ -1371,7 +1673,7 @@
13711673 copy_to_page(page, vaddr, src, len);
13721674
13731675 /*
1374
- * We probably need flush_icache_user_range() but it needs vma.
1676
+ * We probably need flush_icache_user_page() but it needs vma.
13751677 * This should work on most of architectures by default. If
13761678 * architecture needs to do something different it can define
13771679 * its own version of the function.
....@@ -1433,7 +1735,7 @@
14331735 }
14341736
14351737 /*
1436
- * Allocate a uprobe_task object for the task if if necessary.
1738
+ * Allocate a uprobe_task object for the task if necessary.
14371739 * Called when the thread hits a breakpoint.
14381740 *
14391741 * Returns:
....@@ -1521,7 +1823,7 @@
15211823
15221824 t->utask->dup_xol_addr = area->vaddr;
15231825 init_task_work(&t->utask->dup_xol_work, dup_xol_work);
1524
- task_work_add(t, &t->utask->dup_xol_work, true);
1826
+ task_work_add(t, &t->utask->dup_xol_work, TWA_RESUME);
15251827 }
15261828
15271829 /*
....@@ -1671,7 +1973,7 @@
16711973
16721974 WARN_ON_ONCE(utask->state != UTASK_SSTEP);
16731975
1674
- if (signal_pending(t)) {
1976
+ if (task_sigpending(t)) {
16751977 spin_lock_irq(&t->sighand->siglock);
16761978 clear_tsk_thread_flag(t, TIF_SIGPENDING);
16771979 spin_unlock_irq(&t->sighand->siglock);
....@@ -1727,7 +2029,7 @@
17272029 * but we treat this as a 'remote' access since it is
17282030 * essentially a kernel access to the memory.
17292031 */
1730
- result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page,
2032
+ result = get_user_pages_remote(mm, vaddr, 1, FOLL_FORCE, &page,
17312033 NULL, NULL);
17322034 if (result < 0)
17332035 return result;
....@@ -1745,7 +2047,7 @@
17452047 struct uprobe *uprobe = NULL;
17462048 struct vm_area_struct *vma;
17472049
1748
- down_read(&mm->mmap_sem);
2050
+ mmap_read_lock(mm);
17492051 vma = find_vma(mm, bp_vaddr);
17502052 if (vma && vma->vm_start <= bp_vaddr) {
17512053 if (valid_vma(vma, false)) {
....@@ -1763,7 +2065,7 @@
17632065
17642066 if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags))
17652067 mmf_recalc_uprobes(mm);
1766
- up_read(&mm->mmap_sem);
2068
+ mmap_read_unlock(mm);
17672069
17682070 return uprobe;
17692071 }
....@@ -1781,7 +2083,7 @@
17812083 if (uc->handler) {
17822084 rc = uc->handler(uc, regs);
17832085 WARN(rc & ~UPROBE_HANDLER_MASK,
1784
- "bad rc=0x%x from %pf()\n", rc, uc->handler);
2086
+ "bad rc=0x%x from %ps()\n", rc, uc->handler);
17852087 }
17862088
17872089 if (uc->ret_handler)
....@@ -1864,7 +2166,7 @@
18642166
18652167 sigill:
18662168 uprobe_warn(current, "handle uretprobe, sending SIGILL.");
1867
- force_sig(SIGILL, current);
2169
+ force_sig(SIGILL);
18682170
18692171 }
18702172
....@@ -1887,7 +2189,7 @@
18872189 {
18882190 struct uprobe *uprobe;
18892191 unsigned long bp_vaddr;
1890
- int uninitialized_var(is_swbp);
2192
+ int is_swbp;
18912193
18922194 bp_vaddr = uprobe_get_swbp_addr(regs);
18932195 if (bp_vaddr == get_trampoline_vaddr())
....@@ -1897,7 +2199,7 @@
18972199 if (!uprobe) {
18982200 if (is_swbp > 0) {
18992201 /* No matching uprobe; signal SIGTRAP. */
1900
- force_sig(SIGTRAP, current);
2202
+ force_sig(SIGTRAP);
19012203 } else {
19022204 /*
19032205 * Either we raced with uprobe_unregister() or we can't
....@@ -1980,7 +2282,7 @@
19802282
19812283 if (unlikely(err)) {
19822284 uprobe_warn(current, "execute the probed insn, sending SIGILL.");
1983
- force_sig(SIGILL, current);
2285
+ force_sig(SIGILL);
19842286 }
19852287 }
19862288
....@@ -2047,16 +2349,12 @@
20472349 .priority = INT_MAX-1, /* notified after kprobes, kgdb */
20482350 };
20492351
2050
-static int __init init_uprobes(void)
2352
+void __init uprobes_init(void)
20512353 {
20522354 int i;
20532355
20542356 for (i = 0; i < UPROBES_HASH_SZ; i++)
20552357 mutex_init(&uprobes_mmap_mutex[i]);
20562358
2057
- if (percpu_init_rwsem(&dup_mmap_sem))
2058
- return -ENOMEM;
2059
-
2060
- return register_die_notifier(&uprobe_exception_nb);
2359
+ BUG_ON(register_die_notifier(&uprobe_exception_nb));
20612360 }
2062
-__initcall(init_uprobes);