hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/kernel/kprobes.c
....@@ -1,20 +1,7 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Kernel Probes (KProbes)
34 * kernel/kprobes.c
4
- *
5
- * This program is free software; you can redistribute it and/or modify
6
- * it under the terms of the GNU General Public License as published by
7
- * the Free Software Foundation; either version 2 of the License, or
8
- * (at your option) any later version.
9
- *
10
- * This program is distributed in the hope that it will be useful,
11
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- * GNU General Public License for more details.
14
- *
15
- * You should have received a copy of the GNU General Public License
16
- * along with this program; if not, write to the Free Software
17
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
185 *
196 * Copyright (C) IBM Corporation, 2002, 2004
207 *
....@@ -48,6 +35,8 @@
4835 #include <linux/ftrace.h>
4936 #include <linux/cpu.h>
5037 #include <linux/jump_label.h>
38
+#include <linux/perf_event.h>
39
+#include <linux/static_call.h>
5140
5241 #include <asm/sections.h>
5342 #include <asm/cacheflush.h>
....@@ -59,6 +48,11 @@
5948
6049
6150 static int kprobes_initialized;
51
+/* kprobe_table can be accessed by
52
+ * - Normal hlist traversal and RCU add/del under kprobe_mutex is held.
53
+ * Or
54
+ * - RCU hlist traversal under disabling preempt (breakpoint handlers)
55
+ */
6256 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
6357 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
6458
....@@ -131,6 +125,7 @@
131125 .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
132126 .alloc = alloc_insn_page,
133127 .free = free_insn_page,
128
+ .sym = KPROBE_INSN_PAGE_SYM,
134129 .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
135130 .insn_size = MAX_INSN_SIZE,
136131 .nr_garbage = 0,
....@@ -196,6 +191,10 @@
196191 kip->cache = c;
197192 list_add_rcu(&kip->list, &c->pages);
198193 slot = kip->insns;
194
+
195
+ /* Record the perf ksymbol register event after adding the page */
196
+ perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, (unsigned long)kip->insns,
197
+ PAGE_SIZE, false, c->sym);
199198 out:
200199 mutex_unlock(&c->mutex);
201200 return slot;
....@@ -214,6 +213,13 @@
214213 * next time somebody inserts a probe.
215214 */
216215 if (!list_is_singular(&kip->list)) {
216
+ /*
217
+ * Record perf ksymbol unregister event before removing
218
+ * the page.
219
+ */
220
+ perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
221
+ (unsigned long)kip->insns, PAGE_SIZE, true,
222
+ kip->cache->sym);
217223 list_del_rcu(&kip->list);
218224 synchronize_rcu();
219225 kip->cache->free(kip->insns);
....@@ -229,7 +235,7 @@
229235 struct kprobe_insn_page *kip, *next;
230236
231237 /* Ensure no-one is interrupted on the garbages */
232
- synchronize_sched();
238
+ synchronize_rcu();
233239
234240 list_for_each_entry_safe(kip, next, &c->pages, list) {
235241 int i;
....@@ -303,12 +309,34 @@
303309 return ret;
304310 }
305311
312
+int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
313
+ unsigned long *value, char *type, char *sym)
314
+{
315
+ struct kprobe_insn_page *kip;
316
+ int ret = -ERANGE;
317
+
318
+ rcu_read_lock();
319
+ list_for_each_entry_rcu(kip, &c->pages, list) {
320
+ if ((*symnum)--)
321
+ continue;
322
+ strlcpy(sym, c->sym, KSYM_NAME_LEN);
323
+ *type = 't';
324
+ *value = (unsigned long)kip->insns;
325
+ ret = 0;
326
+ break;
327
+ }
328
+ rcu_read_unlock();
329
+
330
+ return ret;
331
+}
332
+
306333 #ifdef CONFIG_OPTPROBES
307334 /* For optimized_kprobe buffer */
308335 struct kprobe_insn_cache kprobe_optinsn_slots = {
309336 .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
310337 .alloc = alloc_insn_page,
311338 .free = free_insn_page,
339
+ .sym = KPROBE_OPTINSN_PAGE_SYM,
312340 .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
313341 /* .insn_size is initialized later */
314342 .nr_garbage = 0,
....@@ -339,7 +367,8 @@
339367 struct kprobe *p;
340368
341369 head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
342
- hlist_for_each_entry_rcu(p, head, hlist) {
370
+ hlist_for_each_entry_rcu(p, head, hlist,
371
+ lockdep_is_held(&kprobe_mutex)) {
343372 if (p->addr == addr)
344373 return p;
345374 }
....@@ -570,8 +599,6 @@
570599 mutex_lock(&kprobe_mutex);
571600 cpus_read_lock();
572601 mutex_lock(&text_mutex);
573
- /* Lock modules while optimizing kprobes */
574
- mutex_lock(&module_mutex);
575602
576603 /*
577604 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
....@@ -596,7 +623,6 @@
596623 /* Step 4: Free cleaned kprobes after quiesence period */
597624 do_free_cleaned_kprobes();
598625
599
- mutex_unlock(&module_mutex);
600626 mutex_unlock(&text_mutex);
601627 cpus_read_unlock();
602628
....@@ -682,8 +708,6 @@
682708 lockdep_assert_cpus_held();
683709 arch_unoptimize_kprobe(op);
684710 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
685
- if (kprobe_disabled(&op->kp))
686
- arch_disarm_kprobe(&op->kp);
687711 }
688712
689713 /* Unoptimize a kprobe if p is optimized */
....@@ -732,7 +756,6 @@
732756 {
733757 struct optimized_kprobe *op;
734758
735
- BUG_ON(!kprobe_unused(ap));
736759 /*
737760 * Unused kprobe MUST be on the way of delayed unoptimizing (means
738761 * there is still a relative jump) and disabled.
....@@ -848,7 +871,6 @@
848871 cpus_read_unlock();
849872 }
850873
851
-#ifdef CONFIG_SYSCTL
852874 static void optimize_all_kprobes(void)
853875 {
854876 struct hlist_head *head;
....@@ -864,7 +886,7 @@
864886 kprobes_allow_optimization = true;
865887 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
866888 head = &kprobe_table[i];
867
- hlist_for_each_entry_rcu(p, head, hlist)
889
+ hlist_for_each_entry(p, head, hlist)
868890 if (!kprobe_disabled(p))
869891 optimize_kprobe(p);
870892 }
....@@ -874,6 +896,7 @@
874896 mutex_unlock(&kprobe_mutex);
875897 }
876898
899
+#ifdef CONFIG_SYSCTL
877900 static void unoptimize_all_kprobes(void)
878901 {
879902 struct hlist_head *head;
....@@ -891,7 +914,7 @@
891914 kprobes_allow_optimization = false;
892915 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
893916 head = &kprobe_table[i];
894
- hlist_for_each_entry_rcu(p, head, hlist) {
917
+ hlist_for_each_entry(p, head, hlist) {
895918 if (!kprobe_disabled(p))
896919 unoptimize_kprobe(p, false);
897920 }
....@@ -907,7 +930,7 @@
907930 static DEFINE_MUTEX(kprobe_sysctl_mutex);
908931 int sysctl_kprobes_optimization;
909932 int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
910
- void __user *buffer, size_t *length,
933
+ void *buffer, size_t *length,
911934 loff_t *ppos)
912935 {
913936 int ret;
....@@ -998,8 +1021,15 @@
9981021 #ifdef CONFIG_KPROBES_ON_FTRACE
9991022 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
10001023 .func = kprobe_ftrace_handler,
1024
+ .flags = FTRACE_OPS_FL_SAVE_REGS,
1025
+};
1026
+
1027
+static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = {
1028
+ .func = kprobe_ftrace_handler,
10011029 .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
10021030 };
1031
+
1032
+static int kprobe_ipmodify_enabled;
10031033 static int kprobe_ftrace_enabled;
10041034
10051035 /* Must ensure p->addr is really on ftrace */
....@@ -1012,57 +1042,74 @@
10121042 }
10131043
10141044 /* Caller must lock kprobe_mutex */
1015
-static int arm_kprobe_ftrace(struct kprobe *p)
1045
+static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1046
+ int *cnt)
10161047 {
10171048 int ret = 0;
10181049
1019
- ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
1020
- (unsigned long)p->addr, 0, 0);
1050
+ ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0);
10211051 if (ret) {
10221052 pr_debug("Failed to arm kprobe-ftrace at %pS (%d)\n",
10231053 p->addr, ret);
10241054 return ret;
10251055 }
10261056
1027
- if (kprobe_ftrace_enabled == 0) {
1028
- ret = register_ftrace_function(&kprobe_ftrace_ops);
1057
+ if (*cnt == 0) {
1058
+ ret = register_ftrace_function(ops);
10291059 if (ret) {
10301060 pr_debug("Failed to init kprobe-ftrace (%d)\n", ret);
10311061 goto err_ftrace;
10321062 }
10331063 }
10341064
1035
- kprobe_ftrace_enabled++;
1065
+ (*cnt)++;
10361066 return ret;
10371067
10381068 err_ftrace:
10391069 /*
1040
- * Note: Since kprobe_ftrace_ops has IPMODIFY set, and ftrace requires a
1041
- * non-empty filter_hash for IPMODIFY ops, we're safe from an accidental
1042
- * empty filter_hash which would undesirably trace all functions.
1070
+ * At this point, sinec ops is not registered, we should be sefe from
1071
+ * registering empty filter.
10431072 */
1044
- ftrace_set_filter_ip(&kprobe_ftrace_ops, (unsigned long)p->addr, 1, 0);
1073
+ ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
10451074 return ret;
10461075 }
10471076
1077
+static int arm_kprobe_ftrace(struct kprobe *p)
1078
+{
1079
+ bool ipmodify = (p->post_handler != NULL);
1080
+
1081
+ return __arm_kprobe_ftrace(p,
1082
+ ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1083
+ ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1084
+}
1085
+
10481086 /* Caller must lock kprobe_mutex */
1049
-static int disarm_kprobe_ftrace(struct kprobe *p)
1087
+static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1088
+ int *cnt)
10501089 {
10511090 int ret = 0;
10521091
1053
- if (kprobe_ftrace_enabled == 1) {
1054
- ret = unregister_ftrace_function(&kprobe_ftrace_ops);
1092
+ if (*cnt == 1) {
1093
+ ret = unregister_ftrace_function(ops);
10551094 if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret))
10561095 return ret;
10571096 }
10581097
1059
- kprobe_ftrace_enabled--;
1098
+ (*cnt)--;
10601099
1061
- ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
1062
- (unsigned long)p->addr, 1, 0);
1100
+ ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
10631101 WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (%d)\n",
10641102 p->addr, ret);
10651103 return ret;
1104
+}
1105
+
1106
+static int disarm_kprobe_ftrace(struct kprobe *p)
1107
+{
1108
+ bool ipmodify = (p->post_handler != NULL);
1109
+
1110
+ return __disarm_kprobe_ftrace(p,
1111
+ ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1112
+ ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
10661113 }
10671114 #else /* !CONFIG_KPROBES_ON_FTRACE */
10681115 static inline int prepare_kprobe(struct kprobe *p)
....@@ -1177,8 +1224,7 @@
11771224 }
11781225 NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
11791226
1180
-void recycle_rp_inst(struct kretprobe_instance *ri,
1181
- struct hlist_head *head)
1227
+static void recycle_rp_inst(struct kretprobe_instance *ri)
11821228 {
11831229 struct kretprobe *rp = ri->rp;
11841230
....@@ -1190,12 +1236,11 @@
11901236 hlist_add_head(&ri->hlist, &rp->free_instances);
11911237 raw_spin_unlock(&rp->lock);
11921238 } else
1193
- /* Unregistering */
1194
- hlist_add_head(&ri->hlist, head);
1239
+ kfree_rcu(ri, rcu);
11951240 }
11961241 NOKPROBE_SYMBOL(recycle_rp_inst);
11971242
1198
-void kretprobe_hash_lock(struct task_struct *tsk,
1243
+static void kretprobe_hash_lock(struct task_struct *tsk,
11991244 struct hlist_head **head, unsigned long *flags)
12001245 __acquires(hlist_lock)
12011246 {
....@@ -1204,7 +1249,13 @@
12041249
12051250 *head = &kretprobe_inst_table[hash];
12061251 hlist_lock = kretprobe_table_lock_ptr(hash);
1207
- raw_spin_lock_irqsave(hlist_lock, *flags);
1252
+ /*
1253
+ * Nested is a workaround that will soon not be needed.
1254
+ * There's other protections that make sure the same lock
1255
+ * is not taken on the same CPU that lockdep is unaware of.
1256
+ * Differentiate when it is taken in NMI context.
1257
+ */
1258
+ raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
12081259 }
12091260 NOKPROBE_SYMBOL(kretprobe_hash_lock);
12101261
....@@ -1213,11 +1264,17 @@
12131264 __acquires(hlist_lock)
12141265 {
12151266 raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
1216
- raw_spin_lock_irqsave(hlist_lock, *flags);
1267
+ /*
1268
+ * Nested is a workaround that will soon not be needed.
1269
+ * There's other protections that make sure the same lock
1270
+ * is not taken on the same CPU that lockdep is unaware of.
1271
+ * Differentiate when it is taken in NMI context.
1272
+ */
1273
+ raw_spin_lock_irqsave_nested(hlist_lock, *flags, !!in_nmi());
12171274 }
12181275 NOKPROBE_SYMBOL(kretprobe_table_lock);
12191276
1220
-void kretprobe_hash_unlock(struct task_struct *tsk,
1277
+static void kretprobe_hash_unlock(struct task_struct *tsk,
12211278 unsigned long *flags)
12221279 __releases(hlist_lock)
12231280 {
....@@ -1238,7 +1295,7 @@
12381295 }
12391296 NOKPROBE_SYMBOL(kretprobe_table_unlock);
12401297
1241
-struct kprobe kprobe_busy = {
1298
+static struct kprobe kprobe_busy = {
12421299 .addr = (void *) get_kprobe,
12431300 };
12441301
....@@ -1267,7 +1324,7 @@
12671324 void kprobe_flush_task(struct task_struct *tk)
12681325 {
12691326 struct kretprobe_instance *ri;
1270
- struct hlist_head *head, empty_rp;
1327
+ struct hlist_head *head;
12711328 struct hlist_node *tmp;
12721329 unsigned long hash, flags = 0;
12731330
....@@ -1277,19 +1334,14 @@
12771334
12781335 kprobe_busy_begin();
12791336
1280
- INIT_HLIST_HEAD(&empty_rp);
12811337 hash = hash_ptr(tk, KPROBE_HASH_BITS);
12821338 head = &kretprobe_inst_table[hash];
12831339 kretprobe_table_lock(hash, &flags);
12841340 hlist_for_each_entry_safe(ri, tmp, head, hlist) {
12851341 if (ri->task == tk)
1286
- recycle_rp_inst(ri, &empty_rp);
1342
+ recycle_rp_inst(ri);
12871343 }
12881344 kretprobe_table_unlock(hash, &flags);
1289
- hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
1290
- hlist_del(&ri->hlist);
1291
- kfree(ri);
1292
- }
12931345
12941346 kprobe_busy_end();
12951347 }
....@@ -1313,7 +1365,8 @@
13131365 struct hlist_node *next;
13141366 struct hlist_head *head;
13151367
1316
- /* No race here */
1368
+ /* To avoid recursive kretprobe by NMI, set kprobe busy here */
1369
+ kprobe_busy_begin();
13171370 for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
13181371 kretprobe_table_lock(hash, &flags);
13191372 head = &kretprobe_inst_table[hash];
....@@ -1323,6 +1376,8 @@
13231376 }
13241377 kretprobe_table_unlock(hash, &flags);
13251378 }
1379
+ kprobe_busy_end();
1380
+
13261381 free_rp_inst(rp);
13271382 }
13281383 NOKPROBE_SYMBOL(cleanup_rp_inst);
....@@ -1330,8 +1385,6 @@
13301385 /* Add the new probe to ap->list */
13311386 static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
13321387 {
1333
- BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
1334
-
13351388 if (p->post_handler)
13361389 unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */
13371390
....@@ -1440,7 +1493,7 @@
14401493 if (ret) {
14411494 ap->flags |= KPROBE_FLAG_DISABLED;
14421495 list_del_rcu(&p->list);
1443
- synchronize_sched();
1496
+ synchronize_rcu();
14441497 }
14451498 }
14461499 }
....@@ -1454,7 +1507,7 @@
14541507 addr < (unsigned long)__kprobes_text_end;
14551508 }
14561509
1457
-bool within_kprobe_blacklist(unsigned long addr)
1510
+static bool __within_kprobe_blacklist(unsigned long addr)
14581511 {
14591512 struct kprobe_blacklist_entry *ent;
14601513
....@@ -1468,7 +1521,26 @@
14681521 if (addr >= ent->start_addr && addr < ent->end_addr)
14691522 return true;
14701523 }
1524
+ return false;
1525
+}
14711526
1527
+bool within_kprobe_blacklist(unsigned long addr)
1528
+{
1529
+ char symname[KSYM_NAME_LEN], *p;
1530
+
1531
+ if (__within_kprobe_blacklist(addr))
1532
+ return true;
1533
+
1534
+ /* Check if the address is on a suffixed-symbol */
1535
+ if (!lookup_symbol_name(addr, symname)) {
1536
+ p = strchr(symname, '.');
1537
+ if (!p)
1538
+ return false;
1539
+ *p = '\0';
1540
+ addr = (unsigned long)kprobe_lookup_name(symname, 0);
1541
+ if (addr)
1542
+ return __within_kprobe_blacklist(addr);
1543
+ }
14721544 return false;
14731545 }
14741546
....@@ -1508,12 +1580,14 @@
15081580 {
15091581 struct kprobe *ap, *list_p;
15101582
1583
+ lockdep_assert_held(&kprobe_mutex);
1584
+
15111585 ap = get_kprobe(p->addr);
15121586 if (unlikely(!ap))
15131587 return NULL;
15141588
15151589 if (p != ap) {
1516
- list_for_each_entry_rcu(list_p, &ap->list, list)
1590
+ list_for_each_entry(list_p, &ap->list, list)
15171591 if (list_p == p)
15181592 /* kprobe p is a valid probe */
15191593 goto valid;
....@@ -1566,9 +1640,12 @@
15661640 preempt_disable();
15671641
15681642 /* Ensure it is not in reserved area nor out of text */
1569
- if (!kernel_text_address((unsigned long) p->addr) ||
1643
+ if (!(core_kernel_text((unsigned long) p->addr) ||
1644
+ is_module_text_address((unsigned long) p->addr)) ||
1645
+ in_gate_area_no_mm((unsigned long) p->addr) ||
15701646 within_kprobe_blacklist((unsigned long) p->addr) ||
15711647 jump_label_text_reserved(p->addr, p->addr) ||
1648
+ static_call_text_reserved(p->addr, p->addr) ||
15721649 find_bug((unsigned long)p->addr)) {
15731650 ret = -EINVAL;
15741651 goto out;
....@@ -1656,7 +1733,7 @@
16561733 ret = arm_kprobe(p);
16571734 if (ret) {
16581735 hlist_del_rcu(&p->hlist);
1659
- synchronize_sched();
1736
+ synchronize_rcu();
16601737 goto out;
16611738 }
16621739 }
....@@ -1678,7 +1755,9 @@
16781755 {
16791756 struct kprobe *kp;
16801757
1681
- list_for_each_entry_rcu(kp, &ap->list, list)
1758
+ lockdep_assert_held(&kprobe_mutex);
1759
+
1760
+ list_for_each_entry(kp, &ap->list, list)
16821761 if (!kprobe_disabled(kp))
16831762 /*
16841763 * There is an active probe on the list.
....@@ -1708,11 +1787,12 @@
17081787 /* Try to disarm and disable this/parent probe */
17091788 if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
17101789 /*
1711
- * If kprobes_all_disarmed is set, orig_p
1712
- * should have already been disarmed, so
1713
- * skip unneed disarming process.
1790
+ * Don't be lazy here. Even if 'kprobes_all_disarmed'
1791
+ * is false, 'orig_p' might not have been armed yet.
1792
+ * Note arm_all_kprobes() __tries__ to arm all kprobes
1793
+ * on the best effort basis.
17141794 */
1715
- if (!kprobes_all_disarmed) {
1795
+ if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) {
17161796 ret = disarm_kprobe(orig_p, true);
17171797 if (ret) {
17181798 p->flags &= ~KPROBE_FLAG_DISABLED;
....@@ -1757,11 +1837,17 @@
17571837 else {
17581838 /* If disabling probe has special handlers, update aggrprobe */
17591839 if (p->post_handler && !kprobe_gone(p)) {
1760
- list_for_each_entry_rcu(list_p, &ap->list, list) {
1840
+ list_for_each_entry(list_p, &ap->list, list) {
17611841 if ((list_p != p) && (list_p->post_handler))
17621842 goto noclean;
17631843 }
1764
- ap->post_handler = NULL;
1844
+ /*
1845
+ * For the kprobe-on-ftrace case, we keep the
1846
+ * post_handler setting to identify this aggrprobe
1847
+ * armed with kprobe_ipmodify_ops.
1848
+ */
1849
+ if (!kprobe_ftrace(ap))
1850
+ ap->post_handler = NULL;
17651851 }
17661852 noclean:
17671853 /*
....@@ -1779,7 +1865,6 @@
17791865 return 0;
17801866
17811867 disarmed:
1782
- BUG_ON(!kprobe_disarmed(ap));
17831868 hlist_del_rcu(&ap->hlist);
17841869 return 0;
17851870 }
....@@ -1836,7 +1921,7 @@
18361921 kps[i]->addr = NULL;
18371922 mutex_unlock(&kprobe_mutex);
18381923
1839
- synchronize_sched();
1924
+ synchronize_rcu();
18401925 for (i = 0; i < num; i++)
18411926 if (kps[i]->addr)
18421927 __unregister_kprobe_bottom(kps[i]);
....@@ -1861,6 +1946,97 @@
18611946 }
18621947
18631948 #ifdef CONFIG_KRETPROBES
1949
+
1950
+unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
1951
+ void *trampoline_address,
1952
+ void *frame_pointer)
1953
+{
1954
+ struct kretprobe_instance *ri = NULL, *last = NULL;
1955
+ struct hlist_head *head;
1956
+ struct hlist_node *tmp;
1957
+ unsigned long flags;
1958
+ kprobe_opcode_t *correct_ret_addr = NULL;
1959
+ bool skipped = false;
1960
+
1961
+ kretprobe_hash_lock(current, &head, &flags);
1962
+
1963
+ /*
1964
+ * It is possible to have multiple instances associated with a given
1965
+ * task either because multiple functions in the call path have
1966
+ * return probes installed on them, and/or more than one
1967
+ * return probe was registered for a target function.
1968
+ *
1969
+ * We can handle this because:
1970
+ * - instances are always pushed into the head of the list
1971
+ * - when multiple return probes are registered for the same
1972
+ * function, the (chronologically) first instance's ret_addr
1973
+ * will be the real return address, and all the rest will
1974
+ * point to kretprobe_trampoline.
1975
+ */
1976
+ hlist_for_each_entry(ri, head, hlist) {
1977
+ if (ri->task != current)
1978
+ /* another task is sharing our hash bucket */
1979
+ continue;
1980
+ /*
1981
+ * Return probes must be pushed on this hash list correct
1982
+ * order (same as return order) so that it can be popped
1983
+ * correctly. However, if we find it is pushed it incorrect
1984
+ * order, this means we find a function which should not be
1985
+ * probed, because the wrong order entry is pushed on the
1986
+ * path of processing other kretprobe itself.
1987
+ */
1988
+ if (ri->fp != frame_pointer) {
1989
+ if (!skipped)
1990
+ pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n");
1991
+ skipped = true;
1992
+ continue;
1993
+ }
1994
+
1995
+ correct_ret_addr = ri->ret_addr;
1996
+ if (skipped)
1997
+ pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n",
1998
+ ri->rp->kp.addr);
1999
+
2000
+ if (correct_ret_addr != trampoline_address)
2001
+ /*
2002
+ * This is the real return address. Any other
2003
+ * instances associated with this task are for
2004
+ * other calls deeper on the call stack
2005
+ */
2006
+ break;
2007
+ }
2008
+
2009
+ BUG_ON(!correct_ret_addr || (correct_ret_addr == trampoline_address));
2010
+ last = ri;
2011
+
2012
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
2013
+ if (ri->task != current)
2014
+ /* another task is sharing our hash bucket */
2015
+ continue;
2016
+ if (ri->fp != frame_pointer)
2017
+ continue;
2018
+
2019
+ if (ri->rp && ri->rp->handler) {
2020
+ struct kprobe *prev = kprobe_running();
2021
+
2022
+ __this_cpu_write(current_kprobe, &ri->rp->kp);
2023
+ ri->ret_addr = correct_ret_addr;
2024
+ ri->rp->handler(ri, regs);
2025
+ __this_cpu_write(current_kprobe, prev);
2026
+ }
2027
+
2028
+ recycle_rp_inst(ri);
2029
+
2030
+ if (ri == last)
2031
+ break;
2032
+ }
2033
+
2034
+ kretprobe_hash_unlock(current, &flags);
2035
+
2036
+ return (unsigned long)correct_ret_addr;
2037
+}
2038
+NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
2039
+
18642040 /*
18652041 * This kprobe pre_handler is registered with every kretprobe. When probe
18662042 * hits it will set up the return probe.
....@@ -1871,20 +2047,14 @@
18712047 unsigned long hash, flags = 0;
18722048 struct kretprobe_instance *ri;
18732049
1874
- /*
1875
- * To avoid deadlocks, prohibit return probing in NMI contexts,
1876
- * just skip the probe and increase the (inexact) 'nmissed'
1877
- * statistical counter, so that the user is informed that
1878
- * something happened:
1879
- */
1880
- if (unlikely(in_nmi())) {
1881
- rp->nmissed++;
1882
- return 0;
1883
- }
1884
-
18852050 /* TODO: consider to only swap the RA after the last pre_handler fired */
18862051 hash = hash_ptr(current, KPROBE_HASH_BITS);
1887
- raw_spin_lock_irqsave(&rp->lock, flags);
2052
+ /*
2053
+ * Nested is a workaround that will soon not be needed.
2054
+ * There's other protections that make sure the same lock
2055
+ * is not taken on the same CPU that lockdep is unaware of.
2056
+ */
2057
+ raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
18882058 if (!hlist_empty(&rp->free_instances)) {
18892059 ri = hlist_entry(rp->free_instances.first,
18902060 struct kretprobe_instance, hlist);
....@@ -1895,7 +2065,7 @@
18952065 ri->task = current;
18962066
18972067 if (rp->entry_handler && rp->entry_handler(ri, regs)) {
1898
- raw_spin_lock_irqsave(&rp->lock, flags);
2068
+ raw_spin_lock_irqsave_nested(&rp->lock, flags, 1);
18992069 hlist_add_head(&ri->hlist, &rp->free_instances);
19002070 raw_spin_unlock_irqrestore(&rp->lock, flags);
19012071 return 0;
....@@ -1985,7 +2155,7 @@
19852155
19862156 /* Pre-allocate memory for max kretprobe instances */
19872157 if (rp->maxactive <= 0) {
1988
-#ifdef CONFIG_PREEMPT
2158
+#ifdef CONFIG_PREEMPTION
19892159 rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
19902160 #else
19912161 rp->maxactive = num_possible_cpus();
....@@ -2049,7 +2219,7 @@
20492219 rps[i]->kp.addr = NULL;
20502220 mutex_unlock(&kprobe_mutex);
20512221
2052
- synchronize_sched();
2222
+ synchronize_rcu();
20532223 for (i = 0; i < num; i++) {
20542224 if (rps[i]->kp.addr) {
20552225 __unregister_kprobe_bottom(&rps[i]->kp);
....@@ -2095,6 +2265,8 @@
20952265 {
20962266 struct kprobe *kp;
20972267
2268
+ lockdep_assert_held(&kprobe_mutex);
2269
+
20982270 if (WARN_ON_ONCE(kprobe_gone(p)))
20992271 return;
21002272
....@@ -2104,7 +2276,7 @@
21042276 * If this is an aggr_kprobe, we have to list all the
21052277 * chained probes and mark them GONE.
21062278 */
2107
- list_for_each_entry_rcu(kp, &p->list, list)
2279
+ list_for_each_entry(kp, &p->list, list)
21082280 kp->flags |= KPROBE_FLAG_GONE;
21092281 p->post_handler = NULL;
21102282 kill_optimized_kprobe(p);
....@@ -2169,8 +2341,11 @@
21692341 if (!kprobes_all_disarmed && kprobe_disabled(p)) {
21702342 p->flags &= ~KPROBE_FLAG_DISABLED;
21712343 ret = arm_kprobe(p);
2172
- if (ret)
2344
+ if (ret) {
21732345 p->flags |= KPROBE_FLAG_DISABLED;
2346
+ if (p != kp)
2347
+ kp->flags |= KPROBE_FLAG_DISABLED;
2348
+ }
21742349 }
21752350 out:
21762351 mutex_unlock(&kprobe_mutex);
....@@ -2223,6 +2398,46 @@
22232398 return 0;
22242399 }
22252400
2401
+/* Remove all symbols in given area from kprobe blacklist */
2402
+static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
2403
+{
2404
+ struct kprobe_blacklist_entry *ent, *n;
2405
+
2406
+ list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) {
2407
+ if (ent->start_addr < start || ent->start_addr >= end)
2408
+ continue;
2409
+ list_del(&ent->list);
2410
+ kfree(ent);
2411
+ }
2412
+}
2413
+
2414
+static void kprobe_remove_ksym_blacklist(unsigned long entry)
2415
+{
2416
+ kprobe_remove_area_blacklist(entry, entry + 1);
2417
+}
2418
+
2419
+int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
2420
+ char *type, char *sym)
2421
+{
2422
+ return -ERANGE;
2423
+}
2424
+
2425
+int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
2426
+ char *sym)
2427
+{
2428
+#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
2429
+ if (!kprobe_cache_get_kallsym(&kprobe_insn_slots, &symnum, value, type, sym))
2430
+ return 0;
2431
+#ifdef CONFIG_OPTPROBES
2432
+ if (!kprobe_cache_get_kallsym(&kprobe_optinsn_slots, &symnum, value, type, sym))
2433
+ return 0;
2434
+#endif
2435
+#endif
2436
+ if (!arch_kprobe_get_kallsym(&symnum, value, type, sym))
2437
+ return 0;
2438
+ return -ERANGE;
2439
+}
2440
+
22262441 int __init __weak arch_populate_kprobe_blacklist(void)
22272442 {
22282443 return 0;
....@@ -2255,8 +2470,60 @@
22552470 /* Symbols in __kprobes_text are blacklisted */
22562471 ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start,
22572472 (unsigned long)__kprobes_text_end);
2473
+ if (ret)
2474
+ return ret;
2475
+
2476
+ /* Symbols in noinstr section are blacklisted */
2477
+ ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start,
2478
+ (unsigned long)__noinstr_text_end);
22582479
22592480 return ret ? : arch_populate_kprobe_blacklist();
2481
+}
2482
+
2483
+static void add_module_kprobe_blacklist(struct module *mod)
2484
+{
2485
+ unsigned long start, end;
2486
+ int i;
2487
+
2488
+ if (mod->kprobe_blacklist) {
2489
+ for (i = 0; i < mod->num_kprobe_blacklist; i++)
2490
+ kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]);
2491
+ }
2492
+
2493
+ start = (unsigned long)mod->kprobes_text_start;
2494
+ if (start) {
2495
+ end = start + mod->kprobes_text_size;
2496
+ kprobe_add_area_blacklist(start, end);
2497
+ }
2498
+
2499
+ start = (unsigned long)mod->noinstr_text_start;
2500
+ if (start) {
2501
+ end = start + mod->noinstr_text_size;
2502
+ kprobe_add_area_blacklist(start, end);
2503
+ }
2504
+}
2505
+
2506
+static void remove_module_kprobe_blacklist(struct module *mod)
2507
+{
2508
+ unsigned long start, end;
2509
+ int i;
2510
+
2511
+ if (mod->kprobe_blacklist) {
2512
+ for (i = 0; i < mod->num_kprobe_blacklist; i++)
2513
+ kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]);
2514
+ }
2515
+
2516
+ start = (unsigned long)mod->kprobes_text_start;
2517
+ if (start) {
2518
+ end = start + mod->kprobes_text_size;
2519
+ kprobe_remove_area_blacklist(start, end);
2520
+ }
2521
+
2522
+ start = (unsigned long)mod->noinstr_text_start;
2523
+ if (start) {
2524
+ end = start + mod->noinstr_text_size;
2525
+ kprobe_remove_area_blacklist(start, end);
2526
+ }
22602527 }
22612528
22622529 /* Module notifier call back, checking kprobes on the module */
....@@ -2269,6 +2536,11 @@
22692536 unsigned int i;
22702537 int checkcore = (val == MODULE_STATE_GOING);
22712538
2539
+ if (val == MODULE_STATE_COMING) {
2540
+ mutex_lock(&kprobe_mutex);
2541
+ add_module_kprobe_blacklist(mod);
2542
+ mutex_unlock(&kprobe_mutex);
2543
+ }
22722544 if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
22732545 return NOTIFY_DONE;
22742546
....@@ -2281,7 +2553,7 @@
22812553 mutex_lock(&kprobe_mutex);
22822554 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
22832555 head = &kprobe_table[i];
2284
- hlist_for_each_entry_rcu(p, head, hlist) {
2556
+ hlist_for_each_entry(p, head, hlist) {
22852557 if (kprobe_gone(p))
22862558 continue;
22872559
....@@ -2303,6 +2575,8 @@
23032575 }
23042576 }
23052577 }
2578
+ if (val == MODULE_STATE_GOING)
2579
+ remove_module_kprobe_blacklist(mod);
23062580 mutex_unlock(&kprobe_mutex);
23072581 return NOTIFY_DONE;
23082582 }
....@@ -2315,6 +2589,28 @@
23152589 /* Markers of _kprobe_blacklist section */
23162590 extern unsigned long __start_kprobe_blacklist[];
23172591 extern unsigned long __stop_kprobe_blacklist[];
2592
+
2593
+void kprobe_free_init_mem(void)
2594
+{
2595
+ void *start = (void *)(&__init_begin);
2596
+ void *end = (void *)(&__init_end);
2597
+ struct hlist_head *head;
2598
+ struct kprobe *p;
2599
+ int i;
2600
+
2601
+ mutex_lock(&kprobe_mutex);
2602
+
2603
+ /* Kill all kprobes on initmem */
2604
+ for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2605
+ head = &kprobe_table[i];
2606
+ hlist_for_each_entry(p, head, hlist) {
2607
+ if (start <= (void *)p->addr && (void *)p->addr < end)
2608
+ kill_kprobe(p);
2609
+ }
2610
+ }
2611
+
2612
+ mutex_unlock(&kprobe_mutex);
2613
+}
23182614
23192615 static int __init init_kprobes(void)
23202616 {
....@@ -2346,17 +2642,13 @@
23462642 }
23472643 }
23482644
2349
-#if defined(CONFIG_OPTPROBES)
2350
-#if defined(__ARCH_WANT_KPROBES_INSN_SLOT)
2351
- /* Init kprobe_optinsn_slots */
2352
- kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
2353
-#endif
2354
- /* By default, kprobes can be optimized */
2355
- kprobes_allow_optimization = true;
2356
-#endif
2357
-
23582645 /* By default, kprobes are armed */
23592646 kprobes_all_disarmed = false;
2647
+
2648
+#if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT)
2649
+ /* Init kprobe_optinsn_slots for allocation */
2650
+ kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
2651
+#endif
23602652
23612653 err = arch_init_kprobes();
23622654 if (!err)
....@@ -2370,6 +2662,22 @@
23702662 init_test_probes();
23712663 return err;
23722664 }
2665
+early_initcall(init_kprobes);
2666
+
2667
+#if defined(CONFIG_OPTPROBES)
2668
+static int __init init_optprobes(void)
2669
+{
2670
+ /*
2671
+ * Enable kprobe optimization - this kicks the optimizer which
2672
+ * depends on synchronize_rcu_tasks() and ksoftirqd, that is
2673
+ * not spawned in early initcall. So delay the optimization.
2674
+ */
2675
+ optimize_all_kprobes();
2676
+
2677
+ return 0;
2678
+}
2679
+subsys_initcall(init_optprobes);
2680
+#endif
23732681
23742682 #ifdef CONFIG_DEBUG_FS
23752683 static void report_probe(struct seq_file *pi, struct kprobe *p,
....@@ -2445,28 +2753,19 @@
24452753 return 0;
24462754 }
24472755
2448
-static const struct seq_operations kprobes_seq_ops = {
2756
+static const struct seq_operations kprobes_sops = {
24492757 .start = kprobe_seq_start,
24502758 .next = kprobe_seq_next,
24512759 .stop = kprobe_seq_stop,
24522760 .show = show_kprobe_addr
24532761 };
24542762
2455
-static int kprobes_open(struct inode *inode, struct file *filp)
2456
-{
2457
- return seq_open(filp, &kprobes_seq_ops);
2458
-}
2459
-
2460
-static const struct file_operations debugfs_kprobes_operations = {
2461
- .open = kprobes_open,
2462
- .read = seq_read,
2463
- .llseek = seq_lseek,
2464
- .release = seq_release,
2465
-};
2763
+DEFINE_SEQ_ATTRIBUTE(kprobes);
24662764
24672765 /* kprobes/blacklist -- shows which functions can not be probed */
24682766 static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
24692767 {
2768
+ mutex_lock(&kprobe_mutex);
24702769 return seq_list_start(&kprobe_blacklist, *pos);
24712770 }
24722771
....@@ -2493,24 +2792,18 @@
24932792 return 0;
24942793 }
24952794
2496
-static const struct seq_operations kprobe_blacklist_seq_ops = {
2497
- .start = kprobe_blacklist_seq_start,
2498
- .next = kprobe_blacklist_seq_next,
2499
- .stop = kprobe_seq_stop, /* Reuse void function */
2500
- .show = kprobe_blacklist_seq_show,
2501
-};
2502
-
2503
-static int kprobe_blacklist_open(struct inode *inode, struct file *filp)
2795
+static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v)
25042796 {
2505
- return seq_open(filp, &kprobe_blacklist_seq_ops);
2797
+ mutex_unlock(&kprobe_mutex);
25062798 }
25072799
2508
-static const struct file_operations debugfs_kprobe_blacklist_ops = {
2509
- .open = kprobe_blacklist_open,
2510
- .read = seq_read,
2511
- .llseek = seq_lseek,
2512
- .release = seq_release,
2800
+static const struct seq_operations kprobe_blacklist_sops = {
2801
+ .start = kprobe_blacklist_seq_start,
2802
+ .next = kprobe_blacklist_seq_next,
2803
+ .stop = kprobe_blacklist_seq_stop,
2804
+ .show = kprobe_blacklist_seq_show,
25132805 };
2806
+DEFINE_SEQ_ATTRIBUTE(kprobe_blacklist);
25142807
25152808 static int arm_all_kprobes(void)
25162809 {
....@@ -2535,7 +2828,7 @@
25352828 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
25362829 head = &kprobe_table[i];
25372830 /* Arm all kprobes on a best-effort basis */
2538
- hlist_for_each_entry_rcu(p, head, hlist) {
2831
+ hlist_for_each_entry(p, head, hlist) {
25392832 if (!kprobe_disabled(p)) {
25402833 err = arm_kprobe(p);
25412834 if (err) {
....@@ -2578,7 +2871,7 @@
25782871 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
25792872 head = &kprobe_table[i];
25802873 /* Disarm all kprobes on a best-effort basis */
2581
- hlist_for_each_entry_rcu(p, head, hlist) {
2874
+ hlist_for_each_entry(p, head, hlist) {
25822875 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
25832876 err = disarm_kprobe(p, false);
25842877 if (err) {
....@@ -2664,36 +2957,19 @@
26642957
26652958 static int __init debugfs_kprobe_init(void)
26662959 {
2667
- struct dentry *dir, *file;
2668
- unsigned int value = 1;
2960
+ struct dentry *dir;
26692961
26702962 dir = debugfs_create_dir("kprobes", NULL);
2671
- if (!dir)
2672
- return -ENOMEM;
26732963
2674
- file = debugfs_create_file("list", 0400, dir, NULL,
2675
- &debugfs_kprobes_operations);
2676
- if (!file)
2677
- goto error;
2964
+ debugfs_create_file("list", 0400, dir, NULL, &kprobes_fops);
26782965
2679
- file = debugfs_create_file("enabled", 0600, dir,
2680
- &value, &fops_kp);
2681
- if (!file)
2682
- goto error;
2966
+ debugfs_create_file("enabled", 0600, dir, NULL, &fops_kp);
26832967
2684
- file = debugfs_create_file("blacklist", 0400, dir, NULL,
2685
- &debugfs_kprobe_blacklist_ops);
2686
- if (!file)
2687
- goto error;
2968
+ debugfs_create_file("blacklist", 0400, dir, NULL,
2969
+ &kprobe_blacklist_fops);
26882970
26892971 return 0;
2690
-
2691
-error:
2692
- debugfs_remove(dir);
2693
- return -ENOMEM;
26942972 }
26952973
26962974 late_initcall(debugfs_kprobe_init);
26972975 #endif /* CONFIG_DEBUG_FS */
2698
-
2699
-module_init(init_kprobes);