forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/arch/x86/kernel/kprobes/opt.c
....@@ -1,24 +1,12 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Kernel Probes Jump Optimization (Optprobes)
3
- *
4
- * This program is free software; you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation; either version 2 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program; if not, write to the Free Software
16
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
174 *
185 * Copyright (C) IBM Corporation, 2002, 2004
196 * Copyright (C) Hitachi Ltd., 2012
207 */
218 #include <linux/kprobes.h>
9
+#include <linux/perf_event.h>
2210 #include <linux/ptrace.h>
2311 #include <linux/string.h>
2412 #include <linux/slab.h>
....@@ -27,13 +15,15 @@
2715 #include <linux/extable.h>
2816 #include <linux/kdebug.h>
2917 #include <linux/kallsyms.h>
18
+#include <linux/kgdb.h>
3019 #include <linux/ftrace.h>
31
-#include <linux/frame.h>
20
+#include <linux/objtool.h>
21
+#include <linux/pgtable.h>
22
+#include <linux/static_call.h>
3223
3324 #include <asm/text-patching.h>
3425 #include <asm/cacheflush.h>
3526 #include <asm/desc.h>
36
-#include <asm/pgtable.h>
3727 #include <linux/uaccess.h>
3828 #include <asm/alternative.h>
3929 #include <asm/insn.h>
....@@ -51,13 +41,13 @@
5141 long offs;
5242 int i;
5343
54
- for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
44
+ for (i = 0; i < JMP32_INSN_SIZE; i++) {
5545 kp = get_kprobe((void *)addr - i);
5646 /* This function only handles jump-optimized kprobe */
5747 if (kp && kprobe_optimized(kp)) {
5848 op = container_of(kp, struct optimized_kprobe, kp);
59
- /* If op->list is not empty, op is under optimizing */
60
- if (list_empty(&op->list))
49
+ /* If op is optimized or under unoptimizing */
50
+ if (list_empty(&op->list) || optprobe_queued_unopt(op))
6151 goto found;
6252 }
6353 }
....@@ -69,19 +59,34 @@
6959 * overwritten by jump destination address. In this case, original
7060 * bytes must be recovered from op->optinsn.copied_insn buffer.
7161 */
72
- if (probe_kernel_read(buf, (void *)addr,
62
+ if (copy_from_kernel_nofault(buf, (void *)addr,
7363 MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
7464 return 0UL;
7565
7666 if (addr == (unsigned long)kp->addr) {
7767 buf[0] = kp->opcode;
78
- memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
68
+ memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE);
7969 } else {
8070 offs = addr - (unsigned long)kp->addr - 1;
81
- memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
71
+ memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs);
8272 }
8373
8474 return (unsigned long)buf;
75
+}
76
+
77
+static void synthesize_clac(kprobe_opcode_t *addr)
78
+{
79
+ /*
80
+ * Can't be static_cpu_has() due to how objtool treats this feature bit.
81
+ * This isn't a fast path anyway.
82
+ */
83
+ if (!boot_cpu_has(X86_FEATURE_SMAP))
84
+ return;
85
+
86
+ /* Replace the NOP3 with CLAC */
87
+ addr[0] = 0x0f;
88
+ addr[1] = 0x01;
89
+ addr[2] = 0xca;
8590 }
8691
8792 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
....@@ -97,13 +102,25 @@
97102 }
98103
99104 asm (
105
+ ".pushsection .rodata\n"
100106 "optprobe_template_func:\n"
107
+ ".pushsection .discard.func_stack_frame_non_standard\n"
108
+ "__func_stack_frame_non_standard_optprobe_template_func:\n"
109
+#ifdef CONFIG_64BIT
110
+ ".quad optprobe_template_func\n"
111
+#else
112
+ ".long optprobe_template_func\n"
113
+#endif
114
+ ".popsection\n"
101115 ".global optprobe_template_entry\n"
102116 "optprobe_template_entry:\n"
103117 #ifdef CONFIG_X86_64
104118 /* We don't bother saving the ss register */
105119 " pushq %rsp\n"
106120 " pushfq\n"
121
+ ".global optprobe_template_clac\n"
122
+ "optprobe_template_clac:\n"
123
+ ASM_NOP3
107124 SAVE_REGS_STRING
108125 " movq %rsp, %rsi\n"
109126 ".global optprobe_template_val\n"
....@@ -114,14 +131,18 @@
114131 "optprobe_template_call:\n"
115132 ASM_NOP5
116133 /* Move flags to rsp */
117
- " movq 144(%rsp), %rdx\n"
118
- " movq %rdx, 152(%rsp)\n"
134
+ " movq 18*8(%rsp), %rdx\n"
135
+ " movq %rdx, 19*8(%rsp)\n"
119136 RESTORE_REGS_STRING
120137 /* Skip flags entry */
121138 " addq $8, %rsp\n"
122139 " popfq\n"
123140 #else /* CONFIG_X86_32 */
124
- " pushf\n"
141
+ " pushl %esp\n"
142
+ " pushfl\n"
143
+ ".global optprobe_template_clac\n"
144
+ "optprobe_template_clac:\n"
145
+ ASM_NOP3
125146 SAVE_REGS_STRING
126147 " movl %esp, %edx\n"
127148 ".global optprobe_template_val\n"
....@@ -130,31 +151,26 @@
130151 ".global optprobe_template_call\n"
131152 "optprobe_template_call:\n"
132153 ASM_NOP5
154
+ /* Move flags into esp */
155
+ " movl 14*4(%esp), %edx\n"
156
+ " movl %edx, 15*4(%esp)\n"
133157 RESTORE_REGS_STRING
134
- " addl $4, %esp\n" /* skip cs */
135
- " popf\n"
158
+ /* Skip flags entry */
159
+ " addl $4, %esp\n"
160
+ " popfl\n"
136161 #endif
137162 ".global optprobe_template_end\n"
138163 "optprobe_template_end:\n"
139
- ".type optprobe_template_func, @function\n"
140
- ".size optprobe_template_func, .-optprobe_template_func\n");
164
+ ".popsection\n");
141165
142
-void optprobe_template_func(void);
143
-STACK_FRAME_NON_STANDARD(optprobe_template_func);
144
-NOKPROBE_SYMBOL(optprobe_template_func);
145
-NOKPROBE_SYMBOL(optprobe_template_entry);
146
-NOKPROBE_SYMBOL(optprobe_template_val);
147
-NOKPROBE_SYMBOL(optprobe_template_call);
148
-NOKPROBE_SYMBOL(optprobe_template_end);
149
-
166
+#define TMPL_CLAC_IDX \
167
+ ((long)optprobe_template_clac - (long)optprobe_template_entry)
150168 #define TMPL_MOVE_IDX \
151169 ((long)optprobe_template_val - (long)optprobe_template_entry)
152170 #define TMPL_CALL_IDX \
153171 ((long)optprobe_template_call - (long)optprobe_template_entry)
154172 #define TMPL_END_IDX \
155173 ((long)optprobe_template_end - (long)optprobe_template_entry)
156
-
157
-#define INT3_SIZE sizeof(kprobe_opcode_t)
158174
159175 /* Optimized kprobe call back function: called from optinsn */
160176 static void
....@@ -170,13 +186,11 @@
170186 } else {
171187 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
172188 /* Save skipped registers */
173
-#ifdef CONFIG_X86_64
174189 regs->cs = __KERNEL_CS;
175
-#else
176
- regs->cs = __KERNEL_CS | get_kernel_rpl();
190
+#ifdef CONFIG_X86_32
177191 regs->gs = 0;
178192 #endif
179
- regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
193
+ regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
180194 regs->orig_ax = ~0UL;
181195
182196 __this_cpu_write(current_kprobe, &op->kp);
....@@ -193,7 +207,7 @@
193207 struct insn insn;
194208 int len = 0, ret;
195209
196
- while (len < RELATIVEJUMP_SIZE) {
210
+ while (len < JMP32_INSN_SIZE) {
197211 ret = __copy_instruction(dest + len, src + len, real + len, &insn);
198212 if (!ret || !can_boost(&insn, src + len))
199213 return -EINVAL;
....@@ -202,7 +216,8 @@
202216 /* Check whether the address range is reserved */
203217 if (ftrace_text_reserved(src, src + len - 1) ||
204218 alternatives_text_reserved(src, src + len - 1) ||
205
- jump_label_text_reserved(src, src + len - 1))
219
+ jump_label_text_reserved(src, src + len - 1) ||
220
+ static_call_text_reserved(src, src + len - 1))
206221 return -EBUSY;
207222
208223 return len;
....@@ -279,19 +294,19 @@
279294 * stack handling and registers setup.
280295 */
281296 if (((paddr >= (unsigned long)__entry_text_start) &&
282
- (paddr < (unsigned long)__entry_text_end)) ||
283
- ((paddr >= (unsigned long)__irqentry_text_start) &&
284
- (paddr < (unsigned long)__irqentry_text_end)))
297
+ (paddr < (unsigned long)__entry_text_end)))
285298 return 0;
286299
287300 /* Check there is enough space for a relative jump. */
288
- if (size - offset < RELATIVEJUMP_SIZE)
301
+ if (size - offset < JMP32_INSN_SIZE)
289302 return 0;
290303
291304 /* Decode instructions */
292305 addr = paddr - offset;
293306 while (addr < paddr - offset + size) { /* Decode until function end */
294307 unsigned long recovered_insn;
308
+ int ret;
309
+
295310 if (search_exception_tables(addr))
296311 /*
297312 * Since some fixup code will jumps into this function,
....@@ -301,18 +316,26 @@
301316 recovered_insn = recover_probed_instruction(buf, addr);
302317 if (!recovered_insn)
303318 return 0;
304
- kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
305
- insn_get_length(&insn);
306
- /* Another subsystem puts a breakpoint */
307
- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
319
+
320
+ ret = insn_decode(&insn, (void *)recovered_insn, MAX_INSN_SIZE, INSN_MODE_KERN);
321
+ if (ret < 0)
308322 return 0;
323
+#ifdef CONFIG_KGDB
324
+ /*
325
+ * If there is a dynamically installed kgdb sw breakpoint,
326
+ * this function should not be probed.
327
+ */
328
+ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE &&
329
+ kgdb_has_hit_break(addr))
330
+ return 0;
331
+#endif
309332 /* Recover address */
310333 insn.kaddr = (void *)addr;
311334 insn.next_byte = (void *)(addr + insn.length);
312335 /* Check any instructions don't jump into target */
313336 if (insn_is_indirect_jump(&insn) ||
314
- insn_jump_into_range(&insn, paddr + INT3_SIZE,
315
- RELATIVE_ADDR_SIZE))
337
+ insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
338
+ DISP32_SIZE))
316339 return 0;
317340 addr += insn.length;
318341 }
....@@ -328,7 +351,7 @@
328351
329352 for (i = 1; i < op->optinsn.size; i++) {
330353 p = get_kprobe(op->kp.addr + i);
331
- if (p && !kprobe_disabled(p))
354
+ if (p && !kprobe_disarmed(p))
332355 return -EEXIST;
333356 }
334357
....@@ -347,8 +370,15 @@
347370 static
348371 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
349372 {
350
- if (op->optinsn.insn) {
351
- free_optinsn_slot(op->optinsn.insn, dirty);
373
+ u8 *slot = op->optinsn.insn;
374
+ if (slot) {
375
+ int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
376
+
377
+ /* Record the perf event before freeing the slot */
378
+ if (dirty)
379
+ perf_event_text_poke(slot, slot, len, NULL, 0);
380
+
381
+ free_optinsn_slot(slot, dirty);
352382 op->optinsn.insn = NULL;
353383 op->optinsn.size = 0;
354384 }
....@@ -388,7 +418,7 @@
388418 * Verify if the address gap is in 2GB range, because this uses
389419 * a relative jump.
390420 */
391
- rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
421
+ rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE;
392422 if (abs(rel) > 0x7fffffff) {
393423 ret = -ERANGE;
394424 goto err;
....@@ -405,6 +435,8 @@
405435 op->optinsn.size = ret;
406436 len = TMPL_END_IDX + op->optinsn.size;
407437
438
+ synthesize_clac(buf + TMPL_CLAC_IDX);
439
+
408440 /* Set probe information */
409441 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
410442
....@@ -415,10 +447,17 @@
415447 /* Set returning jmp instruction at the tail of out-of-line buffer */
416448 synthesize_reljump(buf + len, slot + len,
417449 (u8 *)op->kp.addr + op->optinsn.size);
418
- len += RELATIVEJUMP_SIZE;
450
+ len += JMP32_INSN_SIZE;
419451
420
- /* We have to use text_poke for instuction buffer because it is RO */
452
+ /*
453
+ * Note len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also
454
+ * used in __arch_remove_optimized_kprobe().
455
+ */
456
+
457
+ /* We have to use text_poke() for instruction buffer because it is RO */
458
+ perf_event_text_poke(slot, NULL, 0, buf, len);
421459 text_poke(slot, buf, len);
460
+
422461 ret = 0;
423462 out:
424463 kfree(buf);
....@@ -430,44 +469,63 @@
430469 }
431470
432471 /*
433
- * Replace breakpoints (int3) with relative jumps.
472
+ * Replace breakpoints (INT3) with relative jumps (JMP.d32).
434473 * Caller must call with locking kprobe_mutex and text_mutex.
474
+ *
475
+ * The caller will have installed a regular kprobe and after that issued
476
+ * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in
477
+ * the 4 bytes after the INT3 are unused and can now be overwritten.
435478 */
436479 void arch_optimize_kprobes(struct list_head *oplist)
437480 {
438481 struct optimized_kprobe *op, *tmp;
439
- u8 insn_buf[RELATIVEJUMP_SIZE];
482
+ u8 insn_buff[JMP32_INSN_SIZE];
440483
441484 list_for_each_entry_safe(op, tmp, oplist, list) {
442485 s32 rel = (s32)((long)op->optinsn.insn -
443
- ((long)op->kp.addr + RELATIVEJUMP_SIZE));
486
+ ((long)op->kp.addr + JMP32_INSN_SIZE));
444487
445488 WARN_ON(kprobe_disabled(&op->kp));
446489
447490 /* Backup instructions which will be replaced by jump address */
448
- memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
449
- RELATIVE_ADDR_SIZE);
491
+ memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE,
492
+ DISP32_SIZE);
450493
451
- insn_buf[0] = RELATIVEJUMP_OPCODE;
452
- *(s32 *)(&insn_buf[1]) = rel;
494
+ insn_buff[0] = JMP32_INSN_OPCODE;
495
+ *(s32 *)(&insn_buff[1]) = rel;
453496
454
- text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
455
- op->optinsn.insn);
497
+ text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
456498
457499 list_del_init(&op->list);
458500 }
459501 }
460502
461
-/* Replace a relative jump with a breakpoint (int3). */
503
+/*
504
+ * Replace a relative jump (JMP.d32) with a breakpoint (INT3).
505
+ *
506
+ * After that, we can restore the 4 bytes after the INT3 to undo what
507
+ * arch_optimize_kprobes() scribbled. This is safe since those bytes will be
508
+ * unused once the INT3 lands.
509
+ */
462510 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
463511 {
464
- u8 insn_buf[RELATIVEJUMP_SIZE];
512
+ u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
513
+ u8 old[JMP32_INSN_SIZE];
514
+ u8 *addr = op->kp.addr;
465515
466
- /* Set int3 to first byte for kprobes */
467
- insn_buf[0] = BREAKPOINT_INSTRUCTION;
468
- memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
469
- text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
470
- op->optinsn.insn);
516
+ memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
517
+ memcpy(new + INT3_INSN_SIZE,
518
+ op->optinsn.copied_insn,
519
+ JMP32_INSN_SIZE - INT3_INSN_SIZE);
520
+
521
+ text_poke(addr, new, INT3_INSN_SIZE);
522
+ text_poke_sync();
523
+ text_poke(addr + INT3_INSN_SIZE,
524
+ new + INT3_INSN_SIZE,
525
+ JMP32_INSN_SIZE - INT3_INSN_SIZE);
526
+ text_poke_sync();
527
+
528
+ perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
471529 }
472530
473531 /*