forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/arch/x86/kernel/kprobes/opt.c
....@@ -1,24 +1,12 @@
1
+// SPDX-License-Identifier: GPL-2.0-or-later
12 /*
23 * Kernel Probes Jump Optimization (Optprobes)
3
- *
4
- * This program is free software; you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation; either version 2 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program; if not, write to the Free Software
16
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
174 *
185 * Copyright (C) IBM Corporation, 2002, 2004
196 * Copyright (C) Hitachi Ltd., 2012
207 */
218 #include <linux/kprobes.h>
9
+#include <linux/perf_event.h>
2210 #include <linux/ptrace.h>
2311 #include <linux/string.h>
2412 #include <linux/slab.h>
....@@ -28,12 +16,13 @@
2816 #include <linux/kdebug.h>
2917 #include <linux/kallsyms.h>
3018 #include <linux/ftrace.h>
31
-#include <linux/frame.h>
19
+#include <linux/objtool.h>
20
+#include <linux/pgtable.h>
21
+#include <linux/static_call.h>
3222
3323 #include <asm/text-patching.h>
3424 #include <asm/cacheflush.h>
3525 #include <asm/desc.h>
36
-#include <asm/pgtable.h>
3726 #include <linux/uaccess.h>
3827 #include <asm/alternative.h>
3928 #include <asm/insn.h>
....@@ -51,7 +40,7 @@
5140 long offs;
5241 int i;
5342
54
- for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
43
+ for (i = 0; i < JMP32_INSN_SIZE; i++) {
5544 kp = get_kprobe((void *)addr - i);
5645 /* This function only handles jump-optimized kprobe */
5746 if (kp && kprobe_optimized(kp)) {
....@@ -69,19 +58,34 @@
6958 * overwritten by jump destination address. In this case, original
7059 * bytes must be recovered from op->optinsn.copied_insn buffer.
7160 */
72
- if (probe_kernel_read(buf, (void *)addr,
61
+ if (copy_from_kernel_nofault(buf, (void *)addr,
7362 MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
7463 return 0UL;
7564
7665 if (addr == (unsigned long)kp->addr) {
7766 buf[0] = kp->opcode;
78
- memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
67
+ memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE);
7968 } else {
8069 offs = addr - (unsigned long)kp->addr - 1;
81
- memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
70
+ memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs);
8271 }
8372
8473 return (unsigned long)buf;
74
+}
75
+
76
+static void synthesize_clac(kprobe_opcode_t *addr)
77
+{
78
+ /*
79
+ * Can't be static_cpu_has() due to how objtool treats this feature bit.
80
+ * This isn't a fast path anyway.
81
+ */
82
+ if (!boot_cpu_has(X86_FEATURE_SMAP))
83
+ return;
84
+
85
+ /* Replace the NOP3 with CLAC */
86
+ addr[0] = 0x0f;
87
+ addr[1] = 0x01;
88
+ addr[2] = 0xca;
8589 }
8690
8791 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
....@@ -97,13 +101,25 @@
97101 }
98102
99103 asm (
104
+ ".pushsection .rodata\n"
100105 "optprobe_template_func:\n"
106
+ ".pushsection .discard.func_stack_frame_non_standard\n"
107
+ "__func_stack_frame_non_standard_optprobe_template_func:\n"
108
+#ifdef CONFIG_64BIT
109
+ ".quad optprobe_template_func\n"
110
+#else
111
+ ".long optprobe_template_func\n"
112
+#endif
113
+ ".popsection\n"
101114 ".global optprobe_template_entry\n"
102115 "optprobe_template_entry:\n"
103116 #ifdef CONFIG_X86_64
104117 /* We don't bother saving the ss register */
105118 " pushq %rsp\n"
106119 " pushfq\n"
120
+ ".global optprobe_template_clac\n"
121
+ "optprobe_template_clac:\n"
122
+ ASM_NOP3
107123 SAVE_REGS_STRING
108124 " movq %rsp, %rsi\n"
109125 ".global optprobe_template_val\n"
....@@ -114,14 +130,18 @@
114130 "optprobe_template_call:\n"
115131 ASM_NOP5
116132 /* Move flags to rsp */
117
- " movq 144(%rsp), %rdx\n"
118
- " movq %rdx, 152(%rsp)\n"
133
+ " movq 18*8(%rsp), %rdx\n"
134
+ " movq %rdx, 19*8(%rsp)\n"
119135 RESTORE_REGS_STRING
120136 /* Skip flags entry */
121137 " addq $8, %rsp\n"
122138 " popfq\n"
123139 #else /* CONFIG_X86_32 */
124
- " pushf\n"
140
+ " pushl %esp\n"
141
+ " pushfl\n"
142
+ ".global optprobe_template_clac\n"
143
+ "optprobe_template_clac:\n"
144
+ ASM_NOP3
125145 SAVE_REGS_STRING
126146 " movl %esp, %edx\n"
127147 ".global optprobe_template_val\n"
....@@ -130,31 +150,26 @@
130150 ".global optprobe_template_call\n"
131151 "optprobe_template_call:\n"
132152 ASM_NOP5
153
+ /* Move flags into esp */
154
+ " movl 14*4(%esp), %edx\n"
155
+ " movl %edx, 15*4(%esp)\n"
133156 RESTORE_REGS_STRING
134
- " addl $4, %esp\n" /* skip cs */
135
- " popf\n"
157
+ /* Skip flags entry */
158
+ " addl $4, %esp\n"
159
+ " popfl\n"
136160 #endif
137161 ".global optprobe_template_end\n"
138162 "optprobe_template_end:\n"
139
- ".type optprobe_template_func, @function\n"
140
- ".size optprobe_template_func, .-optprobe_template_func\n");
163
+ ".popsection\n");
141164
142
-void optprobe_template_func(void);
143
-STACK_FRAME_NON_STANDARD(optprobe_template_func);
144
-NOKPROBE_SYMBOL(optprobe_template_func);
145
-NOKPROBE_SYMBOL(optprobe_template_entry);
146
-NOKPROBE_SYMBOL(optprobe_template_val);
147
-NOKPROBE_SYMBOL(optprobe_template_call);
148
-NOKPROBE_SYMBOL(optprobe_template_end);
149
-
165
+#define TMPL_CLAC_IDX \
166
+ ((long)optprobe_template_clac - (long)optprobe_template_entry)
150167 #define TMPL_MOVE_IDX \
151168 ((long)optprobe_template_val - (long)optprobe_template_entry)
152169 #define TMPL_CALL_IDX \
153170 ((long)optprobe_template_call - (long)optprobe_template_entry)
154171 #define TMPL_END_IDX \
155172 ((long)optprobe_template_end - (long)optprobe_template_entry)
156
-
157
-#define INT3_SIZE sizeof(kprobe_opcode_t)
158173
159174 /* Optimized kprobe call back function: called from optinsn */
160175 static void
....@@ -170,13 +185,11 @@
170185 } else {
171186 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
172187 /* Save skipped registers */
173
-#ifdef CONFIG_X86_64
174188 regs->cs = __KERNEL_CS;
175
-#else
176
- regs->cs = __KERNEL_CS | get_kernel_rpl();
189
+#ifdef CONFIG_X86_32
177190 regs->gs = 0;
178191 #endif
179
- regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
192
+ regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
180193 regs->orig_ax = ~0UL;
181194
182195 __this_cpu_write(current_kprobe, &op->kp);
....@@ -193,7 +206,7 @@
193206 struct insn insn;
194207 int len = 0, ret;
195208
196
- while (len < RELATIVEJUMP_SIZE) {
209
+ while (len < JMP32_INSN_SIZE) {
197210 ret = __copy_instruction(dest + len, src + len, real + len, &insn);
198211 if (!ret || !can_boost(&insn, src + len))
199212 return -EINVAL;
....@@ -202,7 +215,8 @@
202215 /* Check whether the address range is reserved */
203216 if (ftrace_text_reserved(src, src + len - 1) ||
204217 alternatives_text_reserved(src, src + len - 1) ||
205
- jump_label_text_reserved(src, src + len - 1))
218
+ jump_label_text_reserved(src, src + len - 1) ||
219
+ static_call_text_reserved(src, src + len - 1))
206220 return -EBUSY;
207221
208222 return len;
....@@ -263,6 +277,19 @@
263277 return ret;
264278 }
265279
280
+static bool is_padding_int3(unsigned long addr, unsigned long eaddr)
281
+{
282
+ unsigned char ops;
283
+
284
+ for (; addr < eaddr; addr++) {
285
+ if (get_kernel_nofault(ops, (void *)addr) < 0 ||
286
+ ops != INT3_INSN_OPCODE)
287
+ return false;
288
+ }
289
+
290
+ return true;
291
+}
292
+
266293 /* Decode whole function to ensure any instructions don't jump into target */
267294 static int can_optimize(unsigned long paddr)
268295 {
....@@ -279,13 +306,11 @@
279306 * stack handling and registers setup.
280307 */
281308 if (((paddr >= (unsigned long)__entry_text_start) &&
282
- (paddr < (unsigned long)__entry_text_end)) ||
283
- ((paddr >= (unsigned long)__irqentry_text_start) &&
284
- (paddr < (unsigned long)__irqentry_text_end)))
309
+ (paddr < (unsigned long)__entry_text_end)))
285310 return 0;
286311
287312 /* Check there is enough space for a relative jump. */
288
- if (size - offset < RELATIVEJUMP_SIZE)
313
+ if (size - offset < JMP32_INSN_SIZE)
289314 return 0;
290315
291316 /* Decode instructions */
....@@ -303,16 +328,21 @@
303328 return 0;
304329 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
305330 insn_get_length(&insn);
306
- /* Another subsystem puts a breakpoint */
307
- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
308
- return 0;
331
+ /*
332
+ * In the case of detecting unknown breakpoint, this could be
333
+ * a padding INT3 between functions. Let's check that all the
334
+ * rest of the bytes are also INT3.
335
+ */
336
+ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
337
+ return is_padding_int3(addr, paddr - offset + size) ? 1 : 0;
338
+
309339 /* Recover address */
310340 insn.kaddr = (void *)addr;
311341 insn.next_byte = (void *)(addr + insn.length);
312342 /* Check any instructions don't jump into target */
313343 if (insn_is_indirect_jump(&insn) ||
314
- insn_jump_into_range(&insn, paddr + INT3_SIZE,
315
- RELATIVE_ADDR_SIZE))
344
+ insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
345
+ DISP32_SIZE))
316346 return 0;
317347 addr += insn.length;
318348 }
....@@ -347,8 +377,15 @@
347377 static
348378 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
349379 {
350
- if (op->optinsn.insn) {
351
- free_optinsn_slot(op->optinsn.insn, dirty);
380
+ u8 *slot = op->optinsn.insn;
381
+ if (slot) {
382
+ int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
383
+
384
+ /* Record the perf event before freeing the slot */
385
+ if (dirty)
386
+ perf_event_text_poke(slot, slot, len, NULL, 0);
387
+
388
+ free_optinsn_slot(slot, dirty);
352389 op->optinsn.insn = NULL;
353390 op->optinsn.size = 0;
354391 }
....@@ -388,7 +425,7 @@
388425 * Verify if the address gap is in 2GB range, because this uses
389426 * a relative jump.
390427 */
391
- rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
428
+ rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE;
392429 if (abs(rel) > 0x7fffffff) {
393430 ret = -ERANGE;
394431 goto err;
....@@ -405,6 +442,8 @@
405442 op->optinsn.size = ret;
406443 len = TMPL_END_IDX + op->optinsn.size;
407444
445
+ synthesize_clac(buf + TMPL_CLAC_IDX);
446
+
408447 /* Set probe information */
409448 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
410449
....@@ -415,10 +454,17 @@
415454 /* Set returning jmp instruction at the tail of out-of-line buffer */
416455 synthesize_reljump(buf + len, slot + len,
417456 (u8 *)op->kp.addr + op->optinsn.size);
418
- len += RELATIVEJUMP_SIZE;
457
+ len += JMP32_INSN_SIZE;
419458
420
- /* We have to use text_poke for instuction buffer because it is RO */
459
+ /*
460
+ * Note len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also
461
+ * used in __arch_remove_optimized_kprobe().
462
+ */
463
+
464
+ /* We have to use text_poke() for instruction buffer because it is RO */
465
+ perf_event_text_poke(slot, NULL, 0, buf, len);
421466 text_poke(slot, buf, len);
467
+
422468 ret = 0;
423469 out:
424470 kfree(buf);
....@@ -430,44 +476,63 @@
430476 }
431477
432478 /*
433
- * Replace breakpoints (int3) with relative jumps.
479
+ * Replace breakpoints (INT3) with relative jumps (JMP.d32).
434480 * Caller must call with locking kprobe_mutex and text_mutex.
481
+ *
482
+ * The caller will have installed a regular kprobe and after that issued
483
+ * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in
484
+ * the 4 bytes after the INT3 are unused and can now be overwritten.
435485 */
436486 void arch_optimize_kprobes(struct list_head *oplist)
437487 {
438488 struct optimized_kprobe *op, *tmp;
439
- u8 insn_buf[RELATIVEJUMP_SIZE];
489
+ u8 insn_buff[JMP32_INSN_SIZE];
440490
441491 list_for_each_entry_safe(op, tmp, oplist, list) {
442492 s32 rel = (s32)((long)op->optinsn.insn -
443
- ((long)op->kp.addr + RELATIVEJUMP_SIZE));
493
+ ((long)op->kp.addr + JMP32_INSN_SIZE));
444494
445495 WARN_ON(kprobe_disabled(&op->kp));
446496
447497 /* Backup instructions which will be replaced by jump address */
448
- memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
449
- RELATIVE_ADDR_SIZE);
498
+ memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE,
499
+ DISP32_SIZE);
450500
451
- insn_buf[0] = RELATIVEJUMP_OPCODE;
452
- *(s32 *)(&insn_buf[1]) = rel;
501
+ insn_buff[0] = JMP32_INSN_OPCODE;
502
+ *(s32 *)(&insn_buff[1]) = rel;
453503
454
- text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
455
- op->optinsn.insn);
504
+ text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
456505
457506 list_del_init(&op->list);
458507 }
459508 }
460509
461
-/* Replace a relative jump with a breakpoint (int3). */
510
+/*
511
+ * Replace a relative jump (JMP.d32) with a breakpoint (INT3).
512
+ *
513
+ * After that, we can restore the 4 bytes after the INT3 to undo what
514
+ * arch_optimize_kprobes() scribbled. This is safe since those bytes will be
515
+ * unused once the INT3 lands.
516
+ */
462517 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
463518 {
464
- u8 insn_buf[RELATIVEJUMP_SIZE];
519
+ u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
520
+ u8 old[JMP32_INSN_SIZE];
521
+ u8 *addr = op->kp.addr;
465522
466
- /* Set int3 to first byte for kprobes */
467
- insn_buf[0] = BREAKPOINT_INSTRUCTION;
468
- memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
469
- text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
470
- op->optinsn.insn);
523
+ memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
524
+ memcpy(new + INT3_INSN_SIZE,
525
+ op->optinsn.copied_insn,
526
+ JMP32_INSN_SIZE - INT3_INSN_SIZE);
527
+
528
+ text_poke(addr, new, INT3_INSN_SIZE);
529
+ text_poke_sync();
530
+ text_poke(addr + INT3_INSN_SIZE,
531
+ new + INT3_INSN_SIZE,
532
+ JMP32_INSN_SIZE - INT3_INSN_SIZE);
533
+ text_poke_sync();
534
+
535
+ perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
471536 }
472537
473538 /*