From 072de836f53be56a70cecf70b43ae43b7ce17376 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 11 Dec 2023 10:08:36 +0000
Subject: [PATCH] mk-rootfs.sh
---
kernel/arch/x86/kernel/kprobes/opt.c | 211 ++++++++++++++++++++++++++++++++++------------------
1 files changed, 138 insertions(+), 73 deletions(-)
diff --git a/kernel/arch/x86/kernel/kprobes/opt.c b/kernel/arch/x86/kernel/kprobes/opt.c
index 544bd41..50d656d 100644
--- a/kernel/arch/x86/kernel/kprobes/opt.c
+++ b/kernel/arch/x86/kernel/kprobes/opt.c
@@ -1,24 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Kernel Probes Jump Optimization (Optprobes)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2002, 2004
* Copyright (C) Hitachi Ltd., 2012
*/
#include <linux/kprobes.h>
+#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/string.h>
#include <linux/slab.h>
@@ -28,12 +16,13 @@
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
-#include <linux/frame.h>
+#include <linux/objtool.h>
+#include <linux/pgtable.h>
+#include <linux/static_call.h>
#include <asm/text-patching.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
-#include <asm/pgtable.h>
#include <linux/uaccess.h>
#include <asm/alternative.h>
#include <asm/insn.h>
@@ -51,7 +40,7 @@
long offs;
int i;
- for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
+ for (i = 0; i < JMP32_INSN_SIZE; i++) {
kp = get_kprobe((void *)addr - i);
/* This function only handles jump-optimized kprobe */
if (kp && kprobe_optimized(kp)) {
@@ -69,19 +58,34 @@
* overwritten by jump destination address. In this case, original
* bytes must be recovered from op->optinsn.copied_insn buffer.
*/
- if (probe_kernel_read(buf, (void *)addr,
+ if (copy_from_kernel_nofault(buf, (void *)addr,
MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
return 0UL;
if (addr == (unsigned long)kp->addr) {
buf[0] = kp->opcode;
- memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+ memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE);
} else {
offs = addr - (unsigned long)kp->addr - 1;
- memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
+ memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs);
}
return (unsigned long)buf;
+}
+
+static void synthesize_clac(kprobe_opcode_t *addr)
+{
+ /*
+ * Can't be static_cpu_has() due to how objtool treats this feature bit.
+ * This isn't a fast path anyway.
+ */
+ if (!boot_cpu_has(X86_FEATURE_SMAP))
+ return;
+
+ /* Replace the NOP3 with CLAC */
+ addr[0] = 0x0f;
+ addr[1] = 0x01;
+ addr[2] = 0xca;
}
/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
@@ -97,13 +101,25 @@
}
asm (
+ ".pushsection .rodata\n"
"optprobe_template_func:\n"
+ ".pushsection .discard.func_stack_frame_non_standard\n"
+ "__func_stack_frame_non_standard_optprobe_template_func:\n"
+#ifdef CONFIG_64BIT
+ ".quad optprobe_template_func\n"
+#else
+ ".long optprobe_template_func\n"
+#endif
+ ".popsection\n"
".global optprobe_template_entry\n"
"optprobe_template_entry:\n"
#ifdef CONFIG_X86_64
/* We don't bother saving the ss register */
" pushq %rsp\n"
" pushfq\n"
+ ".global optprobe_template_clac\n"
+ "optprobe_template_clac:\n"
+ ASM_NOP3
SAVE_REGS_STRING
" movq %rsp, %rsi\n"
".global optprobe_template_val\n"
@@ -114,14 +130,18 @@
"optprobe_template_call:\n"
ASM_NOP5
/* Move flags to rsp */
- " movq 144(%rsp), %rdx\n"
- " movq %rdx, 152(%rsp)\n"
+ " movq 18*8(%rsp), %rdx\n"
+ " movq %rdx, 19*8(%rsp)\n"
RESTORE_REGS_STRING
/* Skip flags entry */
" addq $8, %rsp\n"
" popfq\n"
#else /* CONFIG_X86_32 */
- " pushf\n"
+ " pushl %esp\n"
+ " pushfl\n"
+ ".global optprobe_template_clac\n"
+ "optprobe_template_clac:\n"
+ ASM_NOP3
SAVE_REGS_STRING
" movl %esp, %edx\n"
".global optprobe_template_val\n"
@@ -130,31 +150,26 @@
".global optprobe_template_call\n"
"optprobe_template_call:\n"
ASM_NOP5
+ /* Move flags into esp */
+ " movl 14*4(%esp), %edx\n"
+ " movl %edx, 15*4(%esp)\n"
RESTORE_REGS_STRING
- " addl $4, %esp\n" /* skip cs */
- " popf\n"
+ /* Skip flags entry */
+ " addl $4, %esp\n"
+ " popfl\n"
#endif
".global optprobe_template_end\n"
"optprobe_template_end:\n"
- ".type optprobe_template_func, @function\n"
- ".size optprobe_template_func, .-optprobe_template_func\n");
+ ".popsection\n");
-void optprobe_template_func(void);
-STACK_FRAME_NON_STANDARD(optprobe_template_func);
-NOKPROBE_SYMBOL(optprobe_template_func);
-NOKPROBE_SYMBOL(optprobe_template_entry);
-NOKPROBE_SYMBOL(optprobe_template_val);
-NOKPROBE_SYMBOL(optprobe_template_call);
-NOKPROBE_SYMBOL(optprobe_template_end);
-
+#define TMPL_CLAC_IDX \
+ ((long)optprobe_template_clac - (long)optprobe_template_entry)
#define TMPL_MOVE_IDX \
((long)optprobe_template_val - (long)optprobe_template_entry)
#define TMPL_CALL_IDX \
((long)optprobe_template_call - (long)optprobe_template_entry)
#define TMPL_END_IDX \
((long)optprobe_template_end - (long)optprobe_template_entry)
-
-#define INT3_SIZE sizeof(kprobe_opcode_t)
/* Optimized kprobe call back function: called from optinsn */
static void
@@ -170,13 +185,11 @@
} else {
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
/* Save skipped registers */
-#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
-#else
- regs->cs = __KERNEL_CS | get_kernel_rpl();
+#ifdef CONFIG_X86_32
regs->gs = 0;
#endif
- regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
+ regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
regs->orig_ax = ~0UL;
__this_cpu_write(current_kprobe, &op->kp);
@@ -193,7 +206,7 @@
struct insn insn;
int len = 0, ret;
- while (len < RELATIVEJUMP_SIZE) {
+ while (len < JMP32_INSN_SIZE) {
ret = __copy_instruction(dest + len, src + len, real + len, &insn);
if (!ret || !can_boost(&insn, src + len))
return -EINVAL;
@@ -202,7 +215,8 @@
/* Check whether the address range is reserved */
if (ftrace_text_reserved(src, src + len - 1) ||
alternatives_text_reserved(src, src + len - 1) ||
- jump_label_text_reserved(src, src + len - 1))
+ jump_label_text_reserved(src, src + len - 1) ||
+ static_call_text_reserved(src, src + len - 1))
return -EBUSY;
return len;
@@ -263,6 +277,19 @@
return ret;
}
+static bool is_padding_int3(unsigned long addr, unsigned long eaddr)
+{
+ unsigned char ops;
+
+ for (; addr < eaddr; addr++) {
+ if (get_kernel_nofault(ops, (void *)addr) < 0 ||
+ ops != INT3_INSN_OPCODE)
+ return false;
+ }
+
+ return true;
+}
+
/* Decode whole function to ensure any instructions don't jump into target */
static int can_optimize(unsigned long paddr)
{
@@ -279,13 +306,11 @@
* stack handling and registers setup.
*/
if (((paddr >= (unsigned long)__entry_text_start) &&
- (paddr < (unsigned long)__entry_text_end)) ||
- ((paddr >= (unsigned long)__irqentry_text_start) &&
- (paddr < (unsigned long)__irqentry_text_end)))
+ (paddr < (unsigned long)__entry_text_end)))
return 0;
/* Check there is enough space for a relative jump. */
- if (size - offset < RELATIVEJUMP_SIZE)
+ if (size - offset < JMP32_INSN_SIZE)
return 0;
/* Decode instructions */
@@ -303,16 +328,21 @@
return 0;
kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
insn_get_length(&insn);
- /* Another subsystem puts a breakpoint */
- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
- return 0;
+ /*
+ * In the case of detecting unknown breakpoint, this could be
+ * a padding INT3 between functions. Let's check that all the
+ * rest of the bytes are also INT3.
+ */
+ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
+ return is_padding_int3(addr, paddr - offset + size) ? 1 : 0;
+
/* Recover address */
insn.kaddr = (void *)addr;
insn.next_byte = (void *)(addr + insn.length);
/* Check any instructions don't jump into target */
if (insn_is_indirect_jump(&insn) ||
- insn_jump_into_range(&insn, paddr + INT3_SIZE,
- RELATIVE_ADDR_SIZE))
+ insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
+ DISP32_SIZE))
return 0;
addr += insn.length;
}
@@ -347,8 +377,15 @@
static
void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
{
- if (op->optinsn.insn) {
- free_optinsn_slot(op->optinsn.insn, dirty);
+ u8 *slot = op->optinsn.insn;
+ if (slot) {
+ int len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE;
+
+ /* Record the perf event before freeing the slot */
+ if (dirty)
+ perf_event_text_poke(slot, slot, len, NULL, 0);
+
+ free_optinsn_slot(slot, dirty);
op->optinsn.insn = NULL;
op->optinsn.size = 0;
}
@@ -388,7 +425,7 @@
* Verify if the address gap is in 2GB range, because this uses
* a relative jump.
*/
- rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
+ rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE;
if (abs(rel) > 0x7fffffff) {
ret = -ERANGE;
goto err;
@@ -405,6 +442,8 @@
op->optinsn.size = ret;
len = TMPL_END_IDX + op->optinsn.size;
+ synthesize_clac(buf + TMPL_CLAC_IDX);
+
/* Set probe information */
synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
@@ -415,10 +454,17 @@
/* Set returning jmp instruction at the tail of out-of-line buffer */
synthesize_reljump(buf + len, slot + len,
(u8 *)op->kp.addr + op->optinsn.size);
- len += RELATIVEJUMP_SIZE;
+ len += JMP32_INSN_SIZE;
- /* We have to use text_poke for instuction buffer because it is RO */
+ /*
+ * Note len = TMPL_END_IDX + op->optinsn.size + JMP32_INSN_SIZE is also
+ * used in __arch_remove_optimized_kprobe().
+ */
+
+ /* We have to use text_poke() for instruction buffer because it is RO */
+ perf_event_text_poke(slot, NULL, 0, buf, len);
text_poke(slot, buf, len);
+
ret = 0;
out:
kfree(buf);
@@ -430,44 +476,63 @@
}
/*
- * Replace breakpoints (int3) with relative jumps.
+ * Replace breakpoints (INT3) with relative jumps (JMP.d32).
* Caller must call with locking kprobe_mutex and text_mutex.
+ *
+ * The caller will have installed a regular kprobe and after that issued
+ * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in
+ * the 4 bytes after the INT3 are unused and can now be overwritten.
*/
void arch_optimize_kprobes(struct list_head *oplist)
{
struct optimized_kprobe *op, *tmp;
- u8 insn_buf[RELATIVEJUMP_SIZE];
+ u8 insn_buff[JMP32_INSN_SIZE];
list_for_each_entry_safe(op, tmp, oplist, list) {
s32 rel = (s32)((long)op->optinsn.insn -
- ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+ ((long)op->kp.addr + JMP32_INSN_SIZE));
WARN_ON(kprobe_disabled(&op->kp));
/* Backup instructions which will be replaced by jump address */
- memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
- RELATIVE_ADDR_SIZE);
+ memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE,
+ DISP32_SIZE);
- insn_buf[0] = RELATIVEJUMP_OPCODE;
- *(s32 *)(&insn_buf[1]) = rel;
+ insn_buff[0] = JMP32_INSN_OPCODE;
+ *(s32 *)(&insn_buff[1]) = rel;
- text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
- op->optinsn.insn);
+ text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
list_del_init(&op->list);
}
}
-/* Replace a relative jump with a breakpoint (int3). */
+/*
+ * Replace a relative jump (JMP.d32) with a breakpoint (INT3).
+ *
+ * After that, we can restore the 4 bytes after the INT3 to undo what
+ * arch_optimize_kprobes() scribbled. This is safe since those bytes will be
+ * unused once the INT3 lands.
+ */
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
- u8 insn_buf[RELATIVEJUMP_SIZE];
+ u8 new[JMP32_INSN_SIZE] = { INT3_INSN_OPCODE, };
+ u8 old[JMP32_INSN_SIZE];
+ u8 *addr = op->kp.addr;
- /* Set int3 to first byte for kprobes */
- insn_buf[0] = BREAKPOINT_INSTRUCTION;
- memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
- text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
- op->optinsn.insn);
+ memcpy(old, op->kp.addr, JMP32_INSN_SIZE);
+ memcpy(new + INT3_INSN_SIZE,
+ op->optinsn.copied_insn,
+ JMP32_INSN_SIZE - INT3_INSN_SIZE);
+
+ text_poke(addr, new, INT3_INSN_SIZE);
+ text_poke_sync();
+ text_poke(addr + INT3_INSN_SIZE,
+ new + INT3_INSN_SIZE,
+ JMP32_INSN_SIZE - INT3_INSN_SIZE);
+ text_poke_sync();
+
+ perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
}
/*
--
Gitblit v1.6.2